19417fa9eSJayachandran C. /*-
2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
39417fa9eSJayachandran C. *
49417fa9eSJayachandran C. * Copyright (c) 2010 Hudson River Trading LLC
59417fa9eSJayachandran C. * Written by: John H. Baldwin <jhb@FreeBSD.org>
69417fa9eSJayachandran C. * All rights reserved.
79417fa9eSJayachandran C. *
89417fa9eSJayachandran C. * Redistribution and use in source and binary forms, with or without
99417fa9eSJayachandran C. * modification, are permitted provided that the following conditions
109417fa9eSJayachandran C. * are met:
119417fa9eSJayachandran C. * 1. Redistributions of source code must retain the above copyright
129417fa9eSJayachandran C. * notice, this list of conditions and the following disclaimer.
139417fa9eSJayachandran C. * 2. Redistributions in binary form must reproduce the above copyright
149417fa9eSJayachandran C. * notice, this list of conditions and the following disclaimer in the
159417fa9eSJayachandran C. * documentation and/or other materials provided with the distribution.
169417fa9eSJayachandran C. *
179417fa9eSJayachandran C. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
189417fa9eSJayachandran C. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
199417fa9eSJayachandran C. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
209417fa9eSJayachandran C. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
219417fa9eSJayachandran C. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
229417fa9eSJayachandran C. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
239417fa9eSJayachandran C. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
249417fa9eSJayachandran C. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
259417fa9eSJayachandran C. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
269417fa9eSJayachandran C. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
279417fa9eSJayachandran C. * SUCH DAMAGE.
289417fa9eSJayachandran C. */
299417fa9eSJayachandran C.
309417fa9eSJayachandran C. #include <sys/cdefs.h>
319417fa9eSJayachandran C. #include "opt_vm.h"
329417fa9eSJayachandran C.
339417fa9eSJayachandran C. #include <sys/param.h>
349417fa9eSJayachandran C. #include <sys/systm.h>
359417fa9eSJayachandran C. #include <sys/bus.h>
369417fa9eSJayachandran C. #include <sys/kernel.h>
379417fa9eSJayachandran C. #include <sys/lock.h>
389417fa9eSJayachandran C. #include <sys/mutex.h>
399417fa9eSJayachandran C. #include <sys/smp.h>
409417fa9eSJayachandran C. #include <sys/vmmeter.h>
419417fa9eSJayachandran C. #include <vm/vm.h>
429417fa9eSJayachandran C. #include <vm/pmap.h>
439417fa9eSJayachandran C. #include <vm/vm_param.h>
449417fa9eSJayachandran C. #include <vm/vm_page.h>
459417fa9eSJayachandran C. #include <vm/vm_phys.h>
469417fa9eSJayachandran C.
479417fa9eSJayachandran C. #include <contrib/dev/acpica/include/acpi.h>
489417fa9eSJayachandran C. #include <contrib/dev/acpica/include/aclocal.h>
499417fa9eSJayachandran C. #include <contrib/dev/acpica/include/actables.h>
509417fa9eSJayachandran C.
519417fa9eSJayachandran C. #include <machine/md_var.h>
529417fa9eSJayachandran C.
539417fa9eSJayachandran C. #include <dev/acpica/acpivar.h>
549417fa9eSJayachandran C.
559417fa9eSJayachandran C. #if MAXMEMDOM > 1
569417fa9eSJayachandran C. static struct cpu_info {
57bab8274cSDimitry Andric bool enabled:1;
58bab8274cSDimitry Andric bool has_memory:1;
599417fa9eSJayachandran C. int domain;
6013aacaeeSJayachandran C. int id;
619417fa9eSJayachandran C. } *cpus;
629417fa9eSJayachandran C.
639417fa9eSJayachandran C. static int max_cpus;
649417fa9eSJayachandran C. static int last_cpu;
659417fa9eSJayachandran C.
669417fa9eSJayachandran C. struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
679417fa9eSJayachandran C. int num_mem;
689417fa9eSJayachandran C.
699417fa9eSJayachandran C. static ACPI_TABLE_SRAT *srat;
709417fa9eSJayachandran C. static vm_paddr_t srat_physaddr;
719417fa9eSJayachandran C.
729417fa9eSJayachandran C. static int domain_pxm[MAXMEMDOM];
739417fa9eSJayachandran C. static int ndomain;
749417fa9eSJayachandran C. static vm_paddr_t maxphyaddr;
759417fa9eSJayachandran C.
769417fa9eSJayachandran C. static ACPI_TABLE_SLIT *slit;
779417fa9eSJayachandran C. static vm_paddr_t slit_physaddr;
789417fa9eSJayachandran C. static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
799417fa9eSJayachandran C.
809417fa9eSJayachandran C. static void srat_walk_table(acpi_subtable_handler *handler, void *arg);
819417fa9eSJayachandran C.
829417fa9eSJayachandran C. /*
839417fa9eSJayachandran C. * SLIT parsing.
849417fa9eSJayachandran C. */
859417fa9eSJayachandran C.
869417fa9eSJayachandran C. static void
slit_parse_table(ACPI_TABLE_SLIT * s)879417fa9eSJayachandran C. slit_parse_table(ACPI_TABLE_SLIT *s)
889417fa9eSJayachandran C. {
899417fa9eSJayachandran C. int i, j;
909417fa9eSJayachandran C. int i_domain, j_domain;
919417fa9eSJayachandran C. int offset = 0;
929417fa9eSJayachandran C. uint8_t e;
939417fa9eSJayachandran C.
949417fa9eSJayachandran C. /*
959417fa9eSJayachandran C. * This maps the SLIT data into the VM-domain centric view.
969417fa9eSJayachandran C. * There may be sparse entries in the PXM namespace, so
979417fa9eSJayachandran C. * remap them to a VM-domain ID and if it doesn't exist,
989417fa9eSJayachandran C. * skip it.
999417fa9eSJayachandran C. *
1009417fa9eSJayachandran C. * It should result in a packed 2d array of VM-domain
1019417fa9eSJayachandran C. * locality information entries.
1029417fa9eSJayachandran C. */
1039417fa9eSJayachandran C.
1049417fa9eSJayachandran C. if (bootverbose)
1059417fa9eSJayachandran C. printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
1069417fa9eSJayachandran C. for (i = 0; i < s->LocalityCount; i++) {
1079417fa9eSJayachandran C. i_domain = acpi_map_pxm_to_vm_domainid(i);
1089417fa9eSJayachandran C. if (i_domain < 0)
1099417fa9eSJayachandran C. continue;
1109417fa9eSJayachandran C.
1119417fa9eSJayachandran C. if (bootverbose)
1129417fa9eSJayachandran C. printf("%d: ", i);
1139417fa9eSJayachandran C. for (j = 0; j < s->LocalityCount; j++) {
1149417fa9eSJayachandran C. j_domain = acpi_map_pxm_to_vm_domainid(j);
1159417fa9eSJayachandran C. if (j_domain < 0)
1169417fa9eSJayachandran C. continue;
1179417fa9eSJayachandran C. e = s->Entry[i * s->LocalityCount + j];
1189417fa9eSJayachandran C. if (bootverbose)
1199417fa9eSJayachandran C. printf("%d ", (int) e);
1209417fa9eSJayachandran C. /* 255 == "no locality information" */
1219417fa9eSJayachandran C. if (e == 255)
1229417fa9eSJayachandran C. vm_locality_table[offset] = -1;
1239417fa9eSJayachandran C. else
1249417fa9eSJayachandran C. vm_locality_table[offset] = e;
1259417fa9eSJayachandran C. offset++;
1269417fa9eSJayachandran C. }
1279417fa9eSJayachandran C. if (bootverbose)
1289417fa9eSJayachandran C. printf("\n");
1299417fa9eSJayachandran C. }
1309417fa9eSJayachandran C. }
1319417fa9eSJayachandran C.
1329417fa9eSJayachandran C. /*
1339417fa9eSJayachandran C. * Look for an ACPI System Locality Distance Information Table ("SLIT")
1349417fa9eSJayachandran C. */
1359417fa9eSJayachandran C. static int
parse_slit(void)1369417fa9eSJayachandran C. parse_slit(void)
1379417fa9eSJayachandran C. {
1389417fa9eSJayachandran C.
1399417fa9eSJayachandran C. if (resource_disabled("slit", 0)) {
1409417fa9eSJayachandran C. return (-1);
1419417fa9eSJayachandran C. }
1429417fa9eSJayachandran C.
1439417fa9eSJayachandran C. slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
1449417fa9eSJayachandran C. if (slit_physaddr == 0) {
1459417fa9eSJayachandran C. return (-1);
1469417fa9eSJayachandran C. }
1479417fa9eSJayachandran C.
1489417fa9eSJayachandran C. /*
1499417fa9eSJayachandran C. * Make a pass over the table to populate the cpus[] and
1509417fa9eSJayachandran C. * mem_info[] tables.
1519417fa9eSJayachandran C. */
1529417fa9eSJayachandran C. slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
1539417fa9eSJayachandran C. slit_parse_table(slit);
1549417fa9eSJayachandran C. acpi_unmap_table(slit);
1559417fa9eSJayachandran C. slit = NULL;
1569417fa9eSJayachandran C.
1579417fa9eSJayachandran C. return (0);
1589417fa9eSJayachandran C. }
1599417fa9eSJayachandran C.
1609417fa9eSJayachandran C. /*
1619417fa9eSJayachandran C. * SRAT parsing.
1629417fa9eSJayachandran C. */
1639417fa9eSJayachandran C.
1649417fa9eSJayachandran C. /*
1659417fa9eSJayachandran C. * Returns true if a memory range overlaps with at least one range in
1669417fa9eSJayachandran C. * phys_avail[].
1679417fa9eSJayachandran C. */
1689417fa9eSJayachandran C. static int
overlaps_phys_avail(vm_paddr_t start,vm_paddr_t end)1699417fa9eSJayachandran C. overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
1709417fa9eSJayachandran C. {
1719417fa9eSJayachandran C. int i;
1729417fa9eSJayachandran C.
1739417fa9eSJayachandran C. for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
1749417fa9eSJayachandran C. if (phys_avail[i + 1] <= start)
1759417fa9eSJayachandran C. continue;
1769417fa9eSJayachandran C. if (phys_avail[i] < end)
1779417fa9eSJayachandran C. return (1);
1789417fa9eSJayachandran C. break;
1799417fa9eSJayachandran C. }
1809417fa9eSJayachandran C. return (0);
1819417fa9eSJayachandran C. }
1829417fa9eSJayachandran C.
1839417fa9eSJayachandran C. /*
18413aacaeeSJayachandran C. * On x86 we can use the cpuid to index the cpus array, but on arm64
18513aacaeeSJayachandran C. * we have an ACPI Processor UID with a larger range.
18613aacaeeSJayachandran C. *
18713aacaeeSJayachandran C. * Use this variable to indicate if the cpus can be stored by index.
18813aacaeeSJayachandran C. */
18913aacaeeSJayachandran C. #ifdef __aarch64__
19013aacaeeSJayachandran C. static const int cpus_use_indexing = 0;
19113aacaeeSJayachandran C. #else
19213aacaeeSJayachandran C. static const int cpus_use_indexing = 1;
19313aacaeeSJayachandran C. #endif
19413aacaeeSJayachandran C.
19513aacaeeSJayachandran C. /*
19613aacaeeSJayachandran C. * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64)
1979417fa9eSJayachandran C. */
1989417fa9eSJayachandran C. static struct cpu_info *
cpu_find(int cpuid)1999417fa9eSJayachandran C. cpu_find(int cpuid)
2009417fa9eSJayachandran C. {
20113aacaeeSJayachandran C. int i;
2029417fa9eSJayachandran C.
20313aacaeeSJayachandran C. if (cpus_use_indexing) {
2049417fa9eSJayachandran C. if (cpuid <= last_cpu && cpus[cpuid].enabled)
2059417fa9eSJayachandran C. return (&cpus[cpuid]);
20613aacaeeSJayachandran C. } else {
20713aacaeeSJayachandran C. for (i = 0; i <= last_cpu; i++)
20813aacaeeSJayachandran C. if (cpus[i].id == cpuid)
20913aacaeeSJayachandran C. return (&cpus[i]);
21013aacaeeSJayachandran C. }
2119417fa9eSJayachandran C. return (NULL);
2129417fa9eSJayachandran C. }
2139417fa9eSJayachandran C.
2149417fa9eSJayachandran C. /*
2159417fa9eSJayachandran C. * Find CPU by pcpu pointer.
2169417fa9eSJayachandran C. */
2179417fa9eSJayachandran C. static struct cpu_info *
cpu_get_info(struct pcpu * pc)2189417fa9eSJayachandran C. cpu_get_info(struct pcpu *pc)
2199417fa9eSJayachandran C. {
2209417fa9eSJayachandran C. struct cpu_info *cpup;
2219417fa9eSJayachandran C. int id;
2229417fa9eSJayachandran C.
22313aacaeeSJayachandran C. #ifdef __aarch64__
22413aacaeeSJayachandran C. id = pc->pc_acpi_id;
22513aacaeeSJayachandran C. #else
2269417fa9eSJayachandran C. id = pc->pc_apic_id;
22713aacaeeSJayachandran C. #endif
2289417fa9eSJayachandran C. cpup = cpu_find(id);
2299417fa9eSJayachandran C. if (cpup == NULL)
23013aacaeeSJayachandran C. panic("SRAT: CPU with ID %u is not known", id);
2319417fa9eSJayachandran C. return (cpup);
2329417fa9eSJayachandran C. }
2339417fa9eSJayachandran C.
2349417fa9eSJayachandran C. /*
2359417fa9eSJayachandran C. * Add proximity information for a new CPU.
2369417fa9eSJayachandran C. */
2379417fa9eSJayachandran C. static struct cpu_info *
cpu_add(int cpuid,int domain)2389417fa9eSJayachandran C. cpu_add(int cpuid, int domain)
2399417fa9eSJayachandran C. {
2409417fa9eSJayachandran C. struct cpu_info *cpup;
2419417fa9eSJayachandran C.
24213aacaeeSJayachandran C. if (cpus_use_indexing) {
2439417fa9eSJayachandran C. if (cpuid >= max_cpus)
2449417fa9eSJayachandran C. return (NULL);
2459417fa9eSJayachandran C. last_cpu = imax(last_cpu, cpuid);
2469417fa9eSJayachandran C. cpup = &cpus[cpuid];
24713aacaeeSJayachandran C. } else {
24813aacaeeSJayachandran C. if (last_cpu >= max_cpus - 1)
24913aacaeeSJayachandran C. return (NULL);
25013aacaeeSJayachandran C. cpup = &cpus[++last_cpu];
25113aacaeeSJayachandran C. }
2529417fa9eSJayachandran C. cpup->domain = domain;
25313aacaeeSJayachandran C. cpup->id = cpuid;
2549417fa9eSJayachandran C. cpup->enabled = 1;
2559417fa9eSJayachandran C. return (cpup);
2569417fa9eSJayachandran C. }
2579417fa9eSJayachandran C.
2589417fa9eSJayachandran C. static void
srat_parse_entry(ACPI_SUBTABLE_HEADER * entry,void * arg)2599417fa9eSJayachandran C. srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
2609417fa9eSJayachandran C. {
2619417fa9eSJayachandran C. ACPI_SRAT_CPU_AFFINITY *cpu;
2629417fa9eSJayachandran C. ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
2639417fa9eSJayachandran C. ACPI_SRAT_MEM_AFFINITY *mem;
26413aacaeeSJayachandran C. ACPI_SRAT_GICC_AFFINITY *gicc;
2659417fa9eSJayachandran C. static struct cpu_info *cpup;
2660f8b212aSMark Johnston uint64_t base, length;
2679417fa9eSJayachandran C. int domain, i, slot;
2689417fa9eSJayachandran C.
2699417fa9eSJayachandran C. switch (entry->Type) {
2709417fa9eSJayachandran C. case ACPI_SRAT_TYPE_CPU_AFFINITY:
2719417fa9eSJayachandran C. cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
2729417fa9eSJayachandran C. domain = cpu->ProximityDomainLo |
2739417fa9eSJayachandran C. cpu->ProximityDomainHi[0] << 8 |
2749417fa9eSJayachandran C. cpu->ProximityDomainHi[1] << 16 |
2759417fa9eSJayachandran C. cpu->ProximityDomainHi[2] << 24;
2769417fa9eSJayachandran C. if (bootverbose)
2779417fa9eSJayachandran C. printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
2789417fa9eSJayachandran C. cpu->ApicId, domain,
2799417fa9eSJayachandran C. (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
2809417fa9eSJayachandran C. "enabled" : "disabled");
2819417fa9eSJayachandran C. if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
2829417fa9eSJayachandran C. break;
2839417fa9eSJayachandran C. cpup = cpu_find(cpu->ApicId);
2849417fa9eSJayachandran C. if (cpup != NULL) {
2859417fa9eSJayachandran C. printf("SRAT: Duplicate local APIC ID %u\n",
2869417fa9eSJayachandran C. cpu->ApicId);
2879417fa9eSJayachandran C. *(int *)arg = ENXIO;
2889417fa9eSJayachandran C. break;
2899417fa9eSJayachandran C. }
2909417fa9eSJayachandran C. cpup = cpu_add(cpu->ApicId, domain);
2919417fa9eSJayachandran C. if (cpup == NULL)
2929417fa9eSJayachandran C. printf("SRAT: Ignoring local APIC ID %u (too high)\n",
2939417fa9eSJayachandran C. cpu->ApicId);
2949417fa9eSJayachandran C. break;
2959417fa9eSJayachandran C. case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
2969417fa9eSJayachandran C. x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
2979417fa9eSJayachandran C. if (bootverbose)
2989417fa9eSJayachandran C. printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
2999417fa9eSJayachandran C. x2apic->ApicId, x2apic->ProximityDomain,
3009417fa9eSJayachandran C. (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
3019417fa9eSJayachandran C. "enabled" : "disabled");
3029417fa9eSJayachandran C. if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
3039417fa9eSJayachandran C. break;
3049417fa9eSJayachandran C. KASSERT(cpu_find(x2apic->ApicId) == NULL,
3059417fa9eSJayachandran C. ("Duplicate local APIC ID %u", x2apic->ApicId));
3069417fa9eSJayachandran C. cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain);
3079417fa9eSJayachandran C. if (cpup == NULL)
3089417fa9eSJayachandran C. printf("SRAT: Ignoring local APIC ID %u (too high)\n",
3099417fa9eSJayachandran C. x2apic->ApicId);
3109417fa9eSJayachandran C. break;
31113aacaeeSJayachandran C. case ACPI_SRAT_TYPE_GICC_AFFINITY:
31213aacaeeSJayachandran C. gicc = (ACPI_SRAT_GICC_AFFINITY *)entry;
31313aacaeeSJayachandran C. if (bootverbose)
31413aacaeeSJayachandran C. printf("SRAT: Found CPU UID %u domain %d: %s\n",
31513aacaeeSJayachandran C. gicc->AcpiProcessorUid, gicc->ProximityDomain,
31613aacaeeSJayachandran C. (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ?
31713aacaeeSJayachandran C. "enabled" : "disabled");
31813aacaeeSJayachandran C. if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED))
31913aacaeeSJayachandran C. break;
32013aacaeeSJayachandran C. KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL,
32113aacaeeSJayachandran C. ("Duplicate CPU UID %u", gicc->AcpiProcessorUid));
32213aacaeeSJayachandran C. cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain);
32313aacaeeSJayachandran C. if (cpup == NULL)
32413aacaeeSJayachandran C. printf("SRAT: Ignoring CPU UID %u (too high)\n",
32513aacaeeSJayachandran C. gicc->AcpiProcessorUid);
32613aacaeeSJayachandran C. break;
3279417fa9eSJayachandran C. case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
3289417fa9eSJayachandran C. mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
3290f8b212aSMark Johnston base = mem->BaseAddress;
3300f8b212aSMark Johnston length = mem->Length;
3310f8b212aSMark Johnston domain = mem->ProximityDomain;
3320f8b212aSMark Johnston
3339417fa9eSJayachandran C. if (bootverbose)
3349417fa9eSJayachandran C. printf(
3359417fa9eSJayachandran C. "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n",
3360f8b212aSMark Johnston domain, (uintmax_t)base, (uintmax_t)length,
3379417fa9eSJayachandran C. (mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
3389417fa9eSJayachandran C. "enabled" : "disabled");
3399417fa9eSJayachandran C. if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
3409417fa9eSJayachandran C. break;
3410f8b212aSMark Johnston if (base >= maxphyaddr ||
3420f8b212aSMark Johnston !overlaps_phys_avail(base, base + length)) {
3439417fa9eSJayachandran C. printf("SRAT: Ignoring memory at addr 0x%jx\n",
3440f8b212aSMark Johnston (uintmax_t)base);
3459417fa9eSJayachandran C. break;
3469417fa9eSJayachandran C. }
3479417fa9eSJayachandran C. if (num_mem == VM_PHYSSEG_MAX) {
3489417fa9eSJayachandran C. printf("SRAT: Too many memory regions\n");
3499417fa9eSJayachandran C. *(int *)arg = ENXIO;
3509417fa9eSJayachandran C. break;
3519417fa9eSJayachandran C. }
3529417fa9eSJayachandran C. slot = num_mem;
3539417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) {
3540f8b212aSMark Johnston if (mem_info[i].domain == domain) {
3550f8b212aSMark Johnston /* Try to extend an existing segment. */
3560f8b212aSMark Johnston if (base == mem_info[i].end) {
3570f8b212aSMark Johnston mem_info[i].end += length;
3580f8b212aSMark Johnston return;
3590f8b212aSMark Johnston }
3600f8b212aSMark Johnston if (base + length == mem_info[i].start) {
3610f8b212aSMark Johnston mem_info[i].start -= length;
3620f8b212aSMark Johnston return;
3630f8b212aSMark Johnston }
3640f8b212aSMark Johnston }
3650f8b212aSMark Johnston if (mem_info[i].end <= base)
3669417fa9eSJayachandran C. continue;
3670f8b212aSMark Johnston if (mem_info[i].start < base + length) {
3689417fa9eSJayachandran C. printf("SRAT: Overlapping memory entries\n");
3699417fa9eSJayachandran C. *(int *)arg = ENXIO;
3709417fa9eSJayachandran C. return;
3719417fa9eSJayachandran C. }
3729417fa9eSJayachandran C. slot = i;
3739417fa9eSJayachandran C. }
3749417fa9eSJayachandran C. for (i = num_mem; i > slot; i--)
3759417fa9eSJayachandran C. mem_info[i] = mem_info[i - 1];
3760f8b212aSMark Johnston mem_info[slot].start = base;
3770f8b212aSMark Johnston mem_info[slot].end = base + length;
3780f8b212aSMark Johnston mem_info[slot].domain = domain;
3799417fa9eSJayachandran C. num_mem++;
3809417fa9eSJayachandran C. break;
3819417fa9eSJayachandran C. }
3829417fa9eSJayachandran C. }
3839417fa9eSJayachandran C.
3849417fa9eSJayachandran C. /*
3859417fa9eSJayachandran C. * Ensure each memory domain has at least one CPU and that each CPU
3869417fa9eSJayachandran C. * has at least one memory domain.
3879417fa9eSJayachandran C. */
3889417fa9eSJayachandran C. static int
check_domains(void)3899417fa9eSJayachandran C. check_domains(void)
3909417fa9eSJayachandran C. {
3919417fa9eSJayachandran C. int found, i, j;
3929417fa9eSJayachandran C.
3939417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) {
3949417fa9eSJayachandran C. found = 0;
3959417fa9eSJayachandran C. for (j = 0; j <= last_cpu; j++)
3969417fa9eSJayachandran C. if (cpus[j].enabled &&
3979417fa9eSJayachandran C. cpus[j].domain == mem_info[i].domain) {
3989417fa9eSJayachandran C. cpus[j].has_memory = 1;
3999417fa9eSJayachandran C. found++;
4009417fa9eSJayachandran C. }
4019417fa9eSJayachandran C. if (!found) {
4029417fa9eSJayachandran C. printf("SRAT: No CPU found for memory domain %d\n",
4039417fa9eSJayachandran C. mem_info[i].domain);
4049417fa9eSJayachandran C. return (ENXIO);
4059417fa9eSJayachandran C. }
4069417fa9eSJayachandran C. }
4079417fa9eSJayachandran C. for (i = 0; i <= last_cpu; i++)
4089417fa9eSJayachandran C. if (cpus[i].enabled && !cpus[i].has_memory) {
4099417fa9eSJayachandran C. found = 0;
4109417fa9eSJayachandran C. for (j = 0; j < num_mem && !found; j++) {
4119417fa9eSJayachandran C. if (mem_info[j].domain == cpus[i].domain)
4129417fa9eSJayachandran C. found = 1;
4139417fa9eSJayachandran C. }
4149417fa9eSJayachandran C. if (!found) {
4159417fa9eSJayachandran C. if (bootverbose)
4169417fa9eSJayachandran C. printf("SRAT: mem dom %d is empty\n",
4179417fa9eSJayachandran C. cpus[i].domain);
4189417fa9eSJayachandran C. mem_info[num_mem].start = 0;
4199417fa9eSJayachandran C. mem_info[num_mem].end = 0;
4209417fa9eSJayachandran C. mem_info[num_mem].domain = cpus[i].domain;
4219417fa9eSJayachandran C. num_mem++;
4229417fa9eSJayachandran C. }
4239417fa9eSJayachandran C. }
4249417fa9eSJayachandran C. return (0);
4259417fa9eSJayachandran C. }
4269417fa9eSJayachandran C.
4279417fa9eSJayachandran C. /*
4289417fa9eSJayachandran C. * Check that the SRAT memory regions cover all of the regions in
4299417fa9eSJayachandran C. * phys_avail[].
4309417fa9eSJayachandran C. */
4319417fa9eSJayachandran C. static int
check_phys_avail(void)4329417fa9eSJayachandran C. check_phys_avail(void)
4339417fa9eSJayachandran C. {
4349417fa9eSJayachandran C. vm_paddr_t address;
4359417fa9eSJayachandran C. int i, j;
4369417fa9eSJayachandran C.
4379417fa9eSJayachandran C. /* j is the current offset into phys_avail[]. */
4389417fa9eSJayachandran C. address = phys_avail[0];
4399417fa9eSJayachandran C. j = 0;
4409417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) {
4419417fa9eSJayachandran C. /*
4429417fa9eSJayachandran C. * Consume as many phys_avail[] entries as fit in this
4439417fa9eSJayachandran C. * region.
4449417fa9eSJayachandran C. */
4459417fa9eSJayachandran C. while (address >= mem_info[i].start &&
4469417fa9eSJayachandran C. address <= mem_info[i].end) {
4479417fa9eSJayachandran C. /*
4489417fa9eSJayachandran C. * If we cover the rest of this phys_avail[] entry,
4499417fa9eSJayachandran C. * advance to the next entry.
4509417fa9eSJayachandran C. */
4519417fa9eSJayachandran C. if (phys_avail[j + 1] <= mem_info[i].end) {
4529417fa9eSJayachandran C. j += 2;
4539417fa9eSJayachandran C. if (phys_avail[j] == 0 &&
4549417fa9eSJayachandran C. phys_avail[j + 1] == 0) {
4559417fa9eSJayachandran C. return (0);
4569417fa9eSJayachandran C. }
4579417fa9eSJayachandran C. address = phys_avail[j];
4589417fa9eSJayachandran C. } else
4599417fa9eSJayachandran C. address = mem_info[i].end + 1;
4609417fa9eSJayachandran C. }
4619417fa9eSJayachandran C. }
4629417fa9eSJayachandran C. printf("SRAT: No memory region found for 0x%jx - 0x%jx\n",
4639417fa9eSJayachandran C. (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
4649417fa9eSJayachandran C. return (ENXIO);
4659417fa9eSJayachandran C. }
4669417fa9eSJayachandran C.
4679417fa9eSJayachandran C. /*
4689417fa9eSJayachandran C. * Renumber the memory domains to be compact and zero-based if not
4699417fa9eSJayachandran C. * already. Returns an error if there are too many domains.
4709417fa9eSJayachandran C. */
4719417fa9eSJayachandran C. static int
renumber_domains(void)4729417fa9eSJayachandran C. renumber_domains(void)
4739417fa9eSJayachandran C. {
4749417fa9eSJayachandran C. int i, j, slot;
4759417fa9eSJayachandran C.
4769417fa9eSJayachandran C. /* Enumerate all the domains. */
4779417fa9eSJayachandran C. ndomain = 0;
4789417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) {
4799417fa9eSJayachandran C. /* See if this domain is already known. */
4809417fa9eSJayachandran C. for (j = 0; j < ndomain; j++) {
4819417fa9eSJayachandran C. if (domain_pxm[j] >= mem_info[i].domain)
4829417fa9eSJayachandran C. break;
4839417fa9eSJayachandran C. }
4849417fa9eSJayachandran C. if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
4859417fa9eSJayachandran C. continue;
4869417fa9eSJayachandran C.
4879417fa9eSJayachandran C. if (ndomain >= MAXMEMDOM) {
4889417fa9eSJayachandran C. ndomain = 1;
4899417fa9eSJayachandran C. printf("SRAT: Too many memory domains\n");
4909417fa9eSJayachandran C. return (EFBIG);
4919417fa9eSJayachandran C. }
4929417fa9eSJayachandran C.
4939417fa9eSJayachandran C. /* Insert the new domain at slot 'j'. */
4949417fa9eSJayachandran C. slot = j;
4959417fa9eSJayachandran C. for (j = ndomain; j > slot; j--)
4969417fa9eSJayachandran C. domain_pxm[j] = domain_pxm[j - 1];
4979417fa9eSJayachandran C. domain_pxm[slot] = mem_info[i].domain;
4989417fa9eSJayachandran C. ndomain++;
4999417fa9eSJayachandran C. }
5009417fa9eSJayachandran C.
5019417fa9eSJayachandran C. /* Renumber each domain to its index in the sorted 'domain_pxm' list. */
5029417fa9eSJayachandran C. for (i = 0; i < ndomain; i++) {
5039417fa9eSJayachandran C. /*
5049417fa9eSJayachandran C. * If the domain is already the right value, no need
5059417fa9eSJayachandran C. * to renumber.
5069417fa9eSJayachandran C. */
5079417fa9eSJayachandran C. if (domain_pxm[i] == i)
5089417fa9eSJayachandran C. continue;
5099417fa9eSJayachandran C.
5109417fa9eSJayachandran C. /* Walk the cpu[] and mem_info[] arrays to renumber. */
5119417fa9eSJayachandran C. for (j = 0; j < num_mem; j++)
5129417fa9eSJayachandran C. if (mem_info[j].domain == domain_pxm[i])
5139417fa9eSJayachandran C. mem_info[j].domain = i;
5149417fa9eSJayachandran C. for (j = 0; j <= last_cpu; j++)
5159417fa9eSJayachandran C. if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
5169417fa9eSJayachandran C. cpus[j].domain = i;
5179417fa9eSJayachandran C. }
5189417fa9eSJayachandran C.
5199417fa9eSJayachandran C. return (0);
5209417fa9eSJayachandran C. }
5219417fa9eSJayachandran C.
5229417fa9eSJayachandran C. /*
5239417fa9eSJayachandran C. * Look for an ACPI System Resource Affinity Table ("SRAT"),
5249417fa9eSJayachandran C. * allocate space for cpu information, and initialize globals.
5259417fa9eSJayachandran C. */
5269417fa9eSJayachandran C. int
acpi_pxm_init(int ncpus,vm_paddr_t maxphys)5279417fa9eSJayachandran C. acpi_pxm_init(int ncpus, vm_paddr_t maxphys)
5289417fa9eSJayachandran C. {
5299417fa9eSJayachandran C. unsigned int idx, size;
5309417fa9eSJayachandran C. vm_paddr_t addr;
5319417fa9eSJayachandran C.
5329417fa9eSJayachandran C. if (resource_disabled("srat", 0))
5339417fa9eSJayachandran C. return (-1);
5349417fa9eSJayachandran C.
5359417fa9eSJayachandran C. max_cpus = ncpus;
5369417fa9eSJayachandran C. last_cpu = -1;
5379417fa9eSJayachandran C. maxphyaddr = maxphys;
5389417fa9eSJayachandran C. srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
5399417fa9eSJayachandran C. if (srat_physaddr == 0)
5409417fa9eSJayachandran C. return (-1);
5419417fa9eSJayachandran C.
5429417fa9eSJayachandran C. /*
5439417fa9eSJayachandran C. * Allocate data structure:
5449417fa9eSJayachandran C. *
5459417fa9eSJayachandran C. * Find the last physical memory region and steal some memory from
5469417fa9eSJayachandran C. * it. This is done because at this point in the boot process
5479417fa9eSJayachandran C. * malloc is still not usable.
5489417fa9eSJayachandran C. */
5499417fa9eSJayachandran C. for (idx = 0; phys_avail[idx + 1] != 0; idx += 2);
5509417fa9eSJayachandran C. KASSERT(idx != 0, ("phys_avail is empty!"));
5519417fa9eSJayachandran C. idx -= 2;
5529417fa9eSJayachandran C.
5539417fa9eSJayachandran C. size = sizeof(*cpus) * max_cpus;
5549417fa9eSJayachandran C. addr = trunc_page(phys_avail[idx + 1] - size);
5559417fa9eSJayachandran C. KASSERT(addr >= phys_avail[idx],
5569417fa9eSJayachandran C. ("Not enough memory for SRAT table items"));
5579417fa9eSJayachandran C. phys_avail[idx + 1] = addr - 1;
5589417fa9eSJayachandran C.
5599417fa9eSJayachandran C. /*
5609417fa9eSJayachandran C. * We cannot rely on PHYS_TO_DMAP because this code is also used in
5619417fa9eSJayachandran C. * i386, so use pmap_mapbios to map the memory, this will end up using
5629417fa9eSJayachandran C. * the default memory attribute (WB), and the DMAP when available.
5639417fa9eSJayachandran C. */
5649417fa9eSJayachandran C. cpus = (struct cpu_info *)pmap_mapbios(addr, size);
5659417fa9eSJayachandran C. bzero(cpus, size);
5669417fa9eSJayachandran C. return (0);
5679417fa9eSJayachandran C. }
5689417fa9eSJayachandran C.
5699417fa9eSJayachandran C. static int
parse_srat(void)5709417fa9eSJayachandran C. parse_srat(void)
5719417fa9eSJayachandran C. {
5729417fa9eSJayachandran C. int error;
5739417fa9eSJayachandran C.
5749417fa9eSJayachandran C. /*
5759417fa9eSJayachandran C. * Make a pass over the table to populate the cpus[] and
5769417fa9eSJayachandran C. * mem_info[] tables.
5779417fa9eSJayachandran C. */
5789417fa9eSJayachandran C. srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
5799417fa9eSJayachandran C. error = 0;
5809417fa9eSJayachandran C. srat_walk_table(srat_parse_entry, &error);
5819417fa9eSJayachandran C. acpi_unmap_table(srat);
5829417fa9eSJayachandran C. srat = NULL;
5839417fa9eSJayachandran C. if (error || check_domains() != 0 || check_phys_avail() != 0 ||
5849417fa9eSJayachandran C. renumber_domains() != 0) {
5859417fa9eSJayachandran C. srat_physaddr = 0;
5869417fa9eSJayachandran C. return (-1);
5879417fa9eSJayachandran C. }
5889417fa9eSJayachandran C.
5899417fa9eSJayachandran C. return (0);
5909417fa9eSJayachandran C. }
5919417fa9eSJayachandran C.
5929417fa9eSJayachandran C. static void
init_mem_locality(void)5939417fa9eSJayachandran C. init_mem_locality(void)
5949417fa9eSJayachandran C. {
5959417fa9eSJayachandran C. int i;
5969417fa9eSJayachandran C.
5979417fa9eSJayachandran C. /*
5989417fa9eSJayachandran C. * For now, assume -1 == "no locality information for
5999417fa9eSJayachandran C. * this pairing.
6009417fa9eSJayachandran C. */
6019417fa9eSJayachandran C. for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
6029417fa9eSJayachandran C. vm_locality_table[i] = -1;
6039417fa9eSJayachandran C. }
6049417fa9eSJayachandran C.
6059417fa9eSJayachandran C. /*
6069417fa9eSJayachandran C. * Parse SRAT and SLIT to save proximity info. Don't do
6079417fa9eSJayachandran C. * anything if SRAT is not available.
6089417fa9eSJayachandran C. */
6099417fa9eSJayachandran C. void
acpi_pxm_parse_tables(void)6109417fa9eSJayachandran C. acpi_pxm_parse_tables(void)
6119417fa9eSJayachandran C. {
6129417fa9eSJayachandran C.
6139417fa9eSJayachandran C. if (srat_physaddr == 0)
6149417fa9eSJayachandran C. return;
6159417fa9eSJayachandran C. if (parse_srat() < 0)
6169417fa9eSJayachandran C. return;
6179417fa9eSJayachandran C. init_mem_locality();
6189417fa9eSJayachandran C. (void)parse_slit();
6199417fa9eSJayachandran C. }
6209417fa9eSJayachandran C.
6219417fa9eSJayachandran C. /*
6229417fa9eSJayachandran C. * Use saved data from SRAT/SLIT to update memory locality.
6239417fa9eSJayachandran C. */
6249417fa9eSJayachandran C. void
acpi_pxm_set_mem_locality(void)6259417fa9eSJayachandran C. acpi_pxm_set_mem_locality(void)
6269417fa9eSJayachandran C. {
6279417fa9eSJayachandran C.
6289417fa9eSJayachandran C. if (srat_physaddr == 0)
6299417fa9eSJayachandran C. return;
6309417fa9eSJayachandran C. vm_phys_register_domains(ndomain, mem_info, vm_locality_table);
6319417fa9eSJayachandran C. }
6329417fa9eSJayachandran C.
6339417fa9eSJayachandran C. static void
srat_walk_table(acpi_subtable_handler * handler,void * arg)6349417fa9eSJayachandran C. srat_walk_table(acpi_subtable_handler *handler, void *arg)
6359417fa9eSJayachandran C. {
6369417fa9eSJayachandran C.
6379417fa9eSJayachandran C. acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
6389417fa9eSJayachandran C. handler, arg);
6399417fa9eSJayachandran C. }
6409417fa9eSJayachandran C.
6419417fa9eSJayachandran C. /*
642e76aab6aSMark Johnston * Set up per-CPU domain IDs from information saved in 'cpus' and tear down data
643e76aab6aSMark Johnston * structures allocated by acpi_pxm_init().
6449417fa9eSJayachandran C. */
6459417fa9eSJayachandran C. void
acpi_pxm_set_cpu_locality(void)6469417fa9eSJayachandran C. acpi_pxm_set_cpu_locality(void)
6479417fa9eSJayachandran C. {
6489417fa9eSJayachandran C. struct cpu_info *cpu;
6499417fa9eSJayachandran C. struct pcpu *pc;
6509417fa9eSJayachandran C. u_int i;
6519417fa9eSJayachandran C.
6529417fa9eSJayachandran C. if (srat_physaddr == 0)
6539417fa9eSJayachandran C. return;
6549417fa9eSJayachandran C. for (i = 0; i < MAXCPU; i++) {
6559417fa9eSJayachandran C. if (CPU_ABSENT(i))
6569417fa9eSJayachandran C. continue;
6579417fa9eSJayachandran C. pc = pcpu_find(i);
6589417fa9eSJayachandran C. KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
6599417fa9eSJayachandran C. cpu = cpu_get_info(pc);
6609417fa9eSJayachandran C. pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0;
6619417fa9eSJayachandran C. CPU_SET(i, &cpuset_domain[pc->pc_domain]);
6629417fa9eSJayachandran C. if (bootverbose)
6639417fa9eSJayachandran C. printf("SRAT: CPU %u has memory domain %d\n", i,
6649417fa9eSJayachandran C. pc->pc_domain);
6659417fa9eSJayachandran C. }
666e76aab6aSMark Johnston /* XXXMJ the page is leaked. */
6677ae99f80SJohn Baldwin pmap_unmapbios(cpus, sizeof(*cpus) * max_cpus);
668e76aab6aSMark Johnston srat_physaddr = 0;
669e76aab6aSMark Johnston cpus = NULL;
6709417fa9eSJayachandran C. }
6719417fa9eSJayachandran C.
672a5e5548cSJeff Roberson int
acpi_pxm_get_cpu_locality(int apic_id)673a5e5548cSJeff Roberson acpi_pxm_get_cpu_locality(int apic_id)
674a5e5548cSJeff Roberson {
675a5e5548cSJeff Roberson struct cpu_info *cpu;
676a5e5548cSJeff Roberson
677a5e5548cSJeff Roberson cpu = cpu_find(apic_id);
678a5e5548cSJeff Roberson if (cpu == NULL)
679a5e5548cSJeff Roberson panic("SRAT: CPU with ID %u is not known", apic_id);
680a5e5548cSJeff Roberson return (cpu->domain);
681a5e5548cSJeff Roberson }
682a5e5548cSJeff Roberson
6839417fa9eSJayachandran C. /*
6849417fa9eSJayachandran C. * Map a _PXM value to a VM domain ID.
6859417fa9eSJayachandran C. *
6869417fa9eSJayachandran C. * Returns the domain ID, or -1 if no domain ID was found.
6879417fa9eSJayachandran C. */
6889417fa9eSJayachandran C. int
acpi_map_pxm_to_vm_domainid(int pxm)6899417fa9eSJayachandran C. acpi_map_pxm_to_vm_domainid(int pxm)
6909417fa9eSJayachandran C. {
6919417fa9eSJayachandran C. int i;
6929417fa9eSJayachandran C.
6939417fa9eSJayachandran C. for (i = 0; i < ndomain; i++) {
6949417fa9eSJayachandran C. if (domain_pxm[i] == pxm)
6959417fa9eSJayachandran C. return (vm_ndomains > 1 ? i : 0);
6969417fa9eSJayachandran C. }
6979417fa9eSJayachandran C.
6989417fa9eSJayachandran C. return (-1);
6999417fa9eSJayachandran C. }
7009417fa9eSJayachandran C.
7019417fa9eSJayachandran C. #else /* MAXMEMDOM == 1 */
7029417fa9eSJayachandran C.
7039417fa9eSJayachandran C. int
acpi_map_pxm_to_vm_domainid(int pxm)7049417fa9eSJayachandran C. acpi_map_pxm_to_vm_domainid(int pxm)
7059417fa9eSJayachandran C. {
7069417fa9eSJayachandran C.
7079417fa9eSJayachandran C. return (-1);
7089417fa9eSJayachandran C. }
7099417fa9eSJayachandran C.
7109417fa9eSJayachandran C. #endif /* MAXMEMDOM > 1 */
711