1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2010 Hudson River Trading LLC
5 * Written by: John H. Baldwin <jhb@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_vm.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/smp.h>
40 #include <sys/vmmeter.h>
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43 #include <vm/vm_param.h>
44 #include <vm/vm_page.h>
45 #include <vm/vm_phys.h>
46
47 #include <contrib/dev/acpica/include/acpi.h>
48 #include <contrib/dev/acpica/include/aclocal.h>
49 #include <contrib/dev/acpica/include/actables.h>
50
51 #include <machine/md_var.h>
52
53 #include <dev/acpica/acpivar.h>
54
55 #if MAXMEMDOM > 1
56 static struct cpu_info {
57 bool enabled:1;
58 bool has_memory:1;
59 int domain;
60 int id;
61 } *cpus;
62
63 static int max_cpus;
64 static int last_cpu;
65
66 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
67 int num_mem;
68
69 static ACPI_TABLE_SRAT *srat;
70 static vm_paddr_t srat_physaddr;
71
72 static int domain_pxm[MAXMEMDOM];
73 static int ndomain;
74 static vm_paddr_t maxphyaddr;
75
76 static ACPI_TABLE_SLIT *slit;
77 static vm_paddr_t slit_physaddr;
78 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
79
80 static void srat_walk_table(acpi_subtable_handler *handler, void *arg);
81
82 /*
83 * SLIT parsing.
84 */
85
86 static void
slit_parse_table(ACPI_TABLE_SLIT * s)87 slit_parse_table(ACPI_TABLE_SLIT *s)
88 {
89 int i, j;
90 int i_domain, j_domain;
91 int offset = 0;
92 uint8_t e;
93
94 /*
95 * This maps the SLIT data into the VM-domain centric view.
96 * There may be sparse entries in the PXM namespace, so
97 * remap them to a VM-domain ID and if it doesn't exist,
98 * skip it.
99 *
100 * It should result in a packed 2d array of VM-domain
101 * locality information entries.
102 */
103
104 if (bootverbose)
105 printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
106 for (i = 0; i < s->LocalityCount; i++) {
107 i_domain = acpi_map_pxm_to_vm_domainid(i);
108 if (i_domain < 0)
109 continue;
110
111 if (bootverbose)
112 printf("%d: ", i);
113 for (j = 0; j < s->LocalityCount; j++) {
114 j_domain = acpi_map_pxm_to_vm_domainid(j);
115 if (j_domain < 0)
116 continue;
117 e = s->Entry[i * s->LocalityCount + j];
118 if (bootverbose)
119 printf("%d ", (int) e);
120 /* 255 == "no locality information" */
121 if (e == 255)
122 vm_locality_table[offset] = -1;
123 else
124 vm_locality_table[offset] = e;
125 offset++;
126 }
127 if (bootverbose)
128 printf("\n");
129 }
130 }
131
132 /*
133 * Look for an ACPI System Locality Distance Information Table ("SLIT")
134 */
135 static int
parse_slit(void)136 parse_slit(void)
137 {
138
139 if (resource_disabled("slit", 0)) {
140 return (-1);
141 }
142
143 slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
144 if (slit_physaddr == 0) {
145 return (-1);
146 }
147
148 /*
149 * Make a pass over the table to populate the cpus[] and
150 * mem_info[] tables.
151 */
152 slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
153 slit_parse_table(slit);
154 acpi_unmap_table(slit);
155 slit = NULL;
156
157 return (0);
158 }
159
160 /*
161 * SRAT parsing.
162 */
163
164 /*
165 * Returns true if a memory range overlaps with at least one range in
166 * phys_avail[].
167 */
168 static int
overlaps_phys_avail(vm_paddr_t start,vm_paddr_t end)169 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
170 {
171 int i;
172
173 for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
174 if (phys_avail[i + 1] <= start)
175 continue;
176 if (phys_avail[i] < end)
177 return (1);
178 break;
179 }
180 return (0);
181 }
182
183 /*
184 * On x86 we can use the cpuid to index the cpus array, but on arm64
185 * we have an ACPI Processor UID with a larger range.
186 *
187 * Use this variable to indicate if the cpus can be stored by index.
188 */
189 #ifdef __aarch64__
190 static const int cpus_use_indexing = 0;
191 #else
192 static const int cpus_use_indexing = 1;
193 #endif
194
195 /*
196 * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64)
197 */
198 static struct cpu_info *
cpu_find(int cpuid)199 cpu_find(int cpuid)
200 {
201 int i;
202
203 if (cpus_use_indexing) {
204 if (cpuid <= last_cpu && cpus[cpuid].enabled)
205 return (&cpus[cpuid]);
206 } else {
207 for (i = 0; i <= last_cpu; i++)
208 if (cpus[i].id == cpuid)
209 return (&cpus[i]);
210 }
211 return (NULL);
212 }
213
214 /*
215 * Find CPU by pcpu pointer.
216 */
217 static struct cpu_info *
cpu_get_info(struct pcpu * pc)218 cpu_get_info(struct pcpu *pc)
219 {
220 struct cpu_info *cpup;
221 int id;
222
223 #ifdef __aarch64__
224 id = pc->pc_acpi_id;
225 #else
226 id = pc->pc_apic_id;
227 #endif
228 cpup = cpu_find(id);
229 if (cpup == NULL)
230 panic("SRAT: CPU with ID %u is not known", id);
231 return (cpup);
232 }
233
234 /*
235 * Add proximity information for a new CPU.
236 */
237 static struct cpu_info *
cpu_add(int cpuid,int domain)238 cpu_add(int cpuid, int domain)
239 {
240 struct cpu_info *cpup;
241
242 if (cpus_use_indexing) {
243 if (cpuid >= max_cpus)
244 return (NULL);
245 last_cpu = imax(last_cpu, cpuid);
246 cpup = &cpus[cpuid];
247 } else {
248 if (last_cpu >= max_cpus - 1)
249 return (NULL);
250 cpup = &cpus[++last_cpu];
251 }
252 cpup->domain = domain;
253 cpup->id = cpuid;
254 cpup->enabled = 1;
255 return (cpup);
256 }
257
258 static void
srat_parse_entry(ACPI_SUBTABLE_HEADER * entry,void * arg)259 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
260 {
261 ACPI_SRAT_CPU_AFFINITY *cpu;
262 ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
263 ACPI_SRAT_MEM_AFFINITY *mem;
264 ACPI_SRAT_GICC_AFFINITY *gicc;
265 static struct cpu_info *cpup;
266 uint64_t base, length;
267 int domain, i, slot;
268
269 switch (entry->Type) {
270 case ACPI_SRAT_TYPE_CPU_AFFINITY:
271 cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
272 domain = cpu->ProximityDomainLo |
273 cpu->ProximityDomainHi[0] << 8 |
274 cpu->ProximityDomainHi[1] << 16 |
275 cpu->ProximityDomainHi[2] << 24;
276 if (bootverbose)
277 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
278 cpu->ApicId, domain,
279 (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
280 "enabled" : "disabled");
281 if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
282 break;
283 cpup = cpu_find(cpu->ApicId);
284 if (cpup != NULL) {
285 printf("SRAT: Duplicate local APIC ID %u\n",
286 cpu->ApicId);
287 *(int *)arg = ENXIO;
288 break;
289 }
290 cpup = cpu_add(cpu->ApicId, domain);
291 if (cpup == NULL)
292 printf("SRAT: Ignoring local APIC ID %u (too high)\n",
293 cpu->ApicId);
294 break;
295 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
296 x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
297 if (bootverbose)
298 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
299 x2apic->ApicId, x2apic->ProximityDomain,
300 (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
301 "enabled" : "disabled");
302 if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
303 break;
304 KASSERT(cpu_find(x2apic->ApicId) == NULL,
305 ("Duplicate local APIC ID %u", x2apic->ApicId));
306 cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain);
307 if (cpup == NULL)
308 printf("SRAT: Ignoring local APIC ID %u (too high)\n",
309 x2apic->ApicId);
310 break;
311 case ACPI_SRAT_TYPE_GICC_AFFINITY:
312 gicc = (ACPI_SRAT_GICC_AFFINITY *)entry;
313 if (bootverbose)
314 printf("SRAT: Found CPU UID %u domain %d: %s\n",
315 gicc->AcpiProcessorUid, gicc->ProximityDomain,
316 (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ?
317 "enabled" : "disabled");
318 if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED))
319 break;
320 KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL,
321 ("Duplicate CPU UID %u", gicc->AcpiProcessorUid));
322 cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain);
323 if (cpup == NULL)
324 printf("SRAT: Ignoring CPU UID %u (too high)\n",
325 gicc->AcpiProcessorUid);
326 break;
327 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
328 mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
329 base = mem->BaseAddress;
330 length = mem->Length;
331 domain = mem->ProximityDomain;
332
333 if (bootverbose)
334 printf(
335 "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n",
336 domain, (uintmax_t)base, (uintmax_t)length,
337 (mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
338 "enabled" : "disabled");
339 if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
340 break;
341 if (base >= maxphyaddr ||
342 !overlaps_phys_avail(base, base + length)) {
343 printf("SRAT: Ignoring memory at addr 0x%jx\n",
344 (uintmax_t)base);
345 break;
346 }
347 if (num_mem == VM_PHYSSEG_MAX) {
348 printf("SRAT: Too many memory regions\n");
349 *(int *)arg = ENXIO;
350 break;
351 }
352 slot = num_mem;
353 for (i = 0; i < num_mem; i++) {
354 if (mem_info[i].domain == domain) {
355 /* Try to extend an existing segment. */
356 if (base == mem_info[i].end) {
357 mem_info[i].end += length;
358 return;
359 }
360 if (base + length == mem_info[i].start) {
361 mem_info[i].start -= length;
362 return;
363 }
364 }
365 if (mem_info[i].end <= base)
366 continue;
367 if (mem_info[i].start < base + length) {
368 printf("SRAT: Overlapping memory entries\n");
369 *(int *)arg = ENXIO;
370 return;
371 }
372 slot = i;
373 }
374 for (i = num_mem; i > slot; i--)
375 mem_info[i] = mem_info[i - 1];
376 mem_info[slot].start = base;
377 mem_info[slot].end = base + length;
378 mem_info[slot].domain = domain;
379 num_mem++;
380 break;
381 }
382 }
383
384 /*
385 * Ensure each memory domain has at least one CPU and that each CPU
386 * has at least one memory domain.
387 */
388 static int
check_domains(void)389 check_domains(void)
390 {
391 int found, i, j;
392
393 for (i = 0; i < num_mem; i++) {
394 found = 0;
395 for (j = 0; j <= last_cpu; j++)
396 if (cpus[j].enabled &&
397 cpus[j].domain == mem_info[i].domain) {
398 cpus[j].has_memory = 1;
399 found++;
400 }
401 if (!found) {
402 printf("SRAT: No CPU found for memory domain %d\n",
403 mem_info[i].domain);
404 return (ENXIO);
405 }
406 }
407 for (i = 0; i <= last_cpu; i++)
408 if (cpus[i].enabled && !cpus[i].has_memory) {
409 found = 0;
410 for (j = 0; j < num_mem && !found; j++) {
411 if (mem_info[j].domain == cpus[i].domain)
412 found = 1;
413 }
414 if (!found) {
415 if (bootverbose)
416 printf("SRAT: mem dom %d is empty\n",
417 cpus[i].domain);
418 mem_info[num_mem].start = 0;
419 mem_info[num_mem].end = 0;
420 mem_info[num_mem].domain = cpus[i].domain;
421 num_mem++;
422 }
423 }
424 return (0);
425 }
426
427 /*
428 * Check that the SRAT memory regions cover all of the regions in
429 * phys_avail[].
430 */
431 static int
check_phys_avail(void)432 check_phys_avail(void)
433 {
434 vm_paddr_t address;
435 int i, j;
436
437 /* j is the current offset into phys_avail[]. */
438 address = phys_avail[0];
439 j = 0;
440 for (i = 0; i < num_mem; i++) {
441 /*
442 * Consume as many phys_avail[] entries as fit in this
443 * region.
444 */
445 while (address >= mem_info[i].start &&
446 address <= mem_info[i].end) {
447 /*
448 * If we cover the rest of this phys_avail[] entry,
449 * advance to the next entry.
450 */
451 if (phys_avail[j + 1] <= mem_info[i].end) {
452 j += 2;
453 if (phys_avail[j] == 0 &&
454 phys_avail[j + 1] == 0) {
455 return (0);
456 }
457 address = phys_avail[j];
458 } else
459 address = mem_info[i].end + 1;
460 }
461 }
462 printf("SRAT: No memory region found for 0x%jx - 0x%jx\n",
463 (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
464 return (ENXIO);
465 }
466
467 /*
468 * Renumber the memory domains to be compact and zero-based if not
469 * already. Returns an error if there are too many domains.
470 */
471 static int
renumber_domains(void)472 renumber_domains(void)
473 {
474 int i, j, slot;
475
476 /* Enumerate all the domains. */
477 ndomain = 0;
478 for (i = 0; i < num_mem; i++) {
479 /* See if this domain is already known. */
480 for (j = 0; j < ndomain; j++) {
481 if (domain_pxm[j] >= mem_info[i].domain)
482 break;
483 }
484 if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
485 continue;
486
487 if (ndomain >= MAXMEMDOM) {
488 ndomain = 1;
489 printf("SRAT: Too many memory domains\n");
490 return (EFBIG);
491 }
492
493 /* Insert the new domain at slot 'j'. */
494 slot = j;
495 for (j = ndomain; j > slot; j--)
496 domain_pxm[j] = domain_pxm[j - 1];
497 domain_pxm[slot] = mem_info[i].domain;
498 ndomain++;
499 }
500
501 /* Renumber each domain to its index in the sorted 'domain_pxm' list. */
502 for (i = 0; i < ndomain; i++) {
503 /*
504 * If the domain is already the right value, no need
505 * to renumber.
506 */
507 if (domain_pxm[i] == i)
508 continue;
509
510 /* Walk the cpu[] and mem_info[] arrays to renumber. */
511 for (j = 0; j < num_mem; j++)
512 if (mem_info[j].domain == domain_pxm[i])
513 mem_info[j].domain = i;
514 for (j = 0; j <= last_cpu; j++)
515 if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
516 cpus[j].domain = i;
517 }
518
519 return (0);
520 }
521
522 /*
523 * Look for an ACPI System Resource Affinity Table ("SRAT"),
524 * allocate space for cpu information, and initialize globals.
525 */
526 int
acpi_pxm_init(int ncpus,vm_paddr_t maxphys)527 acpi_pxm_init(int ncpus, vm_paddr_t maxphys)
528 {
529 unsigned int idx, size;
530 vm_paddr_t addr;
531
532 if (resource_disabled("srat", 0))
533 return (-1);
534
535 max_cpus = ncpus;
536 last_cpu = -1;
537 maxphyaddr = maxphys;
538 srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
539 if (srat_physaddr == 0)
540 return (-1);
541
542 /*
543 * Allocate data structure:
544 *
545 * Find the last physical memory region and steal some memory from
546 * it. This is done because at this point in the boot process
547 * malloc is still not usable.
548 */
549 for (idx = 0; phys_avail[idx + 1] != 0; idx += 2);
550 KASSERT(idx != 0, ("phys_avail is empty!"));
551 idx -= 2;
552
553 size = sizeof(*cpus) * max_cpus;
554 addr = trunc_page(phys_avail[idx + 1] - size);
555 KASSERT(addr >= phys_avail[idx],
556 ("Not enough memory for SRAT table items"));
557 phys_avail[idx + 1] = addr - 1;
558
559 /*
560 * We cannot rely on PHYS_TO_DMAP because this code is also used in
561 * i386, so use pmap_mapbios to map the memory, this will end up using
562 * the default memory attribute (WB), and the DMAP when available.
563 */
564 cpus = (struct cpu_info *)pmap_mapbios(addr, size);
565 bzero(cpus, size);
566 return (0);
567 }
568
569 static int
parse_srat(void)570 parse_srat(void)
571 {
572 int error;
573
574 /*
575 * Make a pass over the table to populate the cpus[] and
576 * mem_info[] tables.
577 */
578 srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
579 error = 0;
580 srat_walk_table(srat_parse_entry, &error);
581 acpi_unmap_table(srat);
582 srat = NULL;
583 if (error || check_domains() != 0 || check_phys_avail() != 0 ||
584 renumber_domains() != 0) {
585 srat_physaddr = 0;
586 return (-1);
587 }
588
589 return (0);
590 }
591
592 static void
init_mem_locality(void)593 init_mem_locality(void)
594 {
595 int i;
596
597 /*
598 * For now, assume -1 == "no locality information for
599 * this pairing.
600 */
601 for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
602 vm_locality_table[i] = -1;
603 }
604
605 /*
606 * Parse SRAT and SLIT to save proximity info. Don't do
607 * anything if SRAT is not available.
608 */
609 void
acpi_pxm_parse_tables(void)610 acpi_pxm_parse_tables(void)
611 {
612
613 if (srat_physaddr == 0)
614 return;
615 if (parse_srat() < 0)
616 return;
617 init_mem_locality();
618 (void)parse_slit();
619 }
620
621 /*
622 * Use saved data from SRAT/SLIT to update memory locality.
623 */
624 void
acpi_pxm_set_mem_locality(void)625 acpi_pxm_set_mem_locality(void)
626 {
627
628 if (srat_physaddr == 0)
629 return;
630 vm_phys_register_domains(ndomain, mem_info, vm_locality_table);
631 }
632
633 static void
srat_walk_table(acpi_subtable_handler * handler,void * arg)634 srat_walk_table(acpi_subtable_handler *handler, void *arg)
635 {
636
637 acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
638 handler, arg);
639 }
640
641 /*
642 * Set up per-CPU domain IDs from information saved in 'cpus' and tear down data
643 * structures allocated by acpi_pxm_init().
644 */
645 void
acpi_pxm_set_cpu_locality(void)646 acpi_pxm_set_cpu_locality(void)
647 {
648 struct cpu_info *cpu;
649 struct pcpu *pc;
650 u_int i;
651
652 if (srat_physaddr == 0)
653 return;
654 for (i = 0; i < MAXCPU; i++) {
655 if (CPU_ABSENT(i))
656 continue;
657 pc = pcpu_find(i);
658 KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
659 cpu = cpu_get_info(pc);
660 pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0;
661 CPU_SET(i, &cpuset_domain[pc->pc_domain]);
662 if (bootverbose)
663 printf("SRAT: CPU %u has memory domain %d\n", i,
664 pc->pc_domain);
665 }
666 /* XXXMJ the page is leaked. */
667 pmap_unmapbios(cpus, sizeof(*cpus) * max_cpus);
668 srat_physaddr = 0;
669 cpus = NULL;
670 }
671
672 int
acpi_pxm_get_cpu_locality(int apic_id)673 acpi_pxm_get_cpu_locality(int apic_id)
674 {
675 struct cpu_info *cpu;
676
677 cpu = cpu_find(apic_id);
678 if (cpu == NULL)
679 panic("SRAT: CPU with ID %u is not known", apic_id);
680 return (cpu->domain);
681 }
682
683 /*
684 * Map a _PXM value to a VM domain ID.
685 *
686 * Returns the domain ID, or -1 if no domain ID was found.
687 */
688 int
acpi_map_pxm_to_vm_domainid(int pxm)689 acpi_map_pxm_to_vm_domainid(int pxm)
690 {
691 int i;
692
693 for (i = 0; i < ndomain; i++) {
694 if (domain_pxm[i] == pxm)
695 return (vm_ndomains > 1 ? i : 0);
696 }
697
698 return (-1);
699 }
700
701 #else /* MAXMEMDOM == 1 */
702
703 int
acpi_map_pxm_to_vm_domainid(int pxm)704 acpi_map_pxm_to_vm_domainid(int pxm)
705 {
706
707 return (-1);
708 }
709
710 #endif /* MAXMEMDOM > 1 */
711