1 /*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/efi.h>
42 #include <sys/efi_map.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/cpu_feat.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/ifunc.h>
84 #include <machine/kdb.h>
85 #include <machine/machdep.h>
86 #include <machine/metadata.h>
87 #include <machine/md_var.h>
88 #include <machine/pcb.h>
89 #include <machine/rsi.h>
90 #include <machine/undefined.h>
91 #include <machine/vmparam.h>
92
93 #ifdef VFP
94 #include <machine/vfp.h>
95 #endif
96
97 #ifdef DEV_ACPI
98 #include <contrib/dev/acpica/include/acpi.h>
99 #include <machine/acpica_machdep.h>
100 #endif
101
102 #ifdef FDT
103 #include <dev/fdt/fdt_common.h>
104 #include <dev/ofw/openfirm.h>
105 #endif
106
107 #include <dev/psci/psci.h>
108 #include <dev/smbios/smbios.h>
109
110 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
111 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
112 "pcb_fpusaved changed offset");
113 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
114 "pcb_fpustate changed offset");
115
116 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
117
118 /*
119 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
120 * could relocate this, but will need to keep the same virtual address as
121 * it's reverenced by the EARLY_COUNTER macro.
122 */
123 struct pcpu pcpu0;
124
125 #if defined(PERTHREAD_SSP)
126 /*
127 * The boot SSP canary. Will be replaced with a per-thread canary when
128 * scheduling has started.
129 */
130 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
131 #endif
132
133 static struct trapframe proc0_tf;
134
135 int early_boot = 1;
136 int cold = 1;
137 static int boot_el;
138
139 struct kva_md_info kmi;
140
141 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
142 int has_pan;
143
144 #if defined(SOCDEV_PA)
145 /*
146 * This is the virtual address used to access SOCDEV_PA. As it's set before
147 * .bss is cleared we need to ensure it's preserved. To do this use
148 * __read_mostly as it's only ever set once but read in the putc functions.
149 */
150 uintptr_t socdev_va __read_mostly;
151 #endif
152
153 /*
154 * Physical address of the EFI System Table. Stashed from the metadata hints
155 * passed into the kernel and used by the EFI code to call runtime services.
156 */
157 vm_paddr_t efi_systbl_phys;
158 static struct efi_map_header *efihdr;
159
160 int (*apei_nmi)(void);
161
162 #if defined(PERTHREAD_SSP_WARNING)
163 static void
print_ssp_warning(void * data __unused)164 print_ssp_warning(void *data __unused)
165 {
166 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
167 }
168 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
169 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
170 #endif
171
172 static cpu_feat_en
pan_check(const struct cpu_feat * feat __unused,u_int midr __unused)173 pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
174 {
175 uint64_t id_aa64mfr1;
176
177 get_kernel_reg(ID_AA64MMFR1_EL1, &id_aa64mfr1);
178 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
179 return (FEAT_ALWAYS_DISABLE);
180
181 return (FEAT_DEFAULT_ENABLE);
182 }
183
184 static bool
pan_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)185 pan_enable(const struct cpu_feat *feat __unused,
186 cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
187 u_int errata_count __unused)
188 {
189 has_pan = 1;
190
191 /*
192 * This sets the PAN bit, stopping the kernel from accessing
193 * memory when userspace can also access it unless the kernel
194 * uses the userspace load/store instructions.
195 */
196 WRITE_SPECIALREG(sctlr_el1,
197 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
198 __asm __volatile(
199 ".arch_extension pan \n"
200 "msr pan, #1 \n"
201 ".arch_extension nopan \n");
202
203 return (true);
204 }
205
206 static void
pan_disabled(const struct cpu_feat * feat __unused)207 pan_disabled(const struct cpu_feat *feat __unused)
208 {
209 if (PCPU_GET(cpuid) == 0)
210 update_special_reg(ID_AA64MMFR1_EL1, ID_AA64MMFR1_PAN_MASK, 0);
211 }
212
213 CPU_FEAT(feat_pan, "Privileged access never",
214 pan_check, NULL, pan_enable, pan_disabled,
215 CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
216
217 static cpu_feat_en
mops_check(const struct cpu_feat * feat __unused,u_int midr __unused)218 mops_check(const struct cpu_feat *feat __unused, u_int midr __unused)
219 {
220 uint64_t id_aa64isar2;
221
222 get_kernel_reg(ID_AA64ISAR2_EL1, &id_aa64isar2);
223 if (ID_AA64ISAR2_MOPS_VAL(id_aa64isar2) == ID_AA64ISAR2_MOPS_NONE)
224 return (FEAT_ALWAYS_DISABLE);
225
226 return (FEAT_DEFAULT_ENABLE);
227 }
228
229 static bool
mops_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)230 mops_enable(const struct cpu_feat *feat __unused,
231 cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
232 u_int errata_count __unused)
233 {
234 WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) | SCTLR_MSCEn);
235 isb();
236
237 return (true);
238 }
239
240 static void
mops_disabled(const struct cpu_feat * feat __unused)241 mops_disabled(const struct cpu_feat *feat __unused)
242 {
243 WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) & ~SCTLR_MSCEn);
244 isb();
245 }
246
247 CPU_FEAT(feat_mops, "MOPS",
248 mops_check, NULL, mops_enable, mops_disabled,
249 CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
250
251 bool
has_hyp(void)252 has_hyp(void)
253 {
254 return (boot_el == CURRENTEL_EL_EL2);
255 }
256
257 bool
in_vhe(void)258 in_vhe(void)
259 {
260 /* If we are currently in EL2 then must be in VHE */
261 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
262 CURRENTEL_EL_EL2);
263 }
264
265 static void
cpu_startup(void * dummy)266 cpu_startup(void *dummy)
267 {
268 vm_paddr_t size;
269 int i;
270
271 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
272 ptoa((uintmax_t)realmem) / 1024 / 1024);
273
274 if (bootverbose) {
275 printf("Physical memory chunk(s):\n");
276 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
277 size = phys_avail[i + 1] - phys_avail[i];
278 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
279 (uintmax_t)phys_avail[i],
280 (uintmax_t)phys_avail[i + 1] - 1,
281 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
282 }
283 }
284
285 printf("avail memory = %ju (%ju MB)\n",
286 ptoa((uintmax_t)vm_free_count()),
287 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
288
289 undef_init();
290 install_cpu_errata();
291
292 vm_ksubmap_init(&kmi);
293 bufinit();
294 vm_pager_bufferinit();
295 }
296
297 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
298
299 static void
late_ifunc_resolve(void * dummy __unused)300 late_ifunc_resolve(void *dummy __unused)
301 {
302 link_elf_late_ireloc();
303 }
304 /* Late enough for cpu_feat to have completed */
305 SYSINIT(late_ifunc_resolve, SI_SUB_CONFIGURE, SI_ORDER_ANY,
306 late_ifunc_resolve, NULL);
307
308 int
cpu_idle_wakeup(int cpu)309 cpu_idle_wakeup(int cpu)
310 {
311
312 return (0);
313 }
314
315 void
cpu_idle(int busy)316 cpu_idle(int busy)
317 {
318
319 spinlock_enter();
320 if (!busy)
321 cpu_idleclock();
322 if (!sched_runnable())
323 __asm __volatile(
324 "dsb sy \n"
325 "wfi \n");
326 if (!busy)
327 cpu_activeclock();
328 spinlock_exit();
329 }
330
331 void
cpu_halt(void)332 cpu_halt(void)
333 {
334
335 /* We should have shutdown by now, if not enter a low power sleep */
336 intr_disable();
337 while (1) {
338 __asm __volatile("wfi");
339 }
340 }
341
342 /*
343 * Flush the D-cache for non-DMA I/O so that the I-cache can
344 * be made coherent later.
345 */
346 void
cpu_flush_dcache(void * ptr,size_t len)347 cpu_flush_dcache(void *ptr, size_t len)
348 {
349
350 /* ARM64TODO TBD */
351 }
352
353 /* Get current clock frequency for the given CPU ID. */
354 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)355 cpu_est_clockrate(int cpu_id, uint64_t *rate)
356 {
357 struct pcpu *pc;
358
359 pc = pcpu_find(cpu_id);
360 if (pc == NULL || rate == NULL)
361 return (EINVAL);
362
363 if (pc->pc_clock == 0)
364 return (EOPNOTSUPP);
365
366 *rate = pc->pc_clock;
367 return (0);
368 }
369
370 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)371 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
372 {
373
374 pcpu->pc_acpi_id = 0xffffffff;
375 pcpu->pc_mpidr = UINT64_MAX;
376 }
377
378 void
spinlock_enter(void)379 spinlock_enter(void)
380 {
381 struct thread *td;
382 register_t daif;
383
384 td = curthread;
385 if (td->td_md.md_spinlock_count == 0) {
386 daif = intr_disable();
387 td->td_md.md_spinlock_count = 1;
388 td->td_md.md_saved_daif = daif;
389 critical_enter();
390 } else
391 td->td_md.md_spinlock_count++;
392 }
393
394 void
spinlock_exit(void)395 spinlock_exit(void)
396 {
397 struct thread *td;
398 register_t daif;
399
400 td = curthread;
401 daif = td->td_md.md_saved_daif;
402 td->td_md.md_spinlock_count--;
403 if (td->td_md.md_spinlock_count == 0) {
404 critical_exit();
405 intr_restore(daif);
406 }
407 }
408
409 /*
410 * Construct a PCB from a trapframe. This is called from kdb_trap() where
411 * we want to start a backtrace from the function that caused us to enter
412 * the debugger. We have the context in the trapframe, but base the trace
413 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
414 * enough for a backtrace.
415 */
416 void
makectx(struct trapframe * tf,struct pcb * pcb)417 makectx(struct trapframe *tf, struct pcb *pcb)
418 {
419 int i;
420
421 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
422 for (i = 0; i < nitems(pcb->pcb_x); i++) {
423 if (i == PCB_LR)
424 pcb->pcb_x[i] = tf->tf_elr;
425 else
426 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
427 }
428
429 pcb->pcb_sp = tf->tf_sp;
430 }
431
432 static void
init_proc0(void * kstack)433 init_proc0(void *kstack)
434 {
435 struct pcpu *pcpup;
436
437 pcpup = cpuid_to_pcpu[0];
438 MPASS(pcpup != NULL);
439
440 proc_linkup0(&proc0, &thread0);
441 thread0.td_kstack = kstack;
442 thread0.td_kstack_pages = KSTACK_PAGES;
443 #if defined(PERTHREAD_SSP)
444 thread0.td_md.md_canary = boot_canary;
445 #endif
446 thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
447 thread0.td_kstack_pages * PAGE_SIZE) - 1;
448 thread0.td_pcb->pcb_flags = 0;
449 thread0.td_pcb->pcb_fpflags = 0;
450 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
451 thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
452 thread0.td_frame = &proc0_tf;
453 ptrauth_thread0(&thread0);
454 pcpup->pc_curpcb = thread0.td_pcb;
455
456 /*
457 * Unmask SError exceptions. They are used to signal a RAS failure,
458 * or other hardware error.
459 */
460 serror_enable();
461 }
462
463 /*
464 * Get an address to be used to write to kernel data that may be mapped
465 * read-only, e.g. to patch kernel code.
466 */
467 bool
arm64_get_writable_addr(void * addr,void ** out)468 arm64_get_writable_addr(void *addr, void **out)
469 {
470 vm_paddr_t pa;
471
472 /* Check if the page is writable */
473 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
474 *out = addr;
475 return (true);
476 }
477
478 /*
479 * Find the physical address of the given page.
480 */
481 if (!pmap_klookup((vm_offset_t)addr, &pa)) {
482 return (false);
483 }
484
485 /*
486 * If it is within the DMAP region and is writable use that.
487 */
488 if (PHYS_IN_DMAP_RANGE(pa)) {
489 addr = PHYS_TO_DMAP(pa);
490 if (PAR_SUCCESS(arm64_address_translate_s1e1w(
491 (vm_offset_t)addr))) {
492 *out = addr;
493 return (true);
494 }
495 }
496
497 return (false);
498 }
499
500 /*
501 * Map the passed in VA in EFI space to a void * using the efi memory table to
502 * find the PA and return it in the DMAP, if it exists. We're used between the
503 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
504 * tables We assume that either the entry you are mapping fits within its page,
505 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
506 * All observed tables obey the first part of this precondition.
507 */
508 struct early_map_data
509 {
510 vm_offset_t va;
511 vm_offset_t pa;
512 };
513
514 static void
efi_early_map_entry(struct efi_md * p,void * argp)515 efi_early_map_entry(struct efi_md *p, void *argp)
516 {
517 struct early_map_data *emdp = argp;
518 vm_offset_t s, e;
519
520 if (emdp->pa != 0)
521 return;
522 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
523 return;
524 s = p->md_virt;
525 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
526 if (emdp->va < s || emdp->va >= e)
527 return;
528 emdp->pa = p->md_phys + (emdp->va - p->md_virt);
529 }
530
531 static void *
efi_early_map(vm_offset_t va)532 efi_early_map(vm_offset_t va)
533 {
534 struct early_map_data emd = { .va = va };
535
536 efi_map_foreach_entry(efihdr, efi_early_map_entry, &emd);
537 if (emd.pa == 0)
538 return NULL;
539 return PHYS_TO_DMAP(emd.pa);
540 }
541
542
543 /*
544 * When booted via kexec from Linux, the prior kernel will pass in reserved
545 * memory areas in an EFI config table. We need to find that table and walk
546 * through it excluding the memory ranges in it. btw, this is called too early
547 * for the printf to do anything (unless EARLY_PRINTF is defined) since msgbufp
548 * isn't initialized, let alone a console, but breakpoints in printf help
549 * diagnose rare failures.
550 */
551 static void
exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)552 exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)
553 {
554 struct efi_systbl *systbl;
555 efi_guid_t efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
556
557 systbl = PHYS_TO_DMAP(efi_systbl_phys);
558 if (systbl == NULL) {
559 printf("can't map systbl\n");
560 return;
561 }
562 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
563 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
564 return;
565 }
566
567 /*
568 * We don't yet have the pmap system booted enough to create a pmap for
569 * the efi firmware's preferred address space from the GetMemoryMap()
570 * table. The st_cfgtbl is a VA in this space, so we need to do the
571 * mapping ourselves to a kernel VA with efi_early_map. We assume that
572 * the cfgtbl entries don't span a page. Other pointers are PAs, as
573 * noted below.
574 */
575 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
576 return;
577 for (int i = 0; i < systbl->st_entries; i++) {
578 struct efi_cfgtbl *cfgtbl;
579 struct linux_efi_memreserve *mr;
580
581 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
582 if (cfgtbl == NULL)
583 panic("Can't map the config table entry %d\n", i);
584 if (memcmp(&cfgtbl->ct_guid, &efi_memreserve, sizeof(efi_guid_t)) != 0)
585 continue;
586
587 /*
588 * cfgtbl points are either VA or PA, depending on the GUID of
589 * the table. memreserve GUID pointers are PA and not converted
590 * after a SetVirtualAddressMap(). The list's mr_next pointer
591 * is also a PA.
592 */
593 mr = PHYS_TO_DMAP((vm_offset_t)cfgtbl->ct_data);
594 while (true) {
595 for (int j = 0; j < mr->mr_count; j++) {
596 struct linux_efi_memreserve_entry *mre;
597
598 mre = &mr->mr_entry[j];
599 physmem_exclude_region(mre->mre_base, mre->mre_size,
600 EXFLAG_NODUMP | EXFLAG_NOALLOC);
601 }
602 if (mr->mr_next == 0)
603 break;
604 mr = PHYS_TO_DMAP(mr->mr_next);
605 };
606 }
607
608 }
609
610 #ifdef FDT
611 static void
try_load_dtb(void)612 try_load_dtb(void)
613 {
614 vm_offset_t dtbp;
615
616 dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t);
617 #if defined(FDT_DTB_STATIC)
618 /*
619 * In case the device tree blob was not retrieved (from metadata) try
620 * to use the statically embedded one.
621 */
622 if (dtbp == 0)
623 dtbp = (vm_offset_t)&fdt_static_dtb;
624 #endif
625
626 if (dtbp == (vm_offset_t)NULL) {
627 #ifndef TSLOG
628 printf("ERROR loading DTB\n");
629 #endif
630 return;
631 }
632
633 if (!OF_install(OFW_FDT, 0))
634 panic("Cannot install FDT");
635
636 if (OF_init((void *)dtbp) != 0)
637 panic("OF_init failed with the found device tree");
638
639 parse_fdt_bootargs();
640 }
641 #endif
642
643 static bool
bus_probe(void)644 bus_probe(void)
645 {
646 bool has_acpi, has_fdt;
647 char *order, *env;
648
649 has_acpi = has_fdt = false;
650
651 #ifdef FDT
652 has_fdt = (OF_peer(0) != 0);
653 #endif
654 #ifdef DEV_ACPI
655 has_acpi = (AcpiOsGetRootPointer() != 0);
656 #endif
657
658 env = kern_getenv("kern.cfg.order");
659 if (env != NULL) {
660 order = env;
661 while (order != NULL) {
662 if (has_acpi &&
663 strncmp(order, "acpi", 4) == 0 &&
664 (order[4] == ',' || order[4] == '\0')) {
665 arm64_bus_method = ARM64_BUS_ACPI;
666 break;
667 }
668 if (has_fdt &&
669 strncmp(order, "fdt", 3) == 0 &&
670 (order[3] == ',' || order[3] == '\0')) {
671 arm64_bus_method = ARM64_BUS_FDT;
672 break;
673 }
674 order = strchr(order, ',');
675 if (order != NULL)
676 order++; /* Skip comma */
677 }
678 freeenv(env);
679
680 /* If we set the bus method it is valid */
681 if (arm64_bus_method != ARM64_BUS_NONE)
682 return (true);
683 }
684 /* If no order or an invalid order was set use the default */
685 if (arm64_bus_method == ARM64_BUS_NONE) {
686 if (has_acpi)
687 arm64_bus_method = ARM64_BUS_ACPI;
688 else if (has_fdt)
689 arm64_bus_method = ARM64_BUS_FDT;
690 }
691
692 /*
693 * If no option was set the default is valid, otherwise we are
694 * setting one to get cninit() working, then calling panic to tell
695 * the user about the invalid bus setup.
696 */
697 return (env == NULL);
698 }
699
700 static void
cache_setup(void)701 cache_setup(void)
702 {
703 int dczva_line_shift;
704 uint32_t dczid_el0;
705
706 identify_cache(READ_SPECIALREG(ctr_el0));
707
708 dczid_el0 = READ_SPECIALREG(dczid_el0);
709
710 /* Check if dc zva is not prohibited */
711 if (dczid_el0 & DCZID_DZP)
712 dczva_line_size = 0;
713 else {
714 /* Same as with above calculations */
715 dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
716 dczva_line_size = sizeof(int) << dczva_line_shift;
717 }
718 }
719
720 int
memory_mapping_mode(vm_paddr_t pa)721 memory_mapping_mode(vm_paddr_t pa)
722 {
723 struct efi_md *map, *p;
724 size_t efisz;
725 int ndesc, i;
726
727 if (efihdr == NULL)
728 return (VM_MEMATTR_WRITE_BACK);
729
730 /*
731 * Memory map data provided by UEFI via the GetMemoryMap
732 * Boot Services API.
733 */
734 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
735 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
736
737 if (efihdr->descriptor_size == 0)
738 return (VM_MEMATTR_WRITE_BACK);
739 ndesc = efihdr->memory_size / efihdr->descriptor_size;
740
741 for (i = 0, p = map; i < ndesc; i++,
742 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
743 if (pa < p->md_phys ||
744 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
745 continue;
746 if (p->md_type == EFI_MD_TYPE_IOMEM ||
747 p->md_type == EFI_MD_TYPE_IOPORT)
748 return (VM_MEMATTR_DEVICE);
749 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
750 p->md_type == EFI_MD_TYPE_RECLAIM)
751 return (VM_MEMATTR_WRITE_BACK);
752 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
753 return (VM_MEMATTR_WRITE_THROUGH);
754 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
755 return (VM_MEMATTR_WRITE_COMBINING);
756 break;
757 }
758
759 return (VM_MEMATTR_DEVICE);
760 }
761
762 #ifdef FDT
763 static void
fdt_physmem_hardware_region_cb(const struct mem_region * mr,void * arg __unused)764 fdt_physmem_hardware_region_cb(const struct mem_region *mr, void *arg __unused)
765 {
766 physmem_hardware_region(mr->mr_start, mr->mr_size);
767 }
768
769 static void
fdt_physmem_exclude_region_cb(const struct mem_region * mr,void * arg __unused)770 fdt_physmem_exclude_region_cb(const struct mem_region *mr, void *arg __unused)
771 {
772 physmem_exclude_region(mr->mr_start, mr->mr_size,
773 EXFLAG_NODUMP | EXFLAG_NOALLOC);
774 }
775 #endif
776
777 void
initarm(struct arm64_bootparams * abp)778 initarm(struct arm64_bootparams *abp)
779 {
780 struct efi_fb *efifb;
781 struct pcpu *pcpup;
782 char *env;
783 #ifdef FDT
784 phandle_t root;
785 char dts_version[255];
786 #endif
787 vm_offset_t lastaddr;
788 bool valid;
789
790 TSRAW(&thread0, TS_ENTER, __func__, NULL);
791
792 boot_el = abp->boot_el;
793
794 /* Parse loader or FDT boot parameters. Determine last used address. */
795 lastaddr = parse_boot_param(abp);
796
797 identify_cpu(0);
798 identify_hypervisor_smbios();
799
800 update_special_regs(0);
801
802 sched_instance_select();
803 link_elf_ireloc();
804
805 /* Set the pcpu data, this is needed by pmap_bootstrap */
806 pcpup = &pcpu0;
807 pcpu_init(pcpup, 0, sizeof(struct pcpu));
808
809 /*
810 * Set the pcpu pointer with a backup in tpidr_el1 to be
811 * loaded when entering the kernel from userland.
812 */
813 __asm __volatile(
814 "mov x18, %0 \n"
815 "msr tpidr_el1, %0" :: "r"(pcpup));
816
817 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
818 PCPU_SET(curthread, &thread0);
819 PCPU_SET(midr, get_midr());
820
821 #ifdef FDT
822 try_load_dtb();
823 #endif
824
825 efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE,
826 vm_paddr_t);
827
828 /* Load the physical memory ranges */
829 efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp,
830 MODINFO_METADATA | MODINFOMD_EFI_MAP);
831 if (efihdr != NULL)
832 efi_map_add_entries(efihdr);
833 #ifdef FDT
834 else {
835 /* Grab physical memory regions information from device tree. */
836 if (fdt_foreach_mem_region(fdt_physmem_hardware_region_cb,
837 NULL) != 0)
838 panic("Cannot get physical memory regions");
839 }
840 fdt_foreach_reserved_mem(fdt_physmem_exclude_region_cb, NULL);
841 #endif
842
843 /* Exclude the EFI framebuffer from our view of physical memory. */
844 efifb = (struct efi_fb *)preload_search_info(preload_kmdp,
845 MODINFO_METADATA | MODINFOMD_EFI_FB);
846 if (efifb != NULL)
847 physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
848 EXFLAG_NOALLOC);
849
850 /* Do basic tuning, hz etc */
851 init_param1();
852
853 cache_setup();
854
855 /*
856 * Perform a staged bootstrap of virtual memory.
857 *
858 * - First we create the DMAP region. This allows it to be used in
859 * later bootstrapping.
860 * - Next exclude memory that is needed in the DMAP region, but must
861 * not be used by FreeBSD.
862 * - Lastly complete the bootstrapping. It may use the physical
863 * memory map so any excluded memory must be marked as such before
864 * pmap_bootstrap() is called.
865 */
866 pmap_bootstrap_dmap(lastaddr - KERNBASE);
867 /*
868 * Exclude EFI entries needed in the DMAP, e.g. EFI_MD_TYPE_RECLAIM
869 * may contain the ACPI tables but shouldn't be used by the kernel
870 */
871 if (efihdr != NULL)
872 efi_map_exclude_entries(efihdr);
873 /* Do the same for reserve entries in the EFI MEMRESERVE table */
874 if (efi_systbl_phys != 0)
875 exclude_efi_memreserve(efi_systbl_phys);
876 /* Continue bootstrapping pmap */
877 pmap_bootstrap();
878
879 /*
880 * We carefully bootstrap the sanitizer map after we've excluded
881 * absolutely everything else that could impact phys_avail. There's not
882 * always enough room for the initial shadow map after the kernel, so
883 * we'll end up searching for segments that we can safely use. Those
884 * segments also get excluded from phys_avail.
885 */
886 #if defined(KASAN) || defined(KMSAN)
887 pmap_bootstrap_san();
888 #endif
889
890 physmem_init_kernel_globals();
891
892 valid = bus_probe();
893
894 psci_init(NULL);
895 arm64_rsi_setup_memory();
896
897 cninit();
898 set_ttbr0(abp->kern_ttbr0);
899 pmap_s1_invalidate_all_kernel();
900
901 if (!valid)
902 panic("Invalid bus configuration: %s",
903 kern_getenv("kern.cfg.order"));
904
905 /* Detect early CPU feature support */
906 enable_cpu_feat(CPU_FEAT_EARLY_BOOT);
907
908 /*
909 * Dump the boot metadata. We have to wait for cninit() since console
910 * output is required. If it's grossly incorrect the kernel will never
911 * make it this far.
912 */
913 if (getenv_is_true("debug.dump_modinfo_at_boot"))
914 preload_dump();
915
916 init_proc0(abp->kern_stack);
917 msgbufinit(msgbufp, msgbufsize);
918 mutex_init();
919 init_param2(physmem);
920
921 dbg_init();
922 kdb_init();
923 #ifdef KDB
924 if ((boothowto & RB_KDB) != 0)
925 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
926 #endif
927
928 kcsan_cpu_init(0);
929 kasan_init();
930 kmsan_init();
931
932 env = kern_getenv("kernelname");
933 if (env != NULL)
934 strlcpy(kernelname, env, sizeof(kernelname));
935
936 #ifdef FDT
937 if (arm64_bus_method == ARM64_BUS_FDT) {
938 root = OF_finddevice("/");
939 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
940 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
941 printf("WARNING: DTB version is %s while kernel expects %s, "
942 "please update the DTB in the ESP\n",
943 dts_version,
944 LINUX_DTS_VERSION);
945 } else {
946 printf("WARNING: Cannot find freebsd,dts-version property, "
947 "cannot check DTB compliance\n");
948 }
949 }
950 #endif
951
952 if (boothowto & RB_VERBOSE) {
953 if (efihdr != NULL)
954 efi_map_print_entries(efihdr);
955 physmem_print_tables();
956 }
957
958 early_boot = 0;
959
960 if (bootverbose && kstack_pages != KSTACK_PAGES)
961 printf("kern.kstack_pages = %d ignored for thread0\n",
962 kstack_pages);
963
964 TSEXIT();
965 }
966
967 void
dbg_init(void)968 dbg_init(void)
969 {
970
971 /* Clear OS lock */
972 WRITE_SPECIALREG(oslar_el1, 0);
973
974 /* This permits DDB to use debug registers for watchpoints. */
975 dbg_monitor_init();
976
977 /* TODO: Eventually will need to initialize debug registers here. */
978 }
979
980 #ifdef DDB
981 #include <ddb/ddb.h>
982
DB_SHOW_COMMAND(specialregs,db_show_spregs)983 DB_SHOW_COMMAND(specialregs, db_show_spregs)
984 {
985 #define PRINT_REG(reg) \
986 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
987
988 PRINT_REG(actlr_el1);
989 PRINT_REG(afsr0_el1);
990 PRINT_REG(afsr1_el1);
991 PRINT_REG(aidr_el1);
992 PRINT_REG(amair_el1);
993 PRINT_REG(ccsidr_el1);
994 PRINT_REG(clidr_el1);
995 PRINT_REG(contextidr_el1);
996 PRINT_REG(cpacr_el1);
997 PRINT_REG(csselr_el1);
998 PRINT_REG(ctr_el0);
999 PRINT_REG(currentel);
1000 PRINT_REG(daif);
1001 PRINT_REG(dczid_el0);
1002 PRINT_REG(elr_el1);
1003 PRINT_REG(esr_el1);
1004 PRINT_REG(far_el1);
1005 #if 0
1006 /* ARM64TODO: Enable VFP before reading floating-point registers */
1007 PRINT_REG(fpcr);
1008 PRINT_REG(fpsr);
1009 #endif
1010 PRINT_REG(id_aa64afr0_el1);
1011 PRINT_REG(id_aa64afr1_el1);
1012 PRINT_REG(id_aa64dfr0_el1);
1013 PRINT_REG(id_aa64dfr1_el1);
1014 PRINT_REG(id_aa64isar0_el1);
1015 PRINT_REG(id_aa64isar1_el1);
1016 PRINT_REG(id_aa64pfr0_el1);
1017 PRINT_REG(id_aa64pfr1_el1);
1018 PRINT_REG(id_afr0_el1);
1019 PRINT_REG(id_dfr0_el1);
1020 PRINT_REG(id_isar0_el1);
1021 PRINT_REG(id_isar1_el1);
1022 PRINT_REG(id_isar2_el1);
1023 PRINT_REG(id_isar3_el1);
1024 PRINT_REG(id_isar4_el1);
1025 PRINT_REG(id_isar5_el1);
1026 PRINT_REG(id_mmfr0_el1);
1027 PRINT_REG(id_mmfr1_el1);
1028 PRINT_REG(id_mmfr2_el1);
1029 PRINT_REG(id_mmfr3_el1);
1030 #if 0
1031 /* Missing from llvm */
1032 PRINT_REG(id_mmfr4_el1);
1033 #endif
1034 PRINT_REG(id_pfr0_el1);
1035 PRINT_REG(id_pfr1_el1);
1036 PRINT_REG(isr_el1);
1037 PRINT_REG(mair_el1);
1038 PRINT_REG(midr_el1);
1039 PRINT_REG(mpidr_el1);
1040 PRINT_REG(mvfr0_el1);
1041 PRINT_REG(mvfr1_el1);
1042 PRINT_REG(mvfr2_el1);
1043 PRINT_REG(revidr_el1);
1044 PRINT_REG(sctlr_el1);
1045 PRINT_REG(sp_el0);
1046 PRINT_REG(spsel);
1047 PRINT_REG(spsr_el1);
1048 PRINT_REG(tcr_el1);
1049 PRINT_REG(tpidr_el0);
1050 PRINT_REG(tpidr_el1);
1051 PRINT_REG(tpidrro_el0);
1052 PRINT_REG(ttbr0_el1);
1053 PRINT_REG(ttbr1_el1);
1054 PRINT_REG(vbar_el1);
1055 #undef PRINT_REG
1056 }
1057
DB_SHOW_COMMAND(vtop,db_show_vtop)1058 DB_SHOW_COMMAND(vtop, db_show_vtop)
1059 {
1060 uint64_t phys;
1061
1062 if (have_addr) {
1063 phys = arm64_address_translate_s1e1r(addr);
1064 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
1065 phys = arm64_address_translate_s1e1w(addr);
1066 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1067 phys = arm64_address_translate_s1e0r(addr);
1068 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
1069 phys = arm64_address_translate_s1e0w(addr);
1070 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1071 } else
1072 db_printf("show vtop <virt_addr>\n");
1073 }
1074 #endif
1075
1076 #undef memset
1077 #undef memmove
1078 #undef memcpy
1079
1080 void *memset_std(void *buf, int c, size_t len);
1081 void *memset_mops(void *buf, int c, size_t len);
1082 void *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
1083 size_t len);
1084 void *memmove_mops(void * _Nonnull dst, const void * _Nonnull src,
1085 size_t len);
1086 void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
1087 size_t len);
1088 void *memcpy_mops(void * _Nonnull dst, const void * _Nonnull src,
1089 size_t len);
1090
1091 DEFINE_IFUNC(, void *, memset, (void *, int, size_t))
1092 {
1093 return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memset_mops : memset_std);
1094 }
1095
1096 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
1097 size_t))
1098 {
1099 return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memmove_mops : memmove_std);
1100 }
1101
1102 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,
1103 size_t))
1104 {
1105 return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memcpy_mops : memcpy_std);
1106 }
1107