1 /*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/efi.h>
42 #include <sys/efi_map.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/cpu_feat.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/kdb.h>
84 #include <machine/machdep.h>
85 #include <machine/metadata.h>
86 #include <machine/md_var.h>
87 #include <machine/pcb.h>
88 #include <machine/undefined.h>
89 #include <machine/vmparam.h>
90
91 #ifdef VFP
92 #include <machine/vfp.h>
93 #endif
94
95 #ifdef DEV_ACPI
96 #include <contrib/dev/acpica/include/acpi.h>
97 #include <machine/acpica_machdep.h>
98 #endif
99
100 #ifdef FDT
101 #include <dev/fdt/fdt_common.h>
102 #include <dev/ofw/openfirm.h>
103 #endif
104
105 #include <dev/smbios/smbios.h>
106
107 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
108 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
109 "pcb_fpusaved changed offset");
110 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
111 "pcb_fpustate changed offset");
112
113 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
114
115 /*
116 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
117 * could relocate this, but will need to keep the same virtual address as
118 * it's reverenced by the EARLY_COUNTER macro.
119 */
120 struct pcpu pcpu0;
121
122 #if defined(PERTHREAD_SSP)
123 /*
124 * The boot SSP canary. Will be replaced with a per-thread canary when
125 * scheduling has started.
126 */
127 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
128 #endif
129
130 static struct trapframe proc0_tf;
131
132 int early_boot = 1;
133 int cold = 1;
134 static int boot_el;
135
136 struct kva_md_info kmi;
137
138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
139 int has_pan;
140
141 #if defined(SOCDEV_PA)
142 /*
143 * This is the virtual address used to access SOCDEV_PA. As it's set before
144 * .bss is cleared we need to ensure it's preserved. To do this use
145 * __read_mostly as it's only ever set once but read in the putc functions.
146 */
147 uintptr_t socdev_va __read_mostly;
148 #endif
149
150 /*
151 * Physical address of the EFI System Table. Stashed from the metadata hints
152 * passed into the kernel and used by the EFI code to call runtime services.
153 */
154 vm_paddr_t efi_systbl_phys;
155 static struct efi_map_header *efihdr;
156
157 /* pagezero_* implementations are provided in support.S */
158 void pagezero_simple(void *);
159 void pagezero_cache(void *);
160
161 /* pagezero_simple is default pagezero */
162 void (*pagezero)(void *p) = pagezero_simple;
163
164 int (*apei_nmi)(void);
165
166 #if defined(PERTHREAD_SSP_WARNING)
167 static void
print_ssp_warning(void * data __unused)168 print_ssp_warning(void *data __unused)
169 {
170 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171 }
172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174 #endif
175
176 static cpu_feat_en
pan_check(const struct cpu_feat * feat __unused,u_int midr __unused)177 pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
178 {
179 uint64_t id_aa64mfr1;
180
181 if (!get_kernel_reg(ID_AA64MMFR1_EL1, &id_aa64mfr1))
182 return (FEAT_ALWAYS_DISABLE);
183 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
184 return (FEAT_ALWAYS_DISABLE);
185
186 return (FEAT_DEFAULT_ENABLE);
187 }
188
189 static bool
pan_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)190 pan_enable(const struct cpu_feat *feat __unused,
191 cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
192 u_int errata_count __unused)
193 {
194 has_pan = 1;
195
196 /*
197 * This sets the PAN bit, stopping the kernel from accessing
198 * memory when userspace can also access it unless the kernel
199 * uses the userspace load/store instructions.
200 */
201 WRITE_SPECIALREG(sctlr_el1,
202 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
203 __asm __volatile(
204 ".arch_extension pan \n"
205 "msr pan, #1 \n"
206 ".arch_extension nopan \n");
207
208 return (true);
209 }
210
211 static void
pan_disabled(const struct cpu_feat * feat __unused)212 pan_disabled(const struct cpu_feat *feat __unused)
213 {
214 if (PCPU_GET(cpuid) == 0)
215 update_special_reg(ID_AA64MMFR1_EL1, ID_AA64MMFR1_PAN_MASK, 0);
216 }
217
218 CPU_FEAT(feat_pan, "Privileged access never",
219 pan_check, NULL, pan_enable, pan_disabled,
220 CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
221
222 bool
has_hyp(void)223 has_hyp(void)
224 {
225 return (boot_el == CURRENTEL_EL_EL2);
226 }
227
228 bool
in_vhe(void)229 in_vhe(void)
230 {
231 /* If we are currently in EL2 then must be in VHE */
232 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
233 CURRENTEL_EL_EL2);
234 }
235
236 static void
cpu_startup(void * dummy)237 cpu_startup(void *dummy)
238 {
239 vm_paddr_t size;
240 int i;
241
242 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
243 ptoa((uintmax_t)realmem) / 1024 / 1024);
244
245 if (bootverbose) {
246 printf("Physical memory chunk(s):\n");
247 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
248 size = phys_avail[i + 1] - phys_avail[i];
249 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
250 (uintmax_t)phys_avail[i],
251 (uintmax_t)phys_avail[i + 1] - 1,
252 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
253 }
254 }
255
256 printf("avail memory = %ju (%ju MB)\n",
257 ptoa((uintmax_t)vm_free_count()),
258 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
259
260 undef_init();
261 install_cpu_errata();
262
263 vm_ksubmap_init(&kmi);
264 bufinit();
265 vm_pager_bufferinit();
266 }
267
268 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
269
270 static void
late_ifunc_resolve(void * dummy __unused)271 late_ifunc_resolve(void *dummy __unused)
272 {
273 link_elf_late_ireloc();
274 }
275 /* Late enough for cpu_feat to have completed */
276 SYSINIT(late_ifunc_resolve, SI_SUB_CONFIGURE, SI_ORDER_ANY,
277 late_ifunc_resolve, NULL);
278
279 int
cpu_idle_wakeup(int cpu)280 cpu_idle_wakeup(int cpu)
281 {
282
283 return (0);
284 }
285
286 void
cpu_idle(int busy)287 cpu_idle(int busy)
288 {
289
290 spinlock_enter();
291 if (!busy)
292 cpu_idleclock();
293 if (!sched_runnable())
294 __asm __volatile(
295 "dsb sy \n"
296 "wfi \n");
297 if (!busy)
298 cpu_activeclock();
299 spinlock_exit();
300 }
301
302 void
cpu_halt(void)303 cpu_halt(void)
304 {
305
306 /* We should have shutdown by now, if not enter a low power sleep */
307 intr_disable();
308 while (1) {
309 __asm __volatile("wfi");
310 }
311 }
312
313 /*
314 * Flush the D-cache for non-DMA I/O so that the I-cache can
315 * be made coherent later.
316 */
317 void
cpu_flush_dcache(void * ptr,size_t len)318 cpu_flush_dcache(void *ptr, size_t len)
319 {
320
321 /* ARM64TODO TBD */
322 }
323
324 /* Get current clock frequency for the given CPU ID. */
325 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)326 cpu_est_clockrate(int cpu_id, uint64_t *rate)
327 {
328 struct pcpu *pc;
329
330 pc = pcpu_find(cpu_id);
331 if (pc == NULL || rate == NULL)
332 return (EINVAL);
333
334 if (pc->pc_clock == 0)
335 return (EOPNOTSUPP);
336
337 *rate = pc->pc_clock;
338 return (0);
339 }
340
341 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)342 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
343 {
344
345 pcpu->pc_acpi_id = 0xffffffff;
346 pcpu->pc_mpidr = UINT64_MAX;
347 }
348
349 void
spinlock_enter(void)350 spinlock_enter(void)
351 {
352 struct thread *td;
353 register_t daif;
354
355 td = curthread;
356 if (td->td_md.md_spinlock_count == 0) {
357 daif = intr_disable();
358 td->td_md.md_spinlock_count = 1;
359 td->td_md.md_saved_daif = daif;
360 critical_enter();
361 } else
362 td->td_md.md_spinlock_count++;
363 }
364
365 void
spinlock_exit(void)366 spinlock_exit(void)
367 {
368 struct thread *td;
369 register_t daif;
370
371 td = curthread;
372 daif = td->td_md.md_saved_daif;
373 td->td_md.md_spinlock_count--;
374 if (td->td_md.md_spinlock_count == 0) {
375 critical_exit();
376 intr_restore(daif);
377 }
378 }
379
380 /*
381 * Construct a PCB from a trapframe. This is called from kdb_trap() where
382 * we want to start a backtrace from the function that caused us to enter
383 * the debugger. We have the context in the trapframe, but base the trace
384 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
385 * enough for a backtrace.
386 */
387 void
makectx(struct trapframe * tf,struct pcb * pcb)388 makectx(struct trapframe *tf, struct pcb *pcb)
389 {
390 int i;
391
392 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
393 for (i = 0; i < nitems(pcb->pcb_x); i++) {
394 if (i == PCB_LR)
395 pcb->pcb_x[i] = tf->tf_elr;
396 else
397 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
398 }
399
400 pcb->pcb_sp = tf->tf_sp;
401 }
402
403 static void
init_proc0(vm_offset_t kstack)404 init_proc0(vm_offset_t kstack)
405 {
406 struct pcpu *pcpup;
407
408 pcpup = cpuid_to_pcpu[0];
409 MPASS(pcpup != NULL);
410
411 proc_linkup0(&proc0, &thread0);
412 thread0.td_kstack = kstack;
413 thread0.td_kstack_pages = KSTACK_PAGES;
414 #if defined(PERTHREAD_SSP)
415 thread0.td_md.md_canary = boot_canary;
416 #endif
417 thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
418 thread0.td_kstack_pages * PAGE_SIZE) - 1;
419 thread0.td_pcb->pcb_flags = 0;
420 thread0.td_pcb->pcb_fpflags = 0;
421 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
422 thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
423 thread0.td_frame = &proc0_tf;
424 ptrauth_thread0(&thread0);
425 pcpup->pc_curpcb = thread0.td_pcb;
426
427 /*
428 * Unmask SError exceptions. They are used to signal a RAS failure,
429 * or other hardware error.
430 */
431 serror_enable();
432 }
433
434 /*
435 * Get an address to be used to write to kernel data that may be mapped
436 * read-only, e.g. to patch kernel code.
437 */
438 bool
arm64_get_writable_addr(void * addr,void ** out)439 arm64_get_writable_addr(void *addr, void **out)
440 {
441 vm_paddr_t pa;
442
443 /* Check if the page is writable */
444 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
445 *out = addr;
446 return (true);
447 }
448
449 /*
450 * Find the physical address of the given page.
451 */
452 if (!pmap_klookup((vm_offset_t)addr, &pa)) {
453 return (false);
454 }
455
456 /*
457 * If it is within the DMAP region and is writable use that.
458 */
459 if (PHYS_IN_DMAP_RANGE(pa)) {
460 addr = (void *)PHYS_TO_DMAP(pa);
461 if (PAR_SUCCESS(arm64_address_translate_s1e1w(
462 (vm_offset_t)addr))) {
463 *out = addr;
464 return (true);
465 }
466 }
467
468 return (false);
469 }
470
471 /*
472 * Map the passed in VA in EFI space to a void * using the efi memory table to
473 * find the PA and return it in the DMAP, if it exists. We're used between the
474 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
475 * tables We assume that either the entry you are mapping fits within its page,
476 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
477 * All observed tables obey the first part of this precondition.
478 */
479 struct early_map_data
480 {
481 vm_offset_t va;
482 vm_offset_t pa;
483 };
484
485 static void
efi_early_map_entry(struct efi_md * p,void * argp)486 efi_early_map_entry(struct efi_md *p, void *argp)
487 {
488 struct early_map_data *emdp = argp;
489 vm_offset_t s, e;
490
491 if (emdp->pa != 0)
492 return;
493 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
494 return;
495 s = p->md_virt;
496 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
497 if (emdp->va < s || emdp->va >= e)
498 return;
499 emdp->pa = p->md_phys + (emdp->va - p->md_virt);
500 }
501
502 static void *
efi_early_map(vm_offset_t va)503 efi_early_map(vm_offset_t va)
504 {
505 struct early_map_data emd = { .va = va };
506
507 efi_map_foreach_entry(efihdr, efi_early_map_entry, &emd);
508 if (emd.pa == 0)
509 return NULL;
510 return (void *)PHYS_TO_DMAP(emd.pa);
511 }
512
513
514 /*
515 * When booted via kexec from Linux, the prior kernel will pass in reserved
516 * memory areas in an EFI config table. We need to find that table and walk
517 * through it excluding the memory ranges in it. btw, this is called too early
518 * for the printf to do anything (unless EARLY_PRINTF is defined) since msgbufp
519 * isn't initialized, let alone a console, but breakpoints in printf help
520 * diagnose rare failures.
521 */
522 static void
exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)523 exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)
524 {
525 struct efi_systbl *systbl;
526 efi_guid_t efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
527
528 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
529 if (systbl == NULL) {
530 printf("can't map systbl\n");
531 return;
532 }
533 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
534 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
535 return;
536 }
537
538 /*
539 * We don't yet have the pmap system booted enough to create a pmap for
540 * the efi firmware's preferred address space from the GetMemoryMap()
541 * table. The st_cfgtbl is a VA in this space, so we need to do the
542 * mapping ourselves to a kernel VA with efi_early_map. We assume that
543 * the cfgtbl entries don't span a page. Other pointers are PAs, as
544 * noted below.
545 */
546 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
547 return;
548 for (int i = 0; i < systbl->st_entries; i++) {
549 struct efi_cfgtbl *cfgtbl;
550 struct linux_efi_memreserve *mr;
551
552 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
553 if (cfgtbl == NULL)
554 panic("Can't map the config table entry %d\n", i);
555 if (memcmp(&cfgtbl->ct_guid, &efi_memreserve, sizeof(efi_guid_t)) != 0)
556 continue;
557
558 /*
559 * cfgtbl points are either VA or PA, depending on the GUID of
560 * the table. memreserve GUID pointers are PA and not converted
561 * after a SetVirtualAddressMap(). The list's mr_next pointer
562 * is also a PA.
563 */
564 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
565 (vm_offset_t)cfgtbl->ct_data);
566 while (true) {
567 for (int j = 0; j < mr->mr_count; j++) {
568 struct linux_efi_memreserve_entry *mre;
569
570 mre = &mr->mr_entry[j];
571 physmem_exclude_region(mre->mre_base, mre->mre_size,
572 EXFLAG_NODUMP | EXFLAG_NOALLOC);
573 }
574 if (mr->mr_next == 0)
575 break;
576 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
577 };
578 }
579
580 }
581
582 #ifdef FDT
583 static void
try_load_dtb(void)584 try_load_dtb(void)
585 {
586 vm_offset_t dtbp;
587
588 dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t);
589 #if defined(FDT_DTB_STATIC)
590 /*
591 * In case the device tree blob was not retrieved (from metadata) try
592 * to use the statically embedded one.
593 */
594 if (dtbp == 0)
595 dtbp = (vm_offset_t)&fdt_static_dtb;
596 #endif
597
598 if (dtbp == (vm_offset_t)NULL) {
599 #ifndef TSLOG
600 printf("ERROR loading DTB\n");
601 #endif
602 return;
603 }
604
605 if (!OF_install(OFW_FDT, 0))
606 panic("Cannot install FDT");
607
608 if (OF_init((void *)dtbp) != 0)
609 panic("OF_init failed with the found device tree");
610
611 parse_fdt_bootargs();
612 }
613 #endif
614
615 static bool
bus_probe(void)616 bus_probe(void)
617 {
618 bool has_acpi, has_fdt;
619 char *order, *env;
620
621 has_acpi = has_fdt = false;
622
623 #ifdef FDT
624 has_fdt = (OF_peer(0) != 0);
625 #endif
626 #ifdef DEV_ACPI
627 has_acpi = (AcpiOsGetRootPointer() != 0);
628 #endif
629
630 env = kern_getenv("kern.cfg.order");
631 if (env != NULL) {
632 order = env;
633 while (order != NULL) {
634 if (has_acpi &&
635 strncmp(order, "acpi", 4) == 0 &&
636 (order[4] == ',' || order[4] == '\0')) {
637 arm64_bus_method = ARM64_BUS_ACPI;
638 break;
639 }
640 if (has_fdt &&
641 strncmp(order, "fdt", 3) == 0 &&
642 (order[3] == ',' || order[3] == '\0')) {
643 arm64_bus_method = ARM64_BUS_FDT;
644 break;
645 }
646 order = strchr(order, ',');
647 if (order != NULL)
648 order++; /* Skip comma */
649 }
650 freeenv(env);
651
652 /* If we set the bus method it is valid */
653 if (arm64_bus_method != ARM64_BUS_NONE)
654 return (true);
655 }
656 /* If no order or an invalid order was set use the default */
657 if (arm64_bus_method == ARM64_BUS_NONE) {
658 if (has_acpi)
659 arm64_bus_method = ARM64_BUS_ACPI;
660 else if (has_fdt)
661 arm64_bus_method = ARM64_BUS_FDT;
662 }
663
664 /*
665 * If no option was set the default is valid, otherwise we are
666 * setting one to get cninit() working, then calling panic to tell
667 * the user about the invalid bus setup.
668 */
669 return (env == NULL);
670 }
671
672 static void
cache_setup(void)673 cache_setup(void)
674 {
675 int dczva_line_shift;
676 uint32_t dczid_el0;
677
678 identify_cache(READ_SPECIALREG(ctr_el0));
679
680 dczid_el0 = READ_SPECIALREG(dczid_el0);
681
682 /* Check if dc zva is not prohibited */
683 if (dczid_el0 & DCZID_DZP)
684 dczva_line_size = 0;
685 else {
686 /* Same as with above calculations */
687 dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
688 dczva_line_size = sizeof(int) << dczva_line_shift;
689
690 /* Change pagezero function */
691 pagezero = pagezero_cache;
692 }
693 }
694
695 int
memory_mapping_mode(vm_paddr_t pa)696 memory_mapping_mode(vm_paddr_t pa)
697 {
698 struct efi_md *map, *p;
699 size_t efisz;
700 int ndesc, i;
701
702 if (efihdr == NULL)
703 return (VM_MEMATTR_WRITE_BACK);
704
705 /*
706 * Memory map data provided by UEFI via the GetMemoryMap
707 * Boot Services API.
708 */
709 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
710 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
711
712 if (efihdr->descriptor_size == 0)
713 return (VM_MEMATTR_WRITE_BACK);
714 ndesc = efihdr->memory_size / efihdr->descriptor_size;
715
716 for (i = 0, p = map; i < ndesc; i++,
717 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
718 if (pa < p->md_phys ||
719 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
720 continue;
721 if (p->md_type == EFI_MD_TYPE_IOMEM ||
722 p->md_type == EFI_MD_TYPE_IOPORT)
723 return (VM_MEMATTR_DEVICE);
724 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
725 p->md_type == EFI_MD_TYPE_RECLAIM)
726 return (VM_MEMATTR_WRITE_BACK);
727 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
728 return (VM_MEMATTR_WRITE_THROUGH);
729 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
730 return (VM_MEMATTR_WRITE_COMBINING);
731 break;
732 }
733
734 return (VM_MEMATTR_DEVICE);
735 }
736
737 #ifdef FDT
738 static void
fdt_physmem_hardware_region_cb(const struct mem_region * mr,void * arg __unused)739 fdt_physmem_hardware_region_cb(const struct mem_region *mr, void *arg __unused)
740 {
741 physmem_hardware_region(mr->mr_start, mr->mr_size);
742 }
743
744 static void
fdt_physmem_exclude_region_cb(const struct mem_region * mr,void * arg __unused)745 fdt_physmem_exclude_region_cb(const struct mem_region *mr, void *arg __unused)
746 {
747 physmem_exclude_region(mr->mr_start, mr->mr_size,
748 EXFLAG_NODUMP | EXFLAG_NOALLOC);
749 }
750 #endif
751
752 void
initarm(struct arm64_bootparams * abp)753 initarm(struct arm64_bootparams *abp)
754 {
755 struct efi_fb *efifb;
756 struct pcpu *pcpup;
757 char *env;
758 #ifdef FDT
759 phandle_t root;
760 char dts_version[255];
761 #endif
762 vm_offset_t lastaddr;
763 bool valid;
764
765 TSRAW(&thread0, TS_ENTER, __func__, NULL);
766
767 boot_el = abp->boot_el;
768
769 /* Parse loader or FDT boot parameters. Determine last used address. */
770 lastaddr = parse_boot_param(abp);
771
772 identify_cpu(0);
773 identify_hypervisor_smbios();
774
775 update_special_regs(0);
776
777 /* Set the pcpu data, this is needed by pmap_bootstrap */
778 pcpup = &pcpu0;
779 pcpu_init(pcpup, 0, sizeof(struct pcpu));
780
781 /*
782 * Set the pcpu pointer with a backup in tpidr_el1 to be
783 * loaded when entering the kernel from userland.
784 */
785 __asm __volatile(
786 "mov x18, %0 \n"
787 "msr tpidr_el1, %0" :: "r"(pcpup));
788
789 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
790 PCPU_SET(curthread, &thread0);
791 PCPU_SET(midr, get_midr());
792
793 link_elf_ireloc();
794 #ifdef FDT
795 try_load_dtb();
796 #endif
797
798 efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE,
799 vm_paddr_t);
800
801 /* Load the physical memory ranges */
802 efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp,
803 MODINFO_METADATA | MODINFOMD_EFI_MAP);
804 if (efihdr != NULL)
805 efi_map_add_entries(efihdr);
806 #ifdef FDT
807 else {
808 /* Grab physical memory regions information from device tree. */
809 if (fdt_foreach_mem_region(fdt_physmem_hardware_region_cb,
810 NULL) != 0)
811 panic("Cannot get physical memory regions");
812 }
813 fdt_foreach_reserved_mem(fdt_physmem_exclude_region_cb, NULL);
814 #endif
815
816 /* Exclude the EFI framebuffer from our view of physical memory. */
817 efifb = (struct efi_fb *)preload_search_info(preload_kmdp,
818 MODINFO_METADATA | MODINFOMD_EFI_FB);
819 if (efifb != NULL)
820 physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
821 EXFLAG_NOALLOC);
822
823 /* Do basic tuning, hz etc */
824 init_param1();
825
826 cache_setup();
827
828 /*
829 * Perform a staged bootstrap of virtual memory.
830 *
831 * - First we create the DMAP region. This allows it to be used in
832 * later bootstrapping.
833 * - Next exclude memory that is needed in the DMAP region, but must
834 * not be used by FreeBSD.
835 * - Lastly complete the bootstrapping. It may use the physical
836 * memory map so any excluded memory must be marked as such before
837 * pmap_bootstrap() is called.
838 */
839 pmap_bootstrap_dmap(lastaddr - KERNBASE);
840 /*
841 * Exclude EFI entries needed in the DMAP, e.g. EFI_MD_TYPE_RECLAIM
842 * may contain the ACPI tables but shouldn't be used by the kernel
843 */
844 if (efihdr != NULL)
845 efi_map_exclude_entries(efihdr);
846 /* Do the same for reserve entries in the EFI MEMRESERVE table */
847 if (efi_systbl_phys != 0)
848 exclude_efi_memreserve(efi_systbl_phys);
849 /* Continue bootstrapping pmap */
850 pmap_bootstrap();
851
852 /*
853 * We carefully bootstrap the sanitizer map after we've excluded
854 * absolutely everything else that could impact phys_avail. There's not
855 * always enough room for the initial shadow map after the kernel, so
856 * we'll end up searching for segments that we can safely use. Those
857 * segments also get excluded from phys_avail.
858 */
859 #if defined(KASAN) || defined(KMSAN)
860 pmap_bootstrap_san();
861 #endif
862
863 physmem_init_kernel_globals();
864
865 valid = bus_probe();
866
867 cninit();
868 set_ttbr0(abp->kern_ttbr0);
869 pmap_s1_invalidate_all_kernel();
870
871 if (!valid)
872 panic("Invalid bus configuration: %s",
873 kern_getenv("kern.cfg.order"));
874
875 /* Detect early CPU feature support */
876 enable_cpu_feat(CPU_FEAT_EARLY_BOOT);
877
878 /*
879 * Dump the boot metadata. We have to wait for cninit() since console
880 * output is required. If it's grossly incorrect the kernel will never
881 * make it this far.
882 */
883 if (getenv_is_true("debug.dump_modinfo_at_boot"))
884 preload_dump();
885
886 init_proc0(abp->kern_stack);
887 msgbufinit(msgbufp, msgbufsize);
888 mutex_init();
889 init_param2(physmem);
890
891 dbg_init();
892 kdb_init();
893 #ifdef KDB
894 if ((boothowto & RB_KDB) != 0)
895 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
896 #endif
897
898 kcsan_cpu_init(0);
899 kasan_init();
900 kmsan_init();
901
902 env = kern_getenv("kernelname");
903 if (env != NULL)
904 strlcpy(kernelname, env, sizeof(kernelname));
905
906 #ifdef FDT
907 if (arm64_bus_method == ARM64_BUS_FDT) {
908 root = OF_finddevice("/");
909 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
910 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
911 printf("WARNING: DTB version is %s while kernel expects %s, "
912 "please update the DTB in the ESP\n",
913 dts_version,
914 LINUX_DTS_VERSION);
915 } else {
916 printf("WARNING: Cannot find freebsd,dts-version property, "
917 "cannot check DTB compliance\n");
918 }
919 }
920 #endif
921
922 if (boothowto & RB_VERBOSE) {
923 if (efihdr != NULL)
924 efi_map_print_entries(efihdr);
925 physmem_print_tables();
926 }
927
928 early_boot = 0;
929
930 if (bootverbose && kstack_pages != KSTACK_PAGES)
931 printf("kern.kstack_pages = %d ignored for thread0\n",
932 kstack_pages);
933
934 TSEXIT();
935 }
936
937 void
dbg_init(void)938 dbg_init(void)
939 {
940
941 /* Clear OS lock */
942 WRITE_SPECIALREG(oslar_el1, 0);
943
944 /* This permits DDB to use debug registers for watchpoints. */
945 dbg_monitor_init();
946
947 /* TODO: Eventually will need to initialize debug registers here. */
948 }
949
950 #ifdef DDB
951 #include <ddb/ddb.h>
952
DB_SHOW_COMMAND(specialregs,db_show_spregs)953 DB_SHOW_COMMAND(specialregs, db_show_spregs)
954 {
955 #define PRINT_REG(reg) \
956 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
957
958 PRINT_REG(actlr_el1);
959 PRINT_REG(afsr0_el1);
960 PRINT_REG(afsr1_el1);
961 PRINT_REG(aidr_el1);
962 PRINT_REG(amair_el1);
963 PRINT_REG(ccsidr_el1);
964 PRINT_REG(clidr_el1);
965 PRINT_REG(contextidr_el1);
966 PRINT_REG(cpacr_el1);
967 PRINT_REG(csselr_el1);
968 PRINT_REG(ctr_el0);
969 PRINT_REG(currentel);
970 PRINT_REG(daif);
971 PRINT_REG(dczid_el0);
972 PRINT_REG(elr_el1);
973 PRINT_REG(esr_el1);
974 PRINT_REG(far_el1);
975 #if 0
976 /* ARM64TODO: Enable VFP before reading floating-point registers */
977 PRINT_REG(fpcr);
978 PRINT_REG(fpsr);
979 #endif
980 PRINT_REG(id_aa64afr0_el1);
981 PRINT_REG(id_aa64afr1_el1);
982 PRINT_REG(id_aa64dfr0_el1);
983 PRINT_REG(id_aa64dfr1_el1);
984 PRINT_REG(id_aa64isar0_el1);
985 PRINT_REG(id_aa64isar1_el1);
986 PRINT_REG(id_aa64pfr0_el1);
987 PRINT_REG(id_aa64pfr1_el1);
988 PRINT_REG(id_afr0_el1);
989 PRINT_REG(id_dfr0_el1);
990 PRINT_REG(id_isar0_el1);
991 PRINT_REG(id_isar1_el1);
992 PRINT_REG(id_isar2_el1);
993 PRINT_REG(id_isar3_el1);
994 PRINT_REG(id_isar4_el1);
995 PRINT_REG(id_isar5_el1);
996 PRINT_REG(id_mmfr0_el1);
997 PRINT_REG(id_mmfr1_el1);
998 PRINT_REG(id_mmfr2_el1);
999 PRINT_REG(id_mmfr3_el1);
1000 #if 0
1001 /* Missing from llvm */
1002 PRINT_REG(id_mmfr4_el1);
1003 #endif
1004 PRINT_REG(id_pfr0_el1);
1005 PRINT_REG(id_pfr1_el1);
1006 PRINT_REG(isr_el1);
1007 PRINT_REG(mair_el1);
1008 PRINT_REG(midr_el1);
1009 PRINT_REG(mpidr_el1);
1010 PRINT_REG(mvfr0_el1);
1011 PRINT_REG(mvfr1_el1);
1012 PRINT_REG(mvfr2_el1);
1013 PRINT_REG(revidr_el1);
1014 PRINT_REG(sctlr_el1);
1015 PRINT_REG(sp_el0);
1016 PRINT_REG(spsel);
1017 PRINT_REG(spsr_el1);
1018 PRINT_REG(tcr_el1);
1019 PRINT_REG(tpidr_el0);
1020 PRINT_REG(tpidr_el1);
1021 PRINT_REG(tpidrro_el0);
1022 PRINT_REG(ttbr0_el1);
1023 PRINT_REG(ttbr1_el1);
1024 PRINT_REG(vbar_el1);
1025 #undef PRINT_REG
1026 }
1027
DB_SHOW_COMMAND(vtop,db_show_vtop)1028 DB_SHOW_COMMAND(vtop, db_show_vtop)
1029 {
1030 uint64_t phys;
1031
1032 if (have_addr) {
1033 phys = arm64_address_translate_s1e1r(addr);
1034 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
1035 phys = arm64_address_translate_s1e1w(addr);
1036 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1037 phys = arm64_address_translate_s1e0r(addr);
1038 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
1039 phys = arm64_address_translate_s1e0w(addr);
1040 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1041 } else
1042 db_printf("show vtop <virt_addr>\n");
1043 }
1044 #endif
1045