1 /*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103
104 #include <dev/smbios/smbios.h>
105
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108 "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110 "pcb_fpustate changed offset");
111
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113
114 /*
115 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116 * could relocate this, but will need to keep the same virtual address as
117 * it's reverenced by the EARLY_COUNTER macro.
118 */
119 struct pcpu pcpu0;
120
121 #if defined(PERTHREAD_SSP)
122 /*
123 * The boot SSP canary. Will be replaced with a per-thread canary when
124 * scheduling has started.
125 */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128
129 static struct trapframe proc0_tf;
130
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134
135 struct kva_md_info kmi;
136
137 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
138 int has_pan;
139
140 #if defined(SOCDEV_PA)
141 /*
142 * This is the virtual address used to access SOCDEV_PA. As it's set before
143 * .bss is cleared we need to ensure it's preserved. To do this use
144 * __read_mostly as it's only ever set once but read in the putc functions.
145 */
146 uintptr_t socdev_va __read_mostly;
147 #endif
148
149 /*
150 * Physical address of the EFI System Table. Stashed from the metadata hints
151 * passed into the kernel and used by the EFI code to call runtime services.
152 */
153 vm_paddr_t efi_systbl_phys;
154 static struct efi_map_header *efihdr;
155
156 /* pagezero_* implementations are provided in support.S */
157 void pagezero_simple(void *);
158 void pagezero_cache(void *);
159
160 /* pagezero_simple is default pagezero */
161 void (*pagezero)(void *p) = pagezero_simple;
162
163 int (*apei_nmi)(void);
164
165 #if defined(PERTHREAD_SSP_WARNING)
166 static void
print_ssp_warning(void * data __unused)167 print_ssp_warning(void *data __unused)
168 {
169 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
170 }
171 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
172 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
173 #endif
174
175 static void
pan_setup(void)176 pan_setup(void)
177 {
178 uint64_t id_aa64mfr1;
179
180 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
181 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
182 has_pan = 1;
183 }
184
185 void
pan_enable(void)186 pan_enable(void)
187 {
188
189 /*
190 * The LLVM integrated assembler doesn't understand the PAN
191 * PSTATE field. Because of this we need to manually create
192 * the instruction in an asm block. This is equivalent to:
193 * msr pan, #1
194 *
195 * This sets the PAN bit, stopping the kernel from accessing
196 * memory when userspace can also access it unless the kernel
197 * uses the userspace load/store instructions.
198 */
199 if (has_pan) {
200 WRITE_SPECIALREG(sctlr_el1,
201 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
202 __asm __volatile(".inst 0xd500409f | (0x1 << 8)");
203 }
204 }
205
206 bool
has_hyp(void)207 has_hyp(void)
208 {
209 return (boot_el == CURRENTEL_EL_EL2);
210 }
211
212 bool
in_vhe(void)213 in_vhe(void)
214 {
215 /* If we are currently in EL2 then must be in VHE */
216 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
217 CURRENTEL_EL_EL2);
218 }
219
220 static void
cpu_startup(void * dummy)221 cpu_startup(void *dummy)
222 {
223 vm_paddr_t size;
224 int i;
225
226 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
227 ptoa((uintmax_t)realmem) / 1024 / 1024);
228
229 if (bootverbose) {
230 printf("Physical memory chunk(s):\n");
231 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
232 size = phys_avail[i + 1] - phys_avail[i];
233 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
234 (uintmax_t)phys_avail[i],
235 (uintmax_t)phys_avail[i + 1] - 1,
236 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
237 }
238 }
239
240 printf("avail memory = %ju (%ju MB)\n",
241 ptoa((uintmax_t)vm_free_count()),
242 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
243
244 undef_init();
245 install_cpu_errata();
246
247 vm_ksubmap_init(&kmi);
248 bufinit();
249 vm_pager_bufferinit();
250 }
251
252 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
253
254 static void
late_ifunc_resolve(void * dummy __unused)255 late_ifunc_resolve(void *dummy __unused)
256 {
257 link_elf_late_ireloc();
258 }
259 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
260
261 int
cpu_idle_wakeup(int cpu)262 cpu_idle_wakeup(int cpu)
263 {
264
265 return (0);
266 }
267
268 void
cpu_idle(int busy)269 cpu_idle(int busy)
270 {
271
272 spinlock_enter();
273 if (!busy)
274 cpu_idleclock();
275 if (!sched_runnable())
276 __asm __volatile(
277 "dsb sy \n"
278 "wfi \n");
279 if (!busy)
280 cpu_activeclock();
281 spinlock_exit();
282 }
283
284 void
cpu_halt(void)285 cpu_halt(void)
286 {
287
288 /* We should have shutdown by now, if not enter a low power sleep */
289 intr_disable();
290 while (1) {
291 __asm __volatile("wfi");
292 }
293 }
294
295 /*
296 * Flush the D-cache for non-DMA I/O so that the I-cache can
297 * be made coherent later.
298 */
299 void
cpu_flush_dcache(void * ptr,size_t len)300 cpu_flush_dcache(void *ptr, size_t len)
301 {
302
303 /* ARM64TODO TBD */
304 }
305
306 /* Get current clock frequency for the given CPU ID. */
307 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)308 cpu_est_clockrate(int cpu_id, uint64_t *rate)
309 {
310 struct pcpu *pc;
311
312 pc = pcpu_find(cpu_id);
313 if (pc == NULL || rate == NULL)
314 return (EINVAL);
315
316 if (pc->pc_clock == 0)
317 return (EOPNOTSUPP);
318
319 *rate = pc->pc_clock;
320 return (0);
321 }
322
323 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)324 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
325 {
326
327 pcpu->pc_acpi_id = 0xffffffff;
328 pcpu->pc_mpidr = UINT64_MAX;
329 }
330
331 void
spinlock_enter(void)332 spinlock_enter(void)
333 {
334 struct thread *td;
335 register_t daif;
336
337 td = curthread;
338 if (td->td_md.md_spinlock_count == 0) {
339 daif = intr_disable();
340 td->td_md.md_spinlock_count = 1;
341 td->td_md.md_saved_daif = daif;
342 critical_enter();
343 } else
344 td->td_md.md_spinlock_count++;
345 }
346
347 void
spinlock_exit(void)348 spinlock_exit(void)
349 {
350 struct thread *td;
351 register_t daif;
352
353 td = curthread;
354 daif = td->td_md.md_saved_daif;
355 td->td_md.md_spinlock_count--;
356 if (td->td_md.md_spinlock_count == 0) {
357 critical_exit();
358 intr_restore(daif);
359 }
360 }
361
362 /*
363 * Construct a PCB from a trapframe. This is called from kdb_trap() where
364 * we want to start a backtrace from the function that caused us to enter
365 * the debugger. We have the context in the trapframe, but base the trace
366 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
367 * enough for a backtrace.
368 */
369 void
makectx(struct trapframe * tf,struct pcb * pcb)370 makectx(struct trapframe *tf, struct pcb *pcb)
371 {
372 int i;
373
374 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
375 for (i = 0; i < nitems(pcb->pcb_x); i++) {
376 if (i == PCB_LR)
377 pcb->pcb_x[i] = tf->tf_elr;
378 else
379 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
380 }
381
382 pcb->pcb_sp = tf->tf_sp;
383 }
384
385 static void
init_proc0(vm_offset_t kstack)386 init_proc0(vm_offset_t kstack)
387 {
388 struct pcpu *pcpup;
389
390 pcpup = cpuid_to_pcpu[0];
391 MPASS(pcpup != NULL);
392
393 proc_linkup0(&proc0, &thread0);
394 thread0.td_kstack = kstack;
395 thread0.td_kstack_pages = KSTACK_PAGES;
396 #if defined(PERTHREAD_SSP)
397 thread0.td_md.md_canary = boot_canary;
398 #endif
399 thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
400 thread0.td_kstack_pages * PAGE_SIZE) - 1;
401 thread0.td_pcb->pcb_flags = 0;
402 thread0.td_pcb->pcb_fpflags = 0;
403 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
404 thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
405 thread0.td_frame = &proc0_tf;
406 ptrauth_thread0(&thread0);
407 pcpup->pc_curpcb = thread0.td_pcb;
408
409 /*
410 * Unmask SError exceptions. They are used to signal a RAS failure,
411 * or other hardware error.
412 */
413 serror_enable();
414 }
415
416 /*
417 * Get an address to be used to write to kernel data that may be mapped
418 * read-only, e.g. to patch kernel code.
419 */
420 bool
arm64_get_writable_addr(void * addr,void ** out)421 arm64_get_writable_addr(void *addr, void **out)
422 {
423 vm_paddr_t pa;
424
425 /* Check if the page is writable */
426 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
427 *out = addr;
428 return (true);
429 }
430
431 /*
432 * Find the physical address of the given page.
433 */
434 if (!pmap_klookup((vm_offset_t)addr, &pa)) {
435 return (false);
436 }
437
438 /*
439 * If it is within the DMAP region and is writable use that.
440 */
441 if (PHYS_IN_DMAP_RANGE(pa)) {
442 addr = (void *)PHYS_TO_DMAP(pa);
443 if (PAR_SUCCESS(arm64_address_translate_s1e1w(
444 (vm_offset_t)addr))) {
445 *out = addr;
446 return (true);
447 }
448 }
449
450 return (false);
451 }
452
453 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
454
455 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)456 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
457 {
458 struct efi_md *map, *p;
459 size_t efisz;
460 int ndesc, i;
461
462 /*
463 * Memory map data provided by UEFI via the GetMemoryMap
464 * Boot Services API.
465 */
466 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
467 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
468
469 if (efihdr->descriptor_size == 0)
470 return;
471 ndesc = efihdr->memory_size / efihdr->descriptor_size;
472
473 for (i = 0, p = map; i < ndesc; i++,
474 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
475 cb(p, argp);
476 }
477 }
478
479 /*
480 * Handle the EFI memory map list.
481 *
482 * We will make two passes at this, the first (exclude == false) to populate
483 * physmem with valid physical memory ranges from recognized map entry types.
484 * In the second pass we will exclude memory ranges from physmem which must not
485 * be used for general allocations, either because they are used by runtime
486 * firmware or otherwise reserved.
487 *
488 * Adding the runtime-reserved memory ranges to physmem and excluding them
489 * later ensures that they are included in the DMAP, but excluded from
490 * phys_avail[].
491 *
492 * Entry types not explicitly listed here are ignored and not mapped.
493 */
494 static void
handle_efi_map_entry(struct efi_md * p,void * argp)495 handle_efi_map_entry(struct efi_md *p, void *argp)
496 {
497 bool exclude = *(bool *)argp;
498
499 switch (p->md_type) {
500 case EFI_MD_TYPE_RECLAIM:
501 /*
502 * The recomended location for ACPI tables. Map into the
503 * DMAP so we can access them from userspace via /dev/mem.
504 */
505 case EFI_MD_TYPE_RT_CODE:
506 /*
507 * Some UEFI implementations put the system table in the
508 * runtime code section. Include it in the DMAP, but will
509 * be excluded from phys_avail.
510 */
511 case EFI_MD_TYPE_RT_DATA:
512 /*
513 * Runtime data will be excluded after the DMAP
514 * region is created to stop it from being added
515 * to phys_avail.
516 */
517 if (exclude) {
518 physmem_exclude_region(p->md_phys,
519 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
520 break;
521 }
522 /* FALLTHROUGH */
523 case EFI_MD_TYPE_CODE:
524 case EFI_MD_TYPE_DATA:
525 case EFI_MD_TYPE_BS_CODE:
526 case EFI_MD_TYPE_BS_DATA:
527 case EFI_MD_TYPE_FREE:
528 /*
529 * We're allowed to use any entry with these types.
530 */
531 if (!exclude)
532 physmem_hardware_region(p->md_phys,
533 p->md_pages * EFI_PAGE_SIZE);
534 break;
535 default:
536 /* Other types shall not be handled by physmem. */
537 break;
538 }
539 }
540
541 static void
add_efi_map_entries(struct efi_map_header * efihdr)542 add_efi_map_entries(struct efi_map_header *efihdr)
543 {
544 bool exclude = false;
545 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
546 }
547
548 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)549 exclude_efi_map_entries(struct efi_map_header *efihdr)
550 {
551 bool exclude = true;
552 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
553 }
554
555 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)556 print_efi_map_entry(struct efi_md *p, void *argp __unused)
557 {
558 const char *type;
559 static const char *types[] = {
560 "Reserved",
561 "LoaderCode",
562 "LoaderData",
563 "BootServicesCode",
564 "BootServicesData",
565 "RuntimeServicesCode",
566 "RuntimeServicesData",
567 "ConventionalMemory",
568 "UnusableMemory",
569 "ACPIReclaimMemory",
570 "ACPIMemoryNVS",
571 "MemoryMappedIO",
572 "MemoryMappedIOPortSpace",
573 "PalCode",
574 "PersistentMemory"
575 };
576
577 if (p->md_type < nitems(types))
578 type = types[p->md_type];
579 else
580 type = "<INVALID>";
581 printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
582 p->md_virt, p->md_pages);
583 if (p->md_attr & EFI_MD_ATTR_UC)
584 printf("UC ");
585 if (p->md_attr & EFI_MD_ATTR_WC)
586 printf("WC ");
587 if (p->md_attr & EFI_MD_ATTR_WT)
588 printf("WT ");
589 if (p->md_attr & EFI_MD_ATTR_WB)
590 printf("WB ");
591 if (p->md_attr & EFI_MD_ATTR_UCE)
592 printf("UCE ");
593 if (p->md_attr & EFI_MD_ATTR_WP)
594 printf("WP ");
595 if (p->md_attr & EFI_MD_ATTR_RP)
596 printf("RP ");
597 if (p->md_attr & EFI_MD_ATTR_XP)
598 printf("XP ");
599 if (p->md_attr & EFI_MD_ATTR_NV)
600 printf("NV ");
601 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
602 printf("MORE_RELIABLE ");
603 if (p->md_attr & EFI_MD_ATTR_RO)
604 printf("RO ");
605 if (p->md_attr & EFI_MD_ATTR_RT)
606 printf("RUNTIME");
607 printf("\n");
608 }
609
610 static void
print_efi_map_entries(struct efi_map_header * efihdr)611 print_efi_map_entries(struct efi_map_header *efihdr)
612 {
613
614 printf("%23s %12s %12s %8s %4s\n",
615 "Type", "Physical", "Virtual", "#Pages", "Attr");
616 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
617 }
618
619 /*
620 * Map the passed in VA in EFI space to a void * using the efi memory table to
621 * find the PA and return it in the DMAP, if it exists. We're used between the
622 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
623 * tables We assume that either the entry you are mapping fits within its page,
624 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
625 * All observed tables obey the first part of this precondition.
626 */
627 struct early_map_data
628 {
629 vm_offset_t va;
630 vm_offset_t pa;
631 };
632
633 static void
efi_early_map_entry(struct efi_md * p,void * argp)634 efi_early_map_entry(struct efi_md *p, void *argp)
635 {
636 struct early_map_data *emdp = argp;
637 vm_offset_t s, e;
638
639 if (emdp->pa != 0)
640 return;
641 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
642 return;
643 s = p->md_virt;
644 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
645 if (emdp->va < s || emdp->va >= e)
646 return;
647 emdp->pa = p->md_phys + (emdp->va - p->md_virt);
648 }
649
650 static void *
efi_early_map(vm_offset_t va)651 efi_early_map(vm_offset_t va)
652 {
653 struct early_map_data emd = { .va = va };
654
655 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
656 if (emd.pa == 0)
657 return NULL;
658 return (void *)PHYS_TO_DMAP(emd.pa);
659 }
660
661
662 /*
663 * When booted via kboot, the prior kernel will pass in reserved memory areas in
664 * a EFI config table. We need to find that table and walk through it excluding
665 * the memory ranges in it. btw, this is called too early for the printf to do
666 * anything since msgbufp isn't initialized, let alone a console...
667 */
668 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)669 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
670 {
671 struct efi_systbl *systbl;
672 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
673
674 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
675 if (systbl == NULL) {
676 printf("can't map systbl\n");
677 return;
678 }
679 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
680 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
681 return;
682 }
683
684 /*
685 * We don't yet have the pmap system booted enough to create a pmap for
686 * the efi firmware's preferred address space from the GetMemoryMap()
687 * table. The st_cfgtbl is a VA in this space, so we need to do the
688 * mapping ourselves to a kernel VA with efi_early_map. We assume that
689 * the cfgtbl entries don't span a page. Other pointers are PAs, as
690 * noted below.
691 */
692 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
693 return;
694 for (int i = 0; i < systbl->st_entries; i++) {
695 struct efi_cfgtbl *cfgtbl;
696 struct linux_efi_memreserve *mr;
697
698 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
699 if (cfgtbl == NULL)
700 panic("Can't map the config table entry %d\n", i);
701 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
702 continue;
703
704 /*
705 * cfgtbl points are either VA or PA, depending on the GUID of
706 * the table. memreserve GUID pointers are PA and not converted
707 * after a SetVirtualAddressMap(). The list's mr_next pointer
708 * is also a PA.
709 */
710 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
711 (vm_offset_t)cfgtbl->ct_data);
712 while (true) {
713 for (int j = 0; j < mr->mr_count; j++) {
714 struct linux_efi_memreserve_entry *mre;
715
716 mre = &mr->mr_entry[j];
717 physmem_exclude_region(mre->mre_base, mre->mre_size,
718 EXFLAG_NODUMP | EXFLAG_NOALLOC);
719 }
720 if (mr->mr_next == 0)
721 break;
722 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
723 };
724 }
725
726 }
727
728 #ifdef FDT
729 static void
try_load_dtb(caddr_t kmdp)730 try_load_dtb(caddr_t kmdp)
731 {
732 vm_offset_t dtbp;
733
734 dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
735 #if defined(FDT_DTB_STATIC)
736 /*
737 * In case the device tree blob was not retrieved (from metadata) try
738 * to use the statically embedded one.
739 */
740 if (dtbp == 0)
741 dtbp = (vm_offset_t)&fdt_static_dtb;
742 #endif
743
744 if (dtbp == (vm_offset_t)NULL) {
745 #ifndef TSLOG
746 printf("ERROR loading DTB\n");
747 #endif
748 return;
749 }
750
751 if (!OF_install(OFW_FDT, 0))
752 panic("Cannot install FDT");
753
754 if (OF_init((void *)dtbp) != 0)
755 panic("OF_init failed with the found device tree");
756
757 parse_fdt_bootargs();
758 }
759 #endif
760
761 static bool
bus_probe(void)762 bus_probe(void)
763 {
764 bool has_acpi, has_fdt;
765 char *order, *env;
766
767 has_acpi = has_fdt = false;
768
769 #ifdef FDT
770 has_fdt = (OF_peer(0) != 0);
771 #endif
772 #ifdef DEV_ACPI
773 has_acpi = (AcpiOsGetRootPointer() != 0);
774 #endif
775
776 env = kern_getenv("kern.cfg.order");
777 if (env != NULL) {
778 order = env;
779 while (order != NULL) {
780 if (has_acpi &&
781 strncmp(order, "acpi", 4) == 0 &&
782 (order[4] == ',' || order[4] == '\0')) {
783 arm64_bus_method = ARM64_BUS_ACPI;
784 break;
785 }
786 if (has_fdt &&
787 strncmp(order, "fdt", 3) == 0 &&
788 (order[3] == ',' || order[3] == '\0')) {
789 arm64_bus_method = ARM64_BUS_FDT;
790 break;
791 }
792 order = strchr(order, ',');
793 if (order != NULL)
794 order++; /* Skip comma */
795 }
796 freeenv(env);
797
798 /* If we set the bus method it is valid */
799 if (arm64_bus_method != ARM64_BUS_NONE)
800 return (true);
801 }
802 /* If no order or an invalid order was set use the default */
803 if (arm64_bus_method == ARM64_BUS_NONE) {
804 if (has_acpi)
805 arm64_bus_method = ARM64_BUS_ACPI;
806 else if (has_fdt)
807 arm64_bus_method = ARM64_BUS_FDT;
808 }
809
810 /*
811 * If no option was set the default is valid, otherwise we are
812 * setting one to get cninit() working, then calling panic to tell
813 * the user about the invalid bus setup.
814 */
815 return (env == NULL);
816 }
817
818 static void
cache_setup(void)819 cache_setup(void)
820 {
821 int dczva_line_shift;
822 uint32_t dczid_el0;
823
824 identify_cache(READ_SPECIALREG(ctr_el0));
825
826 dczid_el0 = READ_SPECIALREG(dczid_el0);
827
828 /* Check if dc zva is not prohibited */
829 if (dczid_el0 & DCZID_DZP)
830 dczva_line_size = 0;
831 else {
832 /* Same as with above calculations */
833 dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
834 dczva_line_size = sizeof(int) << dczva_line_shift;
835
836 /* Change pagezero function */
837 pagezero = pagezero_cache;
838 }
839 }
840
841 int
memory_mapping_mode(vm_paddr_t pa)842 memory_mapping_mode(vm_paddr_t pa)
843 {
844 struct efi_md *map, *p;
845 size_t efisz;
846 int ndesc, i;
847
848 if (efihdr == NULL)
849 return (VM_MEMATTR_WRITE_BACK);
850
851 /*
852 * Memory map data provided by UEFI via the GetMemoryMap
853 * Boot Services API.
854 */
855 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
856 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
857
858 if (efihdr->descriptor_size == 0)
859 return (VM_MEMATTR_WRITE_BACK);
860 ndesc = efihdr->memory_size / efihdr->descriptor_size;
861
862 for (i = 0, p = map; i < ndesc; i++,
863 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
864 if (pa < p->md_phys ||
865 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
866 continue;
867 if (p->md_type == EFI_MD_TYPE_IOMEM ||
868 p->md_type == EFI_MD_TYPE_IOPORT)
869 return (VM_MEMATTR_DEVICE);
870 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
871 p->md_type == EFI_MD_TYPE_RECLAIM)
872 return (VM_MEMATTR_WRITE_BACK);
873 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
874 return (VM_MEMATTR_WRITE_THROUGH);
875 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
876 return (VM_MEMATTR_WRITE_COMBINING);
877 break;
878 }
879
880 return (VM_MEMATTR_DEVICE);
881 }
882
883 void
initarm(struct arm64_bootparams * abp)884 initarm(struct arm64_bootparams *abp)
885 {
886 struct efi_fb *efifb;
887 struct pcpu *pcpup;
888 char *env;
889 #ifdef FDT
890 struct mem_region mem_regions[FDT_MEM_REGIONS];
891 int mem_regions_sz;
892 phandle_t root;
893 char dts_version[255];
894 #endif
895 vm_offset_t lastaddr;
896 caddr_t kmdp;
897 bool valid;
898
899 TSRAW(&thread0, TS_ENTER, __func__, NULL);
900
901 boot_el = abp->boot_el;
902
903 /* Parse loader or FDT boot parameters. Determine last used address. */
904 lastaddr = parse_boot_param(abp);
905
906 /* Find the kernel address */
907 kmdp = preload_search_by_type("elf kernel");
908 if (kmdp == NULL)
909 kmdp = preload_search_by_type("elf64 kernel");
910
911 identify_cpu(0);
912 identify_hypervisor_smbios();
913
914 update_special_regs(0);
915
916 /* Set the pcpu data, this is needed by pmap_bootstrap */
917 pcpup = &pcpu0;
918 pcpu_init(pcpup, 0, sizeof(struct pcpu));
919
920 /*
921 * Set the pcpu pointer with a backup in tpidr_el1 to be
922 * loaded when entering the kernel from userland.
923 */
924 __asm __volatile(
925 "mov x18, %0 \n"
926 "msr tpidr_el1, %0" :: "r"(pcpup));
927
928 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
929 PCPU_SET(curthread, &thread0);
930 PCPU_SET(midr, get_midr());
931
932 link_elf_ireloc(kmdp);
933 #ifdef FDT
934 try_load_dtb(kmdp);
935 #endif
936
937 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
938
939 /* Load the physical memory ranges */
940 efihdr = (struct efi_map_header *)preload_search_info(kmdp,
941 MODINFO_METADATA | MODINFOMD_EFI_MAP);
942 if (efihdr != NULL)
943 add_efi_map_entries(efihdr);
944 #ifdef FDT
945 else {
946 /* Grab physical memory regions information from device tree. */
947 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
948 NULL) != 0)
949 panic("Cannot get physical memory regions");
950 physmem_hardware_regions(mem_regions, mem_regions_sz);
951 }
952 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
953 physmem_exclude_regions(mem_regions, mem_regions_sz,
954 EXFLAG_NODUMP | EXFLAG_NOALLOC);
955 #endif
956
957 /* Exclude the EFI framebuffer from our view of physical memory. */
958 efifb = (struct efi_fb *)preload_search_info(kmdp,
959 MODINFO_METADATA | MODINFOMD_EFI_FB);
960 if (efifb != NULL)
961 physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
962 EXFLAG_NOALLOC);
963
964 /* Do basic tuning, hz etc */
965 init_param1();
966
967 cache_setup();
968 pan_setup();
969
970 /* Bootstrap enough of pmap to enter the kernel proper */
971 pmap_bootstrap(lastaddr - KERNBASE);
972 /* Exclude entries needed in the DMAP region, but not phys_avail */
973 if (efihdr != NULL)
974 exclude_efi_map_entries(efihdr);
975 /* Do the same for reserve entries in the EFI MEMRESERVE table */
976 if (efi_systbl_phys != 0)
977 exclude_efi_memreserve(efi_systbl_phys);
978
979 /*
980 * We carefully bootstrap the sanitizer map after we've excluded
981 * absolutely everything else that could impact phys_avail. There's not
982 * always enough room for the initial shadow map after the kernel, so
983 * we'll end up searching for segments that we can safely use. Those
984 * segments also get excluded from phys_avail.
985 */
986 #if defined(KASAN) || defined(KMSAN)
987 pmap_bootstrap_san();
988 #endif
989
990 physmem_init_kernel_globals();
991
992 devmap_bootstrap();
993
994 valid = bus_probe();
995
996 cninit();
997 set_ttbr0(abp->kern_ttbr0);
998 cpu_tlb_flushID();
999
1000 if (!valid)
1001 panic("Invalid bus configuration: %s",
1002 kern_getenv("kern.cfg.order"));
1003
1004 /*
1005 * Check if pointer authentication is available on this system, and
1006 * if so enable its use. This needs to be called before init_proc0
1007 * as that will configure the thread0 pointer authentication keys.
1008 */
1009 ptrauth_init();
1010
1011 /*
1012 * Dump the boot metadata. We have to wait for cninit() since console
1013 * output is required. If it's grossly incorrect the kernel will never
1014 * make it this far.
1015 */
1016 if (getenv_is_true("debug.dump_modinfo_at_boot"))
1017 preload_dump();
1018
1019 init_proc0(abp->kern_stack);
1020 msgbufinit(msgbufp, msgbufsize);
1021 mutex_init();
1022 init_param2(physmem);
1023
1024 dbg_init();
1025 kdb_init();
1026 #ifdef KDB
1027 if ((boothowto & RB_KDB) != 0)
1028 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1029 #endif
1030 pan_enable();
1031
1032 kcsan_cpu_init(0);
1033 kasan_init();
1034 kmsan_init();
1035
1036 env = kern_getenv("kernelname");
1037 if (env != NULL)
1038 strlcpy(kernelname, env, sizeof(kernelname));
1039
1040 #ifdef FDT
1041 if (arm64_bus_method == ARM64_BUS_FDT) {
1042 root = OF_finddevice("/");
1043 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1044 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1045 printf("WARNING: DTB version is %s while kernel expects %s, "
1046 "please update the DTB in the ESP\n",
1047 dts_version,
1048 LINUX_DTS_VERSION);
1049 } else {
1050 printf("WARNING: Cannot find freebsd,dts-version property, "
1051 "cannot check DTB compliance\n");
1052 }
1053 }
1054 #endif
1055
1056 if (boothowto & RB_VERBOSE) {
1057 if (efihdr != NULL)
1058 print_efi_map_entries(efihdr);
1059 physmem_print_tables();
1060 }
1061
1062 early_boot = 0;
1063
1064 if (bootverbose && kstack_pages != KSTACK_PAGES)
1065 printf("kern.kstack_pages = %d ignored for thread0\n",
1066 kstack_pages);
1067
1068 TSEXIT();
1069 }
1070
1071 void
dbg_init(void)1072 dbg_init(void)
1073 {
1074
1075 /* Clear OS lock */
1076 WRITE_SPECIALREG(oslar_el1, 0);
1077
1078 /* This permits DDB to use debug registers for watchpoints. */
1079 dbg_monitor_init();
1080
1081 /* TODO: Eventually will need to initialize debug registers here. */
1082 }
1083
1084 #ifdef DDB
1085 #include <ddb/ddb.h>
1086
DB_SHOW_COMMAND(specialregs,db_show_spregs)1087 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1088 {
1089 #define PRINT_REG(reg) \
1090 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1091
1092 PRINT_REG(actlr_el1);
1093 PRINT_REG(afsr0_el1);
1094 PRINT_REG(afsr1_el1);
1095 PRINT_REG(aidr_el1);
1096 PRINT_REG(amair_el1);
1097 PRINT_REG(ccsidr_el1);
1098 PRINT_REG(clidr_el1);
1099 PRINT_REG(contextidr_el1);
1100 PRINT_REG(cpacr_el1);
1101 PRINT_REG(csselr_el1);
1102 PRINT_REG(ctr_el0);
1103 PRINT_REG(currentel);
1104 PRINT_REG(daif);
1105 PRINT_REG(dczid_el0);
1106 PRINT_REG(elr_el1);
1107 PRINT_REG(esr_el1);
1108 PRINT_REG(far_el1);
1109 #if 0
1110 /* ARM64TODO: Enable VFP before reading floating-point registers */
1111 PRINT_REG(fpcr);
1112 PRINT_REG(fpsr);
1113 #endif
1114 PRINT_REG(id_aa64afr0_el1);
1115 PRINT_REG(id_aa64afr1_el1);
1116 PRINT_REG(id_aa64dfr0_el1);
1117 PRINT_REG(id_aa64dfr1_el1);
1118 PRINT_REG(id_aa64isar0_el1);
1119 PRINT_REG(id_aa64isar1_el1);
1120 PRINT_REG(id_aa64pfr0_el1);
1121 PRINT_REG(id_aa64pfr1_el1);
1122 PRINT_REG(id_afr0_el1);
1123 PRINT_REG(id_dfr0_el1);
1124 PRINT_REG(id_isar0_el1);
1125 PRINT_REG(id_isar1_el1);
1126 PRINT_REG(id_isar2_el1);
1127 PRINT_REG(id_isar3_el1);
1128 PRINT_REG(id_isar4_el1);
1129 PRINT_REG(id_isar5_el1);
1130 PRINT_REG(id_mmfr0_el1);
1131 PRINT_REG(id_mmfr1_el1);
1132 PRINT_REG(id_mmfr2_el1);
1133 PRINT_REG(id_mmfr3_el1);
1134 #if 0
1135 /* Missing from llvm */
1136 PRINT_REG(id_mmfr4_el1);
1137 #endif
1138 PRINT_REG(id_pfr0_el1);
1139 PRINT_REG(id_pfr1_el1);
1140 PRINT_REG(isr_el1);
1141 PRINT_REG(mair_el1);
1142 PRINT_REG(midr_el1);
1143 PRINT_REG(mpidr_el1);
1144 PRINT_REG(mvfr0_el1);
1145 PRINT_REG(mvfr1_el1);
1146 PRINT_REG(mvfr2_el1);
1147 PRINT_REG(revidr_el1);
1148 PRINT_REG(sctlr_el1);
1149 PRINT_REG(sp_el0);
1150 PRINT_REG(spsel);
1151 PRINT_REG(spsr_el1);
1152 PRINT_REG(tcr_el1);
1153 PRINT_REG(tpidr_el0);
1154 PRINT_REG(tpidr_el1);
1155 PRINT_REG(tpidrro_el0);
1156 PRINT_REG(ttbr0_el1);
1157 PRINT_REG(ttbr1_el1);
1158 PRINT_REG(vbar_el1);
1159 #undef PRINT_REG
1160 }
1161
DB_SHOW_COMMAND(vtop,db_show_vtop)1162 DB_SHOW_COMMAND(vtop, db_show_vtop)
1163 {
1164 uint64_t phys;
1165
1166 if (have_addr) {
1167 phys = arm64_address_translate_s1e1r(addr);
1168 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
1169 phys = arm64_address_translate_s1e1w(addr);
1170 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1171 phys = arm64_address_translate_s1e0r(addr);
1172 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
1173 phys = arm64_address_translate_s1e0w(addr);
1174 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1175 } else
1176 db_printf("show vtop <virt_addr>\n");
1177 }
1178 #endif
1179