1 /*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/cpu_feat.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/kdb.h>
84 #include <machine/machdep.h>
85 #include <machine/metadata.h>
86 #include <machine/md_var.h>
87 #include <machine/pcb.h>
88 #include <machine/undefined.h>
89 #include <machine/vmparam.h>
90
91 #ifdef VFP
92 #include <machine/vfp.h>
93 #endif
94
95 #ifdef DEV_ACPI
96 #include <contrib/dev/acpica/include/acpi.h>
97 #include <machine/acpica_machdep.h>
98 #endif
99
100 #ifdef FDT
101 #include <dev/fdt/fdt_common.h>
102 #include <dev/ofw/openfirm.h>
103 #endif
104
105 #include <dev/smbios/smbios.h>
106
107 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
108 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
109 "pcb_fpusaved changed offset");
110 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
111 "pcb_fpustate changed offset");
112
113 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
114
115 /*
116 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
117 * could relocate this, but will need to keep the same virtual address as
118 * it's reverenced by the EARLY_COUNTER macro.
119 */
120 struct pcpu pcpu0;
121
122 #if defined(PERTHREAD_SSP)
123 /*
124 * The boot SSP canary. Will be replaced with a per-thread canary when
125 * scheduling has started.
126 */
127 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
128 #endif
129
130 static struct trapframe proc0_tf;
131
132 int early_boot = 1;
133 int cold = 1;
134 static int boot_el;
135
136 struct kva_md_info kmi;
137
138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
139 int has_pan;
140
141 #if defined(SOCDEV_PA)
142 /*
143 * This is the virtual address used to access SOCDEV_PA. As it's set before
144 * .bss is cleared we need to ensure it's preserved. To do this use
145 * __read_mostly as it's only ever set once but read in the putc functions.
146 */
147 uintptr_t socdev_va __read_mostly;
148 #endif
149
150 /*
151 * Physical address of the EFI System Table. Stashed from the metadata hints
152 * passed into the kernel and used by the EFI code to call runtime services.
153 */
154 vm_paddr_t efi_systbl_phys;
155 static struct efi_map_header *efihdr;
156
157 /* pagezero_* implementations are provided in support.S */
158 void pagezero_simple(void *);
159 void pagezero_cache(void *);
160
161 /* pagezero_simple is default pagezero */
162 void (*pagezero)(void *p) = pagezero_simple;
163
164 int (*apei_nmi)(void);
165
166 #if defined(PERTHREAD_SSP_WARNING)
167 static void
print_ssp_warning(void * data __unused)168 print_ssp_warning(void *data __unused)
169 {
170 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171 }
172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174 #endif
175
176 static bool
pan_check(const struct cpu_feat * feat __unused,u_int midr __unused)177 pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
178 {
179 uint64_t id_aa64mfr1;
180
181 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
182 return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE);
183 }
184
185 static void
pan_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)186 pan_enable(const struct cpu_feat *feat __unused,
187 cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
188 u_int errata_count __unused)
189 {
190 has_pan = 1;
191
192 /*
193 * This sets the PAN bit, stopping the kernel from accessing
194 * memory when userspace can also access it unless the kernel
195 * uses the userspace load/store instructions.
196 */
197 WRITE_SPECIALREG(sctlr_el1,
198 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
199 __asm __volatile(
200 ".arch_extension pan \n"
201 "msr pan, #1 \n"
202 ".arch_extension nopan \n");
203 }
204
205 static struct cpu_feat feat_pan = {
206 .feat_name = "FEAT_PAN",
207 .feat_check = pan_check,
208 .feat_enable = pan_enable,
209 .feat_flags = CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU,
210 };
211 DATA_SET(cpu_feat_set, feat_pan);
212
213 bool
has_hyp(void)214 has_hyp(void)
215 {
216 return (boot_el == CURRENTEL_EL_EL2);
217 }
218
219 bool
in_vhe(void)220 in_vhe(void)
221 {
222 /* If we are currently in EL2 then must be in VHE */
223 return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
224 CURRENTEL_EL_EL2);
225 }
226
227 static void
cpu_startup(void * dummy)228 cpu_startup(void *dummy)
229 {
230 vm_paddr_t size;
231 int i;
232
233 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
234 ptoa((uintmax_t)realmem) / 1024 / 1024);
235
236 if (bootverbose) {
237 printf("Physical memory chunk(s):\n");
238 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
239 size = phys_avail[i + 1] - phys_avail[i];
240 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
241 (uintmax_t)phys_avail[i],
242 (uintmax_t)phys_avail[i + 1] - 1,
243 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
244 }
245 }
246
247 printf("avail memory = %ju (%ju MB)\n",
248 ptoa((uintmax_t)vm_free_count()),
249 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
250
251 undef_init();
252 install_cpu_errata();
253
254 vm_ksubmap_init(&kmi);
255 bufinit();
256 vm_pager_bufferinit();
257 }
258
259 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
260
261 static void
late_ifunc_resolve(void * dummy __unused)262 late_ifunc_resolve(void *dummy __unused)
263 {
264 link_elf_late_ireloc();
265 }
266 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
267
268 int
cpu_idle_wakeup(int cpu)269 cpu_idle_wakeup(int cpu)
270 {
271
272 return (0);
273 }
274
275 void
cpu_idle(int busy)276 cpu_idle(int busy)
277 {
278
279 spinlock_enter();
280 if (!busy)
281 cpu_idleclock();
282 if (!sched_runnable())
283 __asm __volatile(
284 "dsb sy \n"
285 "wfi \n");
286 if (!busy)
287 cpu_activeclock();
288 spinlock_exit();
289 }
290
291 void
cpu_halt(void)292 cpu_halt(void)
293 {
294
295 /* We should have shutdown by now, if not enter a low power sleep */
296 intr_disable();
297 while (1) {
298 __asm __volatile("wfi");
299 }
300 }
301
302 /*
303 * Flush the D-cache for non-DMA I/O so that the I-cache can
304 * be made coherent later.
305 */
306 void
cpu_flush_dcache(void * ptr,size_t len)307 cpu_flush_dcache(void *ptr, size_t len)
308 {
309
310 /* ARM64TODO TBD */
311 }
312
313 /* Get current clock frequency for the given CPU ID. */
314 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)315 cpu_est_clockrate(int cpu_id, uint64_t *rate)
316 {
317 struct pcpu *pc;
318
319 pc = pcpu_find(cpu_id);
320 if (pc == NULL || rate == NULL)
321 return (EINVAL);
322
323 if (pc->pc_clock == 0)
324 return (EOPNOTSUPP);
325
326 *rate = pc->pc_clock;
327 return (0);
328 }
329
330 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)331 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
332 {
333
334 pcpu->pc_acpi_id = 0xffffffff;
335 pcpu->pc_mpidr = UINT64_MAX;
336 }
337
338 void
spinlock_enter(void)339 spinlock_enter(void)
340 {
341 struct thread *td;
342 register_t daif;
343
344 td = curthread;
345 if (td->td_md.md_spinlock_count == 0) {
346 daif = intr_disable();
347 td->td_md.md_spinlock_count = 1;
348 td->td_md.md_saved_daif = daif;
349 critical_enter();
350 } else
351 td->td_md.md_spinlock_count++;
352 }
353
354 void
spinlock_exit(void)355 spinlock_exit(void)
356 {
357 struct thread *td;
358 register_t daif;
359
360 td = curthread;
361 daif = td->td_md.md_saved_daif;
362 td->td_md.md_spinlock_count--;
363 if (td->td_md.md_spinlock_count == 0) {
364 critical_exit();
365 intr_restore(daif);
366 }
367 }
368
369 /*
370 * Construct a PCB from a trapframe. This is called from kdb_trap() where
371 * we want to start a backtrace from the function that caused us to enter
372 * the debugger. We have the context in the trapframe, but base the trace
373 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
374 * enough for a backtrace.
375 */
376 void
makectx(struct trapframe * tf,struct pcb * pcb)377 makectx(struct trapframe *tf, struct pcb *pcb)
378 {
379 int i;
380
381 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
382 for (i = 0; i < nitems(pcb->pcb_x); i++) {
383 if (i == PCB_LR)
384 pcb->pcb_x[i] = tf->tf_elr;
385 else
386 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
387 }
388
389 pcb->pcb_sp = tf->tf_sp;
390 }
391
392 static void
init_proc0(vm_offset_t kstack)393 init_proc0(vm_offset_t kstack)
394 {
395 struct pcpu *pcpup;
396
397 pcpup = cpuid_to_pcpu[0];
398 MPASS(pcpup != NULL);
399
400 proc_linkup0(&proc0, &thread0);
401 thread0.td_kstack = kstack;
402 thread0.td_kstack_pages = KSTACK_PAGES;
403 #if defined(PERTHREAD_SSP)
404 thread0.td_md.md_canary = boot_canary;
405 #endif
406 thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
407 thread0.td_kstack_pages * PAGE_SIZE) - 1;
408 thread0.td_pcb->pcb_flags = 0;
409 thread0.td_pcb->pcb_fpflags = 0;
410 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
411 thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
412 thread0.td_frame = &proc0_tf;
413 ptrauth_thread0(&thread0);
414 pcpup->pc_curpcb = thread0.td_pcb;
415
416 /*
417 * Unmask SError exceptions. They are used to signal a RAS failure,
418 * or other hardware error.
419 */
420 serror_enable();
421 }
422
423 /*
424 * Get an address to be used to write to kernel data that may be mapped
425 * read-only, e.g. to patch kernel code.
426 */
427 bool
arm64_get_writable_addr(void * addr,void ** out)428 arm64_get_writable_addr(void *addr, void **out)
429 {
430 vm_paddr_t pa;
431
432 /* Check if the page is writable */
433 if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
434 *out = addr;
435 return (true);
436 }
437
438 /*
439 * Find the physical address of the given page.
440 */
441 if (!pmap_klookup((vm_offset_t)addr, &pa)) {
442 return (false);
443 }
444
445 /*
446 * If it is within the DMAP region and is writable use that.
447 */
448 if (PHYS_IN_DMAP_RANGE(pa)) {
449 addr = (void *)PHYS_TO_DMAP(pa);
450 if (PAR_SUCCESS(arm64_address_translate_s1e1w(
451 (vm_offset_t)addr))) {
452 *out = addr;
453 return (true);
454 }
455 }
456
457 return (false);
458 }
459
460 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
461
462 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)463 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
464 {
465 struct efi_md *map, *p;
466 size_t efisz;
467 int ndesc, i;
468
469 /*
470 * Memory map data provided by UEFI via the GetMemoryMap
471 * Boot Services API.
472 */
473 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
474 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
475
476 if (efihdr->descriptor_size == 0)
477 return;
478 ndesc = efihdr->memory_size / efihdr->descriptor_size;
479
480 for (i = 0, p = map; i < ndesc; i++,
481 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
482 cb(p, argp);
483 }
484 }
485
486 /*
487 * Handle the EFI memory map list.
488 *
489 * We will make two passes at this, the first (exclude == false) to populate
490 * physmem with valid physical memory ranges from recognized map entry types.
491 * In the second pass we will exclude memory ranges from physmem which must not
492 * be used for general allocations, either because they are used by runtime
493 * firmware or otherwise reserved.
494 *
495 * Adding the runtime-reserved memory ranges to physmem and excluding them
496 * later ensures that they are included in the DMAP, but excluded from
497 * phys_avail[].
498 *
499 * Entry types not explicitly listed here are ignored and not mapped.
500 */
501 static void
handle_efi_map_entry(struct efi_md * p,void * argp)502 handle_efi_map_entry(struct efi_md *p, void *argp)
503 {
504 bool exclude = *(bool *)argp;
505
506 switch (p->md_type) {
507 case EFI_MD_TYPE_RECLAIM:
508 /*
509 * The recomended location for ACPI tables. Map into the
510 * DMAP so we can access them from userspace via /dev/mem.
511 */
512 case EFI_MD_TYPE_RT_CODE:
513 /*
514 * Some UEFI implementations put the system table in the
515 * runtime code section. Include it in the DMAP, but will
516 * be excluded from phys_avail.
517 */
518 case EFI_MD_TYPE_RT_DATA:
519 /*
520 * Runtime data will be excluded after the DMAP
521 * region is created to stop it from being added
522 * to phys_avail.
523 */
524 if (exclude) {
525 physmem_exclude_region(p->md_phys,
526 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
527 break;
528 }
529 /* FALLTHROUGH */
530 case EFI_MD_TYPE_CODE:
531 case EFI_MD_TYPE_DATA:
532 case EFI_MD_TYPE_BS_CODE:
533 case EFI_MD_TYPE_BS_DATA:
534 case EFI_MD_TYPE_FREE:
535 /*
536 * We're allowed to use any entry with these types.
537 */
538 if (!exclude)
539 physmem_hardware_region(p->md_phys,
540 p->md_pages * EFI_PAGE_SIZE);
541 break;
542 default:
543 /* Other types shall not be handled by physmem. */
544 break;
545 }
546 }
547
548 static void
add_efi_map_entries(struct efi_map_header * efihdr)549 add_efi_map_entries(struct efi_map_header *efihdr)
550 {
551 bool exclude = false;
552 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
553 }
554
555 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)556 exclude_efi_map_entries(struct efi_map_header *efihdr)
557 {
558 bool exclude = true;
559 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
560 }
561
562 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)563 print_efi_map_entry(struct efi_md *p, void *argp __unused)
564 {
565 const char *type;
566 static const char *types[] = {
567 "Reserved",
568 "LoaderCode",
569 "LoaderData",
570 "BootServicesCode",
571 "BootServicesData",
572 "RuntimeServicesCode",
573 "RuntimeServicesData",
574 "ConventionalMemory",
575 "UnusableMemory",
576 "ACPIReclaimMemory",
577 "ACPIMemoryNVS",
578 "MemoryMappedIO",
579 "MemoryMappedIOPortSpace",
580 "PalCode",
581 "PersistentMemory"
582 };
583
584 if (p->md_type < nitems(types))
585 type = types[p->md_type];
586 else
587 type = "<INVALID>";
588 printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
589 p->md_virt, p->md_pages);
590 if (p->md_attr & EFI_MD_ATTR_UC)
591 printf("UC ");
592 if (p->md_attr & EFI_MD_ATTR_WC)
593 printf("WC ");
594 if (p->md_attr & EFI_MD_ATTR_WT)
595 printf("WT ");
596 if (p->md_attr & EFI_MD_ATTR_WB)
597 printf("WB ");
598 if (p->md_attr & EFI_MD_ATTR_UCE)
599 printf("UCE ");
600 if (p->md_attr & EFI_MD_ATTR_WP)
601 printf("WP ");
602 if (p->md_attr & EFI_MD_ATTR_RP)
603 printf("RP ");
604 if (p->md_attr & EFI_MD_ATTR_XP)
605 printf("XP ");
606 if (p->md_attr & EFI_MD_ATTR_NV)
607 printf("NV ");
608 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
609 printf("MORE_RELIABLE ");
610 if (p->md_attr & EFI_MD_ATTR_RO)
611 printf("RO ");
612 if (p->md_attr & EFI_MD_ATTR_RT)
613 printf("RUNTIME");
614 printf("\n");
615 }
616
617 static void
print_efi_map_entries(struct efi_map_header * efihdr)618 print_efi_map_entries(struct efi_map_header *efihdr)
619 {
620
621 printf("%23s %12s %12s %8s %4s\n",
622 "Type", "Physical", "Virtual", "#Pages", "Attr");
623 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
624 }
625
626 /*
627 * Map the passed in VA in EFI space to a void * using the efi memory table to
628 * find the PA and return it in the DMAP, if it exists. We're used between the
629 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
630 * tables We assume that either the entry you are mapping fits within its page,
631 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
632 * All observed tables obey the first part of this precondition.
633 */
634 struct early_map_data
635 {
636 vm_offset_t va;
637 vm_offset_t pa;
638 };
639
640 static void
efi_early_map_entry(struct efi_md * p,void * argp)641 efi_early_map_entry(struct efi_md *p, void *argp)
642 {
643 struct early_map_data *emdp = argp;
644 vm_offset_t s, e;
645
646 if (emdp->pa != 0)
647 return;
648 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
649 return;
650 s = p->md_virt;
651 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
652 if (emdp->va < s || emdp->va >= e)
653 return;
654 emdp->pa = p->md_phys + (emdp->va - p->md_virt);
655 }
656
657 static void *
efi_early_map(vm_offset_t va)658 efi_early_map(vm_offset_t va)
659 {
660 struct early_map_data emd = { .va = va };
661
662 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
663 if (emd.pa == 0)
664 return NULL;
665 return (void *)PHYS_TO_DMAP(emd.pa);
666 }
667
668
669 /*
670 * When booted via kboot, the prior kernel will pass in reserved memory areas in
671 * a EFI config table. We need to find that table and walk through it excluding
672 * the memory ranges in it. btw, this is called too early for the printf to do
673 * anything since msgbufp isn't initialized, let alone a console...
674 */
675 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)676 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
677 {
678 struct efi_systbl *systbl;
679 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
680
681 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
682 if (systbl == NULL) {
683 printf("can't map systbl\n");
684 return;
685 }
686 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
687 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
688 return;
689 }
690
691 /*
692 * We don't yet have the pmap system booted enough to create a pmap for
693 * the efi firmware's preferred address space from the GetMemoryMap()
694 * table. The st_cfgtbl is a VA in this space, so we need to do the
695 * mapping ourselves to a kernel VA with efi_early_map. We assume that
696 * the cfgtbl entries don't span a page. Other pointers are PAs, as
697 * noted below.
698 */
699 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
700 return;
701 for (int i = 0; i < systbl->st_entries; i++) {
702 struct efi_cfgtbl *cfgtbl;
703 struct linux_efi_memreserve *mr;
704
705 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
706 if (cfgtbl == NULL)
707 panic("Can't map the config table entry %d\n", i);
708 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
709 continue;
710
711 /*
712 * cfgtbl points are either VA or PA, depending on the GUID of
713 * the table. memreserve GUID pointers are PA and not converted
714 * after a SetVirtualAddressMap(). The list's mr_next pointer
715 * is also a PA.
716 */
717 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
718 (vm_offset_t)cfgtbl->ct_data);
719 while (true) {
720 for (int j = 0; j < mr->mr_count; j++) {
721 struct linux_efi_memreserve_entry *mre;
722
723 mre = &mr->mr_entry[j];
724 physmem_exclude_region(mre->mre_base, mre->mre_size,
725 EXFLAG_NODUMP | EXFLAG_NOALLOC);
726 }
727 if (mr->mr_next == 0)
728 break;
729 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
730 };
731 }
732
733 }
734
735 #ifdef FDT
736 static void
try_load_dtb(void)737 try_load_dtb(void)
738 {
739 vm_offset_t dtbp;
740
741 dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t);
742 #if defined(FDT_DTB_STATIC)
743 /*
744 * In case the device tree blob was not retrieved (from metadata) try
745 * to use the statically embedded one.
746 */
747 if (dtbp == 0)
748 dtbp = (vm_offset_t)&fdt_static_dtb;
749 #endif
750
751 if (dtbp == (vm_offset_t)NULL) {
752 #ifndef TSLOG
753 printf("ERROR loading DTB\n");
754 #endif
755 return;
756 }
757
758 if (!OF_install(OFW_FDT, 0))
759 panic("Cannot install FDT");
760
761 if (OF_init((void *)dtbp) != 0)
762 panic("OF_init failed with the found device tree");
763
764 parse_fdt_bootargs();
765 }
766 #endif
767
768 static bool
bus_probe(void)769 bus_probe(void)
770 {
771 bool has_acpi, has_fdt;
772 char *order, *env;
773
774 has_acpi = has_fdt = false;
775
776 #ifdef FDT
777 has_fdt = (OF_peer(0) != 0);
778 #endif
779 #ifdef DEV_ACPI
780 has_acpi = (AcpiOsGetRootPointer() != 0);
781 #endif
782
783 env = kern_getenv("kern.cfg.order");
784 if (env != NULL) {
785 order = env;
786 while (order != NULL) {
787 if (has_acpi &&
788 strncmp(order, "acpi", 4) == 0 &&
789 (order[4] == ',' || order[4] == '\0')) {
790 arm64_bus_method = ARM64_BUS_ACPI;
791 break;
792 }
793 if (has_fdt &&
794 strncmp(order, "fdt", 3) == 0 &&
795 (order[3] == ',' || order[3] == '\0')) {
796 arm64_bus_method = ARM64_BUS_FDT;
797 break;
798 }
799 order = strchr(order, ',');
800 if (order != NULL)
801 order++; /* Skip comma */
802 }
803 freeenv(env);
804
805 /* If we set the bus method it is valid */
806 if (arm64_bus_method != ARM64_BUS_NONE)
807 return (true);
808 }
809 /* If no order or an invalid order was set use the default */
810 if (arm64_bus_method == ARM64_BUS_NONE) {
811 if (has_acpi)
812 arm64_bus_method = ARM64_BUS_ACPI;
813 else if (has_fdt)
814 arm64_bus_method = ARM64_BUS_FDT;
815 }
816
817 /*
818 * If no option was set the default is valid, otherwise we are
819 * setting one to get cninit() working, then calling panic to tell
820 * the user about the invalid bus setup.
821 */
822 return (env == NULL);
823 }
824
825 static void
cache_setup(void)826 cache_setup(void)
827 {
828 int dczva_line_shift;
829 uint32_t dczid_el0;
830
831 identify_cache(READ_SPECIALREG(ctr_el0));
832
833 dczid_el0 = READ_SPECIALREG(dczid_el0);
834
835 /* Check if dc zva is not prohibited */
836 if (dczid_el0 & DCZID_DZP)
837 dczva_line_size = 0;
838 else {
839 /* Same as with above calculations */
840 dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
841 dczva_line_size = sizeof(int) << dczva_line_shift;
842
843 /* Change pagezero function */
844 pagezero = pagezero_cache;
845 }
846 }
847
848 int
memory_mapping_mode(vm_paddr_t pa)849 memory_mapping_mode(vm_paddr_t pa)
850 {
851 struct efi_md *map, *p;
852 size_t efisz;
853 int ndesc, i;
854
855 if (efihdr == NULL)
856 return (VM_MEMATTR_WRITE_BACK);
857
858 /*
859 * Memory map data provided by UEFI via the GetMemoryMap
860 * Boot Services API.
861 */
862 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
863 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
864
865 if (efihdr->descriptor_size == 0)
866 return (VM_MEMATTR_WRITE_BACK);
867 ndesc = efihdr->memory_size / efihdr->descriptor_size;
868
869 for (i = 0, p = map; i < ndesc; i++,
870 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
871 if (pa < p->md_phys ||
872 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
873 continue;
874 if (p->md_type == EFI_MD_TYPE_IOMEM ||
875 p->md_type == EFI_MD_TYPE_IOPORT)
876 return (VM_MEMATTR_DEVICE);
877 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
878 p->md_type == EFI_MD_TYPE_RECLAIM)
879 return (VM_MEMATTR_WRITE_BACK);
880 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
881 return (VM_MEMATTR_WRITE_THROUGH);
882 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
883 return (VM_MEMATTR_WRITE_COMBINING);
884 break;
885 }
886
887 return (VM_MEMATTR_DEVICE);
888 }
889
890 void
initarm(struct arm64_bootparams * abp)891 initarm(struct arm64_bootparams *abp)
892 {
893 struct efi_fb *efifb;
894 struct pcpu *pcpup;
895 char *env;
896 #ifdef FDT
897 struct mem_region mem_regions[FDT_MEM_REGIONS];
898 int mem_regions_sz;
899 phandle_t root;
900 char dts_version[255];
901 #endif
902 vm_offset_t lastaddr;
903 bool valid;
904
905 TSRAW(&thread0, TS_ENTER, __func__, NULL);
906
907 boot_el = abp->boot_el;
908
909 /* Parse loader or FDT boot parameters. Determine last used address. */
910 lastaddr = parse_boot_param(abp);
911
912 identify_cpu(0);
913 identify_hypervisor_smbios();
914
915 update_special_regs(0);
916
917 /* Set the pcpu data, this is needed by pmap_bootstrap */
918 pcpup = &pcpu0;
919 pcpu_init(pcpup, 0, sizeof(struct pcpu));
920
921 /*
922 * Set the pcpu pointer with a backup in tpidr_el1 to be
923 * loaded when entering the kernel from userland.
924 */
925 __asm __volatile(
926 "mov x18, %0 \n"
927 "msr tpidr_el1, %0" :: "r"(pcpup));
928
929 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
930 PCPU_SET(curthread, &thread0);
931 PCPU_SET(midr, get_midr());
932
933 link_elf_ireloc();
934 #ifdef FDT
935 try_load_dtb();
936 #endif
937
938 efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE,
939 vm_paddr_t);
940
941 /* Load the physical memory ranges */
942 efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp,
943 MODINFO_METADATA | MODINFOMD_EFI_MAP);
944 if (efihdr != NULL)
945 add_efi_map_entries(efihdr);
946 #ifdef FDT
947 else {
948 /* Grab physical memory regions information from device tree. */
949 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
950 NULL) != 0)
951 panic("Cannot get physical memory regions");
952 physmem_hardware_regions(mem_regions, mem_regions_sz);
953 }
954 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
955 physmem_exclude_regions(mem_regions, mem_regions_sz,
956 EXFLAG_NODUMP | EXFLAG_NOALLOC);
957 #endif
958
959 /* Exclude the EFI framebuffer from our view of physical memory. */
960 efifb = (struct efi_fb *)preload_search_info(preload_kmdp,
961 MODINFO_METADATA | MODINFOMD_EFI_FB);
962 if (efifb != NULL)
963 physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
964 EXFLAG_NOALLOC);
965
966 /* Do basic tuning, hz etc */
967 init_param1();
968
969 cache_setup();
970
971 /* Bootstrap enough of pmap to enter the kernel proper */
972 pmap_bootstrap(lastaddr - KERNBASE);
973 /* Exclude entries needed in the DMAP region, but not phys_avail */
974 if (efihdr != NULL)
975 exclude_efi_map_entries(efihdr);
976 /* Do the same for reserve entries in the EFI MEMRESERVE table */
977 if (efi_systbl_phys != 0)
978 exclude_efi_memreserve(efi_systbl_phys);
979
980 /*
981 * We carefully bootstrap the sanitizer map after we've excluded
982 * absolutely everything else that could impact phys_avail. There's not
983 * always enough room for the initial shadow map after the kernel, so
984 * we'll end up searching for segments that we can safely use. Those
985 * segments also get excluded from phys_avail.
986 */
987 #if defined(KASAN) || defined(KMSAN)
988 pmap_bootstrap_san();
989 #endif
990
991 physmem_init_kernel_globals();
992
993 devmap_bootstrap();
994
995 valid = bus_probe();
996
997 cninit();
998 set_ttbr0(abp->kern_ttbr0);
999 cpu_tlb_flushID();
1000
1001 if (!valid)
1002 panic("Invalid bus configuration: %s",
1003 kern_getenv("kern.cfg.order"));
1004
1005 /* Detect early CPU feature support */
1006 enable_cpu_feat(CPU_FEAT_EARLY_BOOT);
1007
1008 /*
1009 * Dump the boot metadata. We have to wait for cninit() since console
1010 * output is required. If it's grossly incorrect the kernel will never
1011 * make it this far.
1012 */
1013 if (getenv_is_true("debug.dump_modinfo_at_boot"))
1014 preload_dump();
1015
1016 init_proc0(abp->kern_stack);
1017 msgbufinit(msgbufp, msgbufsize);
1018 mutex_init();
1019 init_param2(physmem);
1020
1021 dbg_init();
1022 kdb_init();
1023 #ifdef KDB
1024 if ((boothowto & RB_KDB) != 0)
1025 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1026 #endif
1027
1028 kcsan_cpu_init(0);
1029 kasan_init();
1030 kmsan_init();
1031
1032 env = kern_getenv("kernelname");
1033 if (env != NULL)
1034 strlcpy(kernelname, env, sizeof(kernelname));
1035
1036 #ifdef FDT
1037 if (arm64_bus_method == ARM64_BUS_FDT) {
1038 root = OF_finddevice("/");
1039 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1040 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1041 printf("WARNING: DTB version is %s while kernel expects %s, "
1042 "please update the DTB in the ESP\n",
1043 dts_version,
1044 LINUX_DTS_VERSION);
1045 } else {
1046 printf("WARNING: Cannot find freebsd,dts-version property, "
1047 "cannot check DTB compliance\n");
1048 }
1049 }
1050 #endif
1051
1052 if (boothowto & RB_VERBOSE) {
1053 if (efihdr != NULL)
1054 print_efi_map_entries(efihdr);
1055 physmem_print_tables();
1056 }
1057
1058 early_boot = 0;
1059
1060 if (bootverbose && kstack_pages != KSTACK_PAGES)
1061 printf("kern.kstack_pages = %d ignored for thread0\n",
1062 kstack_pages);
1063
1064 TSEXIT();
1065 }
1066
1067 void
dbg_init(void)1068 dbg_init(void)
1069 {
1070
1071 /* Clear OS lock */
1072 WRITE_SPECIALREG(oslar_el1, 0);
1073
1074 /* This permits DDB to use debug registers for watchpoints. */
1075 dbg_monitor_init();
1076
1077 /* TODO: Eventually will need to initialize debug registers here. */
1078 }
1079
1080 #ifdef DDB
1081 #include <ddb/ddb.h>
1082
DB_SHOW_COMMAND(specialregs,db_show_spregs)1083 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1084 {
1085 #define PRINT_REG(reg) \
1086 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1087
1088 PRINT_REG(actlr_el1);
1089 PRINT_REG(afsr0_el1);
1090 PRINT_REG(afsr1_el1);
1091 PRINT_REG(aidr_el1);
1092 PRINT_REG(amair_el1);
1093 PRINT_REG(ccsidr_el1);
1094 PRINT_REG(clidr_el1);
1095 PRINT_REG(contextidr_el1);
1096 PRINT_REG(cpacr_el1);
1097 PRINT_REG(csselr_el1);
1098 PRINT_REG(ctr_el0);
1099 PRINT_REG(currentel);
1100 PRINT_REG(daif);
1101 PRINT_REG(dczid_el0);
1102 PRINT_REG(elr_el1);
1103 PRINT_REG(esr_el1);
1104 PRINT_REG(far_el1);
1105 #if 0
1106 /* ARM64TODO: Enable VFP before reading floating-point registers */
1107 PRINT_REG(fpcr);
1108 PRINT_REG(fpsr);
1109 #endif
1110 PRINT_REG(id_aa64afr0_el1);
1111 PRINT_REG(id_aa64afr1_el1);
1112 PRINT_REG(id_aa64dfr0_el1);
1113 PRINT_REG(id_aa64dfr1_el1);
1114 PRINT_REG(id_aa64isar0_el1);
1115 PRINT_REG(id_aa64isar1_el1);
1116 PRINT_REG(id_aa64pfr0_el1);
1117 PRINT_REG(id_aa64pfr1_el1);
1118 PRINT_REG(id_afr0_el1);
1119 PRINT_REG(id_dfr0_el1);
1120 PRINT_REG(id_isar0_el1);
1121 PRINT_REG(id_isar1_el1);
1122 PRINT_REG(id_isar2_el1);
1123 PRINT_REG(id_isar3_el1);
1124 PRINT_REG(id_isar4_el1);
1125 PRINT_REG(id_isar5_el1);
1126 PRINT_REG(id_mmfr0_el1);
1127 PRINT_REG(id_mmfr1_el1);
1128 PRINT_REG(id_mmfr2_el1);
1129 PRINT_REG(id_mmfr3_el1);
1130 #if 0
1131 /* Missing from llvm */
1132 PRINT_REG(id_mmfr4_el1);
1133 #endif
1134 PRINT_REG(id_pfr0_el1);
1135 PRINT_REG(id_pfr1_el1);
1136 PRINT_REG(isr_el1);
1137 PRINT_REG(mair_el1);
1138 PRINT_REG(midr_el1);
1139 PRINT_REG(mpidr_el1);
1140 PRINT_REG(mvfr0_el1);
1141 PRINT_REG(mvfr1_el1);
1142 PRINT_REG(mvfr2_el1);
1143 PRINT_REG(revidr_el1);
1144 PRINT_REG(sctlr_el1);
1145 PRINT_REG(sp_el0);
1146 PRINT_REG(spsel);
1147 PRINT_REG(spsr_el1);
1148 PRINT_REG(tcr_el1);
1149 PRINT_REG(tpidr_el0);
1150 PRINT_REG(tpidr_el1);
1151 PRINT_REG(tpidrro_el0);
1152 PRINT_REG(ttbr0_el1);
1153 PRINT_REG(ttbr1_el1);
1154 PRINT_REG(vbar_el1);
1155 #undef PRINT_REG
1156 }
1157
DB_SHOW_COMMAND(vtop,db_show_vtop)1158 DB_SHOW_COMMAND(vtop, db_show_vtop)
1159 {
1160 uint64_t phys;
1161
1162 if (have_addr) {
1163 phys = arm64_address_translate_s1e1r(addr);
1164 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
1165 phys = arm64_address_translate_s1e1w(addr);
1166 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1167 phys = arm64_address_translate_s1e0r(addr);
1168 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
1169 phys = arm64_address_translate_s1e0w(addr);
1170 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1171 } else
1172 db_printf("show vtop <virt_addr>\n");
1173 }
1174 #endif
1175