xref: /freebsd/sys/arm64/arm64/machdep.c (revision 33f2cf4ad460ca46bdc77ae50914c76e76fe0e0f)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77 
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89 
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93 
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98 
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103 
104 #include <dev/smbios/smbios.h>
105 
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108     "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110     "pcb_fpustate changed offset");
111 
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113 
114 /*
115  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116  * could relocate this, but will need to keep the same virtual address as
117  * it's reverenced by the EARLY_COUNTER macro.
118  */
119 struct pcpu pcpu0;
120 
121 #if defined(PERTHREAD_SSP)
122 /*
123  * The boot SSP canary. Will be replaced with a per-thread canary when
124  * scheduling has started.
125  */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128 
129 static struct trapframe proc0_tf;
130 
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134 
135 struct kva_md_info kmi;
136 
137 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
138 int has_pan;
139 
140 #if defined(SOCDEV_PA)
141 /*
142  * This is the virtual address used to access SOCDEV_PA. As it's set before
143  * .bss is cleared we need to ensure it's preserved. To do this use
144  * __read_mostly as it's only ever set once but read in the putc functions.
145  */
146 uintptr_t socdev_va __read_mostly;
147 #endif
148 
149 /*
150  * Physical address of the EFI System Table. Stashed from the metadata hints
151  * passed into the kernel and used by the EFI code to call runtime services.
152  */
153 vm_paddr_t efi_systbl_phys;
154 static struct efi_map_header *efihdr;
155 
156 /* pagezero_* implementations are provided in support.S */
157 void pagezero_simple(void *);
158 void pagezero_cache(void *);
159 
160 /* pagezero_simple is default pagezero */
161 void (*pagezero)(void *p) = pagezero_simple;
162 
163 int (*apei_nmi)(void);
164 
165 #if defined(PERTHREAD_SSP_WARNING)
166 static void
print_ssp_warning(void * data __unused)167 print_ssp_warning(void *data __unused)
168 {
169 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
170 }
171 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
172 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
173 #endif
174 
175 static void
pan_setup(void)176 pan_setup(void)
177 {
178 	uint64_t id_aa64mfr1;
179 
180 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
181 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
182 		has_pan = 1;
183 }
184 
185 void
pan_enable(void)186 pan_enable(void)
187 {
188 
189 	/*
190 	 * The LLVM integrated assembler doesn't understand the PAN
191 	 * PSTATE field. Because of this we need to manually create
192 	 * the instruction in an asm block. This is equivalent to:
193 	 * msr pan, #1
194 	 *
195 	 * This sets the PAN bit, stopping the kernel from accessing
196 	 * memory when userspace can also access it unless the kernel
197 	 * uses the userspace load/store instructions.
198 	 */
199 	if (has_pan) {
200 		WRITE_SPECIALREG(sctlr_el1,
201 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
202 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
203 	}
204 }
205 
206 bool
has_hyp(void)207 has_hyp(void)
208 {
209 	return (boot_el == CURRENTEL_EL_EL2);
210 }
211 
212 bool
in_vhe(void)213 in_vhe(void)
214 {
215 	/* If we are currently in EL2 then must be in VHE */
216 	return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
217 	    CURRENTEL_EL_EL2);
218 }
219 
220 static void
cpu_startup(void * dummy)221 cpu_startup(void *dummy)
222 {
223 	vm_paddr_t size;
224 	int i;
225 
226 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
227 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
228 
229 	if (bootverbose) {
230 		printf("Physical memory chunk(s):\n");
231 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
232 			size = phys_avail[i + 1] - phys_avail[i];
233 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
234 			    (uintmax_t)phys_avail[i],
235 			    (uintmax_t)phys_avail[i + 1] - 1,
236 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
237 		}
238 	}
239 
240 	printf("avail memory = %ju (%ju MB)\n",
241 	    ptoa((uintmax_t)vm_free_count()),
242 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
243 
244 	undef_init();
245 	install_cpu_errata();
246 
247 	vm_ksubmap_init(&kmi);
248 	bufinit();
249 	vm_pager_bufferinit();
250 }
251 
252 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
253 
254 static void
late_ifunc_resolve(void * dummy __unused)255 late_ifunc_resolve(void *dummy __unused)
256 {
257 	link_elf_late_ireloc();
258 }
259 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
260 
261 int
cpu_idle_wakeup(int cpu)262 cpu_idle_wakeup(int cpu)
263 {
264 
265 	return (0);
266 }
267 
268 void
cpu_idle(int busy)269 cpu_idle(int busy)
270 {
271 
272 	spinlock_enter();
273 	if (!busy)
274 		cpu_idleclock();
275 	if (!sched_runnable())
276 		__asm __volatile(
277 		    "dsb sy \n"
278 		    "wfi    \n");
279 	if (!busy)
280 		cpu_activeclock();
281 	spinlock_exit();
282 }
283 
284 void
cpu_halt(void)285 cpu_halt(void)
286 {
287 
288 	/* We should have shutdown by now, if not enter a low power sleep */
289 	intr_disable();
290 	while (1) {
291 		__asm __volatile("wfi");
292 	}
293 }
294 
295 /*
296  * Flush the D-cache for non-DMA I/O so that the I-cache can
297  * be made coherent later.
298  */
299 void
cpu_flush_dcache(void * ptr,size_t len)300 cpu_flush_dcache(void *ptr, size_t len)
301 {
302 
303 	/* ARM64TODO TBD */
304 }
305 
306 /* Get current clock frequency for the given CPU ID. */
307 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)308 cpu_est_clockrate(int cpu_id, uint64_t *rate)
309 {
310 	struct pcpu *pc;
311 
312 	pc = pcpu_find(cpu_id);
313 	if (pc == NULL || rate == NULL)
314 		return (EINVAL);
315 
316 	if (pc->pc_clock == 0)
317 		return (EOPNOTSUPP);
318 
319 	*rate = pc->pc_clock;
320 	return (0);
321 }
322 
323 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)324 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
325 {
326 
327 	pcpu->pc_acpi_id = 0xffffffff;
328 	pcpu->pc_mpidr = UINT64_MAX;
329 }
330 
331 void
spinlock_enter(void)332 spinlock_enter(void)
333 {
334 	struct thread *td;
335 	register_t daif;
336 
337 	td = curthread;
338 	if (td->td_md.md_spinlock_count == 0) {
339 		daif = intr_disable();
340 		td->td_md.md_spinlock_count = 1;
341 		td->td_md.md_saved_daif = daif;
342 		critical_enter();
343 	} else
344 		td->td_md.md_spinlock_count++;
345 }
346 
347 void
spinlock_exit(void)348 spinlock_exit(void)
349 {
350 	struct thread *td;
351 	register_t daif;
352 
353 	td = curthread;
354 	daif = td->td_md.md_saved_daif;
355 	td->td_md.md_spinlock_count--;
356 	if (td->td_md.md_spinlock_count == 0) {
357 		critical_exit();
358 		intr_restore(daif);
359 	}
360 }
361 
362 /*
363  * Construct a PCB from a trapframe. This is called from kdb_trap() where
364  * we want to start a backtrace from the function that caused us to enter
365  * the debugger. We have the context in the trapframe, but base the trace
366  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
367  * enough for a backtrace.
368  */
369 void
makectx(struct trapframe * tf,struct pcb * pcb)370 makectx(struct trapframe *tf, struct pcb *pcb)
371 {
372 	int i;
373 
374 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
375 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
376 		if (i == PCB_LR)
377 			pcb->pcb_x[i] = tf->tf_elr;
378 		else
379 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
380 	}
381 
382 	pcb->pcb_sp = tf->tf_sp;
383 }
384 
385 static void
init_proc0(vm_offset_t kstack)386 init_proc0(vm_offset_t kstack)
387 {
388 	struct pcpu *pcpup;
389 
390 	pcpup = cpuid_to_pcpu[0];
391 	MPASS(pcpup != NULL);
392 
393 	proc_linkup0(&proc0, &thread0);
394 	thread0.td_kstack = kstack;
395 	thread0.td_kstack_pages = KSTACK_PAGES;
396 #if defined(PERTHREAD_SSP)
397 	thread0.td_md.md_canary = boot_canary;
398 #endif
399 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
400 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
401 	thread0.td_pcb->pcb_flags = 0;
402 	thread0.td_pcb->pcb_fpflags = 0;
403 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
404 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
405 	thread0.td_frame = &proc0_tf;
406 	ptrauth_thread0(&thread0);
407 	pcpup->pc_curpcb = thread0.td_pcb;
408 
409 	/*
410 	 * Unmask SError exceptions. They are used to signal a RAS failure,
411 	 * or other hardware error.
412 	 */
413 	serror_enable();
414 }
415 
416 /*
417  * Get an address to be used to write to kernel data that may be mapped
418  * read-only, e.g. to patch kernel code.
419  */
420 bool
arm64_get_writable_addr(void * addr,void ** out)421 arm64_get_writable_addr(void *addr, void **out)
422 {
423 	vm_paddr_t pa;
424 
425 	/* Check if the page is writable */
426 	if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
427 		*out = addr;
428 		return (true);
429 	}
430 
431 	/*
432 	 * Find the physical address of the given page.
433 	 */
434 	if (!pmap_klookup((vm_offset_t)addr, &pa)) {
435 		return (false);
436 	}
437 
438 	/*
439 	 * If it is within the DMAP region and is writable use that.
440 	 */
441 	if (PHYS_IN_DMAP_RANGE(pa)) {
442 		addr = (void *)PHYS_TO_DMAP(pa);
443 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(
444 		    (vm_offset_t)addr))) {
445 			*out = addr;
446 			return (true);
447 		}
448 	}
449 
450 	return (false);
451 }
452 
453 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
454 
455 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)456 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
457 {
458 	struct efi_md *map, *p;
459 	size_t efisz;
460 	int ndesc, i;
461 
462 	/*
463 	 * Memory map data provided by UEFI via the GetMemoryMap
464 	 * Boot Services API.
465 	 */
466 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
467 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
468 
469 	if (efihdr->descriptor_size == 0)
470 		return;
471 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
472 
473 	for (i = 0, p = map; i < ndesc; i++,
474 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
475 		cb(p, argp);
476 	}
477 }
478 
479 /*
480  * Handle the EFI memory map list.
481  *
482  * We will make two passes at this, the first (exclude == false) to populate
483  * physmem with valid physical memory ranges from recognized map entry types.
484  * In the second pass we will exclude memory ranges from physmem which must not
485  * be used for general allocations, either because they are used by runtime
486  * firmware or otherwise reserved.
487  *
488  * Adding the runtime-reserved memory ranges to physmem and excluding them
489  * later ensures that they are included in the DMAP, but excluded from
490  * phys_avail[].
491  *
492  * Entry types not explicitly listed here are ignored and not mapped.
493  */
494 static void
handle_efi_map_entry(struct efi_md * p,void * argp)495 handle_efi_map_entry(struct efi_md *p, void *argp)
496 {
497 	bool exclude = *(bool *)argp;
498 
499 	switch (p->md_type) {
500 	case EFI_MD_TYPE_RECLAIM:
501 		/*
502 		 * The recomended location for ACPI tables. Map into the
503 		 * DMAP so we can access them from userspace via /dev/mem.
504 		 */
505 	case EFI_MD_TYPE_RT_CODE:
506 		/*
507 		 * Some UEFI implementations put the system table in the
508 		 * runtime code section. Include it in the DMAP, but will
509 		 * be excluded from phys_avail.
510 		 */
511 	case EFI_MD_TYPE_RT_DATA:
512 		/*
513 		 * Runtime data will be excluded after the DMAP
514 		 * region is created to stop it from being added
515 		 * to phys_avail.
516 		 */
517 		if (exclude) {
518 			physmem_exclude_region(p->md_phys,
519 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
520 			break;
521 		}
522 		/* FALLTHROUGH */
523 	case EFI_MD_TYPE_CODE:
524 	case EFI_MD_TYPE_DATA:
525 	case EFI_MD_TYPE_BS_CODE:
526 	case EFI_MD_TYPE_BS_DATA:
527 	case EFI_MD_TYPE_FREE:
528 		/*
529 		 * We're allowed to use any entry with these types.
530 		 */
531 		if (!exclude)
532 			physmem_hardware_region(p->md_phys,
533 			    p->md_pages * EFI_PAGE_SIZE);
534 		break;
535 	default:
536 		/* Other types shall not be handled by physmem. */
537 		break;
538 	}
539 }
540 
541 static void
add_efi_map_entries(struct efi_map_header * efihdr)542 add_efi_map_entries(struct efi_map_header *efihdr)
543 {
544 	bool exclude = false;
545 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
546 }
547 
548 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)549 exclude_efi_map_entries(struct efi_map_header *efihdr)
550 {
551 	bool exclude = true;
552 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
553 }
554 
555 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)556 print_efi_map_entry(struct efi_md *p, void *argp __unused)
557 {
558 	const char *type;
559 	static const char *types[] = {
560 		"Reserved",
561 		"LoaderCode",
562 		"LoaderData",
563 		"BootServicesCode",
564 		"BootServicesData",
565 		"RuntimeServicesCode",
566 		"RuntimeServicesData",
567 		"ConventionalMemory",
568 		"UnusableMemory",
569 		"ACPIReclaimMemory",
570 		"ACPIMemoryNVS",
571 		"MemoryMappedIO",
572 		"MemoryMappedIOPortSpace",
573 		"PalCode",
574 		"PersistentMemory"
575 	};
576 
577 	if (p->md_type < nitems(types))
578 		type = types[p->md_type];
579 	else
580 		type = "<INVALID>";
581 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
582 	    p->md_virt, p->md_pages);
583 	if (p->md_attr & EFI_MD_ATTR_UC)
584 		printf("UC ");
585 	if (p->md_attr & EFI_MD_ATTR_WC)
586 		printf("WC ");
587 	if (p->md_attr & EFI_MD_ATTR_WT)
588 		printf("WT ");
589 	if (p->md_attr & EFI_MD_ATTR_WB)
590 		printf("WB ");
591 	if (p->md_attr & EFI_MD_ATTR_UCE)
592 		printf("UCE ");
593 	if (p->md_attr & EFI_MD_ATTR_WP)
594 		printf("WP ");
595 	if (p->md_attr & EFI_MD_ATTR_RP)
596 		printf("RP ");
597 	if (p->md_attr & EFI_MD_ATTR_XP)
598 		printf("XP ");
599 	if (p->md_attr & EFI_MD_ATTR_NV)
600 		printf("NV ");
601 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
602 		printf("MORE_RELIABLE ");
603 	if (p->md_attr & EFI_MD_ATTR_RO)
604 		printf("RO ");
605 	if (p->md_attr & EFI_MD_ATTR_RT)
606 		printf("RUNTIME");
607 	printf("\n");
608 }
609 
610 static void
print_efi_map_entries(struct efi_map_header * efihdr)611 print_efi_map_entries(struct efi_map_header *efihdr)
612 {
613 
614 	printf("%23s %12s %12s %8s %4s\n",
615 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
616 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
617 }
618 
619 /*
620  * Map the passed in VA in EFI space to a void * using the efi memory table to
621  * find the PA and return it in the DMAP, if it exists. We're used between the
622  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
623  * tables We assume that either the entry you are mapping fits within its page,
624  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
625  * All observed tables obey the first part of this precondition.
626  */
627 struct early_map_data
628 {
629 	vm_offset_t va;
630 	vm_offset_t pa;
631 };
632 
633 static void
efi_early_map_entry(struct efi_md * p,void * argp)634 efi_early_map_entry(struct efi_md *p, void *argp)
635 {
636 	struct early_map_data *emdp = argp;
637 	vm_offset_t s, e;
638 
639 	if (emdp->pa != 0)
640 		return;
641 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
642 		return;
643 	s = p->md_virt;
644 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
645 	if (emdp->va < s  || emdp->va >= e)
646 		return;
647 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
648 }
649 
650 static void *
efi_early_map(vm_offset_t va)651 efi_early_map(vm_offset_t va)
652 {
653 	struct early_map_data emd = { .va = va };
654 
655 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
656 	if (emd.pa == 0)
657 		return NULL;
658 	return (void *)PHYS_TO_DMAP(emd.pa);
659 }
660 
661 
662 /*
663  * When booted via kboot, the prior kernel will pass in reserved memory areas in
664  * a EFI config table. We need to find that table and walk through it excluding
665  * the memory ranges in it. btw, this is called too early for the printf to do
666  * anything since msgbufp isn't initialized, let alone a console...
667  */
668 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)669 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
670 {
671 	struct efi_systbl *systbl;
672 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
673 
674 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
675 	if (systbl == NULL) {
676 		printf("can't map systbl\n");
677 		return;
678 	}
679 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
680 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
681 		return;
682 	}
683 
684 	/*
685 	 * We don't yet have the pmap system booted enough to create a pmap for
686 	 * the efi firmware's preferred address space from the GetMemoryMap()
687 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
688 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
689 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
690 	 * noted below.
691 	 */
692 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
693 		return;
694 	for (int i = 0; i < systbl->st_entries; i++) {
695 		struct efi_cfgtbl *cfgtbl;
696 		struct linux_efi_memreserve *mr;
697 
698 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
699 		if (cfgtbl == NULL)
700 			panic("Can't map the config table entry %d\n", i);
701 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
702 			continue;
703 
704 		/*
705 		 * cfgtbl points are either VA or PA, depending on the GUID of
706 		 * the table. memreserve GUID pointers are PA and not converted
707 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
708 		 * is also a PA.
709 		 */
710 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
711 			(vm_offset_t)cfgtbl->ct_data);
712 		while (true) {
713 			for (int j = 0; j < mr->mr_count; j++) {
714 				struct linux_efi_memreserve_entry *mre;
715 
716 				mre = &mr->mr_entry[j];
717 				physmem_exclude_region(mre->mre_base, mre->mre_size,
718 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
719 			}
720 			if (mr->mr_next == 0)
721 				break;
722 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
723 		};
724 	}
725 
726 }
727 
728 #ifdef FDT
729 static void
try_load_dtb(caddr_t kmdp)730 try_load_dtb(caddr_t kmdp)
731 {
732 	vm_offset_t dtbp;
733 
734 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
735 #if defined(FDT_DTB_STATIC)
736 	/*
737 	 * In case the device tree blob was not retrieved (from metadata) try
738 	 * to use the statically embedded one.
739 	 */
740 	if (dtbp == 0)
741 		dtbp = (vm_offset_t)&fdt_static_dtb;
742 #endif
743 
744 	if (dtbp == (vm_offset_t)NULL) {
745 #ifndef TSLOG
746 		printf("ERROR loading DTB\n");
747 #endif
748 		return;
749 	}
750 
751 	if (!OF_install(OFW_FDT, 0))
752 		panic("Cannot install FDT");
753 
754 	if (OF_init((void *)dtbp) != 0)
755 		panic("OF_init failed with the found device tree");
756 
757 	parse_fdt_bootargs();
758 }
759 #endif
760 
761 static bool
bus_probe(void)762 bus_probe(void)
763 {
764 	bool has_acpi, has_fdt;
765 	char *order, *env;
766 
767 	has_acpi = has_fdt = false;
768 
769 #ifdef FDT
770 	has_fdt = (OF_peer(0) != 0);
771 #endif
772 #ifdef DEV_ACPI
773 	has_acpi = (AcpiOsGetRootPointer() != 0);
774 #endif
775 
776 	env = kern_getenv("kern.cfg.order");
777 	if (env != NULL) {
778 		order = env;
779 		while (order != NULL) {
780 			if (has_acpi &&
781 			    strncmp(order, "acpi", 4) == 0 &&
782 			    (order[4] == ',' || order[4] == '\0')) {
783 				arm64_bus_method = ARM64_BUS_ACPI;
784 				break;
785 			}
786 			if (has_fdt &&
787 			    strncmp(order, "fdt", 3) == 0 &&
788 			    (order[3] == ',' || order[3] == '\0')) {
789 				arm64_bus_method = ARM64_BUS_FDT;
790 				break;
791 			}
792 			order = strchr(order, ',');
793 			if (order != NULL)
794 				order++;	/* Skip comma */
795 		}
796 		freeenv(env);
797 
798 		/* If we set the bus method it is valid */
799 		if (arm64_bus_method != ARM64_BUS_NONE)
800 			return (true);
801 	}
802 	/* If no order or an invalid order was set use the default */
803 	if (arm64_bus_method == ARM64_BUS_NONE) {
804 		if (has_acpi)
805 			arm64_bus_method = ARM64_BUS_ACPI;
806 		else if (has_fdt)
807 			arm64_bus_method = ARM64_BUS_FDT;
808 	}
809 
810 	/*
811 	 * If no option was set the default is valid, otherwise we are
812 	 * setting one to get cninit() working, then calling panic to tell
813 	 * the user about the invalid bus setup.
814 	 */
815 	return (env == NULL);
816 }
817 
818 static void
cache_setup(void)819 cache_setup(void)
820 {
821 	int dczva_line_shift;
822 	uint32_t dczid_el0;
823 
824 	identify_cache(READ_SPECIALREG(ctr_el0));
825 
826 	dczid_el0 = READ_SPECIALREG(dczid_el0);
827 
828 	/* Check if dc zva is not prohibited */
829 	if (dczid_el0 & DCZID_DZP)
830 		dczva_line_size = 0;
831 	else {
832 		/* Same as with above calculations */
833 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
834 		dczva_line_size = sizeof(int) << dczva_line_shift;
835 
836 		/* Change pagezero function */
837 		pagezero = pagezero_cache;
838 	}
839 }
840 
841 int
memory_mapping_mode(vm_paddr_t pa)842 memory_mapping_mode(vm_paddr_t pa)
843 {
844 	struct efi_md *map, *p;
845 	size_t efisz;
846 	int ndesc, i;
847 
848 	if (efihdr == NULL)
849 		return (VM_MEMATTR_WRITE_BACK);
850 
851 	/*
852 	 * Memory map data provided by UEFI via the GetMemoryMap
853 	 * Boot Services API.
854 	 */
855 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
856 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
857 
858 	if (efihdr->descriptor_size == 0)
859 		return (VM_MEMATTR_WRITE_BACK);
860 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
861 
862 	for (i = 0, p = map; i < ndesc; i++,
863 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
864 		if (pa < p->md_phys ||
865 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
866 			continue;
867 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
868 		    p->md_type == EFI_MD_TYPE_IOPORT)
869 			return (VM_MEMATTR_DEVICE);
870 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
871 		    p->md_type == EFI_MD_TYPE_RECLAIM)
872 			return (VM_MEMATTR_WRITE_BACK);
873 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
874 			return (VM_MEMATTR_WRITE_THROUGH);
875 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
876 			return (VM_MEMATTR_WRITE_COMBINING);
877 		break;
878 	}
879 
880 	return (VM_MEMATTR_DEVICE);
881 }
882 
883 void
initarm(struct arm64_bootparams * abp)884 initarm(struct arm64_bootparams *abp)
885 {
886 	struct efi_fb *efifb;
887 	struct pcpu *pcpup;
888 	char *env;
889 #ifdef FDT
890 	struct mem_region mem_regions[FDT_MEM_REGIONS];
891 	int mem_regions_sz;
892 	phandle_t root;
893 	char dts_version[255];
894 #endif
895 	vm_offset_t lastaddr;
896 	caddr_t kmdp;
897 	bool valid;
898 
899 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
900 
901 	boot_el = abp->boot_el;
902 
903 	/* Parse loader or FDT boot parameters. Determine last used address. */
904 	lastaddr = parse_boot_param(abp);
905 
906 	/* Find the kernel address */
907 	kmdp = preload_search_by_type("elf kernel");
908 	if (kmdp == NULL)
909 		kmdp = preload_search_by_type("elf64 kernel");
910 
911 	identify_cpu(0);
912 	identify_hypervisor_smbios();
913 
914 	update_special_regs(0);
915 
916 	/* Set the pcpu data, this is needed by pmap_bootstrap */
917 	pcpup = &pcpu0;
918 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
919 
920 	/*
921 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
922 	 * loaded when entering the kernel from userland.
923 	 */
924 	__asm __volatile(
925 	    "mov x18, %0 \n"
926 	    "msr tpidr_el1, %0" :: "r"(pcpup));
927 
928 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
929 	PCPU_SET(curthread, &thread0);
930 	PCPU_SET(midr, get_midr());
931 
932 	link_elf_ireloc(kmdp);
933 #ifdef FDT
934 	try_load_dtb(kmdp);
935 #endif
936 
937 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
938 
939 	/* Load the physical memory ranges */
940 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
941 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
942 	if (efihdr != NULL)
943 		add_efi_map_entries(efihdr);
944 #ifdef FDT
945 	else {
946 		/* Grab physical memory regions information from device tree. */
947 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
948 		    NULL) != 0)
949 			panic("Cannot get physical memory regions");
950 		physmem_hardware_regions(mem_regions, mem_regions_sz);
951 	}
952 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
953 		physmem_exclude_regions(mem_regions, mem_regions_sz,
954 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
955 #endif
956 
957 	/* Exclude the EFI framebuffer from our view of physical memory. */
958 	efifb = (struct efi_fb *)preload_search_info(kmdp,
959 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
960 	if (efifb != NULL)
961 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
962 		    EXFLAG_NOALLOC);
963 
964 	/* Do basic tuning, hz etc */
965 	init_param1();
966 
967 	cache_setup();
968 	pan_setup();
969 
970 	/* Bootstrap enough of pmap  to enter the kernel proper */
971 	pmap_bootstrap(lastaddr - KERNBASE);
972 	/* Exclude entries needed in the DMAP region, but not phys_avail */
973 	if (efihdr != NULL)
974 		exclude_efi_map_entries(efihdr);
975 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
976 	if (efi_systbl_phys != 0)
977 		exclude_efi_memreserve(efi_systbl_phys);
978 
979 	/*
980 	 * We carefully bootstrap the sanitizer map after we've excluded
981 	 * absolutely everything else that could impact phys_avail.  There's not
982 	 * always enough room for the initial shadow map after the kernel, so
983 	 * we'll end up searching for segments that we can safely use.  Those
984 	 * segments also get excluded from phys_avail.
985 	 */
986 #if defined(KASAN) || defined(KMSAN)
987 	pmap_bootstrap_san();
988 #endif
989 
990 	physmem_init_kernel_globals();
991 
992 	devmap_bootstrap();
993 
994 	valid = bus_probe();
995 
996 	cninit();
997 	set_ttbr0(abp->kern_ttbr0);
998 	cpu_tlb_flushID();
999 
1000 	if (!valid)
1001 		panic("Invalid bus configuration: %s",
1002 		    kern_getenv("kern.cfg.order"));
1003 
1004 	/*
1005 	 * Check if pointer authentication is available on this system, and
1006 	 * if so enable its use. This needs to be called before init_proc0
1007 	 * as that will configure the thread0 pointer authentication keys.
1008 	 */
1009 	ptrauth_init();
1010 
1011 	/*
1012 	 * Dump the boot metadata. We have to wait for cninit() since console
1013 	 * output is required. If it's grossly incorrect the kernel will never
1014 	 * make it this far.
1015 	 */
1016 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1017 		preload_dump();
1018 
1019 	init_proc0(abp->kern_stack);
1020 	msgbufinit(msgbufp, msgbufsize);
1021 	mutex_init();
1022 	init_param2(physmem);
1023 
1024 	dbg_init();
1025 	kdb_init();
1026 #ifdef KDB
1027 	if ((boothowto & RB_KDB) != 0)
1028 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1029 #endif
1030 	pan_enable();
1031 
1032 	kcsan_cpu_init(0);
1033 	kasan_init();
1034 	kmsan_init();
1035 
1036 	env = kern_getenv("kernelname");
1037 	if (env != NULL)
1038 		strlcpy(kernelname, env, sizeof(kernelname));
1039 
1040 #ifdef FDT
1041 	if (arm64_bus_method == ARM64_BUS_FDT) {
1042 		root = OF_finddevice("/");
1043 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1044 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1045 				printf("WARNING: DTB version is %s while kernel expects %s, "
1046 				    "please update the DTB in the ESP\n",
1047 				    dts_version,
1048 				    LINUX_DTS_VERSION);
1049 		} else {
1050 			printf("WARNING: Cannot find freebsd,dts-version property, "
1051 			    "cannot check DTB compliance\n");
1052 		}
1053 	}
1054 #endif
1055 
1056 	if (boothowto & RB_VERBOSE) {
1057 		if (efihdr != NULL)
1058 			print_efi_map_entries(efihdr);
1059 		physmem_print_tables();
1060 	}
1061 
1062 	early_boot = 0;
1063 
1064 	if (bootverbose && kstack_pages != KSTACK_PAGES)
1065 		printf("kern.kstack_pages = %d ignored for thread0\n",
1066 		    kstack_pages);
1067 
1068 	TSEXIT();
1069 }
1070 
1071 void
dbg_init(void)1072 dbg_init(void)
1073 {
1074 
1075 	/* Clear OS lock */
1076 	WRITE_SPECIALREG(oslar_el1, 0);
1077 
1078 	/* This permits DDB to use debug registers for watchpoints. */
1079 	dbg_monitor_init();
1080 
1081 	/* TODO: Eventually will need to initialize debug registers here. */
1082 }
1083 
1084 #ifdef DDB
1085 #include <ddb/ddb.h>
1086 
DB_SHOW_COMMAND(specialregs,db_show_spregs)1087 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1088 {
1089 #define	PRINT_REG(reg)	\
1090     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1091 
1092 	PRINT_REG(actlr_el1);
1093 	PRINT_REG(afsr0_el1);
1094 	PRINT_REG(afsr1_el1);
1095 	PRINT_REG(aidr_el1);
1096 	PRINT_REG(amair_el1);
1097 	PRINT_REG(ccsidr_el1);
1098 	PRINT_REG(clidr_el1);
1099 	PRINT_REG(contextidr_el1);
1100 	PRINT_REG(cpacr_el1);
1101 	PRINT_REG(csselr_el1);
1102 	PRINT_REG(ctr_el0);
1103 	PRINT_REG(currentel);
1104 	PRINT_REG(daif);
1105 	PRINT_REG(dczid_el0);
1106 	PRINT_REG(elr_el1);
1107 	PRINT_REG(esr_el1);
1108 	PRINT_REG(far_el1);
1109 #if 0
1110 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1111 	PRINT_REG(fpcr);
1112 	PRINT_REG(fpsr);
1113 #endif
1114 	PRINT_REG(id_aa64afr0_el1);
1115 	PRINT_REG(id_aa64afr1_el1);
1116 	PRINT_REG(id_aa64dfr0_el1);
1117 	PRINT_REG(id_aa64dfr1_el1);
1118 	PRINT_REG(id_aa64isar0_el1);
1119 	PRINT_REG(id_aa64isar1_el1);
1120 	PRINT_REG(id_aa64pfr0_el1);
1121 	PRINT_REG(id_aa64pfr1_el1);
1122 	PRINT_REG(id_afr0_el1);
1123 	PRINT_REG(id_dfr0_el1);
1124 	PRINT_REG(id_isar0_el1);
1125 	PRINT_REG(id_isar1_el1);
1126 	PRINT_REG(id_isar2_el1);
1127 	PRINT_REG(id_isar3_el1);
1128 	PRINT_REG(id_isar4_el1);
1129 	PRINT_REG(id_isar5_el1);
1130 	PRINT_REG(id_mmfr0_el1);
1131 	PRINT_REG(id_mmfr1_el1);
1132 	PRINT_REG(id_mmfr2_el1);
1133 	PRINT_REG(id_mmfr3_el1);
1134 #if 0
1135 	/* Missing from llvm */
1136 	PRINT_REG(id_mmfr4_el1);
1137 #endif
1138 	PRINT_REG(id_pfr0_el1);
1139 	PRINT_REG(id_pfr1_el1);
1140 	PRINT_REG(isr_el1);
1141 	PRINT_REG(mair_el1);
1142 	PRINT_REG(midr_el1);
1143 	PRINT_REG(mpidr_el1);
1144 	PRINT_REG(mvfr0_el1);
1145 	PRINT_REG(mvfr1_el1);
1146 	PRINT_REG(mvfr2_el1);
1147 	PRINT_REG(revidr_el1);
1148 	PRINT_REG(sctlr_el1);
1149 	PRINT_REG(sp_el0);
1150 	PRINT_REG(spsel);
1151 	PRINT_REG(spsr_el1);
1152 	PRINT_REG(tcr_el1);
1153 	PRINT_REG(tpidr_el0);
1154 	PRINT_REG(tpidr_el1);
1155 	PRINT_REG(tpidrro_el0);
1156 	PRINT_REG(ttbr0_el1);
1157 	PRINT_REG(ttbr1_el1);
1158 	PRINT_REG(vbar_el1);
1159 #undef PRINT_REG
1160 }
1161 
DB_SHOW_COMMAND(vtop,db_show_vtop)1162 DB_SHOW_COMMAND(vtop, db_show_vtop)
1163 {
1164 	uint64_t phys;
1165 
1166 	if (have_addr) {
1167 		phys = arm64_address_translate_s1e1r(addr);
1168 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1169 		phys = arm64_address_translate_s1e1w(addr);
1170 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1171 		phys = arm64_address_translate_s1e0r(addr);
1172 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1173 		phys = arm64_address_translate_s1e0w(addr);
1174 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1175 	} else
1176 		db_printf("show vtop <virt_addr>\n");
1177 }
1178 #endif
1179