xref: /freebsd/sys/arm64/arm64/machdep.c (revision b72ae900d4348118829fe04abdc11b620930c30f)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/devmap.h>
42 #include <sys/efi.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77 
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/cpu_feat.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/kdb.h>
84 #include <machine/machdep.h>
85 #include <machine/metadata.h>
86 #include <machine/md_var.h>
87 #include <machine/pcb.h>
88 #include <machine/undefined.h>
89 #include <machine/vmparam.h>
90 
91 #ifdef VFP
92 #include <machine/vfp.h>
93 #endif
94 
95 #ifdef DEV_ACPI
96 #include <contrib/dev/acpica/include/acpi.h>
97 #include <machine/acpica_machdep.h>
98 #endif
99 
100 #ifdef FDT
101 #include <dev/fdt/fdt_common.h>
102 #include <dev/ofw/openfirm.h>
103 #endif
104 
105 #include <dev/smbios/smbios.h>
106 
107 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
108 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
109     "pcb_fpusaved changed offset");
110 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
111     "pcb_fpustate changed offset");
112 
113 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
114 
115 /*
116  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
117  * could relocate this, but will need to keep the same virtual address as
118  * it's reverenced by the EARLY_COUNTER macro.
119  */
120 struct pcpu pcpu0;
121 
122 #if defined(PERTHREAD_SSP)
123 /*
124  * The boot SSP canary. Will be replaced with a per-thread canary when
125  * scheduling has started.
126  */
127 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
128 #endif
129 
130 static struct trapframe proc0_tf;
131 
132 int early_boot = 1;
133 int cold = 1;
134 static int boot_el;
135 
136 struct kva_md_info kmi;
137 
138 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
139 int has_pan;
140 
141 #if defined(SOCDEV_PA)
142 /*
143  * This is the virtual address used to access SOCDEV_PA. As it's set before
144  * .bss is cleared we need to ensure it's preserved. To do this use
145  * __read_mostly as it's only ever set once but read in the putc functions.
146  */
147 uintptr_t socdev_va __read_mostly;
148 #endif
149 
150 /*
151  * Physical address of the EFI System Table. Stashed from the metadata hints
152  * passed into the kernel and used by the EFI code to call runtime services.
153  */
154 vm_paddr_t efi_systbl_phys;
155 static struct efi_map_header *efihdr;
156 
157 /* pagezero_* implementations are provided in support.S */
158 void pagezero_simple(void *);
159 void pagezero_cache(void *);
160 
161 /* pagezero_simple is default pagezero */
162 void (*pagezero)(void *p) = pagezero_simple;
163 
164 int (*apei_nmi)(void);
165 
166 #if defined(PERTHREAD_SSP_WARNING)
167 static void
print_ssp_warning(void * data __unused)168 print_ssp_warning(void *data __unused)
169 {
170 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171 }
172 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174 #endif
175 
176 static bool
pan_check(const struct cpu_feat * feat __unused,u_int midr __unused)177 pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
178 {
179 	uint64_t id_aa64mfr1;
180 
181 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
182 	return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE);
183 }
184 
185 static void
pan_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)186 pan_enable(const struct cpu_feat *feat __unused,
187     cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
188     u_int errata_count __unused)
189 {
190 	has_pan = 1;
191 
192 	/*
193 	 * This sets the PAN bit, stopping the kernel from accessing
194 	 * memory when userspace can also access it unless the kernel
195 	 * uses the userspace load/store instructions.
196 	 */
197 	WRITE_SPECIALREG(sctlr_el1,
198 	    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
199 	__asm __volatile(
200 	    ".arch_extension pan	\n"
201 	    "msr pan, #1		\n"
202 	    ".arch_extension nopan	\n");
203 }
204 
205 static struct cpu_feat feat_pan = {
206 	.feat_name		= "FEAT_PAN",
207 	.feat_check		= pan_check,
208 	.feat_enable		= pan_enable,
209 	.feat_flags		= CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU,
210 };
211 DATA_SET(cpu_feat_set, feat_pan);
212 
213 bool
has_hyp(void)214 has_hyp(void)
215 {
216 	return (boot_el == CURRENTEL_EL_EL2);
217 }
218 
219 bool
in_vhe(void)220 in_vhe(void)
221 {
222 	/* If we are currently in EL2 then must be in VHE */
223 	return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
224 	    CURRENTEL_EL_EL2);
225 }
226 
227 static void
cpu_startup(void * dummy)228 cpu_startup(void *dummy)
229 {
230 	vm_paddr_t size;
231 	int i;
232 
233 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
234 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
235 
236 	if (bootverbose) {
237 		printf("Physical memory chunk(s):\n");
238 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
239 			size = phys_avail[i + 1] - phys_avail[i];
240 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
241 			    (uintmax_t)phys_avail[i],
242 			    (uintmax_t)phys_avail[i + 1] - 1,
243 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
244 		}
245 	}
246 
247 	printf("avail memory = %ju (%ju MB)\n",
248 	    ptoa((uintmax_t)vm_free_count()),
249 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
250 
251 	undef_init();
252 	install_cpu_errata();
253 
254 	vm_ksubmap_init(&kmi);
255 	bufinit();
256 	vm_pager_bufferinit();
257 }
258 
259 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
260 
261 static void
late_ifunc_resolve(void * dummy __unused)262 late_ifunc_resolve(void *dummy __unused)
263 {
264 	link_elf_late_ireloc();
265 }
266 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
267 
268 int
cpu_idle_wakeup(int cpu)269 cpu_idle_wakeup(int cpu)
270 {
271 
272 	return (0);
273 }
274 
275 void
cpu_idle(int busy)276 cpu_idle(int busy)
277 {
278 
279 	spinlock_enter();
280 	if (!busy)
281 		cpu_idleclock();
282 	if (!sched_runnable())
283 		__asm __volatile(
284 		    "dsb sy \n"
285 		    "wfi    \n");
286 	if (!busy)
287 		cpu_activeclock();
288 	spinlock_exit();
289 }
290 
291 void
cpu_halt(void)292 cpu_halt(void)
293 {
294 
295 	/* We should have shutdown by now, if not enter a low power sleep */
296 	intr_disable();
297 	while (1) {
298 		__asm __volatile("wfi");
299 	}
300 }
301 
302 /*
303  * Flush the D-cache for non-DMA I/O so that the I-cache can
304  * be made coherent later.
305  */
306 void
cpu_flush_dcache(void * ptr,size_t len)307 cpu_flush_dcache(void *ptr, size_t len)
308 {
309 
310 	/* ARM64TODO TBD */
311 }
312 
313 /* Get current clock frequency for the given CPU ID. */
314 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)315 cpu_est_clockrate(int cpu_id, uint64_t *rate)
316 {
317 	struct pcpu *pc;
318 
319 	pc = pcpu_find(cpu_id);
320 	if (pc == NULL || rate == NULL)
321 		return (EINVAL);
322 
323 	if (pc->pc_clock == 0)
324 		return (EOPNOTSUPP);
325 
326 	*rate = pc->pc_clock;
327 	return (0);
328 }
329 
330 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)331 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
332 {
333 
334 	pcpu->pc_acpi_id = 0xffffffff;
335 	pcpu->pc_mpidr = UINT64_MAX;
336 }
337 
338 void
spinlock_enter(void)339 spinlock_enter(void)
340 {
341 	struct thread *td;
342 	register_t daif;
343 
344 	td = curthread;
345 	if (td->td_md.md_spinlock_count == 0) {
346 		daif = intr_disable();
347 		td->td_md.md_spinlock_count = 1;
348 		td->td_md.md_saved_daif = daif;
349 		critical_enter();
350 	} else
351 		td->td_md.md_spinlock_count++;
352 }
353 
354 void
spinlock_exit(void)355 spinlock_exit(void)
356 {
357 	struct thread *td;
358 	register_t daif;
359 
360 	td = curthread;
361 	daif = td->td_md.md_saved_daif;
362 	td->td_md.md_spinlock_count--;
363 	if (td->td_md.md_spinlock_count == 0) {
364 		critical_exit();
365 		intr_restore(daif);
366 	}
367 }
368 
369 /*
370  * Construct a PCB from a trapframe. This is called from kdb_trap() where
371  * we want to start a backtrace from the function that caused us to enter
372  * the debugger. We have the context in the trapframe, but base the trace
373  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
374  * enough for a backtrace.
375  */
376 void
makectx(struct trapframe * tf,struct pcb * pcb)377 makectx(struct trapframe *tf, struct pcb *pcb)
378 {
379 	int i;
380 
381 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
382 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
383 		if (i == PCB_LR)
384 			pcb->pcb_x[i] = tf->tf_elr;
385 		else
386 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
387 	}
388 
389 	pcb->pcb_sp = tf->tf_sp;
390 }
391 
392 static void
init_proc0(vm_offset_t kstack)393 init_proc0(vm_offset_t kstack)
394 {
395 	struct pcpu *pcpup;
396 
397 	pcpup = cpuid_to_pcpu[0];
398 	MPASS(pcpup != NULL);
399 
400 	proc_linkup0(&proc0, &thread0);
401 	thread0.td_kstack = kstack;
402 	thread0.td_kstack_pages = KSTACK_PAGES;
403 #if defined(PERTHREAD_SSP)
404 	thread0.td_md.md_canary = boot_canary;
405 #endif
406 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
407 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
408 	thread0.td_pcb->pcb_flags = 0;
409 	thread0.td_pcb->pcb_fpflags = 0;
410 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
411 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
412 	thread0.td_frame = &proc0_tf;
413 	ptrauth_thread0(&thread0);
414 	pcpup->pc_curpcb = thread0.td_pcb;
415 
416 	/*
417 	 * Unmask SError exceptions. They are used to signal a RAS failure,
418 	 * or other hardware error.
419 	 */
420 	serror_enable();
421 }
422 
423 /*
424  * Get an address to be used to write to kernel data that may be mapped
425  * read-only, e.g. to patch kernel code.
426  */
427 bool
arm64_get_writable_addr(void * addr,void ** out)428 arm64_get_writable_addr(void *addr, void **out)
429 {
430 	vm_paddr_t pa;
431 
432 	/* Check if the page is writable */
433 	if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
434 		*out = addr;
435 		return (true);
436 	}
437 
438 	/*
439 	 * Find the physical address of the given page.
440 	 */
441 	if (!pmap_klookup((vm_offset_t)addr, &pa)) {
442 		return (false);
443 	}
444 
445 	/*
446 	 * If it is within the DMAP region and is writable use that.
447 	 */
448 	if (PHYS_IN_DMAP_RANGE(pa)) {
449 		addr = (void *)PHYS_TO_DMAP(pa);
450 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(
451 		    (vm_offset_t)addr))) {
452 			*out = addr;
453 			return (true);
454 		}
455 	}
456 
457 	return (false);
458 }
459 
460 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
461 
462 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)463 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
464 {
465 	struct efi_md *map, *p;
466 	size_t efisz;
467 	int ndesc, i;
468 
469 	/*
470 	 * Memory map data provided by UEFI via the GetMemoryMap
471 	 * Boot Services API.
472 	 */
473 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
474 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
475 
476 	if (efihdr->descriptor_size == 0)
477 		return;
478 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
479 
480 	for (i = 0, p = map; i < ndesc; i++,
481 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
482 		cb(p, argp);
483 	}
484 }
485 
486 /*
487  * Handle the EFI memory map list.
488  *
489  * We will make two passes at this, the first (exclude == false) to populate
490  * physmem with valid physical memory ranges from recognized map entry types.
491  * In the second pass we will exclude memory ranges from physmem which must not
492  * be used for general allocations, either because they are used by runtime
493  * firmware or otherwise reserved.
494  *
495  * Adding the runtime-reserved memory ranges to physmem and excluding them
496  * later ensures that they are included in the DMAP, but excluded from
497  * phys_avail[].
498  *
499  * Entry types not explicitly listed here are ignored and not mapped.
500  */
501 static void
handle_efi_map_entry(struct efi_md * p,void * argp)502 handle_efi_map_entry(struct efi_md *p, void *argp)
503 {
504 	bool exclude = *(bool *)argp;
505 
506 	switch (p->md_type) {
507 	case EFI_MD_TYPE_RECLAIM:
508 		/*
509 		 * The recomended location for ACPI tables. Map into the
510 		 * DMAP so we can access them from userspace via /dev/mem.
511 		 */
512 	case EFI_MD_TYPE_RT_CODE:
513 		/*
514 		 * Some UEFI implementations put the system table in the
515 		 * runtime code section. Include it in the DMAP, but will
516 		 * be excluded from phys_avail.
517 		 */
518 	case EFI_MD_TYPE_RT_DATA:
519 		/*
520 		 * Runtime data will be excluded after the DMAP
521 		 * region is created to stop it from being added
522 		 * to phys_avail.
523 		 */
524 		if (exclude) {
525 			physmem_exclude_region(p->md_phys,
526 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
527 			break;
528 		}
529 		/* FALLTHROUGH */
530 	case EFI_MD_TYPE_CODE:
531 	case EFI_MD_TYPE_DATA:
532 	case EFI_MD_TYPE_BS_CODE:
533 	case EFI_MD_TYPE_BS_DATA:
534 	case EFI_MD_TYPE_FREE:
535 		/*
536 		 * We're allowed to use any entry with these types.
537 		 */
538 		if (!exclude)
539 			physmem_hardware_region(p->md_phys,
540 			    p->md_pages * EFI_PAGE_SIZE);
541 		break;
542 	default:
543 		/* Other types shall not be handled by physmem. */
544 		break;
545 	}
546 }
547 
548 static void
add_efi_map_entries(struct efi_map_header * efihdr)549 add_efi_map_entries(struct efi_map_header *efihdr)
550 {
551 	bool exclude = false;
552 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
553 }
554 
555 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)556 exclude_efi_map_entries(struct efi_map_header *efihdr)
557 {
558 	bool exclude = true;
559 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
560 }
561 
562 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)563 print_efi_map_entry(struct efi_md *p, void *argp __unused)
564 {
565 	const char *type;
566 	static const char *types[] = {
567 		"Reserved",
568 		"LoaderCode",
569 		"LoaderData",
570 		"BootServicesCode",
571 		"BootServicesData",
572 		"RuntimeServicesCode",
573 		"RuntimeServicesData",
574 		"ConventionalMemory",
575 		"UnusableMemory",
576 		"ACPIReclaimMemory",
577 		"ACPIMemoryNVS",
578 		"MemoryMappedIO",
579 		"MemoryMappedIOPortSpace",
580 		"PalCode",
581 		"PersistentMemory"
582 	};
583 
584 	if (p->md_type < nitems(types))
585 		type = types[p->md_type];
586 	else
587 		type = "<INVALID>";
588 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
589 	    p->md_virt, p->md_pages);
590 	if (p->md_attr & EFI_MD_ATTR_UC)
591 		printf("UC ");
592 	if (p->md_attr & EFI_MD_ATTR_WC)
593 		printf("WC ");
594 	if (p->md_attr & EFI_MD_ATTR_WT)
595 		printf("WT ");
596 	if (p->md_attr & EFI_MD_ATTR_WB)
597 		printf("WB ");
598 	if (p->md_attr & EFI_MD_ATTR_UCE)
599 		printf("UCE ");
600 	if (p->md_attr & EFI_MD_ATTR_WP)
601 		printf("WP ");
602 	if (p->md_attr & EFI_MD_ATTR_RP)
603 		printf("RP ");
604 	if (p->md_attr & EFI_MD_ATTR_XP)
605 		printf("XP ");
606 	if (p->md_attr & EFI_MD_ATTR_NV)
607 		printf("NV ");
608 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
609 		printf("MORE_RELIABLE ");
610 	if (p->md_attr & EFI_MD_ATTR_RO)
611 		printf("RO ");
612 	if (p->md_attr & EFI_MD_ATTR_RT)
613 		printf("RUNTIME");
614 	printf("\n");
615 }
616 
617 static void
print_efi_map_entries(struct efi_map_header * efihdr)618 print_efi_map_entries(struct efi_map_header *efihdr)
619 {
620 
621 	printf("%23s %12s %12s %8s %4s\n",
622 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
623 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
624 }
625 
626 /*
627  * Map the passed in VA in EFI space to a void * using the efi memory table to
628  * find the PA and return it in the DMAP, if it exists. We're used between the
629  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
630  * tables We assume that either the entry you are mapping fits within its page,
631  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
632  * All observed tables obey the first part of this precondition.
633  */
634 struct early_map_data
635 {
636 	vm_offset_t va;
637 	vm_offset_t pa;
638 };
639 
640 static void
efi_early_map_entry(struct efi_md * p,void * argp)641 efi_early_map_entry(struct efi_md *p, void *argp)
642 {
643 	struct early_map_data *emdp = argp;
644 	vm_offset_t s, e;
645 
646 	if (emdp->pa != 0)
647 		return;
648 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
649 		return;
650 	s = p->md_virt;
651 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
652 	if (emdp->va < s  || emdp->va >= e)
653 		return;
654 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
655 }
656 
657 static void *
efi_early_map(vm_offset_t va)658 efi_early_map(vm_offset_t va)
659 {
660 	struct early_map_data emd = { .va = va };
661 
662 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
663 	if (emd.pa == 0)
664 		return NULL;
665 	return (void *)PHYS_TO_DMAP(emd.pa);
666 }
667 
668 
669 /*
670  * When booted via kboot, the prior kernel will pass in reserved memory areas in
671  * a EFI config table. We need to find that table and walk through it excluding
672  * the memory ranges in it. btw, this is called too early for the printf to do
673  * anything since msgbufp isn't initialized, let alone a console...
674  */
675 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)676 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
677 {
678 	struct efi_systbl *systbl;
679 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
680 
681 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
682 	if (systbl == NULL) {
683 		printf("can't map systbl\n");
684 		return;
685 	}
686 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
687 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
688 		return;
689 	}
690 
691 	/*
692 	 * We don't yet have the pmap system booted enough to create a pmap for
693 	 * the efi firmware's preferred address space from the GetMemoryMap()
694 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
695 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
696 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
697 	 * noted below.
698 	 */
699 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
700 		return;
701 	for (int i = 0; i < systbl->st_entries; i++) {
702 		struct efi_cfgtbl *cfgtbl;
703 		struct linux_efi_memreserve *mr;
704 
705 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
706 		if (cfgtbl == NULL)
707 			panic("Can't map the config table entry %d\n", i);
708 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
709 			continue;
710 
711 		/*
712 		 * cfgtbl points are either VA or PA, depending on the GUID of
713 		 * the table. memreserve GUID pointers are PA and not converted
714 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
715 		 * is also a PA.
716 		 */
717 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
718 			(vm_offset_t)cfgtbl->ct_data);
719 		while (true) {
720 			for (int j = 0; j < mr->mr_count; j++) {
721 				struct linux_efi_memreserve_entry *mre;
722 
723 				mre = &mr->mr_entry[j];
724 				physmem_exclude_region(mre->mre_base, mre->mre_size,
725 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
726 			}
727 			if (mr->mr_next == 0)
728 				break;
729 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
730 		};
731 	}
732 
733 }
734 
735 #ifdef FDT
736 static void
try_load_dtb(void)737 try_load_dtb(void)
738 {
739 	vm_offset_t dtbp;
740 
741 	dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t);
742 #if defined(FDT_DTB_STATIC)
743 	/*
744 	 * In case the device tree blob was not retrieved (from metadata) try
745 	 * to use the statically embedded one.
746 	 */
747 	if (dtbp == 0)
748 		dtbp = (vm_offset_t)&fdt_static_dtb;
749 #endif
750 
751 	if (dtbp == (vm_offset_t)NULL) {
752 #ifndef TSLOG
753 		printf("ERROR loading DTB\n");
754 #endif
755 		return;
756 	}
757 
758 	if (!OF_install(OFW_FDT, 0))
759 		panic("Cannot install FDT");
760 
761 	if (OF_init((void *)dtbp) != 0)
762 		panic("OF_init failed with the found device tree");
763 
764 	parse_fdt_bootargs();
765 }
766 #endif
767 
768 static bool
bus_probe(void)769 bus_probe(void)
770 {
771 	bool has_acpi, has_fdt;
772 	char *order, *env;
773 
774 	has_acpi = has_fdt = false;
775 
776 #ifdef FDT
777 	has_fdt = (OF_peer(0) != 0);
778 #endif
779 #ifdef DEV_ACPI
780 	has_acpi = (AcpiOsGetRootPointer() != 0);
781 #endif
782 
783 	env = kern_getenv("kern.cfg.order");
784 	if (env != NULL) {
785 		order = env;
786 		while (order != NULL) {
787 			if (has_acpi &&
788 			    strncmp(order, "acpi", 4) == 0 &&
789 			    (order[4] == ',' || order[4] == '\0')) {
790 				arm64_bus_method = ARM64_BUS_ACPI;
791 				break;
792 			}
793 			if (has_fdt &&
794 			    strncmp(order, "fdt", 3) == 0 &&
795 			    (order[3] == ',' || order[3] == '\0')) {
796 				arm64_bus_method = ARM64_BUS_FDT;
797 				break;
798 			}
799 			order = strchr(order, ',');
800 			if (order != NULL)
801 				order++;	/* Skip comma */
802 		}
803 		freeenv(env);
804 
805 		/* If we set the bus method it is valid */
806 		if (arm64_bus_method != ARM64_BUS_NONE)
807 			return (true);
808 	}
809 	/* If no order or an invalid order was set use the default */
810 	if (arm64_bus_method == ARM64_BUS_NONE) {
811 		if (has_acpi)
812 			arm64_bus_method = ARM64_BUS_ACPI;
813 		else if (has_fdt)
814 			arm64_bus_method = ARM64_BUS_FDT;
815 	}
816 
817 	/*
818 	 * If no option was set the default is valid, otherwise we are
819 	 * setting one to get cninit() working, then calling panic to tell
820 	 * the user about the invalid bus setup.
821 	 */
822 	return (env == NULL);
823 }
824 
825 static void
cache_setup(void)826 cache_setup(void)
827 {
828 	int dczva_line_shift;
829 	uint32_t dczid_el0;
830 
831 	identify_cache(READ_SPECIALREG(ctr_el0));
832 
833 	dczid_el0 = READ_SPECIALREG(dczid_el0);
834 
835 	/* Check if dc zva is not prohibited */
836 	if (dczid_el0 & DCZID_DZP)
837 		dczva_line_size = 0;
838 	else {
839 		/* Same as with above calculations */
840 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
841 		dczva_line_size = sizeof(int) << dczva_line_shift;
842 
843 		/* Change pagezero function */
844 		pagezero = pagezero_cache;
845 	}
846 }
847 
848 int
memory_mapping_mode(vm_paddr_t pa)849 memory_mapping_mode(vm_paddr_t pa)
850 {
851 	struct efi_md *map, *p;
852 	size_t efisz;
853 	int ndesc, i;
854 
855 	if (efihdr == NULL)
856 		return (VM_MEMATTR_WRITE_BACK);
857 
858 	/*
859 	 * Memory map data provided by UEFI via the GetMemoryMap
860 	 * Boot Services API.
861 	 */
862 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
863 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
864 
865 	if (efihdr->descriptor_size == 0)
866 		return (VM_MEMATTR_WRITE_BACK);
867 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
868 
869 	for (i = 0, p = map; i < ndesc; i++,
870 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
871 		if (pa < p->md_phys ||
872 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
873 			continue;
874 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
875 		    p->md_type == EFI_MD_TYPE_IOPORT)
876 			return (VM_MEMATTR_DEVICE);
877 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
878 		    p->md_type == EFI_MD_TYPE_RECLAIM)
879 			return (VM_MEMATTR_WRITE_BACK);
880 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
881 			return (VM_MEMATTR_WRITE_THROUGH);
882 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
883 			return (VM_MEMATTR_WRITE_COMBINING);
884 		break;
885 	}
886 
887 	return (VM_MEMATTR_DEVICE);
888 }
889 
890 void
initarm(struct arm64_bootparams * abp)891 initarm(struct arm64_bootparams *abp)
892 {
893 	struct efi_fb *efifb;
894 	struct pcpu *pcpup;
895 	char *env;
896 #ifdef FDT
897 	struct mem_region mem_regions[FDT_MEM_REGIONS];
898 	int mem_regions_sz;
899 	phandle_t root;
900 	char dts_version[255];
901 #endif
902 	vm_offset_t lastaddr;
903 	bool valid;
904 
905 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
906 
907 	boot_el = abp->boot_el;
908 
909 	/* Parse loader or FDT boot parameters. Determine last used address. */
910 	lastaddr = parse_boot_param(abp);
911 
912 	identify_cpu(0);
913 	identify_hypervisor_smbios();
914 
915 	update_special_regs(0);
916 
917 	/* Set the pcpu data, this is needed by pmap_bootstrap */
918 	pcpup = &pcpu0;
919 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
920 
921 	/*
922 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
923 	 * loaded when entering the kernel from userland.
924 	 */
925 	__asm __volatile(
926 	    "mov x18, %0 \n"
927 	    "msr tpidr_el1, %0" :: "r"(pcpup));
928 
929 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
930 	PCPU_SET(curthread, &thread0);
931 	PCPU_SET(midr, get_midr());
932 
933 	link_elf_ireloc();
934 #ifdef FDT
935 	try_load_dtb();
936 #endif
937 
938 	efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE,
939 	    vm_paddr_t);
940 
941 	/* Load the physical memory ranges */
942 	efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp,
943 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
944 	if (efihdr != NULL)
945 		add_efi_map_entries(efihdr);
946 #ifdef FDT
947 	else {
948 		/* Grab physical memory regions information from device tree. */
949 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
950 		    NULL) != 0)
951 			panic("Cannot get physical memory regions");
952 		physmem_hardware_regions(mem_regions, mem_regions_sz);
953 	}
954 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
955 		physmem_exclude_regions(mem_regions, mem_regions_sz,
956 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
957 #endif
958 
959 	/* Exclude the EFI framebuffer from our view of physical memory. */
960 	efifb = (struct efi_fb *)preload_search_info(preload_kmdp,
961 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
962 	if (efifb != NULL)
963 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
964 		    EXFLAG_NOALLOC);
965 
966 	/* Do basic tuning, hz etc */
967 	init_param1();
968 
969 	cache_setup();
970 
971 	/* Bootstrap enough of pmap  to enter the kernel proper */
972 	pmap_bootstrap(lastaddr - KERNBASE);
973 	/* Exclude entries needed in the DMAP region, but not phys_avail */
974 	if (efihdr != NULL)
975 		exclude_efi_map_entries(efihdr);
976 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
977 	if (efi_systbl_phys != 0)
978 		exclude_efi_memreserve(efi_systbl_phys);
979 
980 	/*
981 	 * We carefully bootstrap the sanitizer map after we've excluded
982 	 * absolutely everything else that could impact phys_avail.  There's not
983 	 * always enough room for the initial shadow map after the kernel, so
984 	 * we'll end up searching for segments that we can safely use.  Those
985 	 * segments also get excluded from phys_avail.
986 	 */
987 #if defined(KASAN) || defined(KMSAN)
988 	pmap_bootstrap_san();
989 #endif
990 
991 	physmem_init_kernel_globals();
992 
993 	devmap_bootstrap();
994 
995 	valid = bus_probe();
996 
997 	cninit();
998 	set_ttbr0(abp->kern_ttbr0);
999 	cpu_tlb_flushID();
1000 
1001 	if (!valid)
1002 		panic("Invalid bus configuration: %s",
1003 		    kern_getenv("kern.cfg.order"));
1004 
1005 	/* Detect early CPU feature support */
1006 	enable_cpu_feat(CPU_FEAT_EARLY_BOOT);
1007 
1008 	/*
1009 	 * Dump the boot metadata. We have to wait for cninit() since console
1010 	 * output is required. If it's grossly incorrect the kernel will never
1011 	 * make it this far.
1012 	 */
1013 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1014 		preload_dump();
1015 
1016 	init_proc0(abp->kern_stack);
1017 	msgbufinit(msgbufp, msgbufsize);
1018 	mutex_init();
1019 	init_param2(physmem);
1020 
1021 	dbg_init();
1022 	kdb_init();
1023 #ifdef KDB
1024 	if ((boothowto & RB_KDB) != 0)
1025 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1026 #endif
1027 
1028 	kcsan_cpu_init(0);
1029 	kasan_init();
1030 	kmsan_init();
1031 
1032 	env = kern_getenv("kernelname");
1033 	if (env != NULL)
1034 		strlcpy(kernelname, env, sizeof(kernelname));
1035 
1036 #ifdef FDT
1037 	if (arm64_bus_method == ARM64_BUS_FDT) {
1038 		root = OF_finddevice("/");
1039 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1040 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1041 				printf("WARNING: DTB version is %s while kernel expects %s, "
1042 				    "please update the DTB in the ESP\n",
1043 				    dts_version,
1044 				    LINUX_DTS_VERSION);
1045 		} else {
1046 			printf("WARNING: Cannot find freebsd,dts-version property, "
1047 			    "cannot check DTB compliance\n");
1048 		}
1049 	}
1050 #endif
1051 
1052 	if (boothowto & RB_VERBOSE) {
1053 		if (efihdr != NULL)
1054 			print_efi_map_entries(efihdr);
1055 		physmem_print_tables();
1056 	}
1057 
1058 	early_boot = 0;
1059 
1060 	if (bootverbose && kstack_pages != KSTACK_PAGES)
1061 		printf("kern.kstack_pages = %d ignored for thread0\n",
1062 		    kstack_pages);
1063 
1064 	TSEXIT();
1065 }
1066 
1067 void
dbg_init(void)1068 dbg_init(void)
1069 {
1070 
1071 	/* Clear OS lock */
1072 	WRITE_SPECIALREG(oslar_el1, 0);
1073 
1074 	/* This permits DDB to use debug registers for watchpoints. */
1075 	dbg_monitor_init();
1076 
1077 	/* TODO: Eventually will need to initialize debug registers here. */
1078 }
1079 
1080 #ifdef DDB
1081 #include <ddb/ddb.h>
1082 
DB_SHOW_COMMAND(specialregs,db_show_spregs)1083 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1084 {
1085 #define	PRINT_REG(reg)	\
1086     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1087 
1088 	PRINT_REG(actlr_el1);
1089 	PRINT_REG(afsr0_el1);
1090 	PRINT_REG(afsr1_el1);
1091 	PRINT_REG(aidr_el1);
1092 	PRINT_REG(amair_el1);
1093 	PRINT_REG(ccsidr_el1);
1094 	PRINT_REG(clidr_el1);
1095 	PRINT_REG(contextidr_el1);
1096 	PRINT_REG(cpacr_el1);
1097 	PRINT_REG(csselr_el1);
1098 	PRINT_REG(ctr_el0);
1099 	PRINT_REG(currentel);
1100 	PRINT_REG(daif);
1101 	PRINT_REG(dczid_el0);
1102 	PRINT_REG(elr_el1);
1103 	PRINT_REG(esr_el1);
1104 	PRINT_REG(far_el1);
1105 #if 0
1106 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1107 	PRINT_REG(fpcr);
1108 	PRINT_REG(fpsr);
1109 #endif
1110 	PRINT_REG(id_aa64afr0_el1);
1111 	PRINT_REG(id_aa64afr1_el1);
1112 	PRINT_REG(id_aa64dfr0_el1);
1113 	PRINT_REG(id_aa64dfr1_el1);
1114 	PRINT_REG(id_aa64isar0_el1);
1115 	PRINT_REG(id_aa64isar1_el1);
1116 	PRINT_REG(id_aa64pfr0_el1);
1117 	PRINT_REG(id_aa64pfr1_el1);
1118 	PRINT_REG(id_afr0_el1);
1119 	PRINT_REG(id_dfr0_el1);
1120 	PRINT_REG(id_isar0_el1);
1121 	PRINT_REG(id_isar1_el1);
1122 	PRINT_REG(id_isar2_el1);
1123 	PRINT_REG(id_isar3_el1);
1124 	PRINT_REG(id_isar4_el1);
1125 	PRINT_REG(id_isar5_el1);
1126 	PRINT_REG(id_mmfr0_el1);
1127 	PRINT_REG(id_mmfr1_el1);
1128 	PRINT_REG(id_mmfr2_el1);
1129 	PRINT_REG(id_mmfr3_el1);
1130 #if 0
1131 	/* Missing from llvm */
1132 	PRINT_REG(id_mmfr4_el1);
1133 #endif
1134 	PRINT_REG(id_pfr0_el1);
1135 	PRINT_REG(id_pfr1_el1);
1136 	PRINT_REG(isr_el1);
1137 	PRINT_REG(mair_el1);
1138 	PRINT_REG(midr_el1);
1139 	PRINT_REG(mpidr_el1);
1140 	PRINT_REG(mvfr0_el1);
1141 	PRINT_REG(mvfr1_el1);
1142 	PRINT_REG(mvfr2_el1);
1143 	PRINT_REG(revidr_el1);
1144 	PRINT_REG(sctlr_el1);
1145 	PRINT_REG(sp_el0);
1146 	PRINT_REG(spsel);
1147 	PRINT_REG(spsr_el1);
1148 	PRINT_REG(tcr_el1);
1149 	PRINT_REG(tpidr_el0);
1150 	PRINT_REG(tpidr_el1);
1151 	PRINT_REG(tpidrro_el0);
1152 	PRINT_REG(ttbr0_el1);
1153 	PRINT_REG(ttbr1_el1);
1154 	PRINT_REG(vbar_el1);
1155 #undef PRINT_REG
1156 }
1157 
DB_SHOW_COMMAND(vtop,db_show_vtop)1158 DB_SHOW_COMMAND(vtop, db_show_vtop)
1159 {
1160 	uint64_t phys;
1161 
1162 	if (have_addr) {
1163 		phys = arm64_address_translate_s1e1r(addr);
1164 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1165 		phys = arm64_address_translate_s1e1w(addr);
1166 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1167 		phys = arm64_address_translate_s1e0r(addr);
1168 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1169 		phys = arm64_address_translate_s1e0w(addr);
1170 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1171 	} else
1172 		db_printf("show vtop <virt_addr>\n");
1173 }
1174 #endif
1175