xref: /freebsd/sys/arm64/arm64/locore.S (revision f5e9c916afed4a948fe5c03bfaee038d165e12ab)
1/*-
2 * Copyright (c) 2012-2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include "assym.s"
30#include "opt_kstack_pages.h"
31#include <sys/syscall.h>
32#include <machine/asm.h>
33#include <machine/armreg.h>
34#include <machine/hypervisor.h>
35#include <machine/param.h>
36#include <machine/pte.h>
37
38#define	VIRT_BITS	39
39
40	.globl	kernbase
41	.set	kernbase, KERNBASE
42
43#define	DEVICE_MEM	0
44#define	NORMAL_UNCACHED	1
45#define	NORMAL_MEM	2
46
47/*
48 * We assume:
49 *  MMU      on with an identity map, or off
50 *  D-Cache: off
51 *  I-Cache: on or off
52 *  We are loaded at a 2MiB aligned address
53 */
54
55	.text
56	.globl _start
57_start:
58	/* Drop to EL1 */
59	bl	drop_to_el1
60
61	/*
62	 * Disable the MMU. We may have entered the kernel with it on and
63	 * will need to update the tables later. If this has been set up
64	 * with anything other than a VA == PA map then this will fail,
65	 * but in this case the code to find where we are running from
66	 * would have also failed.
67	 */
68	dsb	sy
69	mrs	x2, sctlr_el1
70	bic	x2, x2, SCTLR_M
71	msr	sctlr_el1, x2
72	isb
73
74	/* Set the context id */
75	msr	contextidr_el1, xzr
76
77	/* Get the virt -> phys offset */
78	bl	get_virt_delta
79
80	/*
81	 * At this point:
82	 * x29 = PA - VA
83	 * x28 = Our physical load address
84	 */
85
86	/* Create the page tables */
87	bl	create_pagetables
88
89	/*
90	 * At this point:
91	 * x27 = TTBR0 table
92	 * x26 = TTBR1 table
93	 */
94
95	/* Enable the mmu */
96	bl	start_mmu
97
98	/* Jump to the virtual address space */
99	ldr	x15, .Lvirtdone
100	br	x15
101
102virtdone:
103	/*
104	 * Now that we are in virtual address space,
105	 * we don't need the identity mapping in TTBR0 and
106	 * can set the TCR to a more useful value.
107	 */
108	ldr	x2, tcr
109	mrs	x3, id_aa64mmfr0_el1
110	bfi	x2, x3, #32, #3
111	msr	tcr_el1, x2
112
113	/* Set up the stack */
114	adr	x25, initstack_end
115	mov	sp, x25
116	sub	sp, sp, #PCB_SIZE
117
118	/* Zero the BSS */
119	ldr	x15, .Lbss
120	ldr	x14, .Lend
1211:
122	str	xzr, [x15], #8
123	cmp	x15, x14
124	b.lo	1b
125
126	/* Backup the module pointer */
127	mov	x1, x0
128
129	/* Make the page table base a virtual address */
130	sub	x26, x26, x29
131
132	sub	sp, sp, #(64 * 4)
133	mov	x0, sp
134
135	/* Degate the delda so it is VA -> PA */
136	neg	x29, x29
137
138	str	x1,  [x0]	/* modulep */
139	str	x26, [x0, 8]	/* kern_l1pt */
140	str	x29, [x0, 16]	/* kern_delta */
141	str	x25, [x0, 24]	/* kern_stack */
142
143	/* trace back starts here */
144	mov	fp, #0
145	/* Branch to C code */
146	bl	initarm
147	bl	mi_startup
148
149	/* We should not get here */
150	brk	0
151
152	.align 3
153.Lvirtdone:
154	.quad	virtdone
155.Lbss:
156	.quad	__bss_start
157.Lend:
158	.quad	_end
159
160#ifdef SMP
161/*
162 * mpentry(unsigned long)
163 *
164 * Called by a core when it is being brought online.
165 * The data in x0 is passed straight to init_secondary.
166 */
167ENTRY(mpentry)
168	/* Disable interrupts */
169	msr	daifset, #2
170
171	/* Drop to EL1 */
172	bl	drop_to_el1
173
174	/* Set the context id */
175	msr	contextidr_el1, x1
176
177	/* Load the kernel page table */
178	adr	x26, pagetable_l1_ttbr1
179	/* Load the identity page table */
180	adr	x27, pagetable_l0_ttbr0
181
182	/* Enable the mmu */
183	bl	start_mmu
184
185	/* Jump to the virtual address space */
186	ldr	x15, =mp_virtdone
187	br	x15
188
189mp_virtdone:
190	/*
191	 * Now that we are in virtual address space,
192	 * we don't need the identity mapping in TTBR0 and
193	 * can set the TCR to a more useful value.
194	 */
195	ldr	x2, tcr
196	mrs	x3, id_aa64mmfr0_el1
197	bfi	x2, x3, #32, #3
198	msr	tcr_el1, x2
199
200	ldr	x4, =secondary_stacks
201	mov	x5, #(PAGE_SIZE * KSTACK_PAGES)
202	mul	x5, x0, x5
203	add	sp, x4, x5
204
205	b	init_secondary
206END(mpentry)
207#endif
208
209/*
210 * If we are started in EL2, configure the required hypervisor
211 * registers and drop to EL1.
212 */
213drop_to_el1:
214	mrs	x1, CurrentEL
215	lsr	x1, x1, #2
216	cmp	x1, #0x2
217	b.eq	1f
218	ret
2191:
220	/* Configure the Hypervisor */
221	mov	x2, #(HCR_RW)
222	msr	hcr_el2, x2
223
224	/* Load the Virtualization Process ID Register */
225	mrs	x2, midr_el1
226	msr	vpidr_el2, x2
227
228	/* Load the Virtualization Multiprocess ID Register */
229	mrs	x2, mpidr_el1
230	msr	vmpidr_el2, x2
231
232	/* Set the bits that need to be 1 in sctlr_el1 */
233	ldr	x2, .Lsctlr_res1
234	msr	sctlr_el1, x2
235
236	/* Don't trap to EL2 for exceptions */
237	mov	x2, #CPTR_RES1
238	msr	cptr_el2, x2
239
240	/* Don't trap to EL2 for CP15 traps */
241	msr	hstr_el2, xzr
242
243	/* Enable access to the physical timers at EL1 */
244	mrs	x2, cnthctl_el2
245	orr	x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
246	msr	cnthctl_el2, x2
247
248	/* Set the counter offset to a known value */
249	msr	cntvoff_el2, xzr
250
251	/* Hypervisor trap functions */
252	adr	x2, hyp_vectors
253	msr	vbar_el2, x2
254
255	mov	x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
256	msr	spsr_el2, x2
257
258	/* Configure GICv3 CPU interface */
259	mrs	x2, id_aa64pfr0_el1
260	/* Extract GIC bits from the register */
261	ubfx	x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
262	/* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
263	cmp	x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
264	b.ne	2f
265
266	mrs	x2, icc_sre_el2
267	orr	x2, x2, #ICC_SRE_EL2_EN	/* Enable access from insecure EL1 */
268	orr	x2, x2, #ICC_SRE_EL2_SRE	/* Enable system registers */
269	msr	icc_sre_el2, x2
2702:
271
272	/* Set the address to return to our return address */
273	msr	elr_el2, x30
274	isb
275
276	eret
277
278	.align 3
279.Lsctlr_res1:
280	.quad SCTLR_RES1
281
282#define	VECT_EMPTY	\
283	.align 7;	\
284	1:	b	1b
285
286	.align 11
287hyp_vectors:
288	VECT_EMPTY	/* Synchronous EL2t */
289	VECT_EMPTY	/* IRQ EL2t */
290	VECT_EMPTY	/* FIQ EL2t */
291	VECT_EMPTY	/* Error EL2t */
292
293	VECT_EMPTY	/* Synchronous EL2h */
294	VECT_EMPTY	/* IRQ EL2h */
295	VECT_EMPTY	/* FIQ EL2h */
296	VECT_EMPTY	/* Error EL2h */
297
298	VECT_EMPTY	/* Synchronous 64-bit EL1 */
299	VECT_EMPTY	/* IRQ 64-bit EL1 */
300	VECT_EMPTY	/* FIQ 64-bit EL1 */
301	VECT_EMPTY	/* Error 64-bit EL1 */
302
303	VECT_EMPTY	/* Synchronous 32-bit EL1 */
304	VECT_EMPTY	/* IRQ 32-bit EL1 */
305	VECT_EMPTY	/* FIQ 32-bit EL1 */
306	VECT_EMPTY	/* Error 32-bit EL1 */
307
308/*
309 * Get the delta between the physical address we were loaded to and the
310 * virtual address we expect to run from. This is used when building the
311 * initial page table.
312 */
313get_virt_delta:
314	/* Load the physical address of virt_map */
315	adr	x29, virt_map
316	/* Load the virtual address of virt_map stored in virt_map */
317	ldr	x28, [x29]
318	/* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */
319	sub	x29, x29, x28
320	/* Find the load address for the kernel */
321	mov	x28, #(KERNBASE)
322	add	x28, x28, x29
323	ret
324
325	.align 3
326virt_map:
327	.quad	virt_map
328
329/*
330 * This builds the page tables containing the identity map, and the kernel
331 * virtual map.
332 *
333 * It relys on:
334 *  We were loaded to an address that is on a 2MiB boundary
335 *  All the memory must not cross a 1GiB boundaty
336 *  x28 contains the physical address we were loaded from
337 *
338 * TODO: This is out of date.
339 *  There are at least 5 pages before that address for the page tables
340 *   The pages used are:
341 *    - The identity (PA = VA) table (TTBR0)
342 *    - The Kernel L1 table          (TTBR1)(not yet)
343 *    -  The PA != VA L2 table to jump into (not yet)
344 *    -  The FDT L2 table                   (not yet)
345 */
346create_pagetables:
347	/* Save the Link register */
348	mov	x5, x30
349
350	/* Clean the page table */
351	adr	x6, pagetable
352	mov	x26, x6
353	adr	x27, pagetable_end
3541:
355	stp	xzr, xzr, [x6], #16
356	stp	xzr, xzr, [x6], #16
357	stp	xzr, xzr, [x6], #16
358	stp	xzr, xzr, [x6], #16
359	cmp	x6, x27
360	b.lo	1b
361
362	/*
363	 * Build the TTBR1 maps.
364	 */
365
366	/* Find the size of the kernel */
367	mov	x6, #(KERNBASE)
368	ldr	x7, .Lend
369	/* Find the end - begin */
370	sub	x8, x7, x6
371	/* Get the number of l2 pages to allocate, rounded down */
372	lsr	x10, x8, #(L2_SHIFT)
373	/* Add 8 MiB for any rounding above and the module data */
374	add	x10, x10, #4
375
376	/* Create the kernel space L2 table */
377	mov	x6, x26
378	mov	x7, #NORMAL_MEM
379	mov	x8, #(KERNBASE & L2_BLOCK_MASK)
380	mov	x9, x28
381	bl	build_l2_block_pagetable
382
383	/* Move to the l1 table */
384	add	x26, x26, #PAGE_SIZE
385
386	/* Link the l1 -> l2 table */
387	mov	x9, x6
388	mov	x6, x26
389	bl	link_l1_pagetable
390
391
392	/*
393	 * Build the TTBR0 maps.
394	 */
395	add	x27, x26, #PAGE_SIZE
396
397	mov	x6, x27		/* The initial page table */
398#if defined(SOCDEV_PA) && defined(SOCDEV_VA)
399	/* Create a table for the UART */
400	mov	x7, #DEVICE_MEM
401	mov	x8, #(SOCDEV_VA)	/* VA start */
402	mov	x9, #(SOCDEV_PA)	/* PA start */
403	mov	x10, #1
404	bl	build_l1_block_pagetable
405#endif
406
407	/* Create the VA = PA map */
408	mov	x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */
409	mov	x9, x27
410	mov	x8, x9		/* VA start (== PA start) */
411	mov	x10, #1
412	bl	build_l1_block_pagetable
413
414	/* Move to the l0 table */
415	add	x27, x27, #PAGE_SIZE
416
417	/* Link the l0 -> l1 table */
418	mov	x9, x6
419	mov	x6, x27
420	bl	link_l0_pagetable
421
422	/* Restore the Link register */
423	mov	x30, x5
424	ret
425
426/*
427 * Builds an L0 -> L1 table descriptor
428 *
429 * This is a link for a 512GiB block of memory with up to 1GiB regions mapped
430 * within it by build_l1_block_pagetable.
431 *
432 *  x6  = L0 table
433 *  x8  = Virtual Address
434 *  x9  = L1 PA (trashed)
435 *  x11, x12 and x13 are trashed
436 */
437link_l0_pagetable:
438	/*
439	 * Link an L0 -> L1 table entry.
440	 */
441	/* Find the table index */
442	lsr	x11, x8, #L0_SHIFT
443	and	x11, x11, #Ln_ADDR_MASK
444
445	/* Build the L0 block entry */
446	mov	x12, #L0_TABLE
447
448	/* Only use the output address bits */
449	lsr	x9, x9, #12
450	orr	x12, x12, x9, lsl #12
451
452	/* Store the entry */
453	str	x12, [x6, x11, lsl #3]
454
455	ret
456
457/*
458 * Builds an L1 -> L2 table descriptor
459 *
460 * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
461 * within it by build_l2_block_pagetable.
462 *
463 *  x6  = L1 table
464 *  x8  = Virtual Address
465 *  x9  = L2 PA (trashed)
466 *  x11, x12 and x13 are trashed
467 */
468link_l1_pagetable:
469	/*
470	 * Link an L1 -> L2 table entry.
471	 */
472	/* Find the table index */
473	lsr	x11, x8, #L1_SHIFT
474	and	x11, x11, #Ln_ADDR_MASK
475
476	/* Build the L1 block entry */
477	mov	x12, #L1_TABLE
478
479	/* Only use the output address bits */
480	lsr	x9, x9, #12
481	orr	x12, x12, x9, lsl #12
482
483	/* Store the entry */
484	str	x12, [x6, x11, lsl #3]
485
486	ret
487
488/*
489 * Builds count 1 GiB page table entry
490 *  x6  = L1 table
491 *  x7  = Type (0 = Device, 1 = Normal)
492 *  x8  = VA start
493 *  x9  = PA start (trashed)
494 *  x10 = Entry count (TODO)
495 *  x11, x12 and x13 are trashed
496 */
497build_l1_block_pagetable:
498	/*
499	 * Build the L1 table entry.
500	 */
501	/* Find the table index */
502	lsr	x11, x8, #L1_SHIFT
503	and	x11, x11, #Ln_ADDR_MASK
504
505	/* Build the L1 block entry */
506	lsl	x12, x7, #2
507	orr	x12, x12, #L1_BLOCK
508	orr	x12, x12, #(ATTR_AF)
509#ifdef SMP
510	orr	x12, x12, ATTR_SH(ATTR_SH_IS)
511#endif
512
513	/* Only use the output address bits */
514	lsr	x9, x9, #L1_SHIFT
515
516	/* Set the physical address for this virtual address */
5171:	orr	x12, x12, x9, lsl #L1_SHIFT
518
519	/* Store the entry */
520	str	x12, [x6, x11, lsl #3]
521
522	/* Clear the address bits */
523	and	x12, x12, #ATTR_MASK_L
524
525	sub	x10, x10, #1
526	add	x11, x11, #1
527	add	x9, x9, #1
528	cbnz	x10, 1b
529
5302:	ret
531
532/*
533 * Builds count 2 MiB page table entry
534 *  x6  = L2 table
535 *  x7  = Type (0 = Device, 1 = Normal)
536 *  x8  = VA start
537 *  x9  = PA start (trashed)
538 *  x10 = Entry count (TODO)
539 *  x11, x12 and x13 are trashed
540 */
541build_l2_block_pagetable:
542	/*
543	 * Build the L2 table entry.
544	 */
545	/* Find the table index */
546	lsr	x11, x8, #L2_SHIFT
547	and	x11, x11, #Ln_ADDR_MASK
548
549	/* Build the L2 block entry */
550	lsl	x12, x7, #2
551	orr	x12, x12, #L2_BLOCK
552	orr	x12, x12, #(ATTR_AF)
553#ifdef SMP
554	orr	x12, x12, ATTR_SH(ATTR_SH_IS)
555#endif
556
557	/* Only use the output address bits */
558	lsr	x9, x9, #L2_SHIFT
559
560	/* Set the physical address for this virtual address */
5611:	orr	x12, x12, x9, lsl #L2_SHIFT
562
563	/* Store the entry */
564	str	x12, [x6, x11, lsl #3]
565
566	/* Clear the address bits */
567	and	x12, x12, #ATTR_MASK_L
568
569	sub	x10, x10, #1
570	add	x11, x11, #1
571	add	x9, x9, #1
572	cbnz	x10, 1b
573
5742:	ret
575
576start_mmu:
577	dsb	sy
578
579	/* Load the exception vectors */
580	ldr	x2, =exception_vectors
581	msr	vbar_el1, x2
582
583	/* Load ttbr0 and ttbr1 */
584	msr	ttbr0_el1, x27
585	msr	ttbr1_el1, x26
586	isb
587
588	/* Clear the Monitor Debug System control register */
589	msr	mdscr_el1, xzr
590
591	/* Invalidate the TLB */
592	tlbi	vmalle1is
593
594	ldr	x2, mair
595	msr	mair_el1, x2
596
597	/*
598	 * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
599	 * Some machines have physical memory mapped >512GiB, which can not
600	 * be identity-mapped using the default 39 VA bits. Thus, use
601	 * 48 VA bits for now and switch back to 39 after the VA jump.
602	 */
603	ldr	x2, tcr_early
604	mrs	x3, id_aa64mmfr0_el1
605	bfi	x2, x3, #32, #3
606	msr	tcr_el1, x2
607
608	/* Setup SCTLR */
609	ldr	x2, sctlr_set
610	ldr	x3, sctlr_clear
611	mrs	x1, sctlr_el1
612	bic	x1, x1, x3	/* Clear the required bits */
613	orr	x1, x1, x2	/* Set the required bits */
614	msr	sctlr_el1, x1
615	isb
616
617	ret
618
619	.align 3
620mair:
621		/* Device            Normal, no cache     Normal, write-back */
622	.quad	MAIR_ATTR(0x00, 0) | MAIR_ATTR(0x44, 1) | MAIR_ATTR(0xff, 2)
623tcr:
624	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \
625	    TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
626tcr_early:
627	.quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
628	    TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
629sctlr_set:
630	/* Bits to set */
631	.quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
632	    SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | SCTLR_M)
633sctlr_clear:
634	/* Bits to clear */
635	.quad (SCTLR_EE | SCTLR_EOE | SCTLR_WXN | SCTLR_UMA | SCTLR_ITD | \
636	    SCTLR_THEE | SCTLR_CP15BEN | SCTLR_A)
637
638	.globl abort
639abort:
640	b abort
641
642	//.section .init_pagetable
643	.align 12 /* 4KiB aligned */
644	/*
645	 * 3 initial tables (in the following order):
646	 *           L2 for kernel (High addresses)
647	 *           L1 for kernel
648	 *           L1 for user   (Low addresses)
649	 */
650pagetable:
651	.space	PAGE_SIZE
652pagetable_l1_ttbr1:
653	.space	PAGE_SIZE
654pagetable_l1_ttbr0:
655	.space	PAGE_SIZE
656pagetable_l0_ttbr0:
657	.space	PAGE_SIZE
658pagetable_end:
659
660el2_pagetable:
661	.space	PAGE_SIZE
662
663	.globl init_pt_va
664init_pt_va:
665	.quad pagetable		/* XXX: Keep page tables VA */
666
667	.align	4
668initstack:
669	.space	(PAGE_SIZE * KSTACK_PAGES)
670initstack_end:
671
672
673ENTRY(sigcode)
674	mov	x0, sp
675	add	x0, x0, #SF_UC
676
6771:
678	mov	x8, #SYS_sigreturn
679	svc	0
680
681	/* sigreturn failed, exit */
682	mov	x8, #SYS_exit
683	svc	0
684
685	b	1b
686END(sigcode)
687	/* This may be copied to the stack, keep it 16-byte aligned */
688	.align	3
689esigcode:
690
691	.data
692	.align	3
693	.global	szsigcode
694szsigcode:
695	.quad	esigcode - sigcode
696