xref: /linux/arch/arm/boot/compressed/head.S (revision 32d7e03d26fd93187c87ed0fbf59ec7023a61404)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14#ifdef __ARMEB__
15#define OF_DT_MAGIC 0xd00dfeed
16#else
17#define OF_DT_MAGIC 0xedfe0dd0
18#endif
19
20 AR_CLASS(	.arch	armv7-a	)
21 M_CLASS(	.arch	armv7-m	)
22
23/*
24 * Debugging stuff
25 *
26 * Note that these macros must not contain any code which is not
27 * 100% relocatable.  Any attempt to do so will result in a crash.
28 * Please select one of the following when turning on debugging.
29 */
30#ifdef DEBUG
31
32#if defined(CONFIG_DEBUG_ICEDCC)
33
34#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb, tmp
38		mcr	p14, 0, \ch, c0, c5, 0
39		.endm
40#elif defined(CONFIG_CPU_XSCALE)
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb, tmp
44		mcr	p14, 0, \ch, c8, c0, 0
45		.endm
46#else
47		.macro	loadsp, rb, tmp1, tmp2
48		.endm
49		.macro	writeb, ch, rb, tmp
50		mcr	p14, 0, \ch, c1, c0, 0
51		.endm
52#endif
53
54#else
55
56#include CONFIG_DEBUG_LL_INCLUDE
57
58		.macro	writeb,	ch, rb, tmp
59#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
60		waituartcts \tmp, \rb
61#endif
62		waituarttxrdy \tmp, \rb
63		senduart \ch, \rb
64		busyuart \tmp, \rb
65		.endm
66
67#if defined(CONFIG_ARCH_SA1100)
68		.macro	loadsp, rb, tmp1, tmp2
69		mov	\rb, #0x80000000	@ physical base address
70#ifdef CONFIG_DEBUG_LL_SER3
71		add	\rb, \rb, #0x00050000	@ Ser3
72#else
73		add	\rb, \rb, #0x00010000	@ Ser1
74#endif
75		.endm
76#else
77		.macro	loadsp,	rb, tmp1, tmp2
78		addruart \rb, \tmp1, \tmp2
79		.endm
80#endif
81#endif
82#endif
83
84		.macro	kputc,val
85		mov	r0, \val
86		bl	putc
87		.endm
88
89		.macro	kphex,val,len
90		mov	r0, \val
91		mov	r1, #\len
92		bl	phex
93		.endm
94
95		/*
96		 * Debug kernel copy by printing the memory addresses involved
97		 */
98		.macro dbgkc, begin, end, cbegin, cend
99#ifdef DEBUG
100		kputc   #'C'
101		kputc   #':'
102		kputc   #'0'
103		kputc   #'x'
104		kphex   \begin, 8	/* Start of compressed kernel */
105		kputc	#'-'
106		kputc	#'0'
107		kputc	#'x'
108		kphex	\end, 8		/* End of compressed kernel */
109		kputc	#'-'
110		kputc	#'>'
111		kputc   #'0'
112		kputc   #'x'
113		kphex   \cbegin, 8	/* Start of kernel copy */
114		kputc	#'-'
115		kputc	#'0'
116		kputc	#'x'
117		kphex	\cend, 8	/* End of kernel copy */
118		kputc	#'\n'
119#endif
120		.endm
121
122		/*
123		 * Debug print of the final appended DTB location
124		 */
125		.macro dbgadtb, begin, size
126#ifdef DEBUG
127		kputc   #'D'
128		kputc   #'T'
129		kputc   #'B'
130		kputc   #':'
131		kputc   #'0'
132		kputc   #'x'
133		kphex   \begin, 8	/* Start of appended DTB */
134		kputc	#' '
135		kputc	#'('
136		kputc	#'0'
137		kputc	#'x'
138		kphex	\size, 8	/* Size of appended DTB */
139		kputc	#')'
140		kputc	#'\n'
141#endif
142		.endm
143
144		.macro	enable_cp15_barriers, reg
145		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
146		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
147		bne	.L_\@
148		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
149		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
150 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
151 THUMB(		isb						)
152.L_\@:
153		.endm
154
155		/*
156		 * The kernel build system appends the size of the
157		 * decompressed kernel at the end of the compressed data
158		 * in little-endian form.
159		 */
160		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
161		adr	\res, .Linflated_image_size_offset
162		ldr	\tmp1, [\res]
163		add	\tmp1, \tmp1, \res	@ address of inflated image size
164
165		ldrb	\res, [\tmp1]		@ get_unaligned_le32
166		ldrb	\tmp2, [\tmp1, #1]
167		orr	\res, \res, \tmp2, lsl #8
168		ldrb	\tmp2, [\tmp1, #2]
169		ldrb	\tmp1, [\tmp1, #3]
170		orr	\res, \res, \tmp2, lsl #16
171		orr	\res, \res, \tmp1, lsl #24
172		.endm
173
174		.macro	be32tocpu, val, tmp
175#ifndef __ARMEB__
176		/* convert to little endian */
177		rev_l	\val, \tmp
178#endif
179		.endm
180
181		.section ".start", "ax"
182/*
183 * sort out different calling conventions
184 */
185		.align
186		/*
187		 * Always enter in ARM state for CPUs that support the ARM ISA.
188		 * As of today (2014) that's exactly the members of the A and R
189		 * classes.
190		 */
191 AR_CLASS(	.arm	)
192start:
193		.type	start,#function
194		/*
195		 * These 7 nops along with the 1 nop immediately below for
196		 * !THUMB2 form 8 nops that make the compressed kernel bootable
197		 * on legacy ARM systems that were assuming the kernel in a.out
198		 * binary format. The boot loaders on these systems would
199		 * jump 32 bytes into the image to skip the a.out header.
200		 * with these 8 nops filling exactly 32 bytes, things still
201		 * work as expected on these legacy systems. Thumb2 mode keeps
202		 * 7 of the nops as it turns out that some boot loaders
203		 * were patching the initial instructions of the kernel, i.e
204		 * had started to exploit this "patch area".
205		 */
206		.rept	7
207		__nop
208		.endr
209#ifndef CONFIG_THUMB2_KERNEL
210		__nop
211#else
212 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
213  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
214		.thumb
215#endif
216		W(b)	1f
217
218		.word	_magic_sig	@ Magic numbers to help the loader
219		.word	_magic_start	@ absolute load/run zImage address
220		.word	_magic_end	@ zImage end address
221		.word	0x04030201	@ endianness flag
222		.word	0x45454545	@ another magic number to indicate
223		.word	_magic_table	@ additional data table
224
225		__EFI_HEADER
2261:
227 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
228 AR_CLASS(	mrs	r9, cpsr	)
229#ifdef CONFIG_ARM_VIRT_EXT
230		bl	__hyp_stub_install	@ get into SVC mode, reversibly
231#endif
232		mov	r7, r1			@ save architecture ID
233		mov	r8, r2			@ save atags pointer
234
235#ifndef CONFIG_CPU_V7M
236		/*
237		 * Booting from Angel - need to enter SVC mode and disable
238		 * FIQs/IRQs (numeric definitions from angel arm.h source).
239		 * We only do this if we were in user mode on entry.
240		 */
241		mrs	r2, cpsr		@ get current mode
242		tst	r2, #3			@ not user?
243		bne	not_angel
244		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
245 ARM(		swi	0x123456	)	@ angel_SWI_ARM
246 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
247not_angel:
248		safe_svcmode_maskall r0
249		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
250						@ SPSR
251#endif
252		/*
253		 * Note that some cache flushing and other stuff may
254		 * be needed here - is there an Angel SWI call for this?
255		 */
256
257		/*
258		 * some architecture specific code can be inserted
259		 * by the linker here, but it should preserve r7, r8, and r9.
260		 */
261
262		.text
263
264#ifdef CONFIG_AUTO_ZRELADDR
265		/*
266		 * Find the start of physical memory.  As we are executing
267		 * without the MMU on, we are in the physical address space.
268		 * We just need to get rid of any offset by aligning the
269		 * address.
270		 *
271		 * This alignment is a balance between the requirements of
272		 * different platforms - we have chosen 128MB to allow
273		 * platforms which align the start of their physical memory
274		 * to 128MB to use this feature, while allowing the zImage
275		 * to be placed within the first 128MB of memory on other
276		 * platforms.  Increasing the alignment means we place
277		 * stricter alignment requirements on the start of physical
278		 * memory, but relaxing it means that we break people who
279		 * are already placing their zImage in (eg) the top 64MB
280		 * of this range.
281		 */
282		mov	r0, pc
283		and	r0, r0, #0xf8000000
284#ifdef CONFIG_USE_OF
285		adr	r1, LC1
286#ifdef CONFIG_ARM_APPENDED_DTB
287		/*
288		 * Look for an appended DTB.  If found, we cannot use it to
289		 * validate the calculated start of physical memory, as its
290		 * memory nodes may need to be augmented by ATAGS stored at
291		 * an offset from the same start of physical memory.
292		 */
293		ldr	r2, [r1, #4]	@ get &_edata
294		add	r2, r2, r1	@ relocate it
295		ldr	r2, [r2]	@ get DTB signature
296		ldr	r3, =OF_DT_MAGIC
297		cmp	r2, r3		@ do we have a DTB there?
298		beq	1f		@ if yes, skip validation
299#endif /* CONFIG_ARM_APPENDED_DTB */
300
301		/*
302		 * Make sure we have some stack before calling C code.
303		 * No GOT fixup has occurred yet, but none of the code we're
304		 * about to call uses any global variables.
305		 */
306		ldr	sp, [r1]	@ get stack location
307		add	sp, sp, r1	@ apply relocation
308
309		/* Validate calculated start against passed DTB */
310		mov	r1, r8
311		bl	fdt_check_mem_start
3121:
313#endif /* CONFIG_USE_OF */
314		/* Determine final kernel image address. */
315		add	r4, r0, #TEXT_OFFSET
316#else
317		ldr	r4, =zreladdr
318#endif
319
320		/*
321		 * Set up a page table only if it won't overwrite ourself.
322		 * That means r4 < pc || r4 - 16k page directory > &_end.
323		 * Given that r4 > &_end is most unfrequent, we add a rough
324		 * additional 1MB of room for a possible appended DTB.
325		 */
326		mov	r0, pc
327		cmp	r0, r4
328		ldrcc	r0, .Lheadroom
329		addcc	r0, r0, pc
330		cmpcc	r4, r0
331		orrcc	r4, r4, #1		@ remember we skipped cache_on
332		blcs	cache_on
333
334restart:	adr	r0, LC1
335		ldr	sp, [r0]
336		ldr	r6, [r0, #4]
337		add	sp, sp, r0
338		add	r6, r6, r0
339
340		get_inflated_image_size	r9, r10, lr
341
342#ifndef CONFIG_ZBOOT_ROM
343		/* malloc space is above the relocated stack (64k max) */
344		add	r10, sp, #MALLOC_SIZE
345#else
346		/*
347		 * With ZBOOT_ROM the bss/stack is non relocatable,
348		 * but someone could still run this code from RAM,
349		 * in which case our reference is _edata.
350		 */
351		mov	r10, r6
352#endif
353
354		mov	r5, #0			@ init dtb size to 0
355#ifdef CONFIG_ARM_APPENDED_DTB
356/*
357 *   r4  = final kernel address (possibly with LSB set)
358 *   r5  = appended dtb size (still unknown)
359 *   r6  = _edata
360 *   r7  = architecture ID
361 *   r8  = atags/device tree pointer
362 *   r9  = size of decompressed image
363 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
364 *   sp  = stack pointer
365 *
366 * if there are device trees (dtb) appended to zImage, advance r10 so that the
367 * dtb data will get relocated along with the kernel if necessary.
368 */
369
370		ldr	lr, [r6, #0]
371		ldr	r1, =OF_DT_MAGIC
372		cmp	lr, r1
373		bne	dtb_check_done		@ not found
374
375#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
376		/*
377		 * OK... Let's do some funky business here.
378		 * If we do have a DTB appended to zImage, and we do have
379		 * an ATAG list around, we want the later to be translated
380		 * and folded into the former here. No GOT fixup has occurred
381		 * yet, but none of the code we're about to call uses any
382		 * global variable.
383		*/
384
385		/* Get the initial DTB size */
386		ldr	r5, [r6, #4]
387		be32tocpu r5, r1
388		dbgadtb	r6, r5
389		/* 50% DTB growth should be good enough */
390		add	r5, r5, r5, lsr #1
391		/* preserve 64-bit alignment */
392		add	r5, r5, #7
393		bic	r5, r5, #7
394		/* clamp to 32KB min and 1MB max */
395		cmp	r5, #(1 << 15)
396		movlo	r5, #(1 << 15)
397		cmp	r5, #(1 << 20)
398		movhi	r5, #(1 << 20)
399		/* temporarily relocate the stack past the DTB work space */
400		add	sp, sp, r5
401
402		mov	r0, r8
403		mov	r1, r6
404		mov	r2, r5
405		bl	atags_to_fdt
406
407		/*
408		 * If returned value is 1, there is no ATAG at the location
409		 * pointed by r8.  Try the typical 0x100 offset from start
410		 * of RAM and hope for the best.
411		 */
412		cmp	r0, #1
413		sub	r0, r4, #TEXT_OFFSET
414		bic	r0, r0, #1
415		add	r0, r0, #0x100
416		mov	r1, r6
417		mov	r2, r5
418		bleq	atags_to_fdt
419
420		sub	sp, sp, r5
421#endif
422
423		mov	r8, r6			@ use the appended device tree
424
425		/*
426		 * Make sure that the DTB doesn't end up in the final
427		 * kernel's .bss area. To do so, we adjust the decompressed
428		 * kernel size to compensate if that .bss size is larger
429		 * than the relocated code.
430		 */
431		ldr	r5, =_kernel_bss_size
432		adr	r1, wont_overwrite
433		sub	r1, r6, r1
434		subs	r1, r5, r1
435		addhi	r9, r9, r1
436
437		/* Get the current DTB size */
438		ldr	r5, [r6, #4]
439		be32tocpu r5, r1
440
441		/* preserve 64-bit alignment */
442		add	r5, r5, #7
443		bic	r5, r5, #7
444
445		/* relocate some pointers past the appended dtb */
446		add	r6, r6, r5
447		add	r10, r10, r5
448		add	sp, sp, r5
449dtb_check_done:
450#endif
451
452/*
453 * Check to see if we will overwrite ourselves.
454 *   r4  = final kernel address (possibly with LSB set)
455 *   r9  = size of decompressed image
456 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
457 * We basically want:
458 *   r4 - 16k page directory >= r10 -> OK
459 *   r4 + image length <= address of wont_overwrite -> OK
460 * Note: the possible LSB in r4 is harmless here.
461 */
462		add	r10, r10, #16384
463		cmp	r4, r10
464		bhs	wont_overwrite
465		add	r10, r4, r9
466		adr	r9, wont_overwrite
467		cmp	r10, r9
468		bls	wont_overwrite
469
470/*
471 * Relocate ourselves past the end of the decompressed kernel.
472 *   r6  = _edata
473 *   r10 = end of the decompressed kernel
474 * Because we always copy ahead, we need to do it from the end and go
475 * backward in case the source and destination overlap.
476 */
477		/*
478		 * Bump to the next 256-byte boundary with the size of
479		 * the relocation code added. This avoids overwriting
480		 * ourself when the offset is small.
481		 */
482		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
483		bic	r10, r10, #255
484
485		/* Get start of code we want to copy and align it down. */
486		adr	r5, restart
487		bic	r5, r5, #31
488
489/* Relocate the hyp vector base if necessary */
490#ifdef CONFIG_ARM_VIRT_EXT
491		mrs	r0, spsr
492		and	r0, r0, #MODE_MASK
493		cmp	r0, #HYP_MODE
494		bne	1f
495
496		/*
497		 * Compute the address of the hyp vectors after relocation.
498		 * Call __hyp_set_vectors with the new address so that we
499		 * can HVC again after the copy.
500		 */
501		adr_l	r0, __hyp_stub_vectors
502		sub	r0, r0, r5
503		add	r0, r0, r10
504		bl	__hyp_set_vectors
5051:
506#endif
507
508		sub	r9, r6, r5		@ size to copy
509		add	r9, r9, #31		@ rounded up to a multiple
510		bic	r9, r9, #31		@ ... of 32 bytes
511		add	r6, r9, r5
512		add	r9, r9, r10
513
514#ifdef DEBUG
515		sub     r10, r6, r5
516		sub     r10, r9, r10
517		/*
518		 * We are about to copy the kernel to a new memory area.
519		 * The boundaries of the new memory area can be found in
520		 * r10 and r9, whilst r5 and r6 contain the boundaries
521		 * of the memory we are going to copy.
522		 * Calling dbgkc will help with the printing of this
523		 * information.
524		 */
525		dbgkc	r5, r6, r10, r9
526#endif
527
5281:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
529		cmp	r6, r5
530		stmdb	r9!, {r0 - r3, r10 - r12, lr}
531		bhi	1b
532
533		/* Preserve offset to relocated code. */
534		sub	r6, r9, r6
535
536		mov	r0, r9			@ start of relocated zImage
537		add	r1, sp, r6		@ end of relocated zImage
538		bl	cache_clean_flush
539
540		badr	r0, restart
541		add	r0, r0, r6
542		mov	pc, r0
543
544wont_overwrite:
545		adr	r0, LC0
546		ldmia	r0, {r1, r2, r3, r11, r12}
547		sub	r0, r0, r1		@ calculate the delta offset
548
549/*
550 * If delta is zero, we are running at the address we were linked at.
551 *   r0  = delta
552 *   r2  = BSS start
553 *   r3  = BSS end
554 *   r4  = kernel execution address (possibly with LSB set)
555 *   r5  = appended dtb size (0 if not present)
556 *   r7  = architecture ID
557 *   r8  = atags pointer
558 *   r11 = GOT start
559 *   r12 = GOT end
560 *   sp  = stack pointer
561 */
562		orrs	r1, r0, r5
563		beq	not_relocated
564
565		add	r11, r11, r0
566		add	r12, r12, r0
567
568#ifndef CONFIG_ZBOOT_ROM
569		/*
570		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
571		 * we need to fix up pointers into the BSS region.
572		 * Note that the stack pointer has already been fixed up.
573		 */
574		add	r2, r2, r0
575		add	r3, r3, r0
576
577		/*
578		 * Relocate all entries in the GOT table.
579		 * Bump bss entries to _edata + dtb size
580		 */
5811:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
582		add	r1, r1, r0		@ This fixes up C references
583		cmp	r1, r2			@ if entry >= bss_start &&
584		cmphs	r3, r1			@       bss_end > entry
585		addhi	r1, r1, r5		@    entry += dtb size
586		str	r1, [r11], #4		@ next entry
587		cmp	r11, r12
588		blo	1b
589
590		/* bump our bss pointers too */
591		add	r2, r2, r5
592		add	r3, r3, r5
593
594#else
595
596		/*
597		 * Relocate entries in the GOT table.  We only relocate
598		 * the entries that are outside the (relocated) BSS region.
599		 */
6001:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
601		cmp	r1, r2			@ entry < bss_start ||
602		cmphs	r3, r1			@ _end < entry
603		addlo	r1, r1, r0		@ table.  This fixes up the
604		str	r1, [r11], #4		@ C references.
605		cmp	r11, r12
606		blo	1b
607#endif
608
609not_relocated:	mov	r0, #0
6101:		str	r0, [r2], #4		@ clear bss
611		str	r0, [r2], #4
612		str	r0, [r2], #4
613		str	r0, [r2], #4
614		cmp	r2, r3
615		blo	1b
616
617		/*
618		 * Did we skip the cache setup earlier?
619		 * That is indicated by the LSB in r4.
620		 * Do it now if so.
621		 */
622		tst	r4, #1
623		bic	r4, r4, #1
624		blne	cache_on
625
626/*
627 * The C runtime environment should now be setup sufficiently.
628 * Set up some pointers, and start decompressing.
629 *   r4  = kernel execution address
630 *   r7  = architecture ID
631 *   r8  = atags pointer
632 */
633		mov	r0, r4
634		mov	r1, sp			@ malloc space above stack
635		add	r2, sp, #MALLOC_SIZE	@ 64k max
636		mov	r3, r7
637		bl	decompress_kernel
638
639		get_inflated_image_size	r1, r2, r3
640
641		mov	r0, r4			@ start of inflated image
642		add	r1, r1, r0		@ end of inflated image
643		bl	cache_clean_flush
644		bl	cache_off
645
646#ifdef CONFIG_ARM_VIRT_EXT
647		mrs	r0, spsr		@ Get saved CPU boot mode
648		and	r0, r0, #MODE_MASK
649		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
650		bne	__enter_kernel		@ boot kernel directly
651
652		adr_l	r0, __hyp_reentry_vectors
653		bl	__hyp_set_vectors
654		__HVC(0)			@ otherwise bounce to hyp mode
655
656		b	.			@ should never be reached
657#else
658		b	__enter_kernel
659#endif
660
661		.align	2
662		.type	LC0, #object
663LC0:		.word	LC0			@ r1
664		.word	__bss_start		@ r2
665		.word	_end			@ r3
666		.word	_got_start		@ r11
667		.word	_got_end		@ ip
668		.size	LC0, . - LC0
669
670		.type	LC1, #object
671LC1:		.word	.L_user_stack_end - LC1	@ sp
672		.word	_edata - LC1		@ r6
673		.size	LC1, . - LC1
674
675.Lheadroom:
676		.word	_end - restart + 16384 + 1024*1024
677
678.Linflated_image_size_offset:
679		.long	(input_data_end - 4) - .
680
681#ifdef CONFIG_ARCH_RPC
682		.globl	params
683params:		ldr	r0, =0x10000100		@ params_phys for RPC
684		mov	pc, lr
685		.ltorg
686		.align
687#endif
688
689/*
690 * dcache_line_size - get the minimum D-cache line size from the CTR register
691 * on ARMv7.
692 */
693		.macro	dcache_line_size, reg, tmp
694#ifdef CONFIG_CPU_V7M
695		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
696		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
697		ldr	\tmp, [\tmp]
698#else
699		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
700#endif
701		lsr	\tmp, \tmp, #16
702		and	\tmp, \tmp, #0xf		@ cache line size encoding
703		mov	\reg, #4			@ bytes per word
704		mov	\reg, \reg, lsl \tmp		@ actual cache line size
705		.endm
706
707/*
708 * Turn on the cache.  We need to setup some page tables so that we
709 * can have both the I and D caches on.
710 *
711 * We place the page tables 16k down from the kernel execution address,
712 * and we hope that nothing else is using it.  If we're using it, we
713 * will go pop!
714 *
715 * On entry,
716 *  r4 = kernel execution address
717 *  r7 = architecture number
718 *  r8 = atags pointer
719 * On exit,
720 *  r0, r1, r2, r3, r9, r10, r12 corrupted
721 * This routine must preserve:
722 *  r4, r7, r8
723 */
724		.align	5
725cache_on:	mov	r3, #8			@ cache_on function
726		b	call_cache_fn
727
728/*
729 * Initialize the highest priority protection region, PR7
730 * to cover all 32bit address and cacheable and bufferable.
731 */
732__armv4_mpu_cache_on:
733		mov	r0, #0x3f		@ 4G, the whole
734		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
735		mcr 	p15, 0, r0, c6, c7, 1
736
737		mov	r0, #0x80		@ PR7
738		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
739		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
740		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
741
742		mov	r0, #0xc000
743		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
744		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
745
746		mov	r0, #0
747		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
748		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
749		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
750		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
751						@ ...I .... ..D. WC.M
752		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
753		orr	r0, r0, #0x1000		@ ...1 .... .... ....
754
755		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
756
757		mov	r0, #0
758		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
759		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
760		mov	pc, lr
761
762__armv3_mpu_cache_on:
763		mov	r0, #0x3f		@ 4G, the whole
764		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
765
766		mov	r0, #0x80		@ PR7
767		mcr	p15, 0, r0, c2, c0, 0	@ cache on
768		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
769
770		mov	r0, #0xc000
771		mcr	p15, 0, r0, c5, c0, 0	@ access permission
772
773		mov	r0, #0
774		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
775		/*
776		 * ?? ARMv3 MMU does not allow reading the control register,
777		 * does this really work on ARMv3 MPU?
778		 */
779		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
780						@ .... .... .... WC.M
781		orr	r0, r0, #0x000d		@ .... .... .... 11.1
782		/* ?? this overwrites the value constructed above? */
783		mov	r0, #0
784		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
785
786		/* ?? invalidate for the second time? */
787		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
788		mov	pc, lr
789
790#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
791#define CB_BITS 0x08
792#else
793#define CB_BITS 0x0c
794#endif
795
796__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
797		bic	r3, r3, #0xff		@ Align the pointer
798		bic	r3, r3, #0x3f00
799/*
800 * Initialise the page tables, turning on the cacheable and bufferable
801 * bits for the RAM area only.
802 */
803		mov	r0, r3
804		mov	r9, r0, lsr #18
805		mov	r9, r9, lsl #18		@ start of RAM
806		add	r10, r9, #0x10000000	@ a reasonable RAM size
807		mov	r1, #0x12		@ XN|U + section mapping
808		orr	r1, r1, #3 << 10	@ AP=11
809		add	r2, r3, #16384
8101:		cmp	r1, r9			@ if virt > start of RAM
811		cmphs	r10, r1			@   && end of RAM > virt
812		bic	r1, r1, #0x1c		@ clear XN|U + C + B
813		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
814		orrhs	r1, r1, r6		@ set RAM section settings
815		str	r1, [r0], #4		@ 1:1 mapping
816		add	r1, r1, #1048576
817		teq	r0, r2
818		bne	1b
819/*
820 * If ever we are running from Flash, then we surely want the cache
821 * to be enabled also for our execution instance...  We map 2MB of it
822 * so there is no map overlap problem for up to 1 MB compressed kernel.
823 * If the execution is in RAM then we would only be duplicating the above.
824 */
825		orr	r1, r6, #0x04		@ ensure B is set for this
826		orr	r1, r1, #3 << 10
827		mov	r2, pc
828		mov	r2, r2, lsr #20
829		orr	r1, r1, r2, lsl #20
830		add	r0, r3, r2, lsl #2
831		str	r1, [r0], #4
832		add	r1, r1, #1048576
833		str	r1, [r0]
834		mov	pc, lr
835ENDPROC(__setup_mmu)
836
837@ Enable unaligned access on v6, to allow better code generation
838@ for the decompressor C code:
839__armv6_mmu_cache_on:
840		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
841		bic	r0, r0, #2		@ A (no unaligned access fault)
842		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
843		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
844		b	__armv4_mmu_cache_on
845
846__arm926ejs_mmu_cache_on:
847#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
848		mov	r0, #4			@ put dcache in WT mode
849		mcr	p15, 7, r0, c15, c0, 0
850#endif
851
852__armv4_mmu_cache_on:
853		mov	r12, lr
854#ifdef CONFIG_MMU
855		mov	r6, #CB_BITS | 0x12	@ U
856		bl	__setup_mmu
857		mov	r0, #0
858		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
859		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
860		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
861		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
862		orr	r0, r0, #0x0030
863 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
864		bl	__common_mmu_cache_on
865		mov	r0, #0
866		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
867#endif
868		mov	pc, r12
869
870__armv7_mmu_cache_on:
871		enable_cp15_barriers	r11
872		mov	r12, lr
873#ifdef CONFIG_MMU
874		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
875		tst	r11, #0xf		@ VMSA
876		movne	r6, #CB_BITS | 0x02	@ !XN
877		blne	__setup_mmu
878		mov	r0, #0
879		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
880		tst	r11, #0xf		@ VMSA
881		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
882#endif
883		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
884		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
885		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
886		orr	r0, r0, #0x003c		@ write buffer
887		bic	r0, r0, #2		@ A (no unaligned access fault)
888		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
889						@ (needed for ARM1176)
890#ifdef CONFIG_MMU
891 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
892		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
893		orrne	r0, r0, #1		@ MMU enabled
894		movne	r1, #0xfffffffd		@ domain 0 = client
895		bic     r6, r6, #1 << 31        @ 32-bit translation system
896		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
897		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
898		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
899		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
900#endif
901		mcr	p15, 0, r0, c7, c5, 4	@ ISB
902		mcr	p15, 0, r0, c1, c0, 0	@ load control register
903		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
904		mov	r0, #0
905		mcr	p15, 0, r0, c7, c5, 4	@ ISB
906		mov	pc, r12
907
908__fa526_cache_on:
909		mov	r12, lr
910		mov	r6, #CB_BITS | 0x12	@ U
911		bl	__setup_mmu
912		mov	r0, #0
913		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
914		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
915		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
916		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
917		orr	r0, r0, #0x1000		@ I-cache enable
918		bl	__common_mmu_cache_on
919		mov	r0, #0
920		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
921		mov	pc, r12
922
923__common_mmu_cache_on:
924#ifndef CONFIG_THUMB2_KERNEL
925#ifndef DEBUG
926		orr	r0, r0, #0x000d		@ Write buffer, mmu
927#endif
928		mov	r1, #-1
929		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
930		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
931		b	1f
932		.align	5			@ cache line aligned
9331:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
934		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
935		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
936#endif
937
938#define PROC_ENTRY_SIZE (4*5)
939
940/*
941 * Here follow the relocatable cache support functions for the
942 * various processors.  This is a generic hook for locating an
943 * entry and jumping to an instruction at the specified offset
944 * from the start of the block.  Please note this is all position
945 * independent code.
946 *
947 *  r1  = corrupted
948 *  r2  = corrupted
949 *  r3  = block offset
950 *  r9  = corrupted
951 *  r12 = corrupted
952 */
953
954call_cache_fn:	adr	r12, proc_types
955#ifdef CONFIG_CPU_CP15
956		mrc	p15, 0, r9, c0, c0	@ get processor ID
957#elif defined(CONFIG_CPU_V7M)
958		/*
959		 * On v7-M the processor id is located in the V7M_SCB_CPUID
960		 * register, but as cache handling is IMPLEMENTATION DEFINED on
961		 * v7-M (if existant at all) we just return early here.
962		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
963		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
964		 * use cp15 registers that are not implemented on v7-M.
965		 */
966		bx	lr
967#else
968		ldr	r9, =CONFIG_PROCESSOR_ID
969#endif
9701:		ldr	r1, [r12, #0]		@ get value
971		ldr	r2, [r12, #4]		@ get mask
972		eor	r1, r1, r9		@ (real ^ match)
973		tst	r1, r2			@       & mask
974 ARM(		addeq	pc, r12, r3		) @ call cache function
975 THUMB(		addeq	r12, r3			)
976 THUMB(		moveq	pc, r12			) @ call cache function
977		add	r12, r12, #PROC_ENTRY_SIZE
978		b	1b
979
980/*
981 * Table for cache operations.  This is basically:
982 *   - CPU ID match
983 *   - CPU ID mask
984 *   - 'cache on' method instruction
985 *   - 'cache off' method instruction
986 *   - 'cache flush' method instruction
987 *
988 * We match an entry using: ((real_id ^ match) & mask) == 0
989 *
990 * Writethrough caches generally only need 'on' and 'off'
991 * methods.  Writeback caches _must_ have the flush method
992 * defined.
993 */
994		.align	2
995		.type	proc_types,#object
996proc_types:
997		.word	0x41000000		@ old ARM ID
998		.word	0xff00f000
999		mov	pc, lr
1000 THUMB(		nop				)
1001		mov	pc, lr
1002 THUMB(		nop				)
1003		mov	pc, lr
1004 THUMB(		nop				)
1005
1006		.word	0x41007000		@ ARM7/710
1007		.word	0xfff8fe00
1008		mov	pc, lr
1009 THUMB(		nop				)
1010		mov	pc, lr
1011 THUMB(		nop				)
1012		mov	pc, lr
1013 THUMB(		nop				)
1014
1015		.word	0x41807200		@ ARM720T (writethrough)
1016		.word	0xffffff00
1017		W(b)	__armv4_mmu_cache_on
1018		W(b)	__armv4_mmu_cache_off
1019		mov	pc, lr
1020 THUMB(		nop				)
1021
1022		.word	0x41007400		@ ARM74x
1023		.word	0xff00ff00
1024		W(b)	__armv3_mpu_cache_on
1025		W(b)	__armv3_mpu_cache_off
1026		W(b)	__armv3_mpu_cache_flush
1027
1028		.word	0x41009400		@ ARM94x
1029		.word	0xff00ff00
1030		W(b)	__armv4_mpu_cache_on
1031		W(b)	__armv4_mpu_cache_off
1032		W(b)	__armv4_mpu_cache_flush
1033
1034		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1035		.word	0xff0ffff0
1036		W(b)	__arm926ejs_mmu_cache_on
1037		W(b)	__armv4_mmu_cache_off
1038		W(b)	__armv5tej_mmu_cache_flush
1039
1040		.word	0x00007000		@ ARM7 IDs
1041		.word	0x0000f000
1042		mov	pc, lr
1043 THUMB(		nop				)
1044		mov	pc, lr
1045 THUMB(		nop				)
1046		mov	pc, lr
1047 THUMB(		nop				)
1048
1049		@ Everything from here on will be the new ID system.
1050
1051		.word	0x4401a100		@ sa110 / sa1100
1052		.word	0xffffffe0
1053		W(b)	__armv4_mmu_cache_on
1054		W(b)	__armv4_mmu_cache_off
1055		W(b)	__armv4_mmu_cache_flush
1056
1057		.word	0x6901b110		@ sa1110
1058		.word	0xfffffff0
1059		W(b)	__armv4_mmu_cache_on
1060		W(b)	__armv4_mmu_cache_off
1061		W(b)	__armv4_mmu_cache_flush
1062
1063		.word	0x56056900
1064		.word	0xffffff00		@ PXA9xx
1065		W(b)	__armv4_mmu_cache_on
1066		W(b)	__armv4_mmu_cache_off
1067		W(b)	__armv4_mmu_cache_flush
1068
1069		.word	0x56158000		@ PXA168
1070		.word	0xfffff000
1071		W(b)	__armv4_mmu_cache_on
1072		W(b)	__armv4_mmu_cache_off
1073		W(b)	__armv5tej_mmu_cache_flush
1074
1075		.word	0x56050000		@ Feroceon
1076		.word	0xff0f0000
1077		W(b)	__armv4_mmu_cache_on
1078		W(b)	__armv4_mmu_cache_off
1079		W(b)	__armv5tej_mmu_cache_flush
1080
1081#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1082		/* this conflicts with the standard ARMv5TE entry */
1083		.long	0x41009260		@ Old Feroceon
1084		.long	0xff00fff0
1085		b	__armv4_mmu_cache_on
1086		b	__armv4_mmu_cache_off
1087		b	__armv5tej_mmu_cache_flush
1088#endif
1089
1090		.word	0x66015261		@ FA526
1091		.word	0xff01fff1
1092		W(b)	__fa526_cache_on
1093		W(b)	__armv4_mmu_cache_off
1094		W(b)	__fa526_cache_flush
1095
1096		@ These match on the architecture ID
1097
1098		.word	0x00020000		@ ARMv4T
1099		.word	0x000f0000
1100		W(b)	__armv4_mmu_cache_on
1101		W(b)	__armv4_mmu_cache_off
1102		W(b)	__armv4_mmu_cache_flush
1103
1104		.word	0x00050000		@ ARMv5TE
1105		.word	0x000f0000
1106		W(b)	__armv4_mmu_cache_on
1107		W(b)	__armv4_mmu_cache_off
1108		W(b)	__armv4_mmu_cache_flush
1109
1110		.word	0x00060000		@ ARMv5TEJ
1111		.word	0x000f0000
1112		W(b)	__armv4_mmu_cache_on
1113		W(b)	__armv4_mmu_cache_off
1114		W(b)	__armv5tej_mmu_cache_flush
1115
1116		.word	0x0007b000		@ ARMv6
1117		.word	0x000ff000
1118		W(b)	__armv6_mmu_cache_on
1119		W(b)	__armv4_mmu_cache_off
1120		W(b)	__armv6_mmu_cache_flush
1121
1122		.word	0x000f0000		@ new CPU Id
1123		.word	0x000f0000
1124		W(b)	__armv7_mmu_cache_on
1125		W(b)	__armv7_mmu_cache_off
1126		W(b)	__armv7_mmu_cache_flush
1127
1128		.word	0			@ unrecognised type
1129		.word	0
1130		mov	pc, lr
1131 THUMB(		nop				)
1132		mov	pc, lr
1133 THUMB(		nop				)
1134		mov	pc, lr
1135 THUMB(		nop				)
1136
1137		.size	proc_types, . - proc_types
1138
1139		/*
1140		 * If you get a "non-constant expression in ".if" statement"
1141		 * error from the assembler on this line, check that you have
1142		 * not accidentally written a "b" instruction where you should
1143		 * have written W(b).
1144		 */
1145		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1146		.error "The size of one or more proc_types entries is wrong."
1147		.endif
1148
1149/*
1150 * Turn off the Cache and MMU.  ARMv3 does not support
1151 * reading the control register, but ARMv4 does.
1152 *
1153 * On exit,
1154 *  r0, r1, r2, r3, r9, r12 corrupted
1155 * This routine must preserve:
1156 *  r4, r7, r8
1157 */
1158		.align	5
1159cache_off:	mov	r3, #12			@ cache_off function
1160		b	call_cache_fn
1161
1162__armv4_mpu_cache_off:
1163		mrc	p15, 0, r0, c1, c0
1164		bic	r0, r0, #0x000d
1165		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1166		mov	r0, #0
1167		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1168		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1169		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1170		mov	pc, lr
1171
1172__armv3_mpu_cache_off:
1173		mrc	p15, 0, r0, c1, c0
1174		bic	r0, r0, #0x000d
1175		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1176		mov	r0, #0
1177		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1178		mov	pc, lr
1179
1180__armv4_mmu_cache_off:
1181#ifdef CONFIG_MMU
1182		mrc	p15, 0, r0, c1, c0
1183		bic	r0, r0, #0x000d
1184		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1185		mov	r0, #0
1186		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1187		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1188#endif
1189		mov	pc, lr
1190
1191__armv7_mmu_cache_off:
1192		mrc	p15, 0, r0, c1, c0
1193#ifdef CONFIG_MMU
1194		bic	r0, r0, #0x0005
1195#else
1196		bic	r0, r0, #0x0004
1197#endif
1198		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1199		mov	r0, #0
1200#ifdef CONFIG_MMU
1201		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1202#endif
1203		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1204		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1205		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1206		mov	pc, lr
1207
1208/*
1209 * Clean and flush the cache to maintain consistency.
1210 *
1211 * On entry,
1212 *  r0 = start address
1213 *  r1 = end address (exclusive)
1214 * On exit,
1215 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1216 * This routine must preserve:
1217 *  r4, r6, r7, r8
1218 */
1219		.align	5
1220cache_clean_flush:
1221		mov	r3, #16
1222		mov	r11, r1
1223		b	call_cache_fn
1224
1225__armv4_mpu_cache_flush:
1226		tst	r4, #1
1227		movne	pc, lr
1228		mov	r2, #1
1229		mov	r3, #0
1230		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1231		mov	r1, #7 << 5		@ 8 segments
12321:		orr	r3, r1, #63 << 26	@ 64 entries
12332:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1234		subs	r3, r3, #1 << 26
1235		bcs	2b			@ entries 63 to 0
1236		subs 	r1, r1, #1 << 5
1237		bcs	1b			@ segments 7 to 0
1238
1239		teq	r2, #0
1240		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1241		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1242		mov	pc, lr
1243
1244__fa526_cache_flush:
1245		tst	r4, #1
1246		movne	pc, lr
1247		mov	r1, #0
1248		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1249		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1250		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1251		mov	pc, lr
1252
1253__armv6_mmu_cache_flush:
1254		mov	r1, #0
1255		tst	r4, #1
1256		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1257		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1258		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1259		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1260		mov	pc, lr
1261
1262__armv7_mmu_cache_flush:
1263		enable_cp15_barriers	r10
1264		tst	r4, #1
1265		bne	iflush
1266		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1267		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1268		mov	r10, #0
1269		beq	hierarchical
1270		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1271		b	iflush
1272hierarchical:
1273		dcache_line_size r1, r2		@ r1 := dcache min line size
1274		sub	r2, r1, #1		@ r2 := line size mask
1275		bic	r0, r0, r2		@ round down start to line size
1276		sub	r11, r11, #1		@ end address is exclusive
1277		bic	r11, r11, r2		@ round down end to line size
12780:		cmp	r0, r11			@ finished?
1279		bgt	iflush
1280		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1281		add	r0, r0, r1
1282		b	0b
1283iflush:
1284		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1285		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1286		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1287		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1288		mov	pc, lr
1289
1290__armv5tej_mmu_cache_flush:
1291		tst	r4, #1
1292		movne	pc, lr
12931:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1294		bne	1b
1295		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1296		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1297		mov	pc, lr
1298
1299__armv4_mmu_cache_flush:
1300		tst	r4, #1
1301		movne	pc, lr
1302		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1303		mov	r11, #32		@ default: 32 byte line size
1304		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1305		teq	r3, r9			@ cache ID register present?
1306		beq	no_cache_id
1307		mov	r1, r3, lsr #18
1308		and	r1, r1, #7
1309		mov	r2, #1024
1310		mov	r2, r2, lsl r1		@ base dcache size *2
1311		tst	r3, #1 << 14		@ test M bit
1312		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1313		mov	r3, r3, lsr #12
1314		and	r3, r3, #3
1315		mov	r11, #8
1316		mov	r11, r11, lsl r3	@ cache line size in bytes
1317no_cache_id:
1318		mov	r1, pc
1319		bic	r1, r1, #63		@ align to longest cache line
1320		add	r2, r1, r2
13211:
1322 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1323 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1324 THUMB(		add     r1, r1, r11		)
1325		teq	r1, r2
1326		bne	1b
1327
1328		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1329		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1330		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1331		mov	pc, lr
1332
1333__armv3_mmu_cache_flush:
1334__armv3_mpu_cache_flush:
1335		tst	r4, #1
1336		movne	pc, lr
1337		mov	r1, #0
1338		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1339		mov	pc, lr
1340
1341/*
1342 * Various debugging routines for printing hex characters and
1343 * memory, which again must be relocatable.
1344 */
1345#ifdef DEBUG
1346		.align	2
1347		.type	phexbuf,#object
1348phexbuf:	.space	12
1349		.size	phexbuf, . - phexbuf
1350
1351@ phex corrupts {r0, r1, r2, r3}
1352phex:		adr	r3, phexbuf
1353		mov	r2, #0
1354		strb	r2, [r3, r1]
13551:		subs	r1, r1, #1
1356		movmi	r0, r3
1357		bmi	puts
1358		and	r2, r0, #15
1359		mov	r0, r0, lsr #4
1360		cmp	r2, #10
1361		addge	r2, r2, #7
1362		add	r2, r2, #'0'
1363		strb	r2, [r3, r1]
1364		b	1b
1365
1366@ puts corrupts {r0, r1, r2, r3}
1367puts:		loadsp	r3, r2, r1
13681:		ldrb	r2, [r0], #1
1369		teq	r2, #0
1370		moveq	pc, lr
13712:		writeb	r2, r3, r1
1372		mov	r1, #0x00020000
13733:		subs	r1, r1, #1
1374		bne	3b
1375		teq	r2, #'\n'
1376		moveq	r2, #'\r'
1377		beq	2b
1378		teq	r0, #0
1379		bne	1b
1380		mov	pc, lr
1381@ putc corrupts {r0, r1, r2, r3}
1382putc:
1383		mov	r2, r0
1384		loadsp	r3, r1, r0
1385		mov	r0, #0
1386		b	2b
1387
1388@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1389memdump:	mov	r12, r0
1390		mov	r10, lr
1391		mov	r11, #0
13922:		mov	r0, r11, lsl #2
1393		add	r0, r0, r12
1394		mov	r1, #8
1395		bl	phex
1396		mov	r0, #':'
1397		bl	putc
13981:		mov	r0, #' '
1399		bl	putc
1400		ldr	r0, [r12, r11, lsl #2]
1401		mov	r1, #8
1402		bl	phex
1403		and	r0, r11, #7
1404		teq	r0, #3
1405		moveq	r0, #' '
1406		bleq	putc
1407		and	r0, r11, #7
1408		add	r11, r11, #1
1409		teq	r0, #7
1410		bne	1b
1411		mov	r0, #'\n'
1412		bl	putc
1413		cmp	r11, #64
1414		blt	2b
1415		mov	pc, r10
1416#endif
1417
1418		.ltorg
1419
1420#ifdef CONFIG_ARM_VIRT_EXT
1421.align 5
1422__hyp_reentry_vectors:
1423		W(b)	.			@ reset
1424		W(b)	.			@ undef
1425#ifdef CONFIG_EFI_STUB
1426		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1427#else
1428		W(b)	.			@ svc
1429#endif
1430		W(b)	.			@ pabort
1431		W(b)	.			@ dabort
1432		W(b)	__enter_kernel		@ hyp
1433		W(b)	.			@ irq
1434		W(b)	.			@ fiq
1435#endif /* CONFIG_ARM_VIRT_EXT */
1436
1437__enter_kernel:
1438		mov	r0, #0			@ must be 0
1439		mov	r1, r7			@ restore architecture number
1440		mov	r2, r8			@ restore atags pointer
1441 ARM(		mov	pc, r4		)	@ call kernel
1442 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1443 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1444
1445reloc_code_end:
1446
1447#ifdef CONFIG_EFI_STUB
1448__enter_kernel_from_hyp:
1449		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1450		bic	r0, r0, #0x5		@ disable MMU and caches
1451		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1452		isb
1453		b	__enter_kernel
1454
1455ENTRY(efi_enter_kernel)
1456		mov	r4, r0			@ preserve image base
1457		mov	r8, r1			@ preserve DT pointer
1458
1459		adr_l	r0, call_cache_fn
1460		adr	r1, 0f			@ clean the region of code we
1461		bl	cache_clean_flush	@ may run with the MMU off
1462
1463#ifdef CONFIG_ARM_VIRT_EXT
1464		@
1465		@ The EFI spec does not support booting on ARM in HYP mode,
1466		@ since it mandates that the MMU and caches are on, with all
1467		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1468		@
1469		@ While the EDK2 reference implementation adheres to this,
1470		@ U-Boot might decide to enter the EFI stub in HYP mode
1471		@ anyway, with the MMU and caches either on or off.
1472		@
1473		mrs	r0, cpsr		@ get the current mode
1474		msr	spsr_cxsf, r0		@ record boot mode
1475		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1476		cmp	r0, #HYP_MODE
1477		bne	.Lefi_svc
1478
1479		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1480		tst	r1, #0x1		@ MMU enabled at HYP?
1481		beq	1f
1482
1483		@
1484		@ When running in HYP mode with the caches on, we're better
1485		@ off just carrying on using the cached 1:1 mapping that the
1486		@ firmware provided. Set up the HYP vectors so HVC instructions
1487		@ issued from HYP mode take us to the correct handler code. We
1488		@ will disable the MMU before jumping to the kernel proper.
1489		@
1490 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1491 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1492		mcr	p15, 4, r1, c1, c0, 0
1493		adr	r0, __hyp_reentry_vectors
1494		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1495		isb
1496		b	.Lefi_hyp
1497
1498		@
1499		@ When running in HYP mode with the caches off, we need to drop
1500		@ into SVC mode now, and let the decompressor set up its cached
1501		@ 1:1 mapping as usual.
1502		@
15031:		mov	r9, r4			@ preserve image base
1504		bl	__hyp_stub_install	@ install HYP stub vectors
1505		safe_svcmode_maskall	r1	@ drop to SVC mode
1506		msr	spsr_cxsf, r0		@ record boot mode
1507		orr	r4, r9, #1		@ restore image base and set LSB
1508		b	.Lefi_hyp
1509.Lefi_svc:
1510#endif
1511		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1512		tst	r0, #0x1		@ MMU enabled?
1513		orreq	r4, r4, #1		@ set LSB if not
1514
1515.Lefi_hyp:
1516		mov	r0, r8			@ DT start
1517		add	r1, r8, r2		@ DT end
1518		bl	cache_clean_flush
1519
1520		adr	r0, 0f			@ switch to our stack
1521		ldr	sp, [r0]
1522		add	sp, sp, r0
1523
1524		mov	r5, #0			@ appended DTB size
1525		mov	r7, #0xFFFFFFFF		@ machine ID
1526		b	wont_overwrite
1527ENDPROC(efi_enter_kernel)
15280:		.long	.L_user_stack_end - .
1529#endif
1530
1531		.align
1532		.section ".stack", "aw", %nobits
1533.L_user_stack:	.space	4096
1534.L_user_stack_end:
1535