xref: /linux/arch/arm/boot/compressed/head.S (revision d7bf4786b5250b0e490a937d1f8a16ee3a54adbe)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14#ifdef __ARMEB__
15#define OF_DT_MAGIC 0xd00dfeed
16#else
17#define OF_DT_MAGIC 0xedfe0dd0
18#endif
19
20 AR_CLASS(	.arch	armv7-a	)
21 M_CLASS(	.arch	armv7-m	)
22
23/*
24 * Debugging stuff
25 *
26 * Note that these macros must not contain any code which is not
27 * 100% relocatable.  Any attempt to do so will result in a crash.
28 * Please select one of the following when turning on debugging.
29 */
30#ifdef DEBUG
31
32#if defined(CONFIG_DEBUG_ICEDCC)
33
34#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb, tmp
38		mcr	p14, 0, \ch, c0, c5, 0
39		.endm
40#elif defined(CONFIG_CPU_XSCALE)
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb, tmp
44		mcr	p14, 0, \ch, c8, c0, 0
45		.endm
46#else
47		.macro	loadsp, rb, tmp1, tmp2
48		.endm
49		.macro	writeb, ch, rb, tmp
50		mcr	p14, 0, \ch, c1, c0, 0
51		.endm
52#endif
53
54#else
55
56#include CONFIG_DEBUG_LL_INCLUDE
57
58		.macro	writeb,	ch, rb, tmp
59#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
60		waituartcts \tmp, \rb
61#endif
62		waituarttxrdy \tmp, \rb
63		senduart \ch, \rb
64		busyuart \tmp, \rb
65		.endm
66
67#if defined(CONFIG_ARCH_SA1100)
68		.macro	loadsp, rb, tmp1, tmp2
69		mov	\rb, #0x80000000	@ physical base address
70		add	\rb, \rb, #0x00010000	@ Ser1
71		.endm
72#else
73		.macro	loadsp,	rb, tmp1, tmp2
74		addruart \rb, \tmp1, \tmp2
75		.endm
76#endif
77#endif
78#endif
79
80		.macro	kputc,val
81		mov	r0, \val
82		bl	putc
83		.endm
84
85		.macro	kphex,val,len
86		mov	r0, \val
87		mov	r1, #\len
88		bl	phex
89		.endm
90
91		/*
92		 * Debug kernel copy by printing the memory addresses involved
93		 */
94		.macro dbgkc, begin, end, cbegin, cend
95#ifdef DEBUG
96		kputc   #'C'
97		kputc   #':'
98		kputc   #'0'
99		kputc   #'x'
100		kphex   \begin, 8	/* Start of compressed kernel */
101		kputc	#'-'
102		kputc	#'0'
103		kputc	#'x'
104		kphex	\end, 8		/* End of compressed kernel */
105		kputc	#'-'
106		kputc	#'>'
107		kputc   #'0'
108		kputc   #'x'
109		kphex   \cbegin, 8	/* Start of kernel copy */
110		kputc	#'-'
111		kputc	#'0'
112		kputc	#'x'
113		kphex	\cend, 8	/* End of kernel copy */
114		kputc	#'\n'
115#endif
116		.endm
117
118		/*
119		 * Debug print of the final appended DTB location
120		 */
121		.macro dbgadtb, begin, size
122#ifdef DEBUG
123		kputc   #'D'
124		kputc   #'T'
125		kputc   #'B'
126		kputc   #':'
127		kputc   #'0'
128		kputc   #'x'
129		kphex   \begin, 8	/* Start of appended DTB */
130		kputc	#' '
131		kputc	#'('
132		kputc	#'0'
133		kputc	#'x'
134		kphex	\size, 8	/* Size of appended DTB */
135		kputc	#')'
136		kputc	#'\n'
137#endif
138		.endm
139
140		.macro	enable_cp15_barriers, reg
141		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
142		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
143		bne	.L_\@
144		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
145		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
146 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
147 THUMB(		isb						)
148.L_\@:
149		.endm
150
151		/*
152		 * The kernel build system appends the size of the
153		 * decompressed kernel at the end of the compressed data
154		 * in little-endian form.
155		 */
156		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
157		adr	\res, .Linflated_image_size_offset
158		ldr	\tmp1, [\res]
159		add	\tmp1, \tmp1, \res	@ address of inflated image size
160
161		ldrb	\res, [\tmp1]		@ get_unaligned_le32
162		ldrb	\tmp2, [\tmp1, #1]
163		orr	\res, \res, \tmp2, lsl #8
164		ldrb	\tmp2, [\tmp1, #2]
165		ldrb	\tmp1, [\tmp1, #3]
166		orr	\res, \res, \tmp2, lsl #16
167		orr	\res, \res, \tmp1, lsl #24
168		.endm
169
170		.macro	be32tocpu, val, tmp
171#ifndef __ARMEB__
172		/* convert to little endian */
173		rev_l	\val, \tmp
174#endif
175		.endm
176
177		.section ".start", "ax"
178/*
179 * sort out different calling conventions
180 */
181		.align
182		/*
183		 * Always enter in ARM state for CPUs that support the ARM ISA.
184		 * As of today (2014) that's exactly the members of the A and R
185		 * classes.
186		 */
187 AR_CLASS(	.arm	)
188start:
189		.type	start,#function
190		/*
191		 * These 7 nops along with the 1 nop immediately below for
192		 * !THUMB2 form 8 nops that make the compressed kernel bootable
193		 * on legacy ARM systems that were assuming the kernel in a.out
194		 * binary format. The boot loaders on these systems would
195		 * jump 32 bytes into the image to skip the a.out header.
196		 * with these 8 nops filling exactly 32 bytes, things still
197		 * work as expected on these legacy systems. Thumb2 mode keeps
198		 * 7 of the nops as it turns out that some boot loaders
199		 * were patching the initial instructions of the kernel, i.e
200		 * had started to exploit this "patch area".
201		 */
202		__initial_nops
203		.rept	5
204		__nop
205		.endr
206#ifndef CONFIG_THUMB2_KERNEL
207		__nop
208#else
209 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
210  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
211		.thumb
212#endif
213		W(b)	1f
214
215		.word	_magic_sig	@ Magic numbers to help the loader
216		.word	_magic_start	@ absolute load/run zImage address
217		.word	_magic_end	@ zImage end address
218		.word	0x04030201	@ endianness flag
219		.word	0x45454545	@ another magic number to indicate
220		.word	_magic_table	@ additional data table
221
222		__EFI_HEADER
2231:
224 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
225 AR_CLASS(	mrs	r9, cpsr	)
226#ifdef CONFIG_ARM_VIRT_EXT
227		bl	__hyp_stub_install	@ get into SVC mode, reversibly
228#endif
229		mov	r7, r1			@ save architecture ID
230		mov	r8, r2			@ save atags pointer
231
232#ifndef CONFIG_CPU_V7M
233		/*
234		 * Booting from Angel - need to enter SVC mode and disable
235		 * FIQs/IRQs (numeric definitions from angel arm.h source).
236		 * We only do this if we were in user mode on entry.
237		 */
238		mrs	r2, cpsr		@ get current mode
239		tst	r2, #3			@ not user?
240		bne	not_angel
241		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
242 ARM(		swi	0x123456	)	@ angel_SWI_ARM
243 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
244not_angel:
245		safe_svcmode_maskall r0
246		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
247						@ SPSR
248#endif
249		/*
250		 * Note that some cache flushing and other stuff may
251		 * be needed here - is there an Angel SWI call for this?
252		 */
253
254		/*
255		 * some architecture specific code can be inserted
256		 * by the linker here, but it should preserve r7, r8, and r9.
257		 */
258
259		.text
260
261#ifdef CONFIG_AUTO_ZRELADDR
262		/*
263		 * Find the start of physical memory.  As we are executing
264		 * without the MMU on, we are in the physical address space.
265		 * We just need to get rid of any offset by aligning the
266		 * address.
267		 *
268		 * This alignment is a balance between the requirements of
269		 * different platforms - we have chosen 128MB to allow
270		 * platforms which align the start of their physical memory
271		 * to 128MB to use this feature, while allowing the zImage
272		 * to be placed within the first 128MB of memory on other
273		 * platforms.  Increasing the alignment means we place
274		 * stricter alignment requirements on the start of physical
275		 * memory, but relaxing it means that we break people who
276		 * are already placing their zImage in (eg) the top 64MB
277		 * of this range.
278		 */
279		mov	r0, pc
280		and	r0, r0, #0xf8000000
281#ifdef CONFIG_USE_OF
282		adr	r1, LC1
283#ifdef CONFIG_ARM_APPENDED_DTB
284		/*
285		 * Look for an appended DTB.  If found, we cannot use it to
286		 * validate the calculated start of physical memory, as its
287		 * memory nodes may need to be augmented by ATAGS stored at
288		 * an offset from the same start of physical memory.
289		 */
290		ldr	r2, [r1, #4]	@ get &_edata
291		add	r2, r2, r1	@ relocate it
292		ldr	r2, [r2]	@ get DTB signature
293		ldr	r3, =OF_DT_MAGIC
294		cmp	r2, r3		@ do we have a DTB there?
295		beq	1f		@ if yes, skip validation
296#endif /* CONFIG_ARM_APPENDED_DTB */
297
298		/*
299		 * Make sure we have some stack before calling C code.
300		 * No GOT fixup has occurred yet, but none of the code we're
301		 * about to call uses any global variables.
302		 */
303		ldr	sp, [r1]	@ get stack location
304		add	sp, sp, r1	@ apply relocation
305
306		/* Validate calculated start against passed DTB */
307		mov	r1, r8
308		bl	fdt_check_mem_start
3091:
310#endif /* CONFIG_USE_OF */
311		/* Determine final kernel image address. */
312		add	r4, r0, #TEXT_OFFSET
313#else
314		ldr	r4, =zreladdr
315#endif
316
317		/*
318		 * Set up a page table only if it won't overwrite ourself.
319		 * That means r4 < pc || r4 - 16k page directory > &_end.
320		 * Given that r4 > &_end is most unfrequent, we add a rough
321		 * additional 1MB of room for a possible appended DTB.
322		 */
323		mov	r0, pc
324		cmp	r0, r4
325		ldrcc	r0, .Lheadroom
326		addcc	r0, r0, pc
327		cmpcc	r4, r0
328		orrcc	r4, r4, #1		@ remember we skipped cache_on
329		blcs	cache_on
330
331restart:	adr	r0, LC1
332		ldr	sp, [r0]
333		ldr	r6, [r0, #4]
334		add	sp, sp, r0
335		add	r6, r6, r0
336
337		get_inflated_image_size	r9, r10, lr
338
339#ifndef CONFIG_ZBOOT_ROM
340		/* malloc space is above the relocated stack (64k max) */
341		add	r10, sp, #MALLOC_SIZE
342#else
343		/*
344		 * With ZBOOT_ROM the bss/stack is non relocatable,
345		 * but someone could still run this code from RAM,
346		 * in which case our reference is _edata.
347		 */
348		mov	r10, r6
349#endif
350
351		mov	r5, #0			@ init dtb size to 0
352#ifdef CONFIG_ARM_APPENDED_DTB
353/*
354 *   r4  = final kernel address (possibly with LSB set)
355 *   r5  = appended dtb size (still unknown)
356 *   r6  = _edata
357 *   r7  = architecture ID
358 *   r8  = atags/device tree pointer
359 *   r9  = size of decompressed image
360 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
361 *   sp  = stack pointer
362 *
363 * if there are device trees (dtb) appended to zImage, advance r10 so that the
364 * dtb data will get relocated along with the kernel if necessary.
365 */
366
367		ldr	lr, [r6, #0]
368		ldr	r1, =OF_DT_MAGIC
369		cmp	lr, r1
370		bne	dtb_check_done		@ not found
371
372#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
373		/*
374		 * OK... Let's do some funky business here.
375		 * If we do have a DTB appended to zImage, and we do have
376		 * an ATAG list around, we want the later to be translated
377		 * and folded into the former here. No GOT fixup has occurred
378		 * yet, but none of the code we're about to call uses any
379		 * global variable.
380		*/
381
382		/* Get the initial DTB size */
383		ldr	r5, [r6, #4]
384		be32tocpu r5, r1
385		dbgadtb	r6, r5
386		/* 50% DTB growth should be good enough */
387		add	r5, r5, r5, lsr #1
388		/* preserve 64-bit alignment */
389		add	r5, r5, #7
390		bic	r5, r5, #7
391		/* clamp to 32KB min and 1MB max */
392		cmp	r5, #(1 << 15)
393		movlo	r5, #(1 << 15)
394		cmp	r5, #(1 << 20)
395		movhi	r5, #(1 << 20)
396		/* temporarily relocate the stack past the DTB work space */
397		add	sp, sp, r5
398
399		mov	r0, r8
400		mov	r1, r6
401		mov	r2, r5
402		bl	atags_to_fdt
403
404		/*
405		 * If returned value is 1, there is no ATAG at the location
406		 * pointed by r8.  Try the typical 0x100 offset from start
407		 * of RAM and hope for the best.
408		 */
409		cmp	r0, #1
410		sub	r0, r4, #TEXT_OFFSET
411		bic	r0, r0, #1
412		add	r0, r0, #0x100
413		mov	r1, r6
414		mov	r2, r5
415		bleq	atags_to_fdt
416
417		sub	sp, sp, r5
418#endif
419
420		mov	r8, r6			@ use the appended device tree
421
422		/*
423		 * Make sure that the DTB doesn't end up in the final
424		 * kernel's .bss area. To do so, we adjust the decompressed
425		 * kernel size to compensate if that .bss size is larger
426		 * than the relocated code.
427		 */
428		ldr	r5, =_kernel_bss_size
429		adr	r1, wont_overwrite
430		sub	r1, r6, r1
431		subs	r1, r5, r1
432		addhi	r9, r9, r1
433
434		/* Get the current DTB size */
435		ldr	r5, [r6, #4]
436		be32tocpu r5, r1
437
438		/* preserve 64-bit alignment */
439		add	r5, r5, #7
440		bic	r5, r5, #7
441
442		/* relocate some pointers past the appended dtb */
443		add	r6, r6, r5
444		add	r10, r10, r5
445		add	sp, sp, r5
446dtb_check_done:
447#endif
448
449/*
450 * Check to see if we will overwrite ourselves.
451 *   r4  = final kernel address (possibly with LSB set)
452 *   r9  = size of decompressed image
453 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
454 * We basically want:
455 *   r4 - 16k page directory >= r10 -> OK
456 *   r4 + image length <= address of wont_overwrite -> OK
457 * Note: the possible LSB in r4 is harmless here.
458 */
459		add	r10, r10, #16384
460		cmp	r4, r10
461		bhs	wont_overwrite
462		add	r10, r4, r9
463		adr	r9, wont_overwrite
464		cmp	r10, r9
465		bls	wont_overwrite
466
467/*
468 * Relocate ourselves past the end of the decompressed kernel.
469 *   r6  = _edata
470 *   r10 = end of the decompressed kernel
471 * Because we always copy ahead, we need to do it from the end and go
472 * backward in case the source and destination overlap.
473 */
474		/*
475		 * Bump to the next 256-byte boundary with the size of
476		 * the relocation code added. This avoids overwriting
477		 * ourself when the offset is small.
478		 */
479		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
480		bic	r10, r10, #255
481
482		/* Get start of code we want to copy and align it down. */
483		adr	r5, restart
484		bic	r5, r5, #31
485
486/* Relocate the hyp vector base if necessary */
487#ifdef CONFIG_ARM_VIRT_EXT
488		mrs	r0, spsr
489		and	r0, r0, #MODE_MASK
490		cmp	r0, #HYP_MODE
491		bne	1f
492
493		/*
494		 * Compute the address of the hyp vectors after relocation.
495		 * Call __hyp_set_vectors with the new address so that we
496		 * can HVC again after the copy.
497		 */
498		adr_l	r0, __hyp_stub_vectors
499		sub	r0, r0, r5
500		add	r0, r0, r10
501		bl	__hyp_set_vectors
5021:
503#endif
504
505		sub	r9, r6, r5		@ size to copy
506		add	r9, r9, #31		@ rounded up to a multiple
507		bic	r9, r9, #31		@ ... of 32 bytes
508		add	r6, r9, r5
509		add	r9, r9, r10
510
511#ifdef DEBUG
512		sub     r10, r6, r5
513		sub     r10, r9, r10
514		/*
515		 * We are about to copy the kernel to a new memory area.
516		 * The boundaries of the new memory area can be found in
517		 * r10 and r9, whilst r5 and r6 contain the boundaries
518		 * of the memory we are going to copy.
519		 * Calling dbgkc will help with the printing of this
520		 * information.
521		 */
522		dbgkc	r5, r6, r10, r9
523#endif
524
5251:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
526		cmp	r6, r5
527		stmdb	r9!, {r0 - r3, r10 - r12, lr}
528		bhi	1b
529
530		/* Preserve offset to relocated code. */
531		sub	r6, r9, r6
532
533		mov	r0, r9			@ start of relocated zImage
534		add	r1, sp, r6		@ end of relocated zImage
535		bl	cache_clean_flush
536
537		badr	r0, restart
538		add	r0, r0, r6
539		mov	pc, r0
540
541wont_overwrite:
542		adr	r0, LC0
543		ldmia	r0, {r1, r2, r3, r11, r12}
544		sub	r0, r0, r1		@ calculate the delta offset
545
546/*
547 * If delta is zero, we are running at the address we were linked at.
548 *   r0  = delta
549 *   r2  = BSS start
550 *   r3  = BSS end
551 *   r4  = kernel execution address (possibly with LSB set)
552 *   r5  = appended dtb size (0 if not present)
553 *   r7  = architecture ID
554 *   r8  = atags pointer
555 *   r11 = GOT start
556 *   r12 = GOT end
557 *   sp  = stack pointer
558 */
559		orrs	r1, r0, r5
560		beq	not_relocated
561
562		add	r11, r11, r0
563		add	r12, r12, r0
564
565#ifndef CONFIG_ZBOOT_ROM
566		/*
567		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
568		 * we need to fix up pointers into the BSS region.
569		 * Note that the stack pointer has already been fixed up.
570		 */
571		add	r2, r2, r0
572		add	r3, r3, r0
573
574		/*
575		 * Relocate all entries in the GOT table.
576		 * Bump bss entries to _edata + dtb size
577		 */
5781:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
579		add	r1, r1, r0		@ This fixes up C references
580		cmp	r1, r2			@ if entry >= bss_start &&
581		cmphs	r3, r1			@       bss_end > entry
582		addhi	r1, r1, r5		@    entry += dtb size
583		str	r1, [r11], #4		@ next entry
584		cmp	r11, r12
585		blo	1b
586
587		/* bump our bss pointers too */
588		add	r2, r2, r5
589		add	r3, r3, r5
590
591#else
592
593		/*
594		 * Relocate entries in the GOT table.  We only relocate
595		 * the entries that are outside the (relocated) BSS region.
596		 */
5971:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
598		cmp	r1, r2			@ entry < bss_start ||
599		cmphs	r3, r1			@ _end < entry
600		addlo	r1, r1, r0		@ table.  This fixes up the
601		str	r1, [r11], #4		@ C references.
602		cmp	r11, r12
603		blo	1b
604#endif
605
606not_relocated:	mov	r0, #0
6071:		str	r0, [r2], #4		@ clear bss
608		str	r0, [r2], #4
609		str	r0, [r2], #4
610		str	r0, [r2], #4
611		cmp	r2, r3
612		blo	1b
613
614		/*
615		 * Did we skip the cache setup earlier?
616		 * That is indicated by the LSB in r4.
617		 * Do it now if so.
618		 */
619		tst	r4, #1
620		bic	r4, r4, #1
621		blne	cache_on
622
623/*
624 * The C runtime environment should now be setup sufficiently.
625 * Set up some pointers, and start decompressing.
626 *   r4  = kernel execution address
627 *   r7  = architecture ID
628 *   r8  = atags pointer
629 */
630		mov	r0, r4
631		mov	r1, sp			@ malloc space above stack
632		add	r2, sp, #MALLOC_SIZE	@ 64k max
633		mov	r3, r7
634		bl	decompress_kernel
635
636		get_inflated_image_size	r1, r2, r3
637
638		mov	r0, r4			@ start of inflated image
639		add	r1, r1, r0		@ end of inflated image
640		bl	cache_clean_flush
641		bl	cache_off
642
643#ifdef CONFIG_ARM_VIRT_EXT
644		mrs	r0, spsr		@ Get saved CPU boot mode
645		and	r0, r0, #MODE_MASK
646		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
647		bne	__enter_kernel		@ boot kernel directly
648
649		adr_l	r0, __hyp_reentry_vectors
650		bl	__hyp_set_vectors
651		__HVC(0)			@ otherwise bounce to hyp mode
652
653		b	.			@ should never be reached
654#else
655		b	__enter_kernel
656#endif
657
658		.align	2
659		.type	LC0, #object
660LC0:		.word	LC0			@ r1
661		.word	__bss_start		@ r2
662		.word	_end			@ r3
663		.word	_got_start		@ r11
664		.word	_got_end		@ ip
665		.size	LC0, . - LC0
666
667		.type	LC1, #object
668LC1:		.word	.L_user_stack_end - LC1	@ sp
669		.word	_edata - LC1		@ r6
670		.size	LC1, . - LC1
671
672.Lheadroom:
673		.word	_end - restart + 16384 + 1024*1024
674
675.Linflated_image_size_offset:
676		.long	(input_data_end - 4) - .
677
678#ifdef CONFIG_ARCH_RPC
679		.globl	params
680params:		ldr	r0, =0x10000100		@ params_phys for RPC
681		mov	pc, lr
682		.ltorg
683		.align
684#endif
685
686/*
687 * dcache_line_size - get the minimum D-cache line size from the CTR register
688 * on ARMv7.
689 */
690		.macro	dcache_line_size, reg, tmp
691#ifdef CONFIG_CPU_V7M
692		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
693		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
694		ldr	\tmp, [\tmp]
695#else
696		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
697#endif
698		lsr	\tmp, \tmp, #16
699		and	\tmp, \tmp, #0xf		@ cache line size encoding
700		mov	\reg, #4			@ bytes per word
701		mov	\reg, \reg, lsl \tmp		@ actual cache line size
702		.endm
703
704/*
705 * Turn on the cache.  We need to setup some page tables so that we
706 * can have both the I and D caches on.
707 *
708 * We place the page tables 16k down from the kernel execution address,
709 * and we hope that nothing else is using it.  If we're using it, we
710 * will go pop!
711 *
712 * On entry,
713 *  r4 = kernel execution address
714 *  r7 = architecture number
715 *  r8 = atags pointer
716 * On exit,
717 *  r0, r1, r2, r3, r9, r10, r12 corrupted
718 * This routine must preserve:
719 *  r4, r7, r8
720 */
721		.align	5
722cache_on:	mov	r3, #8			@ cache_on function
723		b	call_cache_fn
724
725/*
726 * Initialize the highest priority protection region, PR7
727 * to cover all 32bit address and cacheable and bufferable.
728 */
729__armv4_mpu_cache_on:
730		mov	r0, #0x3f		@ 4G, the whole
731		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
732		mcr 	p15, 0, r0, c6, c7, 1
733
734		mov	r0, #0x80		@ PR7
735		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
736		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
737		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
738
739		mov	r0, #0xc000
740		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
741		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
742
743		mov	r0, #0
744		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
745		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
746		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
747		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
748						@ ...I .... ..D. WC.M
749		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
750		orr	r0, r0, #0x1000		@ ...1 .... .... ....
751
752		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
753
754		mov	r0, #0
755		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
756		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
757		mov	pc, lr
758
759__armv3_mpu_cache_on:
760		mov	r0, #0x3f		@ 4G, the whole
761		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
762
763		mov	r0, #0x80		@ PR7
764		mcr	p15, 0, r0, c2, c0, 0	@ cache on
765		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
766
767		mov	r0, #0xc000
768		mcr	p15, 0, r0, c5, c0, 0	@ access permission
769
770		mov	r0, #0
771		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
772		/*
773		 * ?? ARMv3 MMU does not allow reading the control register,
774		 * does this really work on ARMv3 MPU?
775		 */
776		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
777						@ .... .... .... WC.M
778		orr	r0, r0, #0x000d		@ .... .... .... 11.1
779		/* ?? this overwrites the value constructed above? */
780		mov	r0, #0
781		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
782
783		/* ?? invalidate for the second time? */
784		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
785		mov	pc, lr
786
787#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
788#define CB_BITS 0x08
789#else
790#define CB_BITS 0x0c
791#endif
792
793__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
794		bic	r3, r3, #0xff		@ Align the pointer
795		bic	r3, r3, #0x3f00
796/*
797 * Initialise the page tables, turning on the cacheable and bufferable
798 * bits for the RAM area only.
799 */
800		mov	r0, r3
801		mov	r9, r0, lsr #18
802		mov	r9, r9, lsl #18		@ start of RAM
803		add	r10, r9, #0x10000000	@ a reasonable RAM size
804		mov	r1, #0x12		@ XN|U + section mapping
805		orr	r1, r1, #3 << 10	@ AP=11
806		add	r2, r3, #16384
8071:		cmp	r1, r9			@ if virt > start of RAM
808		cmphs	r10, r1			@   && end of RAM > virt
809		bic	r1, r1, #0x1c		@ clear XN|U + C + B
810		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
811		orrhs	r1, r1, r6		@ set RAM section settings
812		str	r1, [r0], #4		@ 1:1 mapping
813		add	r1, r1, #1048576
814		teq	r0, r2
815		bne	1b
816/*
817 * If ever we are running from Flash, then we surely want the cache
818 * to be enabled also for our execution instance...  We map 2MB of it
819 * so there is no map overlap problem for up to 1 MB compressed kernel.
820 * If the execution is in RAM then we would only be duplicating the above.
821 */
822		orr	r1, r6, #0x04		@ ensure B is set for this
823		orr	r1, r1, #3 << 10
824		mov	r2, pc
825		mov	r2, r2, lsr #20
826		orr	r1, r1, r2, lsl #20
827		add	r0, r3, r2, lsl #2
828		str	r1, [r0], #4
829		add	r1, r1, #1048576
830		str	r1, [r0]
831		mov	pc, lr
832ENDPROC(__setup_mmu)
833
834@ Enable unaligned access on v6, to allow better code generation
835@ for the decompressor C code:
836__armv6_mmu_cache_on:
837		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
838		bic	r0, r0, #2		@ A (no unaligned access fault)
839		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
840		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
841		b	__armv4_mmu_cache_on
842
843__arm926ejs_mmu_cache_on:
844#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
845		mov	r0, #4			@ put dcache in WT mode
846		mcr	p15, 7, r0, c15, c0, 0
847#endif
848
849__armv4_mmu_cache_on:
850		mov	r12, lr
851#ifdef CONFIG_MMU
852		mov	r6, #CB_BITS | 0x12	@ U
853		bl	__setup_mmu
854		mov	r0, #0
855		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
856		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
857		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
858		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
859		orr	r0, r0, #0x0030
860 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
861		bl	__common_mmu_cache_on
862		mov	r0, #0
863		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
864#endif
865		mov	pc, r12
866
867__armv7_mmu_cache_on:
868		enable_cp15_barriers	r11
869		mov	r12, lr
870#ifdef CONFIG_MMU
871		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
872		tst	r11, #0xf		@ VMSA
873		movne	r6, #CB_BITS | 0x02	@ !XN
874		blne	__setup_mmu
875		mov	r0, #0
876		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
877		tst	r11, #0xf		@ VMSA
878		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
879#endif
880		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
881		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
882		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
883		orr	r0, r0, #0x003c		@ write buffer
884		bic	r0, r0, #2		@ A (no unaligned access fault)
885		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
886						@ (needed for ARM1176)
887#ifdef CONFIG_MMU
888 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
889		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
890		orrne	r0, r0, #1		@ MMU enabled
891		movne	r1, #0xfffffffd		@ domain 0 = client
892		bic     r6, r6, #1 << 31        @ 32-bit translation system
893		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
894		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
895		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
896		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
897#endif
898		mcr	p15, 0, r0, c7, c5, 4	@ ISB
899		mcr	p15, 0, r0, c1, c0, 0	@ load control register
900		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
901		mov	r0, #0
902		mcr	p15, 0, r0, c7, c5, 4	@ ISB
903		mov	pc, r12
904
905__fa526_cache_on:
906		mov	r12, lr
907		mov	r6, #CB_BITS | 0x12	@ U
908		bl	__setup_mmu
909		mov	r0, #0
910		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
911		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
912		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
913		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
914		orr	r0, r0, #0x1000		@ I-cache enable
915		bl	__common_mmu_cache_on
916		mov	r0, #0
917		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
918		mov	pc, r12
919
920__common_mmu_cache_on:
921#ifndef CONFIG_THUMB2_KERNEL
922#ifndef DEBUG
923		orr	r0, r0, #0x000d		@ Write buffer, mmu
924#endif
925		mov	r1, #-1
926		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
927		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
928		b	1f
929		.align	5			@ cache line aligned
9301:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
931		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
932		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
933#endif
934
935#define PROC_ENTRY_SIZE (4*5)
936
937/*
938 * Here follow the relocatable cache support functions for the
939 * various processors.  This is a generic hook for locating an
940 * entry and jumping to an instruction at the specified offset
941 * from the start of the block.  Please note this is all position
942 * independent code.
943 *
944 *  r1  = corrupted
945 *  r2  = corrupted
946 *  r3  = block offset
947 *  r9  = corrupted
948 *  r12 = corrupted
949 */
950
951call_cache_fn:	adr	r12, proc_types
952#ifdef CONFIG_CPU_CP15
953		mrc	p15, 0, r9, c0, c0	@ get processor ID
954#elif defined(CONFIG_CPU_V7M)
955		/*
956		 * On v7-M the processor id is located in the V7M_SCB_CPUID
957		 * register, but as cache handling is IMPLEMENTATION DEFINED on
958		 * v7-M (if existant at all) we just return early here.
959		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
960		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
961		 * use cp15 registers that are not implemented on v7-M.
962		 */
963		bx	lr
964#else
965		ldr	r9, =CONFIG_PROCESSOR_ID
966#endif
9671:		ldr	r1, [r12, #0]		@ get value
968		ldr	r2, [r12, #4]		@ get mask
969		eor	r1, r1, r9		@ (real ^ match)
970		tst	r1, r2			@       & mask
971 ARM(		addeq	pc, r12, r3		) @ call cache function
972 THUMB(		addeq	r12, r3			)
973 THUMB(		moveq	pc, r12			) @ call cache function
974		add	r12, r12, #PROC_ENTRY_SIZE
975		b	1b
976
977/*
978 * Table for cache operations.  This is basically:
979 *   - CPU ID match
980 *   - CPU ID mask
981 *   - 'cache on' method instruction
982 *   - 'cache off' method instruction
983 *   - 'cache flush' method instruction
984 *
985 * We match an entry using: ((real_id ^ match) & mask) == 0
986 *
987 * Writethrough caches generally only need 'on' and 'off'
988 * methods.  Writeback caches _must_ have the flush method
989 * defined.
990 */
991		.align	2
992		.type	proc_types,#object
993proc_types:
994		.word	0x41000000		@ old ARM ID
995		.word	0xff00f000
996		mov	pc, lr
997 THUMB(		nop				)
998		mov	pc, lr
999 THUMB(		nop				)
1000		mov	pc, lr
1001 THUMB(		nop				)
1002
1003		.word	0x41007000		@ ARM7/710
1004		.word	0xfff8fe00
1005		mov	pc, lr
1006 THUMB(		nop				)
1007		mov	pc, lr
1008 THUMB(		nop				)
1009		mov	pc, lr
1010 THUMB(		nop				)
1011
1012		.word	0x41807200		@ ARM720T (writethrough)
1013		.word	0xffffff00
1014		W(b)	__armv4_mmu_cache_on
1015		W(b)	__armv4_mmu_cache_off
1016		mov	pc, lr
1017 THUMB(		nop				)
1018
1019		.word	0x41007400		@ ARM74x
1020		.word	0xff00ff00
1021		W(b)	__armv3_mpu_cache_on
1022		W(b)	__armv3_mpu_cache_off
1023		W(b)	__armv3_mpu_cache_flush
1024
1025		.word	0x41009400		@ ARM94x
1026		.word	0xff00ff00
1027		W(b)	__armv4_mpu_cache_on
1028		W(b)	__armv4_mpu_cache_off
1029		W(b)	__armv4_mpu_cache_flush
1030
1031		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1032		.word	0xff0ffff0
1033		W(b)	__arm926ejs_mmu_cache_on
1034		W(b)	__armv4_mmu_cache_off
1035		W(b)	__armv5tej_mmu_cache_flush
1036
1037		.word	0x00007000		@ ARM7 IDs
1038		.word	0x0000f000
1039		mov	pc, lr
1040 THUMB(		nop				)
1041		mov	pc, lr
1042 THUMB(		nop				)
1043		mov	pc, lr
1044 THUMB(		nop				)
1045
1046		@ Everything from here on will be the new ID system.
1047
1048		.word	0x4401a100		@ sa110 / sa1100
1049		.word	0xffffffe0
1050		W(b)	__armv4_mmu_cache_on
1051		W(b)	__armv4_mmu_cache_off
1052		W(b)	__armv4_mmu_cache_flush
1053
1054		.word	0x6901b110		@ sa1110
1055		.word	0xfffffff0
1056		W(b)	__armv4_mmu_cache_on
1057		W(b)	__armv4_mmu_cache_off
1058		W(b)	__armv4_mmu_cache_flush
1059
1060		.word	0x56056900
1061		.word	0xffffff00		@ PXA9xx
1062		W(b)	__armv4_mmu_cache_on
1063		W(b)	__armv4_mmu_cache_off
1064		W(b)	__armv4_mmu_cache_flush
1065
1066		.word	0x56158000		@ PXA168
1067		.word	0xfffff000
1068		W(b)	__armv4_mmu_cache_on
1069		W(b)	__armv4_mmu_cache_off
1070		W(b)	__armv5tej_mmu_cache_flush
1071
1072		.word	0x56050000		@ Feroceon
1073		.word	0xff0f0000
1074		W(b)	__armv4_mmu_cache_on
1075		W(b)	__armv4_mmu_cache_off
1076		W(b)	__armv5tej_mmu_cache_flush
1077
1078#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1079		/* this conflicts with the standard ARMv5TE entry */
1080		.long	0x41009260		@ Old Feroceon
1081		.long	0xff00fff0
1082		b	__armv4_mmu_cache_on
1083		b	__armv4_mmu_cache_off
1084		b	__armv5tej_mmu_cache_flush
1085#endif
1086
1087		.word	0x66015261		@ FA526
1088		.word	0xff01fff1
1089		W(b)	__fa526_cache_on
1090		W(b)	__armv4_mmu_cache_off
1091		W(b)	__fa526_cache_flush
1092
1093		@ These match on the architecture ID
1094
1095		.word	0x00020000		@ ARMv4T
1096		.word	0x000f0000
1097		W(b)	__armv4_mmu_cache_on
1098		W(b)	__armv4_mmu_cache_off
1099		W(b)	__armv4_mmu_cache_flush
1100
1101		.word	0x00050000		@ ARMv5TE
1102		.word	0x000f0000
1103		W(b)	__armv4_mmu_cache_on
1104		W(b)	__armv4_mmu_cache_off
1105		W(b)	__armv4_mmu_cache_flush
1106
1107		.word	0x00060000		@ ARMv5TEJ
1108		.word	0x000f0000
1109		W(b)	__armv4_mmu_cache_on
1110		W(b)	__armv4_mmu_cache_off
1111		W(b)	__armv5tej_mmu_cache_flush
1112
1113		.word	0x0007b000		@ ARMv6
1114		.word	0x000ff000
1115		W(b)	__armv6_mmu_cache_on
1116		W(b)	__armv4_mmu_cache_off
1117		W(b)	__armv6_mmu_cache_flush
1118
1119		.word	0x000f0000		@ new CPU Id
1120		.word	0x000f0000
1121		W(b)	__armv7_mmu_cache_on
1122		W(b)	__armv7_mmu_cache_off
1123		W(b)	__armv7_mmu_cache_flush
1124
1125		.word	0			@ unrecognised type
1126		.word	0
1127		mov	pc, lr
1128 THUMB(		nop				)
1129		mov	pc, lr
1130 THUMB(		nop				)
1131		mov	pc, lr
1132 THUMB(		nop				)
1133
1134		.size	proc_types, . - proc_types
1135
1136		/*
1137		 * If you get a "non-constant expression in ".if" statement"
1138		 * error from the assembler on this line, check that you have
1139		 * not accidentally written a "b" instruction where you should
1140		 * have written W(b).
1141		 */
1142		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1143		.error "The size of one or more proc_types entries is wrong."
1144		.endif
1145
1146/*
1147 * Turn off the Cache and MMU.  ARMv3 does not support
1148 * reading the control register, but ARMv4 does.
1149 *
1150 * On exit,
1151 *  r0, r1, r2, r3, r9, r12 corrupted
1152 * This routine must preserve:
1153 *  r4, r7, r8
1154 */
1155		.align	5
1156cache_off:	mov	r3, #12			@ cache_off function
1157		b	call_cache_fn
1158
1159__armv4_mpu_cache_off:
1160		mrc	p15, 0, r0, c1, c0
1161		bic	r0, r0, #0x000d
1162		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1163		mov	r0, #0
1164		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1165		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1166		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1167		mov	pc, lr
1168
1169__armv3_mpu_cache_off:
1170		mrc	p15, 0, r0, c1, c0
1171		bic	r0, r0, #0x000d
1172		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1173		mov	r0, #0
1174		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1175		mov	pc, lr
1176
1177__armv4_mmu_cache_off:
1178#ifdef CONFIG_MMU
1179		mrc	p15, 0, r0, c1, c0
1180		bic	r0, r0, #0x000d
1181		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1182		mov	r0, #0
1183		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1184		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1185#endif
1186		mov	pc, lr
1187
1188__armv7_mmu_cache_off:
1189		mrc	p15, 0, r0, c1, c0
1190#ifdef CONFIG_MMU
1191		bic	r0, r0, #0x0005
1192#else
1193		bic	r0, r0, #0x0004
1194#endif
1195		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1196		mov	r0, #0
1197#ifdef CONFIG_MMU
1198		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1199#endif
1200		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1201		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1202		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1203		mov	pc, lr
1204
1205/*
1206 * Clean and flush the cache to maintain consistency.
1207 *
1208 * On entry,
1209 *  r0 = start address
1210 *  r1 = end address (exclusive)
1211 * On exit,
1212 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1213 * This routine must preserve:
1214 *  r4, r6, r7, r8
1215 */
1216		.align	5
1217cache_clean_flush:
1218		mov	r3, #16
1219		mov	r11, r1
1220		b	call_cache_fn
1221
1222__armv4_mpu_cache_flush:
1223		tst	r4, #1
1224		movne	pc, lr
1225		mov	r2, #1
1226		mov	r3, #0
1227		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1228		mov	r1, #7 << 5		@ 8 segments
12291:		orr	r3, r1, #63 << 26	@ 64 entries
12302:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1231		subs	r3, r3, #1 << 26
1232		bcs	2b			@ entries 63 to 0
1233		subs 	r1, r1, #1 << 5
1234		bcs	1b			@ segments 7 to 0
1235
1236		teq	r2, #0
1237		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1238		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1239		mov	pc, lr
1240
1241__fa526_cache_flush:
1242		tst	r4, #1
1243		movne	pc, lr
1244		mov	r1, #0
1245		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1246		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1247		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1248		mov	pc, lr
1249
1250__armv6_mmu_cache_flush:
1251		mov	r1, #0
1252		tst	r4, #1
1253		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1254		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1255		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1256		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1257		mov	pc, lr
1258
1259__armv7_mmu_cache_flush:
1260		enable_cp15_barriers	r10
1261		tst	r4, #1
1262		bne	iflush
1263		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1264		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1265		mov	r10, #0
1266		beq	hierarchical
1267		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1268		b	iflush
1269hierarchical:
1270		dcache_line_size r1, r2		@ r1 := dcache min line size
1271		sub	r2, r1, #1		@ r2 := line size mask
1272		bic	r0, r0, r2		@ round down start to line size
1273		sub	r11, r11, #1		@ end address is exclusive
1274		bic	r11, r11, r2		@ round down end to line size
12750:		cmp	r0, r11			@ finished?
1276		bgt	iflush
1277		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1278		add	r0, r0, r1
1279		b	0b
1280iflush:
1281		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1282		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1283		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1284		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1285		mov	pc, lr
1286
1287__armv5tej_mmu_cache_flush:
1288		tst	r4, #1
1289		movne	pc, lr
12901:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1291		bne	1b
1292		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1293		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1294		mov	pc, lr
1295
1296__armv4_mmu_cache_flush:
1297		tst	r4, #1
1298		movne	pc, lr
1299		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1300		mov	r11, #32		@ default: 32 byte line size
1301		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1302		teq	r3, r9			@ cache ID register present?
1303		beq	no_cache_id
1304		mov	r1, r3, lsr #18
1305		and	r1, r1, #7
1306		mov	r2, #1024
1307		mov	r2, r2, lsl r1		@ base dcache size *2
1308		tst	r3, #1 << 14		@ test M bit
1309		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1310		mov	r3, r3, lsr #12
1311		and	r3, r3, #3
1312		mov	r11, #8
1313		mov	r11, r11, lsl r3	@ cache line size in bytes
1314no_cache_id:
1315		mov	r1, pc
1316		bic	r1, r1, #63		@ align to longest cache line
1317		add	r2, r1, r2
13181:
1319 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1320 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1321 THUMB(		add     r1, r1, r11		)
1322		teq	r1, r2
1323		bne	1b
1324
1325		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1326		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1327		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1328		mov	pc, lr
1329
1330__armv3_mmu_cache_flush:
1331__armv3_mpu_cache_flush:
1332		tst	r4, #1
1333		movne	pc, lr
1334		mov	r1, #0
1335		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1336		mov	pc, lr
1337
1338/*
1339 * Various debugging routines for printing hex characters and
1340 * memory, which again must be relocatable.
1341 */
1342#ifdef DEBUG
1343		.align	2
1344		.type	phexbuf,#object
1345phexbuf:	.space	12
1346		.size	phexbuf, . - phexbuf
1347
1348@ phex corrupts {r0, r1, r2, r3}
1349phex:		adr	r3, phexbuf
1350		mov	r2, #0
1351		strb	r2, [r3, r1]
13521:		subs	r1, r1, #1
1353		movmi	r0, r3
1354		bmi	puts
1355		and	r2, r0, #15
1356		mov	r0, r0, lsr #4
1357		cmp	r2, #10
1358		addge	r2, r2, #7
1359		add	r2, r2, #'0'
1360		strb	r2, [r3, r1]
1361		b	1b
1362
1363@ puts corrupts {r0, r1, r2, r3}
1364puts:		loadsp	r3, r2, r1
13651:		ldrb	r2, [r0], #1
1366		teq	r2, #0
1367		moveq	pc, lr
13682:		writeb	r2, r3, r1
1369		mov	r1, #0x00020000
13703:		subs	r1, r1, #1
1371		bne	3b
1372		teq	r2, #'\n'
1373		moveq	r2, #'\r'
1374		beq	2b
1375		teq	r0, #0
1376		bne	1b
1377		mov	pc, lr
1378@ putc corrupts {r0, r1, r2, r3}
1379putc:
1380		mov	r2, r0
1381		loadsp	r3, r1, r0
1382		mov	r0, #0
1383		b	2b
1384
1385@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1386memdump:	mov	r12, r0
1387		mov	r10, lr
1388		mov	r11, #0
13892:		mov	r0, r11, lsl #2
1390		add	r0, r0, r12
1391		mov	r1, #8
1392		bl	phex
1393		mov	r0, #':'
1394		bl	putc
13951:		mov	r0, #' '
1396		bl	putc
1397		ldr	r0, [r12, r11, lsl #2]
1398		mov	r1, #8
1399		bl	phex
1400		and	r0, r11, #7
1401		teq	r0, #3
1402		moveq	r0, #' '
1403		bleq	putc
1404		and	r0, r11, #7
1405		add	r11, r11, #1
1406		teq	r0, #7
1407		bne	1b
1408		mov	r0, #'\n'
1409		bl	putc
1410		cmp	r11, #64
1411		blt	2b
1412		mov	pc, r10
1413#endif
1414
1415		.ltorg
1416
1417#ifdef CONFIG_ARM_VIRT_EXT
1418.align 5
1419__hyp_reentry_vectors:
1420		W(b)	.			@ reset
1421		W(b)	.			@ undef
1422#ifdef CONFIG_EFI_STUB
1423		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1424#else
1425		W(b)	.			@ svc
1426#endif
1427		W(b)	.			@ pabort
1428		W(b)	.			@ dabort
1429		W(b)	__enter_kernel		@ hyp
1430		W(b)	.			@ irq
1431		W(b)	.			@ fiq
1432#endif /* CONFIG_ARM_VIRT_EXT */
1433
1434__enter_kernel:
1435		mov	r0, #0			@ must be 0
1436		mov	r1, r7			@ restore architecture number
1437		mov	r2, r8			@ restore atags pointer
1438 ARM(		mov	pc, r4		)	@ call kernel
1439 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1440 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1441
1442reloc_code_end:
1443
1444#ifdef CONFIG_EFI_STUB
1445__enter_kernel_from_hyp:
1446		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1447		bic	r0, r0, #0x5		@ disable MMU and caches
1448		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1449		isb
1450		b	__enter_kernel
1451
1452ENTRY(efi_enter_kernel)
1453		mov	r4, r0			@ preserve image base
1454		mov	r8, r1			@ preserve DT pointer
1455
1456		adr_l	r0, call_cache_fn
1457		adr	r1, 0f			@ clean the region of code we
1458		bl	cache_clean_flush	@ may run with the MMU off
1459
1460#ifdef CONFIG_ARM_VIRT_EXT
1461		@
1462		@ The EFI spec does not support booting on ARM in HYP mode,
1463		@ since it mandates that the MMU and caches are on, with all
1464		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1465		@
1466		@ While the EDK2 reference implementation adheres to this,
1467		@ U-Boot might decide to enter the EFI stub in HYP mode
1468		@ anyway, with the MMU and caches either on or off.
1469		@
1470		mrs	r0, cpsr		@ get the current mode
1471		msr	spsr_cxsf, r0		@ record boot mode
1472		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1473		cmp	r0, #HYP_MODE
1474		bne	.Lefi_svc
1475
1476		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1477		tst	r1, #0x1		@ MMU enabled at HYP?
1478		beq	1f
1479
1480		@
1481		@ When running in HYP mode with the caches on, we're better
1482		@ off just carrying on using the cached 1:1 mapping that the
1483		@ firmware provided. Set up the HYP vectors so HVC instructions
1484		@ issued from HYP mode take us to the correct handler code. We
1485		@ will disable the MMU before jumping to the kernel proper.
1486		@
1487 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1488 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1489		mcr	p15, 4, r1, c1, c0, 0
1490		adr	r0, __hyp_reentry_vectors
1491		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1492		isb
1493		b	.Lefi_hyp
1494
1495		@
1496		@ When running in HYP mode with the caches off, we need to drop
1497		@ into SVC mode now, and let the decompressor set up its cached
1498		@ 1:1 mapping as usual.
1499		@
15001:		mov	r9, r4			@ preserve image base
1501		bl	__hyp_stub_install	@ install HYP stub vectors
1502		safe_svcmode_maskall	r1	@ drop to SVC mode
1503		msr	spsr_cxsf, r0		@ record boot mode
1504		orr	r4, r9, #1		@ restore image base and set LSB
1505		b	.Lefi_hyp
1506.Lefi_svc:
1507#endif
1508		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1509		tst	r0, #0x1		@ MMU enabled?
1510		orreq	r4, r4, #1		@ set LSB if not
1511
1512.Lefi_hyp:
1513		mov	r0, r8			@ DT start
1514		add	r1, r8, r2		@ DT end
1515		bl	cache_clean_flush
1516
1517		adr	r0, 0f			@ switch to our stack
1518		ldr	sp, [r0]
1519		add	sp, sp, r0
1520
1521		mov	r5, #0			@ appended DTB size
1522		mov	r7, #0xFFFFFFFF		@ machine ID
1523		b	wont_overwrite
1524ENDPROC(efi_enter_kernel)
15250:		.long	.L_user_stack_end - .
1526#endif
1527
1528		.align
1529		.section ".stack", "aw", %nobits
1530.L_user_stack:	.space	4096
1531.L_user_stack_end:
1532