xref: /linux/arch/arm/boot/compressed/head.S (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13#include <asm/v7m.h>
14
15#include "efi-header.S"
16
17 AR_CLASS(	.arch	armv7-a	)
18 M_CLASS(	.arch	armv7-m	)
19
20/*
21 * Debugging stuff
22 *
23 * Note that these macros must not contain any code which is not
24 * 100% relocatable.  Any attempt to do so will result in a crash.
25 * Please select one of the following when turning on debugging.
26 */
27#ifdef DEBUG
28
29#if defined(CONFIG_DEBUG_ICEDCC)
30
31#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
32		.macro	loadsp, rb, tmp
33		.endm
34		.macro	writeb, ch, rb
35		mcr	p14, 0, \ch, c0, c5, 0
36		.endm
37#elif defined(CONFIG_CPU_XSCALE)
38		.macro	loadsp, rb, tmp
39		.endm
40		.macro	writeb, ch, rb
41		mcr	p14, 0, \ch, c8, c0, 0
42		.endm
43#else
44		.macro	loadsp, rb, tmp
45		.endm
46		.macro	writeb, ch, rb
47		mcr	p14, 0, \ch, c1, c0, 0
48		.endm
49#endif
50
51#else
52
53#include CONFIG_DEBUG_LL_INCLUDE
54
55		.macro	writeb,	ch, rb
56		senduart \ch, \rb
57		.endm
58
59#if defined(CONFIG_ARCH_SA1100)
60		.macro	loadsp, rb, tmp
61		mov	\rb, #0x80000000	@ physical base address
62#ifdef CONFIG_DEBUG_LL_SER3
63		add	\rb, \rb, #0x00050000	@ Ser3
64#else
65		add	\rb, \rb, #0x00010000	@ Ser1
66#endif
67		.endm
68#else
69		.macro	loadsp,	rb, tmp
70		addruart \rb, \tmp
71		.endm
72#endif
73#endif
74#endif
75
76		.macro	kputc,val
77		mov	r0, \val
78		bl	putc
79		.endm
80
81		.macro	kphex,val,len
82		mov	r0, \val
83		mov	r1, #\len
84		bl	phex
85		.endm
86
87		.macro	debug_reloc_start
88#ifdef DEBUG
89		kputc	#'\n'
90		kphex	r6, 8		/* processor id */
91		kputc	#':'
92		kphex	r7, 8		/* architecture id */
93#ifdef CONFIG_CPU_CP15
94		kputc	#':'
95		mrc	p15, 0, r0, c1, c0
96		kphex	r0, 8		/* control reg */
97#endif
98		kputc	#'\n'
99		kphex	r5, 8		/* decompressed kernel start */
100		kputc	#'-'
101		kphex	r9, 8		/* decompressed kernel end  */
102		kputc	#'>'
103		kphex	r4, 8		/* kernel execution address */
104		kputc	#'\n'
105#endif
106		.endm
107
108		.macro	debug_reloc_end
109#ifdef DEBUG
110		kphex	r5, 8		/* end of kernel */
111		kputc	#'\n'
112		mov	r0, r4
113		bl	memdump		/* dump 256 bytes at start of kernel */
114#endif
115		.endm
116
117		.section ".start", #alloc, #execinstr
118/*
119 * sort out different calling conventions
120 */
121		.align
122		/*
123		 * Always enter in ARM state for CPUs that support the ARM ISA.
124		 * As of today (2014) that's exactly the members of the A and R
125		 * classes.
126		 */
127 AR_CLASS(	.arm	)
128start:
129		.type	start,#function
130		.rept	7
131		__nop
132		.endr
133   ARM(		mov	r0, r0		)
134   ARM(		b	1f		)
135 THUMB(		badr	r12, 1f		)
136 THUMB(		bx	r12		)
137
138		.word	_magic_sig	@ Magic numbers to help the loader
139		.word	_magic_start	@ absolute load/run zImage address
140		.word	_magic_end	@ zImage end address
141		.word	0x04030201	@ endianness flag
142
143 THUMB(		.thumb			)
1441:		__EFI_HEADER
145
146 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
147 AR_CLASS(	mrs	r9, cpsr	)
148#ifdef CONFIG_ARM_VIRT_EXT
149		bl	__hyp_stub_install	@ get into SVC mode, reversibly
150#endif
151		mov	r7, r1			@ save architecture ID
152		mov	r8, r2			@ save atags pointer
153
154#ifndef CONFIG_CPU_V7M
155		/*
156		 * Booting from Angel - need to enter SVC mode and disable
157		 * FIQs/IRQs (numeric definitions from angel arm.h source).
158		 * We only do this if we were in user mode on entry.
159		 */
160		mrs	r2, cpsr		@ get current mode
161		tst	r2, #3			@ not user?
162		bne	not_angel
163		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
164 ARM(		swi	0x123456	)	@ angel_SWI_ARM
165 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
166not_angel:
167		safe_svcmode_maskall r0
168		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
169						@ SPSR
170#endif
171		/*
172		 * Note that some cache flushing and other stuff may
173		 * be needed here - is there an Angel SWI call for this?
174		 */
175
176		/*
177		 * some architecture specific code can be inserted
178		 * by the linker here, but it should preserve r7, r8, and r9.
179		 */
180
181		.text
182
183#ifdef CONFIG_AUTO_ZRELADDR
184		/*
185		 * Find the start of physical memory.  As we are executing
186		 * without the MMU on, we are in the physical address space.
187		 * We just need to get rid of any offset by aligning the
188		 * address.
189		 *
190		 * This alignment is a balance between the requirements of
191		 * different platforms - we have chosen 128MB to allow
192		 * platforms which align the start of their physical memory
193		 * to 128MB to use this feature, while allowing the zImage
194		 * to be placed within the first 128MB of memory on other
195		 * platforms.  Increasing the alignment means we place
196		 * stricter alignment requirements on the start of physical
197		 * memory, but relaxing it means that we break people who
198		 * are already placing their zImage in (eg) the top 64MB
199		 * of this range.
200		 */
201		mov	r4, pc
202		and	r4, r4, #0xf8000000
203		/* Determine final kernel image address. */
204		add	r4, r4, #TEXT_OFFSET
205#else
206		ldr	r4, =zreladdr
207#endif
208
209		/*
210		 * Set up a page table only if it won't overwrite ourself.
211		 * That means r4 < pc || r4 - 16k page directory > &_end.
212		 * Given that r4 > &_end is most unfrequent, we add a rough
213		 * additional 1MB of room for a possible appended DTB.
214		 */
215		mov	r0, pc
216		cmp	r0, r4
217		ldrcc	r0, LC0+32
218		addcc	r0, r0, pc
219		cmpcc	r4, r0
220		orrcc	r4, r4, #1		@ remember we skipped cache_on
221		blcs	cache_on
222
223restart:	adr	r0, LC0
224		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
225		ldr	sp, [r0, #28]
226
227		/*
228		 * We might be running at a different address.  We need
229		 * to fix up various pointers.
230		 */
231		sub	r0, r0, r1		@ calculate the delta offset
232		add	r6, r6, r0		@ _edata
233		add	r10, r10, r0		@ inflated kernel size location
234
235		/*
236		 * The kernel build system appends the size of the
237		 * decompressed kernel at the end of the compressed data
238		 * in little-endian form.
239		 */
240		ldrb	r9, [r10, #0]
241		ldrb	lr, [r10, #1]
242		orr	r9, r9, lr, lsl #8
243		ldrb	lr, [r10, #2]
244		ldrb	r10, [r10, #3]
245		orr	r9, r9, lr, lsl #16
246		orr	r9, r9, r10, lsl #24
247
248#ifndef CONFIG_ZBOOT_ROM
249		/* malloc space is above the relocated stack (64k max) */
250		add	sp, sp, r0
251		add	r10, sp, #0x10000
252#else
253		/*
254		 * With ZBOOT_ROM the bss/stack is non relocatable,
255		 * but someone could still run this code from RAM,
256		 * in which case our reference is _edata.
257		 */
258		mov	r10, r6
259#endif
260
261		mov	r5, #0			@ init dtb size to 0
262#ifdef CONFIG_ARM_APPENDED_DTB
263/*
264 *   r0  = delta
265 *   r2  = BSS start
266 *   r3  = BSS end
267 *   r4  = final kernel address (possibly with LSB set)
268 *   r5  = appended dtb size (still unknown)
269 *   r6  = _edata
270 *   r7  = architecture ID
271 *   r8  = atags/device tree pointer
272 *   r9  = size of decompressed image
273 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
274 *   r11 = GOT start
275 *   r12 = GOT end
276 *   sp  = stack pointer
277 *
278 * if there are device trees (dtb) appended to zImage, advance r10 so that the
279 * dtb data will get relocated along with the kernel if necessary.
280 */
281
282		ldr	lr, [r6, #0]
283#ifndef __ARMEB__
284		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
285#else
286		ldr	r1, =0xd00dfeed
287#endif
288		cmp	lr, r1
289		bne	dtb_check_done		@ not found
290
291#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
292		/*
293		 * OK... Let's do some funky business here.
294		 * If we do have a DTB appended to zImage, and we do have
295		 * an ATAG list around, we want the later to be translated
296		 * and folded into the former here. No GOT fixup has occurred
297		 * yet, but none of the code we're about to call uses any
298		 * global variable.
299		*/
300
301		/* Get the initial DTB size */
302		ldr	r5, [r6, #4]
303#ifndef __ARMEB__
304		/* convert to little endian */
305		eor	r1, r5, r5, ror #16
306		bic	r1, r1, #0x00ff0000
307		mov	r5, r5, ror #8
308		eor	r5, r5, r1, lsr #8
309#endif
310		/* 50% DTB growth should be good enough */
311		add	r5, r5, r5, lsr #1
312		/* preserve 64-bit alignment */
313		add	r5, r5, #7
314		bic	r5, r5, #7
315		/* clamp to 32KB min and 1MB max */
316		cmp	r5, #(1 << 15)
317		movlo	r5, #(1 << 15)
318		cmp	r5, #(1 << 20)
319		movhi	r5, #(1 << 20)
320		/* temporarily relocate the stack past the DTB work space */
321		add	sp, sp, r5
322
323		stmfd	sp!, {r0-r3, ip, lr}
324		mov	r0, r8
325		mov	r1, r6
326		mov	r2, r5
327		bl	atags_to_fdt
328
329		/*
330		 * If returned value is 1, there is no ATAG at the location
331		 * pointed by r8.  Try the typical 0x100 offset from start
332		 * of RAM and hope for the best.
333		 */
334		cmp	r0, #1
335		sub	r0, r4, #TEXT_OFFSET
336		bic	r0, r0, #1
337		add	r0, r0, #0x100
338		mov	r1, r6
339		mov	r2, r5
340		bleq	atags_to_fdt
341
342		ldmfd	sp!, {r0-r3, ip, lr}
343		sub	sp, sp, r5
344#endif
345
346		mov	r8, r6			@ use the appended device tree
347
348		/*
349		 * Make sure that the DTB doesn't end up in the final
350		 * kernel's .bss area. To do so, we adjust the decompressed
351		 * kernel size to compensate if that .bss size is larger
352		 * than the relocated code.
353		 */
354		ldr	r5, =_kernel_bss_size
355		adr	r1, wont_overwrite
356		sub	r1, r6, r1
357		subs	r1, r5, r1
358		addhi	r9, r9, r1
359
360		/* Get the current DTB size */
361		ldr	r5, [r6, #4]
362#ifndef __ARMEB__
363		/* convert r5 (dtb size) to little endian */
364		eor	r1, r5, r5, ror #16
365		bic	r1, r1, #0x00ff0000
366		mov	r5, r5, ror #8
367		eor	r5, r5, r1, lsr #8
368#endif
369
370		/* preserve 64-bit alignment */
371		add	r5, r5, #7
372		bic	r5, r5, #7
373
374		/* relocate some pointers past the appended dtb */
375		add	r6, r6, r5
376		add	r10, r10, r5
377		add	sp, sp, r5
378dtb_check_done:
379#endif
380
381/*
382 * Check to see if we will overwrite ourselves.
383 *   r4  = final kernel address (possibly with LSB set)
384 *   r9  = size of decompressed image
385 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
386 * We basically want:
387 *   r4 - 16k page directory >= r10 -> OK
388 *   r4 + image length <= address of wont_overwrite -> OK
389 * Note: the possible LSB in r4 is harmless here.
390 */
391		add	r10, r10, #16384
392		cmp	r4, r10
393		bhs	wont_overwrite
394		add	r10, r4, r9
395		adr	r9, wont_overwrite
396		cmp	r10, r9
397		bls	wont_overwrite
398
399/*
400 * Relocate ourselves past the end of the decompressed kernel.
401 *   r6  = _edata
402 *   r10 = end of the decompressed kernel
403 * Because we always copy ahead, we need to do it from the end and go
404 * backward in case the source and destination overlap.
405 */
406		/*
407		 * Bump to the next 256-byte boundary with the size of
408		 * the relocation code added. This avoids overwriting
409		 * ourself when the offset is small.
410		 */
411		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
412		bic	r10, r10, #255
413
414		/* Get start of code we want to copy and align it down. */
415		adr	r5, restart
416		bic	r5, r5, #31
417
418/* Relocate the hyp vector base if necessary */
419#ifdef CONFIG_ARM_VIRT_EXT
420		mrs	r0, spsr
421		and	r0, r0, #MODE_MASK
422		cmp	r0, #HYP_MODE
423		bne	1f
424
425		bl	__hyp_get_vectors
426		sub	r0, r0, r5
427		add	r0, r0, r10
428		bl	__hyp_set_vectors
4291:
430#endif
431
432		sub	r9, r6, r5		@ size to copy
433		add	r9, r9, #31		@ rounded up to a multiple
434		bic	r9, r9, #31		@ ... of 32 bytes
435		add	r6, r9, r5
436		add	r9, r9, r10
437
4381:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
439		cmp	r6, r5
440		stmdb	r9!, {r0 - r3, r10 - r12, lr}
441		bhi	1b
442
443		/* Preserve offset to relocated code. */
444		sub	r6, r9, r6
445
446#ifndef CONFIG_ZBOOT_ROM
447		/* cache_clean_flush may use the stack, so relocate it */
448		add	sp, sp, r6
449#endif
450
451		bl	cache_clean_flush
452
453		badr	r0, restart
454		add	r0, r0, r6
455		mov	pc, r0
456
457wont_overwrite:
458/*
459 * If delta is zero, we are running at the address we were linked at.
460 *   r0  = delta
461 *   r2  = BSS start
462 *   r3  = BSS end
463 *   r4  = kernel execution address (possibly with LSB set)
464 *   r5  = appended dtb size (0 if not present)
465 *   r7  = architecture ID
466 *   r8  = atags pointer
467 *   r11 = GOT start
468 *   r12 = GOT end
469 *   sp  = stack pointer
470 */
471		orrs	r1, r0, r5
472		beq	not_relocated
473
474		add	r11, r11, r0
475		add	r12, r12, r0
476
477#ifndef CONFIG_ZBOOT_ROM
478		/*
479		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
480		 * we need to fix up pointers into the BSS region.
481		 * Note that the stack pointer has already been fixed up.
482		 */
483		add	r2, r2, r0
484		add	r3, r3, r0
485
486		/*
487		 * Relocate all entries in the GOT table.
488		 * Bump bss entries to _edata + dtb size
489		 */
4901:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
491		add	r1, r1, r0		@ This fixes up C references
492		cmp	r1, r2			@ if entry >= bss_start &&
493		cmphs	r3, r1			@       bss_end > entry
494		addhi	r1, r1, r5		@    entry += dtb size
495		str	r1, [r11], #4		@ next entry
496		cmp	r11, r12
497		blo	1b
498
499		/* bump our bss pointers too */
500		add	r2, r2, r5
501		add	r3, r3, r5
502
503#else
504
505		/*
506		 * Relocate entries in the GOT table.  We only relocate
507		 * the entries that are outside the (relocated) BSS region.
508		 */
5091:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
510		cmp	r1, r2			@ entry < bss_start ||
511		cmphs	r3, r1			@ _end < entry
512		addlo	r1, r1, r0		@ table.  This fixes up the
513		str	r1, [r11], #4		@ C references.
514		cmp	r11, r12
515		blo	1b
516#endif
517
518not_relocated:	mov	r0, #0
5191:		str	r0, [r2], #4		@ clear bss
520		str	r0, [r2], #4
521		str	r0, [r2], #4
522		str	r0, [r2], #4
523		cmp	r2, r3
524		blo	1b
525
526		/*
527		 * Did we skip the cache setup earlier?
528		 * That is indicated by the LSB in r4.
529		 * Do it now if so.
530		 */
531		tst	r4, #1
532		bic	r4, r4, #1
533		blne	cache_on
534
535/*
536 * The C runtime environment should now be setup sufficiently.
537 * Set up some pointers, and start decompressing.
538 *   r4  = kernel execution address
539 *   r7  = architecture ID
540 *   r8  = atags pointer
541 */
542		mov	r0, r4
543		mov	r1, sp			@ malloc space above stack
544		add	r2, sp, #0x10000	@ 64k max
545		mov	r3, r7
546		bl	decompress_kernel
547		bl	cache_clean_flush
548		bl	cache_off
549		mov	r1, r7			@ restore architecture number
550		mov	r2, r8			@ restore atags pointer
551
552#ifdef CONFIG_ARM_VIRT_EXT
553		mrs	r0, spsr		@ Get saved CPU boot mode
554		and	r0, r0, #MODE_MASK
555		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
556		bne	__enter_kernel		@ boot kernel directly
557
558		adr	r12, .L__hyp_reentry_vectors_offset
559		ldr	r0, [r12]
560		add	r0, r0, r12
561
562		bl	__hyp_set_vectors
563		__HVC(0)			@ otherwise bounce to hyp mode
564
565		b	.			@ should never be reached
566
567		.align	2
568.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
569#else
570		b	__enter_kernel
571#endif
572
573		.align	2
574		.type	LC0, #object
575LC0:		.word	LC0			@ r1
576		.word	__bss_start		@ r2
577		.word	_end			@ r3
578		.word	_edata			@ r6
579		.word	input_data_end - 4	@ r10 (inflated size location)
580		.word	_got_start		@ r11
581		.word	_got_end		@ ip
582		.word	.L_user_stack_end	@ sp
583		.word	_end - restart + 16384 + 1024*1024
584		.size	LC0, . - LC0
585
586#ifdef CONFIG_ARCH_RPC
587		.globl	params
588params:		ldr	r0, =0x10000100		@ params_phys for RPC
589		mov	pc, lr
590		.ltorg
591		.align
592#endif
593
594/*
595 * Turn on the cache.  We need to setup some page tables so that we
596 * can have both the I and D caches on.
597 *
598 * We place the page tables 16k down from the kernel execution address,
599 * and we hope that nothing else is using it.  If we're using it, we
600 * will go pop!
601 *
602 * On entry,
603 *  r4 = kernel execution address
604 *  r7 = architecture number
605 *  r8 = atags pointer
606 * On exit,
607 *  r0, r1, r2, r3, r9, r10, r12 corrupted
608 * This routine must preserve:
609 *  r4, r7, r8
610 */
611		.align	5
612cache_on:	mov	r3, #8			@ cache_on function
613		b	call_cache_fn
614
615/*
616 * Initialize the highest priority protection region, PR7
617 * to cover all 32bit address and cacheable and bufferable.
618 */
619__armv4_mpu_cache_on:
620		mov	r0, #0x3f		@ 4G, the whole
621		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
622		mcr 	p15, 0, r0, c6, c7, 1
623
624		mov	r0, #0x80		@ PR7
625		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
626		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
627		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
628
629		mov	r0, #0xc000
630		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
631		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
632
633		mov	r0, #0
634		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
635		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
636		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
637		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
638						@ ...I .... ..D. WC.M
639		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
640		orr	r0, r0, #0x1000		@ ...1 .... .... ....
641
642		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
643
644		mov	r0, #0
645		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
646		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
647		mov	pc, lr
648
649__armv3_mpu_cache_on:
650		mov	r0, #0x3f		@ 4G, the whole
651		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
652
653		mov	r0, #0x80		@ PR7
654		mcr	p15, 0, r0, c2, c0, 0	@ cache on
655		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
656
657		mov	r0, #0xc000
658		mcr	p15, 0, r0, c5, c0, 0	@ access permission
659
660		mov	r0, #0
661		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
662		/*
663		 * ?? ARMv3 MMU does not allow reading the control register,
664		 * does this really work on ARMv3 MPU?
665		 */
666		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
667						@ .... .... .... WC.M
668		orr	r0, r0, #0x000d		@ .... .... .... 11.1
669		/* ?? this overwrites the value constructed above? */
670		mov	r0, #0
671		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
672
673		/* ?? invalidate for the second time? */
674		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
675		mov	pc, lr
676
677#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
678#define CB_BITS 0x08
679#else
680#define CB_BITS 0x0c
681#endif
682
683__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
684		bic	r3, r3, #0xff		@ Align the pointer
685		bic	r3, r3, #0x3f00
686/*
687 * Initialise the page tables, turning on the cacheable and bufferable
688 * bits for the RAM area only.
689 */
690		mov	r0, r3
691		mov	r9, r0, lsr #18
692		mov	r9, r9, lsl #18		@ start of RAM
693		add	r10, r9, #0x10000000	@ a reasonable RAM size
694		mov	r1, #0x12		@ XN|U + section mapping
695		orr	r1, r1, #3 << 10	@ AP=11
696		add	r2, r3, #16384
6971:		cmp	r1, r9			@ if virt > start of RAM
698		cmphs	r10, r1			@   && end of RAM > virt
699		bic	r1, r1, #0x1c		@ clear XN|U + C + B
700		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
701		orrhs	r1, r1, r6		@ set RAM section settings
702		str	r1, [r0], #4		@ 1:1 mapping
703		add	r1, r1, #1048576
704		teq	r0, r2
705		bne	1b
706/*
707 * If ever we are running from Flash, then we surely want the cache
708 * to be enabled also for our execution instance...  We map 2MB of it
709 * so there is no map overlap problem for up to 1 MB compressed kernel.
710 * If the execution is in RAM then we would only be duplicating the above.
711 */
712		orr	r1, r6, #0x04		@ ensure B is set for this
713		orr	r1, r1, #3 << 10
714		mov	r2, pc
715		mov	r2, r2, lsr #20
716		orr	r1, r1, r2, lsl #20
717		add	r0, r3, r2, lsl #2
718		str	r1, [r0], #4
719		add	r1, r1, #1048576
720		str	r1, [r0]
721		mov	pc, lr
722ENDPROC(__setup_mmu)
723
724@ Enable unaligned access on v6, to allow better code generation
725@ for the decompressor C code:
726__armv6_mmu_cache_on:
727		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
728		bic	r0, r0, #2		@ A (no unaligned access fault)
729		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
730		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
731		b	__armv4_mmu_cache_on
732
733__arm926ejs_mmu_cache_on:
734#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
735		mov	r0, #4			@ put dcache in WT mode
736		mcr	p15, 7, r0, c15, c0, 0
737#endif
738
739__armv4_mmu_cache_on:
740		mov	r12, lr
741#ifdef CONFIG_MMU
742		mov	r6, #CB_BITS | 0x12	@ U
743		bl	__setup_mmu
744		mov	r0, #0
745		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
746		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
747		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
748		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
749		orr	r0, r0, #0x0030
750 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
751		bl	__common_mmu_cache_on
752		mov	r0, #0
753		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
754#endif
755		mov	pc, r12
756
757__armv7_mmu_cache_on:
758		mov	r12, lr
759#ifdef CONFIG_MMU
760		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
761		tst	r11, #0xf		@ VMSA
762		movne	r6, #CB_BITS | 0x02	@ !XN
763		blne	__setup_mmu
764		mov	r0, #0
765		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
766		tst	r11, #0xf		@ VMSA
767		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
768#endif
769		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
770		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
771		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
772		orr	r0, r0, #0x003c		@ write buffer
773		bic	r0, r0, #2		@ A (no unaligned access fault)
774		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
775						@ (needed for ARM1176)
776#ifdef CONFIG_MMU
777 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
778		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
779		orrne	r0, r0, #1		@ MMU enabled
780		movne	r1, #0xfffffffd		@ domain 0 = client
781		bic     r6, r6, #1 << 31        @ 32-bit translation system
782		bic     r6, r6, #3 << 0         @ use only ttbr0
783		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
784		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
785		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
786#endif
787		mcr	p15, 0, r0, c7, c5, 4	@ ISB
788		mcr	p15, 0, r0, c1, c0, 0	@ load control register
789		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
790		mov	r0, #0
791		mcr	p15, 0, r0, c7, c5, 4	@ ISB
792		mov	pc, r12
793
794__fa526_cache_on:
795		mov	r12, lr
796		mov	r6, #CB_BITS | 0x12	@ U
797		bl	__setup_mmu
798		mov	r0, #0
799		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
800		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
801		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
802		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
803		orr	r0, r0, #0x1000		@ I-cache enable
804		bl	__common_mmu_cache_on
805		mov	r0, #0
806		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
807		mov	pc, r12
808
809__common_mmu_cache_on:
810#ifndef CONFIG_THUMB2_KERNEL
811#ifndef DEBUG
812		orr	r0, r0, #0x000d		@ Write buffer, mmu
813#endif
814		mov	r1, #-1
815		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
816		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
817		b	1f
818		.align	5			@ cache line aligned
8191:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
820		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
821		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
822#endif
823
824#define PROC_ENTRY_SIZE (4*5)
825
826/*
827 * Here follow the relocatable cache support functions for the
828 * various processors.  This is a generic hook for locating an
829 * entry and jumping to an instruction at the specified offset
830 * from the start of the block.  Please note this is all position
831 * independent code.
832 *
833 *  r1  = corrupted
834 *  r2  = corrupted
835 *  r3  = block offset
836 *  r9  = corrupted
837 *  r12 = corrupted
838 */
839
840call_cache_fn:	adr	r12, proc_types
841#ifdef CONFIG_CPU_CP15
842		mrc	p15, 0, r9, c0, c0	@ get processor ID
843#elif defined(CONFIG_CPU_V7M)
844		/*
845		 * On v7-M the processor id is located in the V7M_SCB_CPUID
846		 * register, but as cache handling is IMPLEMENTATION DEFINED on
847		 * v7-M (if existant at all) we just return early here.
848		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
849		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
850		 * use cp15 registers that are not implemented on v7-M.
851		 */
852		bx	lr
853#else
854		ldr	r9, =CONFIG_PROCESSOR_ID
855#endif
8561:		ldr	r1, [r12, #0]		@ get value
857		ldr	r2, [r12, #4]		@ get mask
858		eor	r1, r1, r9		@ (real ^ match)
859		tst	r1, r2			@       & mask
860 ARM(		addeq	pc, r12, r3		) @ call cache function
861 THUMB(		addeq	r12, r3			)
862 THUMB(		moveq	pc, r12			) @ call cache function
863		add	r12, r12, #PROC_ENTRY_SIZE
864		b	1b
865
866/*
867 * Table for cache operations.  This is basically:
868 *   - CPU ID match
869 *   - CPU ID mask
870 *   - 'cache on' method instruction
871 *   - 'cache off' method instruction
872 *   - 'cache flush' method instruction
873 *
874 * We match an entry using: ((real_id ^ match) & mask) == 0
875 *
876 * Writethrough caches generally only need 'on' and 'off'
877 * methods.  Writeback caches _must_ have the flush method
878 * defined.
879 */
880		.align	2
881		.type	proc_types,#object
882proc_types:
883		.word	0x41000000		@ old ARM ID
884		.word	0xff00f000
885		mov	pc, lr
886 THUMB(		nop				)
887		mov	pc, lr
888 THUMB(		nop				)
889		mov	pc, lr
890 THUMB(		nop				)
891
892		.word	0x41007000		@ ARM7/710
893		.word	0xfff8fe00
894		mov	pc, lr
895 THUMB(		nop				)
896		mov	pc, lr
897 THUMB(		nop				)
898		mov	pc, lr
899 THUMB(		nop				)
900
901		.word	0x41807200		@ ARM720T (writethrough)
902		.word	0xffffff00
903		W(b)	__armv4_mmu_cache_on
904		W(b)	__armv4_mmu_cache_off
905		mov	pc, lr
906 THUMB(		nop				)
907
908		.word	0x41007400		@ ARM74x
909		.word	0xff00ff00
910		W(b)	__armv3_mpu_cache_on
911		W(b)	__armv3_mpu_cache_off
912		W(b)	__armv3_mpu_cache_flush
913
914		.word	0x41009400		@ ARM94x
915		.word	0xff00ff00
916		W(b)	__armv4_mpu_cache_on
917		W(b)	__armv4_mpu_cache_off
918		W(b)	__armv4_mpu_cache_flush
919
920		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
921		.word	0xff0ffff0
922		W(b)	__arm926ejs_mmu_cache_on
923		W(b)	__armv4_mmu_cache_off
924		W(b)	__armv5tej_mmu_cache_flush
925
926		.word	0x00007000		@ ARM7 IDs
927		.word	0x0000f000
928		mov	pc, lr
929 THUMB(		nop				)
930		mov	pc, lr
931 THUMB(		nop				)
932		mov	pc, lr
933 THUMB(		nop				)
934
935		@ Everything from here on will be the new ID system.
936
937		.word	0x4401a100		@ sa110 / sa1100
938		.word	0xffffffe0
939		W(b)	__armv4_mmu_cache_on
940		W(b)	__armv4_mmu_cache_off
941		W(b)	__armv4_mmu_cache_flush
942
943		.word	0x6901b110		@ sa1110
944		.word	0xfffffff0
945		W(b)	__armv4_mmu_cache_on
946		W(b)	__armv4_mmu_cache_off
947		W(b)	__armv4_mmu_cache_flush
948
949		.word	0x56056900
950		.word	0xffffff00		@ PXA9xx
951		W(b)	__armv4_mmu_cache_on
952		W(b)	__armv4_mmu_cache_off
953		W(b)	__armv4_mmu_cache_flush
954
955		.word	0x56158000		@ PXA168
956		.word	0xfffff000
957		W(b)	__armv4_mmu_cache_on
958		W(b)	__armv4_mmu_cache_off
959		W(b)	__armv5tej_mmu_cache_flush
960
961		.word	0x56050000		@ Feroceon
962		.word	0xff0f0000
963		W(b)	__armv4_mmu_cache_on
964		W(b)	__armv4_mmu_cache_off
965		W(b)	__armv5tej_mmu_cache_flush
966
967#ifdef CONFIG_CPU_FEROCEON_OLD_ID
968		/* this conflicts with the standard ARMv5TE entry */
969		.long	0x41009260		@ Old Feroceon
970		.long	0xff00fff0
971		b	__armv4_mmu_cache_on
972		b	__armv4_mmu_cache_off
973		b	__armv5tej_mmu_cache_flush
974#endif
975
976		.word	0x66015261		@ FA526
977		.word	0xff01fff1
978		W(b)	__fa526_cache_on
979		W(b)	__armv4_mmu_cache_off
980		W(b)	__fa526_cache_flush
981
982		@ These match on the architecture ID
983
984		.word	0x00020000		@ ARMv4T
985		.word	0x000f0000
986		W(b)	__armv4_mmu_cache_on
987		W(b)	__armv4_mmu_cache_off
988		W(b)	__armv4_mmu_cache_flush
989
990		.word	0x00050000		@ ARMv5TE
991		.word	0x000f0000
992		W(b)	__armv4_mmu_cache_on
993		W(b)	__armv4_mmu_cache_off
994		W(b)	__armv4_mmu_cache_flush
995
996		.word	0x00060000		@ ARMv5TEJ
997		.word	0x000f0000
998		W(b)	__armv4_mmu_cache_on
999		W(b)	__armv4_mmu_cache_off
1000		W(b)	__armv5tej_mmu_cache_flush
1001
1002		.word	0x0007b000		@ ARMv6
1003		.word	0x000ff000
1004		W(b)	__armv6_mmu_cache_on
1005		W(b)	__armv4_mmu_cache_off
1006		W(b)	__armv6_mmu_cache_flush
1007
1008		.word	0x000f0000		@ new CPU Id
1009		.word	0x000f0000
1010		W(b)	__armv7_mmu_cache_on
1011		W(b)	__armv7_mmu_cache_off
1012		W(b)	__armv7_mmu_cache_flush
1013
1014		.word	0			@ unrecognised type
1015		.word	0
1016		mov	pc, lr
1017 THUMB(		nop				)
1018		mov	pc, lr
1019 THUMB(		nop				)
1020		mov	pc, lr
1021 THUMB(		nop				)
1022
1023		.size	proc_types, . - proc_types
1024
1025		/*
1026		 * If you get a "non-constant expression in ".if" statement"
1027		 * error from the assembler on this line, check that you have
1028		 * not accidentally written a "b" instruction where you should
1029		 * have written W(b).
1030		 */
1031		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1032		.error "The size of one or more proc_types entries is wrong."
1033		.endif
1034
1035/*
1036 * Turn off the Cache and MMU.  ARMv3 does not support
1037 * reading the control register, but ARMv4 does.
1038 *
1039 * On exit,
1040 *  r0, r1, r2, r3, r9, r12 corrupted
1041 * This routine must preserve:
1042 *  r4, r7, r8
1043 */
1044		.align	5
1045cache_off:	mov	r3, #12			@ cache_off function
1046		b	call_cache_fn
1047
1048__armv4_mpu_cache_off:
1049		mrc	p15, 0, r0, c1, c0
1050		bic	r0, r0, #0x000d
1051		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1052		mov	r0, #0
1053		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1054		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1055		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1056		mov	pc, lr
1057
1058__armv3_mpu_cache_off:
1059		mrc	p15, 0, r0, c1, c0
1060		bic	r0, r0, #0x000d
1061		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1062		mov	r0, #0
1063		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1064		mov	pc, lr
1065
1066__armv4_mmu_cache_off:
1067#ifdef CONFIG_MMU
1068		mrc	p15, 0, r0, c1, c0
1069		bic	r0, r0, #0x000d
1070		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1071		mov	r0, #0
1072		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1073		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1074#endif
1075		mov	pc, lr
1076
1077__armv7_mmu_cache_off:
1078		mrc	p15, 0, r0, c1, c0
1079#ifdef CONFIG_MMU
1080		bic	r0, r0, #0x000d
1081#else
1082		bic	r0, r0, #0x000c
1083#endif
1084		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1085		mov	r12, lr
1086		bl	__armv7_mmu_cache_flush
1087		mov	r0, #0
1088#ifdef CONFIG_MMU
1089		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1090#endif
1091		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1092		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1093		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1094		mov	pc, r12
1095
1096/*
1097 * Clean and flush the cache to maintain consistency.
1098 *
1099 * On exit,
1100 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1101 * This routine must preserve:
1102 *  r4, r6, r7, r8
1103 */
1104		.align	5
1105cache_clean_flush:
1106		mov	r3, #16
1107		b	call_cache_fn
1108
1109__armv4_mpu_cache_flush:
1110		tst	r4, #1
1111		movne	pc, lr
1112		mov	r2, #1
1113		mov	r3, #0
1114		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1115		mov	r1, #7 << 5		@ 8 segments
11161:		orr	r3, r1, #63 << 26	@ 64 entries
11172:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1118		subs	r3, r3, #1 << 26
1119		bcs	2b			@ entries 63 to 0
1120		subs 	r1, r1, #1 << 5
1121		bcs	1b			@ segments 7 to 0
1122
1123		teq	r2, #0
1124		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1125		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1126		mov	pc, lr
1127
1128__fa526_cache_flush:
1129		tst	r4, #1
1130		movne	pc, lr
1131		mov	r1, #0
1132		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1133		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1134		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1135		mov	pc, lr
1136
1137__armv6_mmu_cache_flush:
1138		mov	r1, #0
1139		tst	r4, #1
1140		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1141		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1142		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1143		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1144		mov	pc, lr
1145
1146__armv7_mmu_cache_flush:
1147		tst	r4, #1
1148		bne	iflush
1149		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1150		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1151		mov	r10, #0
1152		beq	hierarchical
1153		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1154		b	iflush
1155hierarchical:
1156		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1157		stmfd	sp!, {r0-r7, r9-r11}
1158		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
1159		ands	r3, r0, #0x7000000	@ extract loc from clidr
1160		mov	r3, r3, lsr #23		@ left align loc bit field
1161		beq	finished		@ if loc is 0, then no need to clean
1162		mov	r10, #0			@ start clean at cache level 0
1163loop1:
1164		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
1165		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
1166		and	r1, r1, #7		@ mask of the bits for current cache only
1167		cmp	r1, #2			@ see what cache we have at this level
1168		blt	skip			@ skip if no cache, or just i-cache
1169		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1170		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
1171		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
1172		and	r2, r1, #7		@ extract the length of the cache lines
1173		add	r2, r2, #4		@ add 4 (line length offset)
1174		ldr	r4, =0x3ff
1175		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1176		clz	r5, r4			@ find bit position of way size increment
1177		ldr	r7, =0x7fff
1178		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
1179loop2:
1180		mov	r9, r4			@ create working copy of max way size
1181loop3:
1182 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
1183 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
1184 THUMB(		lsl	r6, r9, r5		)
1185 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
1186 THUMB(		lsl	r6, r7, r2		)
1187 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1188		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
1189		subs	r9, r9, #1		@ decrement the way
1190		bge	loop3
1191		subs	r7, r7, #1		@ decrement the index
1192		bge	loop2
1193skip:
1194		add	r10, r10, #2		@ increment cache number
1195		cmp	r3, r10
1196		bgt	loop1
1197finished:
1198		ldmfd	sp!, {r0-r7, r9-r11}
1199		mov	r10, #0			@ swith back to cache level 0
1200		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1201iflush:
1202		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1203		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1204		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1205		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1206		mov	pc, lr
1207
1208__armv5tej_mmu_cache_flush:
1209		tst	r4, #1
1210		movne	pc, lr
12111:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
1212		bne	1b
1213		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1214		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1215		mov	pc, lr
1216
1217__armv4_mmu_cache_flush:
1218		tst	r4, #1
1219		movne	pc, lr
1220		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1221		mov	r11, #32		@ default: 32 byte line size
1222		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1223		teq	r3, r9			@ cache ID register present?
1224		beq	no_cache_id
1225		mov	r1, r3, lsr #18
1226		and	r1, r1, #7
1227		mov	r2, #1024
1228		mov	r2, r2, lsl r1		@ base dcache size *2
1229		tst	r3, #1 << 14		@ test M bit
1230		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1231		mov	r3, r3, lsr #12
1232		and	r3, r3, #3
1233		mov	r11, #8
1234		mov	r11, r11, lsl r3	@ cache line size in bytes
1235no_cache_id:
1236		mov	r1, pc
1237		bic	r1, r1, #63		@ align to longest cache line
1238		add	r2, r1, r2
12391:
1240 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1241 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1242 THUMB(		add     r1, r1, r11		)
1243		teq	r1, r2
1244		bne	1b
1245
1246		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1247		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1248		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1249		mov	pc, lr
1250
1251__armv3_mmu_cache_flush:
1252__armv3_mpu_cache_flush:
1253		tst	r4, #1
1254		movne	pc, lr
1255		mov	r1, #0
1256		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1257		mov	pc, lr
1258
1259/*
1260 * Various debugging routines for printing hex characters and
1261 * memory, which again must be relocatable.
1262 */
1263#ifdef DEBUG
1264		.align	2
1265		.type	phexbuf,#object
1266phexbuf:	.space	12
1267		.size	phexbuf, . - phexbuf
1268
1269@ phex corrupts {r0, r1, r2, r3}
1270phex:		adr	r3, phexbuf
1271		mov	r2, #0
1272		strb	r2, [r3, r1]
12731:		subs	r1, r1, #1
1274		movmi	r0, r3
1275		bmi	puts
1276		and	r2, r0, #15
1277		mov	r0, r0, lsr #4
1278		cmp	r2, #10
1279		addge	r2, r2, #7
1280		add	r2, r2, #'0'
1281		strb	r2, [r3, r1]
1282		b	1b
1283
1284@ puts corrupts {r0, r1, r2, r3}
1285puts:		loadsp	r3, r1
12861:		ldrb	r2, [r0], #1
1287		teq	r2, #0
1288		moveq	pc, lr
12892:		writeb	r2, r3
1290		mov	r1, #0x00020000
12913:		subs	r1, r1, #1
1292		bne	3b
1293		teq	r2, #'\n'
1294		moveq	r2, #'\r'
1295		beq	2b
1296		teq	r0, #0
1297		bne	1b
1298		mov	pc, lr
1299@ putc corrupts {r0, r1, r2, r3}
1300putc:
1301		mov	r2, r0
1302		mov	r0, #0
1303		loadsp	r3, r1
1304		b	2b
1305
1306@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1307memdump:	mov	r12, r0
1308		mov	r10, lr
1309		mov	r11, #0
13102:		mov	r0, r11, lsl #2
1311		add	r0, r0, r12
1312		mov	r1, #8
1313		bl	phex
1314		mov	r0, #':'
1315		bl	putc
13161:		mov	r0, #' '
1317		bl	putc
1318		ldr	r0, [r12, r11, lsl #2]
1319		mov	r1, #8
1320		bl	phex
1321		and	r0, r11, #7
1322		teq	r0, #3
1323		moveq	r0, #' '
1324		bleq	putc
1325		and	r0, r11, #7
1326		add	r11, r11, #1
1327		teq	r0, #7
1328		bne	1b
1329		mov	r0, #'\n'
1330		bl	putc
1331		cmp	r11, #64
1332		blt	2b
1333		mov	pc, r10
1334#endif
1335
1336		.ltorg
1337
1338#ifdef CONFIG_ARM_VIRT_EXT
1339.align 5
1340__hyp_reentry_vectors:
1341		W(b)	.			@ reset
1342		W(b)	.			@ undef
1343		W(b)	.			@ svc
1344		W(b)	.			@ pabort
1345		W(b)	.			@ dabort
1346		W(b)	__enter_kernel		@ hyp
1347		W(b)	.			@ irq
1348		W(b)	.			@ fiq
1349#endif /* CONFIG_ARM_VIRT_EXT */
1350
1351__enter_kernel:
1352		mov	r0, #0			@ must be 0
1353 ARM(		mov	pc, r4		)	@ call kernel
1354 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1355 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1356
1357reloc_code_end:
1358
1359#ifdef CONFIG_EFI_STUB
1360		.align	2
1361_start:		.long	start - .
1362
1363ENTRY(efi_stub_entry)
1364		@ allocate space on stack for passing current zImage address
1365		@ and for the EFI stub to return of new entry point of
1366		@ zImage, as EFI stub may copy the kernel. Pointer address
1367		@ is passed in r2. r0 and r1 are passed through from the
1368		@ EFI firmware to efi_entry
1369		adr	ip, _start
1370		ldr	r3, [ip]
1371		add	r3, r3, ip
1372		stmfd	sp!, {r3, lr}
1373		mov	r2, sp			@ pass zImage address in r2
1374		bl	efi_entry
1375
1376		@ Check for error return from EFI stub. r0 has FDT address
1377		@ or error code.
1378		cmn	r0, #1
1379		beq	efi_load_fail
1380
1381		@ Preserve return value of efi_entry() in r4
1382		mov	r4, r0
1383		bl	cache_clean_flush
1384		bl	cache_off
1385
1386		@ Set parameters for booting zImage according to boot protocol
1387		@ put FDT address in r2, it was returned by efi_entry()
1388		@ r1 is the machine type, and r0 needs to be 0
1389		mov	r0, #0
1390		mov	r1, #0xFFFFFFFF
1391		mov	r2, r4
1392
1393		@ Branch to (possibly) relocated zImage that is in [sp]
1394		ldr	lr, [sp]
1395		ldr	ip, =start_offset
1396		add	lr, lr, ip
1397		mov	pc, lr				@ no mode switch
1398
1399efi_load_fail:
1400		@ Return EFI_LOAD_ERROR to EFI firmware on error.
1401		ldr	r0, =0x80000001
1402		ldmfd	sp!, {ip, pc}
1403ENDPROC(efi_stub_entry)
1404#endif
1405
1406		.align
1407		.section ".stack", "aw", %nobits
1408.L_user_stack:	.space	4096
1409.L_user_stack_end:
1410