xref: /linux/arch/arm/boot/compressed/head.S (revision 88e24c3a4b30a6bd361f2b5ce602667a8161b2e8)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
25		.macro	loadsp, rb, tmp
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_XSCALE)
31		.macro	loadsp, rb, tmp
32		.endm
33		.macro	writeb, ch, rb
34		mcr	p14, 0, \ch, c8, c0, 0
35		.endm
36#else
37		.macro	loadsp, rb, tmp
38		.endm
39		.macro	writeb, ch, rb
40		mcr	p14, 0, \ch, c1, c0, 0
41		.endm
42#endif
43
44#else
45
46#include <mach/debug-macro.S>
47
48		.macro	writeb,	ch, rb
49		senduart \ch, \rb
50		.endm
51
52#if defined(CONFIG_ARCH_SA1100)
53		.macro	loadsp, rb, tmp
54		mov	\rb, #0x80000000	@ physical base address
55#ifdef CONFIG_DEBUG_LL_SER3
56		add	\rb, \rb, #0x00050000	@ Ser3
57#else
58		add	\rb, \rb, #0x00010000	@ Ser1
59#endif
60		.endm
61#elif defined(CONFIG_ARCH_S3C2410)
62		.macro loadsp, rb, tmp
63		mov	\rb, #0x50000000
64		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
65		.endm
66#else
67		.macro	loadsp,	rb, tmp
68		addruart \rb, \tmp
69		.endm
70#endif
71#endif
72#endif
73
74		.macro	kputc,val
75		mov	r0, \val
76		bl	putc
77		.endm
78
79		.macro	kphex,val,len
80		mov	r0, \val
81		mov	r1, #\len
82		bl	phex
83		.endm
84
85		.macro	debug_reloc_start
86#ifdef DEBUG
87		kputc	#'\n'
88		kphex	r6, 8		/* processor id */
89		kputc	#':'
90		kphex	r7, 8		/* architecture id */
91#ifdef CONFIG_CPU_CP15
92		kputc	#':'
93		mrc	p15, 0, r0, c1, c0
94		kphex	r0, 8		/* control reg */
95#endif
96		kputc	#'\n'
97		kphex	r5, 8		/* decompressed kernel start */
98		kputc	#'-'
99		kphex	r9, 8		/* decompressed kernel end  */
100		kputc	#'>'
101		kphex	r4, 8		/* kernel execution address */
102		kputc	#'\n'
103#endif
104		.endm
105
106		.macro	debug_reloc_end
107#ifdef DEBUG
108		kphex	r5, 8		/* end of kernel */
109		kputc	#'\n'
110		mov	r0, r4
111		bl	memdump		/* dump 256 bytes at start of kernel */
112#endif
113		.endm
114
115		.section ".start", #alloc, #execinstr
116/*
117 * sort out different calling conventions
118 */
119		.align
120		.arm				@ Always enter in ARM state
121start:
122		.type	start,#function
123		.rept	7
124		mov	r0, r0
125		.endr
126   ARM(		mov	r0, r0		)
127   ARM(		b	1f		)
128 THUMB(		adr	r12, BSYM(1f)	)
129 THUMB(		bx	r12		)
130
131		.word	0x016f2818		@ Magic numbers to help the loader
132		.word	start			@ absolute load/run zImage address
133		.word	_edata			@ zImage end address
134 THUMB(		.thumb			)
1351:		mov	r7, r1			@ save architecture ID
136		mov	r8, r2			@ save atags pointer
137
138#ifndef __ARM_ARCH_2__
139		/*
140		 * Booting from Angel - need to enter SVC mode and disable
141		 * FIQs/IRQs (numeric definitions from angel arm.h source).
142		 * We only do this if we were in user mode on entry.
143		 */
144		mrs	r2, cpsr		@ get current mode
145		tst	r2, #3			@ not user?
146		bne	not_angel
147		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
148 ARM(		swi	0x123456	)	@ angel_SWI_ARM
149 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
150not_angel:
151		mrs	r2, cpsr		@ turn off interrupts to
152		orr	r2, r2, #0xc0		@ prevent angel from running
153		msr	cpsr_c, r2
154#else
155		teqp	pc, #0x0c000003		@ turn off interrupts
156#endif
157
158		/*
159		 * Note that some cache flushing and other stuff may
160		 * be needed here - is there an Angel SWI call for this?
161		 */
162
163		/*
164		 * some architecture specific code can be inserted
165		 * by the linker here, but it should preserve r7, r8, and r9.
166		 */
167
168		.text
169
170#ifdef CONFIG_AUTO_ZRELADDR
171		@ determine final kernel image address
172		mov	r4, pc
173		and	r4, r4, #0xf8000000
174		add	r4, r4, #TEXT_OFFSET
175#else
176		ldr	r4, =zreladdr
177#endif
178
179		bl	cache_on
180
181restart:	adr	r0, LC0
182		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
183		ldr	sp, [r0, #28]
184
185		/*
186		 * We might be running at a different address.  We need
187		 * to fix up various pointers.
188		 */
189		sub	r0, r0, r1		@ calculate the delta offset
190		add	r6, r6, r0		@ _edata
191		add	r10, r10, r0		@ inflated kernel size location
192
193		/*
194		 * The kernel build system appends the size of the
195		 * decompressed kernel at the end of the compressed data
196		 * in little-endian form.
197		 */
198		ldrb	r9, [r10, #0]
199		ldrb	lr, [r10, #1]
200		orr	r9, r9, lr, lsl #8
201		ldrb	lr, [r10, #2]
202		ldrb	r10, [r10, #3]
203		orr	r9, r9, lr, lsl #16
204		orr	r9, r9, r10, lsl #24
205
206#ifndef CONFIG_ZBOOT_ROM
207		/* malloc space is above the relocated stack (64k max) */
208		add	sp, sp, r0
209		add	r10, sp, #0x10000
210#else
211		/*
212		 * With ZBOOT_ROM the bss/stack is non relocatable,
213		 * but someone could still run this code from RAM,
214		 * in which case our reference is _edata.
215		 */
216		mov	r10, r6
217#endif
218
219/*
220 * Check to see if we will overwrite ourselves.
221 *   r4  = final kernel address
222 *   r9  = size of decompressed image
223 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
224 * We basically want:
225 *   r4 - 16k page directory >= r10 -> OK
226 *   r4 + image length <= current position (pc) -> OK
227 */
228		add	r10, r10, #16384
229		cmp	r4, r10
230		bhs	wont_overwrite
231		add	r10, r4, r9
232   ARM(		cmp	r10, pc		)
233 THUMB(		mov	lr, pc		)
234 THUMB(		cmp	r10, lr		)
235		bls	wont_overwrite
236
237/*
238 * Relocate ourselves past the end of the decompressed kernel.
239 *   r6  = _edata
240 *   r10 = end of the decompressed kernel
241 * Because we always copy ahead, we need to do it from the end and go
242 * backward in case the source and destination overlap.
243 */
244		/*
245		 * Bump to the next 256-byte boundary with the size of
246		 * the relocation code added. This avoids overwriting
247		 * ourself when the offset is small.
248		 */
249		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
250		bic	r10, r10, #255
251
252		/* Get start of code we want to copy and align it down. */
253		adr	r5, restart
254		bic	r5, r5, #31
255
256		sub	r9, r6, r5		@ size to copy
257		add	r9, r9, #31		@ rounded up to a multiple
258		bic	r9, r9, #31		@ ... of 32 bytes
259		add	r6, r9, r5
260		add	r9, r9, r10
261
2621:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
263		cmp	r6, r5
264		stmdb	r9!, {r0 - r3, r10 - r12, lr}
265		bhi	1b
266
267		/* Preserve offset to relocated code. */
268		sub	r6, r9, r6
269
270#ifndef CONFIG_ZBOOT_ROM
271		/* cache_clean_flush may use the stack, so relocate it */
272		add	sp, sp, r6
273#endif
274
275		bl	cache_clean_flush
276
277		adr	r0, BSYM(restart)
278		add	r0, r0, r6
279		mov	pc, r0
280
281wont_overwrite:
282/*
283 * If delta is zero, we are running at the address we were linked at.
284 *   r0  = delta
285 *   r2  = BSS start
286 *   r3  = BSS end
287 *   r4  = kernel execution address
288 *   r7  = architecture ID
289 *   r8  = atags pointer
290 *   r11 = GOT start
291 *   r12 = GOT end
292 *   sp  = stack pointer
293 */
294		teq	r0, #0
295		beq	not_relocated
296		add	r11, r11, r0
297		add	r12, r12, r0
298
299#ifndef CONFIG_ZBOOT_ROM
300		/*
301		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
302		 * we need to fix up pointers into the BSS region.
303		 * Note that the stack pointer has already been fixed up.
304		 */
305		add	r2, r2, r0
306		add	r3, r3, r0
307
308		/*
309		 * Relocate all entries in the GOT table.
310		 */
3111:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
312		add	r1, r1, r0		@ table.  This fixes up the
313		str	r1, [r11], #4		@ C references.
314		cmp	r11, r12
315		blo	1b
316#else
317
318		/*
319		 * Relocate entries in the GOT table.  We only relocate
320		 * the entries that are outside the (relocated) BSS region.
321		 */
3221:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
323		cmp	r1, r2			@ entry < bss_start ||
324		cmphs	r3, r1			@ _end < entry
325		addlo	r1, r1, r0		@ table.  This fixes up the
326		str	r1, [r11], #4		@ C references.
327		cmp	r11, r12
328		blo	1b
329#endif
330
331not_relocated:	mov	r0, #0
3321:		str	r0, [r2], #4		@ clear bss
333		str	r0, [r2], #4
334		str	r0, [r2], #4
335		str	r0, [r2], #4
336		cmp	r2, r3
337		blo	1b
338
339/*
340 * The C runtime environment should now be setup sufficiently.
341 * Set up some pointers, and start decompressing.
342 *   r4  = kernel execution address
343 *   r7  = architecture ID
344 *   r8  = atags pointer
345 */
346		mov	r0, r4
347		mov	r1, sp			@ malloc space above stack
348		add	r2, sp, #0x10000	@ 64k max
349		mov	r3, r7
350		bl	decompress_kernel
351		bl	cache_clean_flush
352		bl	cache_off
353		mov	r0, #0			@ must be zero
354		mov	r1, r7			@ restore architecture number
355		mov	r2, r8			@ restore atags pointer
356 ARM(		mov	pc, r4	)		@ call kernel
357 THUMB(		bx	r4	)		@ entry point is always ARM
358
359		.align	2
360		.type	LC0, #object
361LC0:		.word	LC0			@ r1
362		.word	__bss_start		@ r2
363		.word	_end			@ r3
364		.word	_edata			@ r6
365		.word	input_data_end - 4	@ r10 (inflated size location)
366		.word	_got_start		@ r11
367		.word	_got_end		@ ip
368		.word	.L_user_stack_end	@ sp
369		.size	LC0, . - LC0
370
371#ifdef CONFIG_ARCH_RPC
372		.globl	params
373params:		ldr	r0, =0x10000100		@ params_phys for RPC
374		mov	pc, lr
375		.ltorg
376		.align
377#endif
378
379/*
380 * Turn on the cache.  We need to setup some page tables so that we
381 * can have both the I and D caches on.
382 *
383 * We place the page tables 16k down from the kernel execution address,
384 * and we hope that nothing else is using it.  If we're using it, we
385 * will go pop!
386 *
387 * On entry,
388 *  r4 = kernel execution address
389 *  r7 = architecture number
390 *  r8 = atags pointer
391 * On exit,
392 *  r0, r1, r2, r3, r9, r10, r12 corrupted
393 * This routine must preserve:
394 *  r4, r7, r8
395 */
396		.align	5
397cache_on:	mov	r3, #8			@ cache_on function
398		b	call_cache_fn
399
400/*
401 * Initialize the highest priority protection region, PR7
402 * to cover all 32bit address and cacheable and bufferable.
403 */
404__armv4_mpu_cache_on:
405		mov	r0, #0x3f		@ 4G, the whole
406		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
407		mcr 	p15, 0, r0, c6, c7, 1
408
409		mov	r0, #0x80		@ PR7
410		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
411		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
412		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
413
414		mov	r0, #0xc000
415		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
416		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
417
418		mov	r0, #0
419		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
420		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
421		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
422		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
423						@ ...I .... ..D. WC.M
424		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
425		orr	r0, r0, #0x1000		@ ...1 .... .... ....
426
427		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
428
429		mov	r0, #0
430		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
431		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
432		mov	pc, lr
433
434__armv3_mpu_cache_on:
435		mov	r0, #0x3f		@ 4G, the whole
436		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
437
438		mov	r0, #0x80		@ PR7
439		mcr	p15, 0, r0, c2, c0, 0	@ cache on
440		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
441
442		mov	r0, #0xc000
443		mcr	p15, 0, r0, c5, c0, 0	@ access permission
444
445		mov	r0, #0
446		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
447		/*
448		 * ?? ARMv3 MMU does not allow reading the control register,
449		 * does this really work on ARMv3 MPU?
450		 */
451		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
452						@ .... .... .... WC.M
453		orr	r0, r0, #0x000d		@ .... .... .... 11.1
454		/* ?? this overwrites the value constructed above? */
455		mov	r0, #0
456		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
457
458		/* ?? invalidate for the second time? */
459		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
460		mov	pc, lr
461
462__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
463		bic	r3, r3, #0xff		@ Align the pointer
464		bic	r3, r3, #0x3f00
465/*
466 * Initialise the page tables, turning on the cacheable and bufferable
467 * bits for the RAM area only.
468 */
469		mov	r0, r3
470		mov	r9, r0, lsr #18
471		mov	r9, r9, lsl #18		@ start of RAM
472		add	r10, r9, #0x10000000	@ a reasonable RAM size
473		mov	r1, #0x12
474		orr	r1, r1, #3 << 10
475		add	r2, r3, #16384
4761:		cmp	r1, r9			@ if virt > start of RAM
477#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
478		orrhs	r1, r1, #0x08		@ set cacheable
479#else
480		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
481#endif
482		cmp	r1, r10			@ if virt > end of RAM
483		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
484		str	r1, [r0], #4		@ 1:1 mapping
485		add	r1, r1, #1048576
486		teq	r0, r2
487		bne	1b
488/*
489 * If ever we are running from Flash, then we surely want the cache
490 * to be enabled also for our execution instance...  We map 2MB of it
491 * so there is no map overlap problem for up to 1 MB compressed kernel.
492 * If the execution is in RAM then we would only be duplicating the above.
493 */
494		mov	r1, #0x1e
495		orr	r1, r1, #3 << 10
496		mov	r2, pc
497		mov	r2, r2, lsr #20
498		orr	r1, r1, r2, lsl #20
499		add	r0, r3, r2, lsl #2
500		str	r1, [r0], #4
501		add	r1, r1, #1048576
502		str	r1, [r0]
503		mov	pc, lr
504ENDPROC(__setup_mmu)
505
506__arm926ejs_mmu_cache_on:
507#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
508		mov	r0, #4			@ put dcache in WT mode
509		mcr	p15, 7, r0, c15, c0, 0
510#endif
511
512__armv4_mmu_cache_on:
513		mov	r12, lr
514#ifdef CONFIG_MMU
515		bl	__setup_mmu
516		mov	r0, #0
517		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
518		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
519		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
520		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
521		orr	r0, r0, #0x0030
522#ifdef CONFIG_CPU_ENDIAN_BE8
523		orr	r0, r0, #1 << 25	@ big-endian page tables
524#endif
525		bl	__common_mmu_cache_on
526		mov	r0, #0
527		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
528#endif
529		mov	pc, r12
530
531__armv7_mmu_cache_on:
532		mov	r12, lr
533#ifdef CONFIG_MMU
534		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
535		tst	r11, #0xf		@ VMSA
536		blne	__setup_mmu
537		mov	r0, #0
538		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
539		tst	r11, #0xf		@ VMSA
540		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
541#endif
542		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
543		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
544		orr	r0, r0, #0x003c		@ write buffer
545#ifdef CONFIG_MMU
546#ifdef CONFIG_CPU_ENDIAN_BE8
547		orr	r0, r0, #1 << 25	@ big-endian page tables
548#endif
549		orrne	r0, r0, #1		@ MMU enabled
550		movne	r1, #-1
551		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
552		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
553#endif
554		mcr	p15, 0, r0, c1, c0, 0	@ load control register
555		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
556		mov	r0, #0
557		mcr	p15, 0, r0, c7, c5, 4	@ ISB
558		mov	pc, r12
559
560__fa526_cache_on:
561		mov	r12, lr
562		bl	__setup_mmu
563		mov	r0, #0
564		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
565		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
566		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
567		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
568		orr	r0, r0, #0x1000		@ I-cache enable
569		bl	__common_mmu_cache_on
570		mov	r0, #0
571		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
572		mov	pc, r12
573
574__arm6_mmu_cache_on:
575		mov	r12, lr
576		bl	__setup_mmu
577		mov	r0, #0
578		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
579		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
580		mov	r0, #0x30
581		bl	__common_mmu_cache_on
582		mov	r0, #0
583		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
584		mov	pc, r12
585
586__common_mmu_cache_on:
587#ifndef CONFIG_THUMB2_KERNEL
588#ifndef DEBUG
589		orr	r0, r0, #0x000d		@ Write buffer, mmu
590#endif
591		mov	r1, #-1
592		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
593		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
594		b	1f
595		.align	5			@ cache line aligned
5961:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
597		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
598		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
599#endif
600
601#define PROC_ENTRY_SIZE (4*5)
602
603/*
604 * Here follow the relocatable cache support functions for the
605 * various processors.  This is a generic hook for locating an
606 * entry and jumping to an instruction at the specified offset
607 * from the start of the block.  Please note this is all position
608 * independent code.
609 *
610 *  r1  = corrupted
611 *  r2  = corrupted
612 *  r3  = block offset
613 *  r9  = corrupted
614 *  r12 = corrupted
615 */
616
617call_cache_fn:	adr	r12, proc_types
618#ifdef CONFIG_CPU_CP15
619		mrc	p15, 0, r9, c0, c0	@ get processor ID
620#else
621		ldr	r9, =CONFIG_PROCESSOR_ID
622#endif
6231:		ldr	r1, [r12, #0]		@ get value
624		ldr	r2, [r12, #4]		@ get mask
625		eor	r1, r1, r9		@ (real ^ match)
626		tst	r1, r2			@       & mask
627 ARM(		addeq	pc, r12, r3		) @ call cache function
628 THUMB(		addeq	r12, r3			)
629 THUMB(		moveq	pc, r12			) @ call cache function
630		add	r12, r12, #PROC_ENTRY_SIZE
631		b	1b
632
633/*
634 * Table for cache operations.  This is basically:
635 *   - CPU ID match
636 *   - CPU ID mask
637 *   - 'cache on' method instruction
638 *   - 'cache off' method instruction
639 *   - 'cache flush' method instruction
640 *
641 * We match an entry using: ((real_id ^ match) & mask) == 0
642 *
643 * Writethrough caches generally only need 'on' and 'off'
644 * methods.  Writeback caches _must_ have the flush method
645 * defined.
646 */
647		.align	2
648		.type	proc_types,#object
649proc_types:
650		.word	0x41560600		@ ARM6/610
651		.word	0xffffffe0
652		W(b)	__arm6_mmu_cache_off	@ works, but slow
653		W(b)	__arm6_mmu_cache_off
654		mov	pc, lr
655 THUMB(		nop				)
656@		b	__arm6_mmu_cache_on		@ untested
657@		b	__arm6_mmu_cache_off
658@		b	__armv3_mmu_cache_flush
659
660		.word	0x00000000		@ old ARM ID
661		.word	0x0000f000
662		mov	pc, lr
663 THUMB(		nop				)
664		mov	pc, lr
665 THUMB(		nop				)
666		mov	pc, lr
667 THUMB(		nop				)
668
669		.word	0x41007000		@ ARM7/710
670		.word	0xfff8fe00
671		W(b)	__arm7_mmu_cache_off
672		W(b)	__arm7_mmu_cache_off
673		mov	pc, lr
674 THUMB(		nop				)
675
676		.word	0x41807200		@ ARM720T (writethrough)
677		.word	0xffffff00
678		W(b)	__armv4_mmu_cache_on
679		W(b)	__armv4_mmu_cache_off
680		mov	pc, lr
681 THUMB(		nop				)
682
683		.word	0x41007400		@ ARM74x
684		.word	0xff00ff00
685		W(b)	__armv3_mpu_cache_on
686		W(b)	__armv3_mpu_cache_off
687		W(b)	__armv3_mpu_cache_flush
688
689		.word	0x41009400		@ ARM94x
690		.word	0xff00ff00
691		W(b)	__armv4_mpu_cache_on
692		W(b)	__armv4_mpu_cache_off
693		W(b)	__armv4_mpu_cache_flush
694
695		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
696		.word	0xff0ffff0
697		W(b)	__arm926ejs_mmu_cache_on
698		W(b)	__armv4_mmu_cache_off
699		W(b)	__armv5tej_mmu_cache_flush
700
701		.word	0x00007000		@ ARM7 IDs
702		.word	0x0000f000
703		mov	pc, lr
704 THUMB(		nop				)
705		mov	pc, lr
706 THUMB(		nop				)
707		mov	pc, lr
708 THUMB(		nop				)
709
710		@ Everything from here on will be the new ID system.
711
712		.word	0x4401a100		@ sa110 / sa1100
713		.word	0xffffffe0
714		W(b)	__armv4_mmu_cache_on
715		W(b)	__armv4_mmu_cache_off
716		W(b)	__armv4_mmu_cache_flush
717
718		.word	0x6901b110		@ sa1110
719		.word	0xfffffff0
720		W(b)	__armv4_mmu_cache_on
721		W(b)	__armv4_mmu_cache_off
722		W(b)	__armv4_mmu_cache_flush
723
724		.word	0x56056900
725		.word	0xffffff00		@ PXA9xx
726		W(b)	__armv4_mmu_cache_on
727		W(b)	__armv4_mmu_cache_off
728		W(b)	__armv4_mmu_cache_flush
729
730		.word	0x56158000		@ PXA168
731		.word	0xfffff000
732		W(b)	__armv4_mmu_cache_on
733		W(b)	__armv4_mmu_cache_off
734		W(b)	__armv5tej_mmu_cache_flush
735
736		.word	0x56050000		@ Feroceon
737		.word	0xff0f0000
738		W(b)	__armv4_mmu_cache_on
739		W(b)	__armv4_mmu_cache_off
740		W(b)	__armv5tej_mmu_cache_flush
741
742#ifdef CONFIG_CPU_FEROCEON_OLD_ID
743		/* this conflicts with the standard ARMv5TE entry */
744		.long	0x41009260		@ Old Feroceon
745		.long	0xff00fff0
746		b	__armv4_mmu_cache_on
747		b	__armv4_mmu_cache_off
748		b	__armv5tej_mmu_cache_flush
749#endif
750
751		.word	0x66015261		@ FA526
752		.word	0xff01fff1
753		W(b)	__fa526_cache_on
754		W(b)	__armv4_mmu_cache_off
755		W(b)	__fa526_cache_flush
756
757		@ These match on the architecture ID
758
759		.word	0x00020000		@ ARMv4T
760		.word	0x000f0000
761		W(b)	__armv4_mmu_cache_on
762		W(b)	__armv4_mmu_cache_off
763		W(b)	__armv4_mmu_cache_flush
764
765		.word	0x00050000		@ ARMv5TE
766		.word	0x000f0000
767		W(b)	__armv4_mmu_cache_on
768		W(b)	__armv4_mmu_cache_off
769		W(b)	__armv4_mmu_cache_flush
770
771		.word	0x00060000		@ ARMv5TEJ
772		.word	0x000f0000
773		W(b)	__armv4_mmu_cache_on
774		W(b)	__armv4_mmu_cache_off
775		W(b)	__armv5tej_mmu_cache_flush
776
777		.word	0x0007b000		@ ARMv6
778		.word	0x000ff000
779		W(b)	__armv4_mmu_cache_on
780		W(b)	__armv4_mmu_cache_off
781		W(b)	__armv6_mmu_cache_flush
782
783		.word	0x000f0000		@ new CPU Id
784		.word	0x000f0000
785		W(b)	__armv7_mmu_cache_on
786		W(b)	__armv7_mmu_cache_off
787		W(b)	__armv7_mmu_cache_flush
788
789		.word	0			@ unrecognised type
790		.word	0
791		mov	pc, lr
792 THUMB(		nop				)
793		mov	pc, lr
794 THUMB(		nop				)
795		mov	pc, lr
796 THUMB(		nop				)
797
798		.size	proc_types, . - proc_types
799
800		/*
801		 * If you get a "non-constant expression in ".if" statement"
802		 * error from the assembler on this line, check that you have
803		 * not accidentally written a "b" instruction where you should
804		 * have written W(b).
805		 */
806		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
807		.error "The size of one or more proc_types entries is wrong."
808		.endif
809
810/*
811 * Turn off the Cache and MMU.  ARMv3 does not support
812 * reading the control register, but ARMv4 does.
813 *
814 * On exit,
815 *  r0, r1, r2, r3, r9, r12 corrupted
816 * This routine must preserve:
817 *  r4, r7, r8
818 */
819		.align	5
820cache_off:	mov	r3, #12			@ cache_off function
821		b	call_cache_fn
822
823__armv4_mpu_cache_off:
824		mrc	p15, 0, r0, c1, c0
825		bic	r0, r0, #0x000d
826		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
827		mov	r0, #0
828		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
829		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
830		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
831		mov	pc, lr
832
833__armv3_mpu_cache_off:
834		mrc	p15, 0, r0, c1, c0
835		bic	r0, r0, #0x000d
836		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
837		mov	r0, #0
838		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
839		mov	pc, lr
840
841__armv4_mmu_cache_off:
842#ifdef CONFIG_MMU
843		mrc	p15, 0, r0, c1, c0
844		bic	r0, r0, #0x000d
845		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
846		mov	r0, #0
847		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
848		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
849#endif
850		mov	pc, lr
851
852__armv7_mmu_cache_off:
853		mrc	p15, 0, r0, c1, c0
854#ifdef CONFIG_MMU
855		bic	r0, r0, #0x000d
856#else
857		bic	r0, r0, #0x000c
858#endif
859		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
860		mov	r12, lr
861		bl	__armv7_mmu_cache_flush
862		mov	r0, #0
863#ifdef CONFIG_MMU
864		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
865#endif
866		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
867		mcr	p15, 0, r0, c7, c10, 4	@ DSB
868		mcr	p15, 0, r0, c7, c5, 4	@ ISB
869		mov	pc, r12
870
871__arm6_mmu_cache_off:
872		mov	r0, #0x00000030		@ ARM6 control reg.
873		b	__armv3_mmu_cache_off
874
875__arm7_mmu_cache_off:
876		mov	r0, #0x00000070		@ ARM7 control reg.
877		b	__armv3_mmu_cache_off
878
879__armv3_mmu_cache_off:
880		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
881		mov	r0, #0
882		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
883		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
884		mov	pc, lr
885
886/*
887 * Clean and flush the cache to maintain consistency.
888 *
889 * On exit,
890 *  r1, r2, r3, r9, r10, r11, r12 corrupted
891 * This routine must preserve:
892 *  r4, r6, r7, r8
893 */
894		.align	5
895cache_clean_flush:
896		mov	r3, #16
897		b	call_cache_fn
898
899__armv4_mpu_cache_flush:
900		mov	r2, #1
901		mov	r3, #0
902		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
903		mov	r1, #7 << 5		@ 8 segments
9041:		orr	r3, r1, #63 << 26	@ 64 entries
9052:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
906		subs	r3, r3, #1 << 26
907		bcs	2b			@ entries 63 to 0
908		subs 	r1, r1, #1 << 5
909		bcs	1b			@ segments 7 to 0
910
911		teq	r2, #0
912		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
913		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
914		mov	pc, lr
915
916__fa526_cache_flush:
917		mov	r1, #0
918		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
919		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
920		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
921		mov	pc, lr
922
923__armv6_mmu_cache_flush:
924		mov	r1, #0
925		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
926		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
927		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
928		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
929		mov	pc, lr
930
931__armv7_mmu_cache_flush:
932		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
933		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
934		mov	r10, #0
935		beq	hierarchical
936		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
937		b	iflush
938hierarchical:
939		mcr	p15, 0, r10, c7, c10, 5	@ DMB
940		stmfd	sp!, {r0-r7, r9-r11}
941		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
942		ands	r3, r0, #0x7000000	@ extract loc from clidr
943		mov	r3, r3, lsr #23		@ left align loc bit field
944		beq	finished		@ if loc is 0, then no need to clean
945		mov	r10, #0			@ start clean at cache level 0
946loop1:
947		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
948		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
949		and	r1, r1, #7		@ mask of the bits for current cache only
950		cmp	r1, #2			@ see what cache we have at this level
951		blt	skip			@ skip if no cache, or just i-cache
952		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
953		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
954		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
955		and	r2, r1, #7		@ extract the length of the cache lines
956		add	r2, r2, #4		@ add 4 (line length offset)
957		ldr	r4, =0x3ff
958		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
959		clz	r5, r4			@ find bit position of way size increment
960		ldr	r7, =0x7fff
961		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
962loop2:
963		mov	r9, r4			@ create working copy of max way size
964loop3:
965 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
966 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
967 THUMB(		lsl	r6, r9, r5		)
968 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
969 THUMB(		lsl	r6, r7, r2		)
970 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
971		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
972		subs	r9, r9, #1		@ decrement the way
973		bge	loop3
974		subs	r7, r7, #1		@ decrement the index
975		bge	loop2
976skip:
977		add	r10, r10, #2		@ increment cache number
978		cmp	r3, r10
979		bgt	loop1
980finished:
981		ldmfd	sp!, {r0-r7, r9-r11}
982		mov	r10, #0			@ swith back to cache level 0
983		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
984iflush:
985		mcr	p15, 0, r10, c7, c10, 4	@ DSB
986		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
987		mcr	p15, 0, r10, c7, c10, 4	@ DSB
988		mcr	p15, 0, r10, c7, c5, 4	@ ISB
989		mov	pc, lr
990
991__armv5tej_mmu_cache_flush:
9921:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
993		bne	1b
994		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
995		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
996		mov	pc, lr
997
998__armv4_mmu_cache_flush:
999		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1000		mov	r11, #32		@ default: 32 byte line size
1001		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1002		teq	r3, r9			@ cache ID register present?
1003		beq	no_cache_id
1004		mov	r1, r3, lsr #18
1005		and	r1, r1, #7
1006		mov	r2, #1024
1007		mov	r2, r2, lsl r1		@ base dcache size *2
1008		tst	r3, #1 << 14		@ test M bit
1009		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1010		mov	r3, r3, lsr #12
1011		and	r3, r3, #3
1012		mov	r11, #8
1013		mov	r11, r11, lsl r3	@ cache line size in bytes
1014no_cache_id:
1015		mov	r1, pc
1016		bic	r1, r1, #63		@ align to longest cache line
1017		add	r2, r1, r2
10181:
1019 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1020 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1021 THUMB(		add     r1, r1, r11		)
1022		teq	r1, r2
1023		bne	1b
1024
1025		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1026		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1027		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1028		mov	pc, lr
1029
1030__armv3_mmu_cache_flush:
1031__armv3_mpu_cache_flush:
1032		mov	r1, #0
1033		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1034		mov	pc, lr
1035
1036/*
1037 * Various debugging routines for printing hex characters and
1038 * memory, which again must be relocatable.
1039 */
1040#ifdef DEBUG
1041		.align	2
1042		.type	phexbuf,#object
1043phexbuf:	.space	12
1044		.size	phexbuf, . - phexbuf
1045
1046@ phex corrupts {r0, r1, r2, r3}
1047phex:		adr	r3, phexbuf
1048		mov	r2, #0
1049		strb	r2, [r3, r1]
10501:		subs	r1, r1, #1
1051		movmi	r0, r3
1052		bmi	puts
1053		and	r2, r0, #15
1054		mov	r0, r0, lsr #4
1055		cmp	r2, #10
1056		addge	r2, r2, #7
1057		add	r2, r2, #'0'
1058		strb	r2, [r3, r1]
1059		b	1b
1060
1061@ puts corrupts {r0, r1, r2, r3}
1062puts:		loadsp	r3, r1
10631:		ldrb	r2, [r0], #1
1064		teq	r2, #0
1065		moveq	pc, lr
10662:		writeb	r2, r3
1067		mov	r1, #0x00020000
10683:		subs	r1, r1, #1
1069		bne	3b
1070		teq	r2, #'\n'
1071		moveq	r2, #'\r'
1072		beq	2b
1073		teq	r0, #0
1074		bne	1b
1075		mov	pc, lr
1076@ putc corrupts {r0, r1, r2, r3}
1077putc:
1078		mov	r2, r0
1079		mov	r0, #0
1080		loadsp	r3, r1
1081		b	2b
1082
1083@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1084memdump:	mov	r12, r0
1085		mov	r10, lr
1086		mov	r11, #0
10872:		mov	r0, r11, lsl #2
1088		add	r0, r0, r12
1089		mov	r1, #8
1090		bl	phex
1091		mov	r0, #':'
1092		bl	putc
10931:		mov	r0, #' '
1094		bl	putc
1095		ldr	r0, [r12, r11, lsl #2]
1096		mov	r1, #8
1097		bl	phex
1098		and	r0, r11, #7
1099		teq	r0, #3
1100		moveq	r0, #' '
1101		bleq	putc
1102		and	r0, r11, #7
1103		add	r11, r11, #1
1104		teq	r0, #7
1105		bne	1b
1106		mov	r0, #'\n'
1107		bl	putc
1108		cmp	r11, #64
1109		blt	2b
1110		mov	pc, r10
1111#endif
1112
1113		.ltorg
1114reloc_code_end:
1115
1116		.align
1117		.section ".stack", "aw", %nobits
1118.L_user_stack:	.space	4096
1119.L_user_stack_end:
1120