xref: /linux/arch/arm/boot/compressed/head.S (revision 9e84ed63dc71e13b62cea5ec6b0049260cca0b7a)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb, tmp
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_V7)
31		.macro	loadsp, rb, tmp
32		.endm
33		.macro	writeb, ch, rb
34wait:		mrc	p14, 0, pc, c0, c1, 0
35		bcs	wait
36		mcr	p14, 0, \ch, c0, c5, 0
37		.endm
38#elif defined(CONFIG_CPU_XSCALE)
39		.macro	loadsp, rb, tmp
40		.endm
41		.macro	writeb, ch, rb
42		mcr	p14, 0, \ch, c8, c0, 0
43		.endm
44#else
45		.macro	loadsp, rb, tmp
46		.endm
47		.macro	writeb, ch, rb
48		mcr	p14, 0, \ch, c1, c0, 0
49		.endm
50#endif
51
52#else
53
54#include <mach/debug-macro.S>
55
56		.macro	writeb,	ch, rb
57		senduart \ch, \rb
58		.endm
59
60#if defined(CONFIG_ARCH_SA1100)
61		.macro	loadsp, rb, tmp
62		mov	\rb, #0x80000000	@ physical base address
63#ifdef CONFIG_DEBUG_LL_SER3
64		add	\rb, \rb, #0x00050000	@ Ser3
65#else
66		add	\rb, \rb, #0x00010000	@ Ser1
67#endif
68		.endm
69#elif defined(CONFIG_ARCH_S3C2410)
70		.macro loadsp, rb, tmp
71		mov	\rb, #0x50000000
72		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
73		.endm
74#else
75		.macro	loadsp,	rb, tmp
76		addruart \rb, \tmp
77		.endm
78#endif
79#endif
80#endif
81
82		.macro	kputc,val
83		mov	r0, \val
84		bl	putc
85		.endm
86
87		.macro	kphex,val,len
88		mov	r0, \val
89		mov	r1, #\len
90		bl	phex
91		.endm
92
93		.macro	debug_reloc_start
94#ifdef DEBUG
95		kputc	#'\n'
96		kphex	r6, 8		/* processor id */
97		kputc	#':'
98		kphex	r7, 8		/* architecture id */
99#ifdef CONFIG_CPU_CP15
100		kputc	#':'
101		mrc	p15, 0, r0, c1, c0
102		kphex	r0, 8		/* control reg */
103#endif
104		kputc	#'\n'
105		kphex	r5, 8		/* decompressed kernel start */
106		kputc	#'-'
107		kphex	r9, 8		/* decompressed kernel end  */
108		kputc	#'>'
109		kphex	r4, 8		/* kernel execution address */
110		kputc	#'\n'
111#endif
112		.endm
113
114		.macro	debug_reloc_end
115#ifdef DEBUG
116		kphex	r5, 8		/* end of kernel */
117		kputc	#'\n'
118		mov	r0, r4
119		bl	memdump		/* dump 256 bytes at start of kernel */
120#endif
121		.endm
122
123		.section ".start", #alloc, #execinstr
124/*
125 * sort out different calling conventions
126 */
127		.align
128start:
129		.type	start,#function
130		.rept	8
131		mov	r0, r0
132		.endr
133
134		b	1f
135		.word	0x016f2818		@ Magic numbers to help the loader
136		.word	start			@ absolute load/run zImage address
137		.word	_edata			@ zImage end address
1381:		mov	r7, r1			@ save architecture ID
139		mov	r8, r2			@ save atags pointer
140
141#ifndef __ARM_ARCH_2__
142		/*
143		 * Booting from Angel - need to enter SVC mode and disable
144		 * FIQs/IRQs (numeric definitions from angel arm.h source).
145		 * We only do this if we were in user mode on entry.
146		 */
147		mrs	r2, cpsr		@ get current mode
148		tst	r2, #3			@ not user?
149		bne	not_angel
150		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
151 ARM(		swi	0x123456	)	@ angel_SWI_ARM
152 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
153not_angel:
154		mrs	r2, cpsr		@ turn off interrupts to
155		orr	r2, r2, #0xc0		@ prevent angel from running
156		msr	cpsr_c, r2
157#else
158		teqp	pc, #0x0c000003		@ turn off interrupts
159#endif
160
161		/*
162		 * Note that some cache flushing and other stuff may
163		 * be needed here - is there an Angel SWI call for this?
164		 */
165
166		/*
167		 * some architecture specific code can be inserted
168		 * by the linker here, but it should preserve r7, r8, and r9.
169		 */
170
171		.text
172		adr	r0, LC0
173		ldmia	r0, {r1, r2, r3, r5, r6, r11, ip}
174		ldr	sp, [r0, #28]
175#ifdef CONFIG_AUTO_ZRELADDR
176		@ determine final kernel image address
177		and	r4, pc, #0xf8000000
178		add	r4, r4, #TEXT_OFFSET
179#else
180		ldr	r4, =zreladdr
181#endif
182		subs	r0, r0, r1		@ calculate the delta offset
183
184						@ if delta is zero, we are
185		beq	not_relocated		@ running at the address we
186						@ were linked at.
187
188		/*
189		 * We're running at a different address.  We need to fix
190		 * up various pointers:
191		 *   r5 - zImage base address (_start)
192		 *   r6 - size of decompressed image
193		 *   r11 - GOT start
194		 *   ip - GOT end
195		 */
196		add	r5, r5, r0
197		add	r11, r11, r0
198		add	ip, ip, r0
199
200#ifndef CONFIG_ZBOOT_ROM
201		/*
202		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
203		 * we need to fix up pointers into the BSS region.
204		 *   r2 - BSS start
205		 *   r3 - BSS end
206		 *   sp - stack pointer
207		 */
208		add	r2, r2, r0
209		add	r3, r3, r0
210		add	sp, sp, r0
211
212		/*
213		 * Relocate all entries in the GOT table.
214		 */
2151:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
216		add	r1, r1, r0		@ table.  This fixes up the
217		str	r1, [r11], #4		@ C references.
218		cmp	r11, ip
219		blo	1b
220#else
221
222		/*
223		 * Relocate entries in the GOT table.  We only relocate
224		 * the entries that are outside the (relocated) BSS region.
225		 */
2261:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
227		cmp	r1, r2			@ entry < bss_start ||
228		cmphs	r3, r1			@ _end < entry
229		addlo	r1, r1, r0		@ table.  This fixes up the
230		str	r1, [r11], #4		@ C references.
231		cmp	r11, ip
232		blo	1b
233#endif
234
235not_relocated:	mov	r0, #0
2361:		str	r0, [r2], #4		@ clear bss
237		str	r0, [r2], #4
238		str	r0, [r2], #4
239		str	r0, [r2], #4
240		cmp	r2, r3
241		blo	1b
242
243		/*
244		 * The C runtime environment should now be setup
245		 * sufficiently.  Turn the cache on, set up some
246		 * pointers, and start decompressing.
247		 */
248		bl	cache_on
249
250		mov	r1, sp			@ malloc space above stack
251		add	r2, sp, #0x10000	@ 64k max
252
253/*
254 * Check to see if we will overwrite ourselves.
255 *   r4 = final kernel address
256 *   r5 = start of this image
257 *   r6 = size of decompressed image
258 *   r2 = end of malloc space (and therefore this image)
259 * We basically want:
260 *   r4 >= r2 -> OK
261 *   r4 + image length <= r5 -> OK
262 */
263		cmp	r4, r2
264		bhs	wont_overwrite
265		add	r0, r4, r6
266		cmp	r0, r5
267		bls	wont_overwrite
268
269		mov	r5, r2			@ decompress after malloc space
270		mov	r0, r5
271		mov	r3, r7
272		bl	decompress_kernel
273
274		add	r0, r0, #127 + 128	@ alignment + stack
275		bic	r0, r0, #127		@ align the kernel length
276/*
277 * r0     = decompressed kernel length
278 * r1-r3  = unused
279 * r4     = kernel execution address
280 * r5     = decompressed kernel start
281 * r7     = architecture ID
282 * r8     = atags pointer
283 * r9-r12,r14 = corrupted
284 */
285		add	r1, r5, r0		@ end of decompressed kernel
286		adr	r2, reloc_start
287		ldr	r3, LC1
288		add	r3, r2, r3
2891:		ldmia	r2!, {r9 - r12, r14}	@ copy relocation code
290		stmia	r1!, {r9 - r12, r14}
291		ldmia	r2!, {r9 - r12, r14}
292		stmia	r1!, {r9 - r12, r14}
293		cmp	r2, r3
294		blo	1b
295		mov	sp, r1
296		add	sp, sp, #128		@ relocate the stack
297
298		bl	cache_clean_flush
299 ARM(		add	pc, r5, r0		) @ call relocation code
300 THUMB(		add	r12, r5, r0		)
301 THUMB(		mov	pc, r12			) @ call relocation code
302
303/*
304 * We're not in danger of overwriting ourselves.  Do this the simple way.
305 *
306 * r4     = kernel execution address
307 * r7     = architecture ID
308 */
309wont_overwrite:	mov	r0, r4
310		mov	r3, r7
311		bl	decompress_kernel
312		b	call_kernel
313
314		.align	2
315		.type	LC0, #object
316LC0:		.word	LC0			@ r1
317		.word	__bss_start		@ r2
318		.word	_end			@ r3
319		.word	_start			@ r5
320		.word	_image_size		@ r6
321		.word	_got_start		@ r11
322		.word	_got_end		@ ip
323		.word	user_stack_end		@ sp
324LC1:		.word	reloc_end - reloc_start
325		.size	LC0, . - LC0
326
327#ifdef CONFIG_ARCH_RPC
328		.globl	params
329params:		ldr	r0, =0x10000100		@ params_phys for RPC
330		mov	pc, lr
331		.ltorg
332		.align
333#endif
334
335/*
336 * Turn on the cache.  We need to setup some page tables so that we
337 * can have both the I and D caches on.
338 *
339 * We place the page tables 16k down from the kernel execution address,
340 * and we hope that nothing else is using it.  If we're using it, we
341 * will go pop!
342 *
343 * On entry,
344 *  r4 = kernel execution address
345 *  r7 = architecture number
346 *  r8 = atags pointer
347 * On exit,
348 *  r0, r1, r2, r3, r9, r10, r12 corrupted
349 * This routine must preserve:
350 *  r4, r5, r6, r7, r8
351 */
352		.align	5
353cache_on:	mov	r3, #8			@ cache_on function
354		b	call_cache_fn
355
356/*
357 * Initialize the highest priority protection region, PR7
358 * to cover all 32bit address and cacheable and bufferable.
359 */
360__armv4_mpu_cache_on:
361		mov	r0, #0x3f		@ 4G, the whole
362		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
363		mcr 	p15, 0, r0, c6, c7, 1
364
365		mov	r0, #0x80		@ PR7
366		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
367		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
368		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
369
370		mov	r0, #0xc000
371		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
372		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
373
374		mov	r0, #0
375		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
376		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
377		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
378		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
379						@ ...I .... ..D. WC.M
380		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
381		orr	r0, r0, #0x1000		@ ...1 .... .... ....
382
383		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
384
385		mov	r0, #0
386		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
387		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
388		mov	pc, lr
389
390__armv3_mpu_cache_on:
391		mov	r0, #0x3f		@ 4G, the whole
392		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
393
394		mov	r0, #0x80		@ PR7
395		mcr	p15, 0, r0, c2, c0, 0	@ cache on
396		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
397
398		mov	r0, #0xc000
399		mcr	p15, 0, r0, c5, c0, 0	@ access permission
400
401		mov	r0, #0
402		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
403		/*
404		 * ?? ARMv3 MMU does not allow reading the control register,
405		 * does this really work on ARMv3 MPU?
406		 */
407		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
408						@ .... .... .... WC.M
409		orr	r0, r0, #0x000d		@ .... .... .... 11.1
410		/* ?? this overwrites the value constructed above? */
411		mov	r0, #0
412		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
413
414		/* ?? invalidate for the second time? */
415		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
416		mov	pc, lr
417
418__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
419		bic	r3, r3, #0xff		@ Align the pointer
420		bic	r3, r3, #0x3f00
421/*
422 * Initialise the page tables, turning on the cacheable and bufferable
423 * bits for the RAM area only.
424 */
425		mov	r0, r3
426		mov	r9, r0, lsr #18
427		mov	r9, r9, lsl #18		@ start of RAM
428		add	r10, r9, #0x10000000	@ a reasonable RAM size
429		mov	r1, #0x12
430		orr	r1, r1, #3 << 10
431		add	r2, r3, #16384
4321:		cmp	r1, r9			@ if virt > start of RAM
433		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
434		cmp	r1, r10			@ if virt > end of RAM
435		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
436		str	r1, [r0], #4		@ 1:1 mapping
437		add	r1, r1, #1048576
438		teq	r0, r2
439		bne	1b
440/*
441 * If ever we are running from Flash, then we surely want the cache
442 * to be enabled also for our execution instance...  We map 2MB of it
443 * so there is no map overlap problem for up to 1 MB compressed kernel.
444 * If the execution is in RAM then we would only be duplicating the above.
445 */
446		mov	r1, #0x1e
447		orr	r1, r1, #3 << 10
448		mov	r2, pc, lsr #20
449		orr	r1, r1, r2, lsl #20
450		add	r0, r3, r2, lsl #2
451		str	r1, [r0], #4
452		add	r1, r1, #1048576
453		str	r1, [r0]
454		mov	pc, lr
455ENDPROC(__setup_mmu)
456
457__armv4_mmu_cache_on:
458		mov	r12, lr
459#ifdef CONFIG_MMU
460		bl	__setup_mmu
461		mov	r0, #0
462		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
463		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
464		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
465		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
466		orr	r0, r0, #0x0030
467#ifdef CONFIG_CPU_ENDIAN_BE8
468		orr	r0, r0, #1 << 25	@ big-endian page tables
469#endif
470		bl	__common_mmu_cache_on
471		mov	r0, #0
472		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
473#endif
474		mov	pc, r12
475
476__armv7_mmu_cache_on:
477		mov	r12, lr
478#ifdef CONFIG_MMU
479		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
480		tst	r11, #0xf		@ VMSA
481		blne	__setup_mmu
482		mov	r0, #0
483		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
484		tst	r11, #0xf		@ VMSA
485		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
486#endif
487		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
488		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
489		orr	r0, r0, #0x003c		@ write buffer
490#ifdef CONFIG_MMU
491#ifdef CONFIG_CPU_ENDIAN_BE8
492		orr	r0, r0, #1 << 25	@ big-endian page tables
493#endif
494		orrne	r0, r0, #1		@ MMU enabled
495		movne	r1, #-1
496		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
497		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
498#endif
499		mcr	p15, 0, r0, c1, c0, 0	@ load control register
500		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
501		mov	r0, #0
502		mcr	p15, 0, r0, c7, c5, 4	@ ISB
503		mov	pc, r12
504
505__fa526_cache_on:
506		mov	r12, lr
507		bl	__setup_mmu
508		mov	r0, #0
509		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
510		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
511		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
512		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
513		orr	r0, r0, #0x1000		@ I-cache enable
514		bl	__common_mmu_cache_on
515		mov	r0, #0
516		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
517		mov	pc, r12
518
519__arm6_mmu_cache_on:
520		mov	r12, lr
521		bl	__setup_mmu
522		mov	r0, #0
523		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
524		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
525		mov	r0, #0x30
526		bl	__common_mmu_cache_on
527		mov	r0, #0
528		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
529		mov	pc, r12
530
531__common_mmu_cache_on:
532#ifndef CONFIG_THUMB2_KERNEL
533#ifndef DEBUG
534		orr	r0, r0, #0x000d		@ Write buffer, mmu
535#endif
536		mov	r1, #-1
537		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
538		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
539		b	1f
540		.align	5			@ cache line aligned
5411:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
542		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
543		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
544#endif
545
546/*
547 * All code following this line is relocatable.  It is relocated by
548 * the above code to the end of the decompressed kernel image and
549 * executed there.  During this time, we have no stacks.
550 *
551 * r0     = decompressed kernel length
552 * r1-r3  = unused
553 * r4     = kernel execution address
554 * r5     = decompressed kernel start
555 * r7     = architecture ID
556 * r8     = atags pointer
557 * r9-r12,r14 = corrupted
558 */
559		.align	5
560reloc_start:	add	r9, r5, r0
561		sub	r9, r9, #128		@ do not copy the stack
562		debug_reloc_start
563		mov	r1, r4
5641:
565		.rept	4
566		ldmia	r5!, {r0, r2, r3, r10 - r12, r14}	@ relocate kernel
567		stmia	r1!, {r0, r2, r3, r10 - r12, r14}
568		.endr
569
570		cmp	r5, r9
571		blo	1b
572		mov	sp, r1
573		add	sp, sp, #128		@ relocate the stack
574		debug_reloc_end
575
576call_kernel:	bl	cache_clean_flush
577		bl	cache_off
578		mov	r0, #0			@ must be zero
579		mov	r1, r7			@ restore architecture number
580		mov	r2, r8			@ restore atags pointer
581		mov	pc, r4			@ call kernel
582
583/*
584 * Here follow the relocatable cache support functions for the
585 * various processors.  This is a generic hook for locating an
586 * entry and jumping to an instruction at the specified offset
587 * from the start of the block.  Please note this is all position
588 * independent code.
589 *
590 *  r1  = corrupted
591 *  r2  = corrupted
592 *  r3  = block offset
593 *  r9  = corrupted
594 *  r12 = corrupted
595 */
596
597call_cache_fn:	adr	r12, proc_types
598#ifdef CONFIG_CPU_CP15
599		mrc	p15, 0, r9, c0, c0	@ get processor ID
600#else
601		ldr	r9, =CONFIG_PROCESSOR_ID
602#endif
6031:		ldr	r1, [r12, #0]		@ get value
604		ldr	r2, [r12, #4]		@ get mask
605		eor	r1, r1, r9		@ (real ^ match)
606		tst	r1, r2			@       & mask
607 ARM(		addeq	pc, r12, r3		) @ call cache function
608 THUMB(		addeq	r12, r3			)
609 THUMB(		moveq	pc, r12			) @ call cache function
610		add	r12, r12, #4*5
611		b	1b
612
613/*
614 * Table for cache operations.  This is basically:
615 *   - CPU ID match
616 *   - CPU ID mask
617 *   - 'cache on' method instruction
618 *   - 'cache off' method instruction
619 *   - 'cache flush' method instruction
620 *
621 * We match an entry using: ((real_id ^ match) & mask) == 0
622 *
623 * Writethrough caches generally only need 'on' and 'off'
624 * methods.  Writeback caches _must_ have the flush method
625 * defined.
626 */
627		.align	2
628		.type	proc_types,#object
629proc_types:
630		.word	0x41560600		@ ARM6/610
631		.word	0xffffffe0
632		W(b)	__arm6_mmu_cache_off	@ works, but slow
633		W(b)	__arm6_mmu_cache_off
634		mov	pc, lr
635 THUMB(		nop				)
636@		b	__arm6_mmu_cache_on		@ untested
637@		b	__arm6_mmu_cache_off
638@		b	__armv3_mmu_cache_flush
639
640		.word	0x00000000		@ old ARM ID
641		.word	0x0000f000
642		mov	pc, lr
643 THUMB(		nop				)
644		mov	pc, lr
645 THUMB(		nop				)
646		mov	pc, lr
647 THUMB(		nop				)
648
649		.word	0x41007000		@ ARM7/710
650		.word	0xfff8fe00
651		W(b)	__arm7_mmu_cache_off
652		W(b)	__arm7_mmu_cache_off
653		mov	pc, lr
654 THUMB(		nop				)
655
656		.word	0x41807200		@ ARM720T (writethrough)
657		.word	0xffffff00
658		W(b)	__armv4_mmu_cache_on
659		W(b)	__armv4_mmu_cache_off
660		mov	pc, lr
661 THUMB(		nop				)
662
663		.word	0x41007400		@ ARM74x
664		.word	0xff00ff00
665		W(b)	__armv3_mpu_cache_on
666		W(b)	__armv3_mpu_cache_off
667		W(b)	__armv3_mpu_cache_flush
668
669		.word	0x41009400		@ ARM94x
670		.word	0xff00ff00
671		W(b)	__armv4_mpu_cache_on
672		W(b)	__armv4_mpu_cache_off
673		W(b)	__armv4_mpu_cache_flush
674
675		.word	0x00007000		@ ARM7 IDs
676		.word	0x0000f000
677		mov	pc, lr
678 THUMB(		nop				)
679		mov	pc, lr
680 THUMB(		nop				)
681		mov	pc, lr
682 THUMB(		nop				)
683
684		@ Everything from here on will be the new ID system.
685
686		.word	0x4401a100		@ sa110 / sa1100
687		.word	0xffffffe0
688		W(b)	__armv4_mmu_cache_on
689		W(b)	__armv4_mmu_cache_off
690		W(b)	__armv4_mmu_cache_flush
691
692		.word	0x6901b110		@ sa1110
693		.word	0xfffffff0
694		W(b)	__armv4_mmu_cache_on
695		W(b)	__armv4_mmu_cache_off
696		W(b)	__armv4_mmu_cache_flush
697
698		.word	0x56056900
699		.word	0xffffff00		@ PXA9xx
700		W(b)	__armv4_mmu_cache_on
701		W(b)	__armv4_mmu_cache_off
702		W(b)	__armv4_mmu_cache_flush
703
704		.word	0x56158000		@ PXA168
705		.word	0xfffff000
706		W(b)	__armv4_mmu_cache_on
707		W(b)	__armv4_mmu_cache_off
708		W(b)	__armv5tej_mmu_cache_flush
709
710		.word	0x56050000		@ Feroceon
711		.word	0xff0f0000
712		W(b)	__armv4_mmu_cache_on
713		W(b)	__armv4_mmu_cache_off
714		W(b)	__armv5tej_mmu_cache_flush
715
716#ifdef CONFIG_CPU_FEROCEON_OLD_ID
717		/* this conflicts with the standard ARMv5TE entry */
718		.long	0x41009260		@ Old Feroceon
719		.long	0xff00fff0
720		b	__armv4_mmu_cache_on
721		b	__armv4_mmu_cache_off
722		b	__armv5tej_mmu_cache_flush
723#endif
724
725		.word	0x66015261		@ FA526
726		.word	0xff01fff1
727		W(b)	__fa526_cache_on
728		W(b)	__armv4_mmu_cache_off
729		W(b)	__fa526_cache_flush
730
731		@ These match on the architecture ID
732
733		.word	0x00020000		@ ARMv4T
734		.word	0x000f0000
735		W(b)	__armv4_mmu_cache_on
736		W(b)	__armv4_mmu_cache_off
737		W(b)	__armv4_mmu_cache_flush
738
739		.word	0x00050000		@ ARMv5TE
740		.word	0x000f0000
741		W(b)	__armv4_mmu_cache_on
742		W(b)	__armv4_mmu_cache_off
743		W(b)	__armv4_mmu_cache_flush
744
745		.word	0x00060000		@ ARMv5TEJ
746		.word	0x000f0000
747		W(b)	__armv4_mmu_cache_on
748		W(b)	__armv4_mmu_cache_off
749		W(b)	__armv5tej_mmu_cache_flush
750
751		.word	0x0007b000		@ ARMv6
752		.word	0x000ff000
753		W(b)	__armv4_mmu_cache_on
754		W(b)	__armv4_mmu_cache_off
755		W(b)	__armv6_mmu_cache_flush
756
757		.word	0x560f5810		@ Marvell PJ4 ARMv6
758		.word	0xff0ffff0
759		W(b)	__armv4_mmu_cache_on
760		W(b)	__armv4_mmu_cache_off
761		W(b)	__armv6_mmu_cache_flush
762
763		.word	0x000f0000		@ new CPU Id
764		.word	0x000f0000
765		W(b)	__armv7_mmu_cache_on
766		W(b)	__armv7_mmu_cache_off
767		W(b)	__armv7_mmu_cache_flush
768
769		.word	0			@ unrecognised type
770		.word	0
771		mov	pc, lr
772 THUMB(		nop				)
773		mov	pc, lr
774 THUMB(		nop				)
775		mov	pc, lr
776 THUMB(		nop				)
777
778		.size	proc_types, . - proc_types
779
780/*
781 * Turn off the Cache and MMU.  ARMv3 does not support
782 * reading the control register, but ARMv4 does.
783 *
784 * On exit,
785 *  r0, r1, r2, r3, r9, r12 corrupted
786 * This routine must preserve:
787 *  r4, r6, r7
788 */
789		.align	5
790cache_off:	mov	r3, #12			@ cache_off function
791		b	call_cache_fn
792
793__armv4_mpu_cache_off:
794		mrc	p15, 0, r0, c1, c0
795		bic	r0, r0, #0x000d
796		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
797		mov	r0, #0
798		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
799		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
800		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
801		mov	pc, lr
802
803__armv3_mpu_cache_off:
804		mrc	p15, 0, r0, c1, c0
805		bic	r0, r0, #0x000d
806		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
807		mov	r0, #0
808		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
809		mov	pc, lr
810
811__armv4_mmu_cache_off:
812#ifdef CONFIG_MMU
813		mrc	p15, 0, r0, c1, c0
814		bic	r0, r0, #0x000d
815		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
816		mov	r0, #0
817		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
818		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
819#endif
820		mov	pc, lr
821
822__armv7_mmu_cache_off:
823		mrc	p15, 0, r0, c1, c0
824#ifdef CONFIG_MMU
825		bic	r0, r0, #0x000d
826#else
827		bic	r0, r0, #0x000c
828#endif
829		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
830		mov	r12, lr
831		bl	__armv7_mmu_cache_flush
832		mov	r0, #0
833#ifdef CONFIG_MMU
834		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
835#endif
836		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
837		mcr	p15, 0, r0, c7, c10, 4	@ DSB
838		mcr	p15, 0, r0, c7, c5, 4	@ ISB
839		mov	pc, r12
840
841__arm6_mmu_cache_off:
842		mov	r0, #0x00000030		@ ARM6 control reg.
843		b	__armv3_mmu_cache_off
844
845__arm7_mmu_cache_off:
846		mov	r0, #0x00000070		@ ARM7 control reg.
847		b	__armv3_mmu_cache_off
848
849__armv3_mmu_cache_off:
850		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
851		mov	r0, #0
852		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
853		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
854		mov	pc, lr
855
856/*
857 * Clean and flush the cache to maintain consistency.
858 *
859 * On exit,
860 *  r1, r2, r3, r9, r10, r11, r12 corrupted
861 * This routine must preserve:
862 *  r0, r4, r5, r6, r7
863 */
864		.align	5
865cache_clean_flush:
866		mov	r3, #16
867		b	call_cache_fn
868
869__armv4_mpu_cache_flush:
870		mov	r2, #1
871		mov	r3, #0
872		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
873		mov	r1, #7 << 5		@ 8 segments
8741:		orr	r3, r1, #63 << 26	@ 64 entries
8752:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
876		subs	r3, r3, #1 << 26
877		bcs	2b			@ entries 63 to 0
878		subs 	r1, r1, #1 << 5
879		bcs	1b			@ segments 7 to 0
880
881		teq	r2, #0
882		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
883		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
884		mov	pc, lr
885
886__fa526_cache_flush:
887		mov	r1, #0
888		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
889		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
890		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
891		mov	pc, lr
892
893__armv6_mmu_cache_flush:
894		mov	r1, #0
895		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
896		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
897		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
898		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
899		mov	pc, lr
900
901__armv7_mmu_cache_flush:
902		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
903		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
904		mov	r10, #0
905		beq	hierarchical
906		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
907		b	iflush
908hierarchical:
909		mcr	p15, 0, r10, c7, c10, 5	@ DMB
910		stmfd	sp!, {r0-r7, r9-r11}
911		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
912		ands	r3, r0, #0x7000000	@ extract loc from clidr
913		mov	r3, r3, lsr #23		@ left align loc bit field
914		beq	finished		@ if loc is 0, then no need to clean
915		mov	r10, #0			@ start clean at cache level 0
916loop1:
917		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
918		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
919		and	r1, r1, #7		@ mask of the bits for current cache only
920		cmp	r1, #2			@ see what cache we have at this level
921		blt	skip			@ skip if no cache, or just i-cache
922		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
923		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
924		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
925		and	r2, r1, #7		@ extract the length of the cache lines
926		add	r2, r2, #4		@ add 4 (line length offset)
927		ldr	r4, =0x3ff
928		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
929		clz	r5, r4			@ find bit position of way size increment
930		ldr	r7, =0x7fff
931		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
932loop2:
933		mov	r9, r4			@ create working copy of max way size
934loop3:
935 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
936 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
937 THUMB(		lsl	r6, r9, r5		)
938 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
939 THUMB(		lsl	r6, r7, r2		)
940 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
941		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
942		subs	r9, r9, #1		@ decrement the way
943		bge	loop3
944		subs	r7, r7, #1		@ decrement the index
945		bge	loop2
946skip:
947		add	r10, r10, #2		@ increment cache number
948		cmp	r3, r10
949		bgt	loop1
950finished:
951		ldmfd	sp!, {r0-r7, r9-r11}
952		mov	r10, #0			@ swith back to cache level 0
953		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
954iflush:
955		mcr	p15, 0, r10, c7, c10, 4	@ DSB
956		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
957		mcr	p15, 0, r10, c7, c10, 4	@ DSB
958		mcr	p15, 0, r10, c7, c5, 4	@ ISB
959		mov	pc, lr
960
961__armv5tej_mmu_cache_flush:
9621:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
963		bne	1b
964		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
965		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
966		mov	pc, lr
967
968__armv4_mmu_cache_flush:
969		mov	r2, #64*1024		@ default: 32K dcache size (*2)
970		mov	r11, #32		@ default: 32 byte line size
971		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
972		teq	r3, r9			@ cache ID register present?
973		beq	no_cache_id
974		mov	r1, r3, lsr #18
975		and	r1, r1, #7
976		mov	r2, #1024
977		mov	r2, r2, lsl r1		@ base dcache size *2
978		tst	r3, #1 << 14		@ test M bit
979		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
980		mov	r3, r3, lsr #12
981		and	r3, r3, #3
982		mov	r11, #8
983		mov	r11, r11, lsl r3	@ cache line size in bytes
984no_cache_id:
985		mov	r1, pc
986		bic	r1, r1, #63		@ align to longest cache line
987		add	r2, r1, r2
9881:
989 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
990 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
991 THUMB(		add     r1, r1, r11		)
992		teq	r1, r2
993		bne	1b
994
995		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
996		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
997		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
998		mov	pc, lr
999
1000__armv3_mmu_cache_flush:
1001__armv3_mpu_cache_flush:
1002		mov	r1, #0
1003		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1004		mov	pc, lr
1005
1006/*
1007 * Various debugging routines for printing hex characters and
1008 * memory, which again must be relocatable.
1009 */
1010#ifdef DEBUG
1011		.align	2
1012		.type	phexbuf,#object
1013phexbuf:	.space	12
1014		.size	phexbuf, . - phexbuf
1015
1016@ phex corrupts {r0, r1, r2, r3}
1017phex:		adr	r3, phexbuf
1018		mov	r2, #0
1019		strb	r2, [r3, r1]
10201:		subs	r1, r1, #1
1021		movmi	r0, r3
1022		bmi	puts
1023		and	r2, r0, #15
1024		mov	r0, r0, lsr #4
1025		cmp	r2, #10
1026		addge	r2, r2, #7
1027		add	r2, r2, #'0'
1028		strb	r2, [r3, r1]
1029		b	1b
1030
1031@ puts corrupts {r0, r1, r2, r3}
1032puts:		loadsp	r3, r1
10331:		ldrb	r2, [r0], #1
1034		teq	r2, #0
1035		moveq	pc, lr
10362:		writeb	r2, r3
1037		mov	r1, #0x00020000
10383:		subs	r1, r1, #1
1039		bne	3b
1040		teq	r2, #'\n'
1041		moveq	r2, #'\r'
1042		beq	2b
1043		teq	r0, #0
1044		bne	1b
1045		mov	pc, lr
1046@ putc corrupts {r0, r1, r2, r3}
1047putc:
1048		mov	r2, r0
1049		mov	r0, #0
1050		loadsp	r3, r1
1051		b	2b
1052
1053@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1054memdump:	mov	r12, r0
1055		mov	r10, lr
1056		mov	r11, #0
10572:		mov	r0, r11, lsl #2
1058		add	r0, r0, r12
1059		mov	r1, #8
1060		bl	phex
1061		mov	r0, #':'
1062		bl	putc
10631:		mov	r0, #' '
1064		bl	putc
1065		ldr	r0, [r12, r11, lsl #2]
1066		mov	r1, #8
1067		bl	phex
1068		and	r0, r11, #7
1069		teq	r0, #3
1070		moveq	r0, #' '
1071		bleq	putc
1072		and	r0, r11, #7
1073		add	r11, r11, #1
1074		teq	r0, #7
1075		bne	1b
1076		mov	r0, #'\n'
1077		bl	putc
1078		cmp	r11, #64
1079		blt	2b
1080		mov	pc, r10
1081#endif
1082
1083		.ltorg
1084reloc_end:
1085
1086		.align
1087		.section ".stack", "w"
1088user_stack:	.space	4096
1089user_stack_end:
1090