xref: /linux/arch/arm/boot/compressed/head.S (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/config.h>
12#include <linux/linkage.h>
13
14/*
15 * Debugging stuff
16 *
17 * Note that these macros must not contain any code which is not
18 * 100% relocatable.  Any attempt to do so will result in a crash.
19 * Please select one of the following when turning on debugging.
20 */
21#ifdef DEBUG
22
23#if defined(CONFIG_DEBUG_ICEDCC)
24		.macro	loadsp, rb
25		.endm
26		.macro	writeb, ch, rb
27		mcr	p14, 0, \ch, c0, c1, 0
28		.endm
29#else
30
31#include <asm/arch/debug-macro.S>
32
33		.macro	writeb,	ch, rb
34		senduart \ch, \rb
35		.endm
36
37#if defined(CONFIG_ARCH_SA1100)
38		.macro	loadsp, rb
39		mov	\rb, #0x80000000	@ physical base address
40#ifdef CONFIG_DEBUG_LL_SER3
41		add	\rb, \rb, #0x00050000	@ Ser3
42#else
43		add	\rb, \rb, #0x00010000	@ Ser1
44#endif
45		.endm
46#elif defined(CONFIG_ARCH_IOP331)
47		.macro loadsp, rb
48                mov   	\rb, #0xff000000
49                orr     \rb, \rb, #0x00ff0000
50                orr     \rb, \rb, #0x0000f700   @ location of the UART
51		.endm
52#elif defined(CONFIG_ARCH_S3C2410)
53		.macro loadsp, rb
54		mov	\rb, #0x50000000
55		add	\rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT
56		.endm
57#else
58		.macro	loadsp,	rb
59		addruart \rb
60		.endm
61#endif
62#endif
63#endif
64
65		.macro	kputc,val
66		mov	r0, \val
67		bl	putc
68		.endm
69
70		.macro	kphex,val,len
71		mov	r0, \val
72		mov	r1, #\len
73		bl	phex
74		.endm
75
76		.macro	debug_reloc_start
77#ifdef DEBUG
78		kputc	#'\n'
79		kphex	r6, 8		/* processor id */
80		kputc	#':'
81		kphex	r7, 8		/* architecture id */
82		kputc	#':'
83		mrc	p15, 0, r0, c1, c0
84		kphex	r0, 8		/* control reg */
85		kputc	#'\n'
86		kphex	r5, 8		/* decompressed kernel start */
87		kputc	#'-'
88		kphex	r9, 8		/* decompressed kernel end  */
89		kputc	#'>'
90		kphex	r4, 8		/* kernel execution address */
91		kputc	#'\n'
92#endif
93		.endm
94
95		.macro	debug_reloc_end
96#ifdef DEBUG
97		kphex	r5, 8		/* end of kernel */
98		kputc	#'\n'
99		mov	r0, r4
100		bl	memdump		/* dump 256 bytes at start of kernel */
101#endif
102		.endm
103
104		.section ".start", #alloc, #execinstr
105/*
106 * sort out different calling conventions
107 */
108		.align
109start:
110		.type	start,#function
111		.rept	8
112		mov	r0, r0
113		.endr
114
115		b	1f
116		.word	0x016f2818		@ Magic numbers to help the loader
117		.word	start			@ absolute load/run zImage address
118		.word	_edata			@ zImage end address
1191:		mov	r7, r1			@ save architecture ID
120		mov	r8, r2			@ save atags pointer
121
122#ifndef __ARM_ARCH_2__
123		/*
124		 * Booting from Angel - need to enter SVC mode and disable
125		 * FIQs/IRQs (numeric definitions from angel arm.h source).
126		 * We only do this if we were in user mode on entry.
127		 */
128		mrs	r2, cpsr		@ get current mode
129		tst	r2, #3			@ not user?
130		bne	not_angel
131		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
132		swi	0x123456		@ angel_SWI_ARM
133not_angel:
134		mrs	r2, cpsr		@ turn off interrupts to
135		orr	r2, r2, #0xc0		@ prevent angel from running
136		msr	cpsr_c, r2
137#else
138		teqp	pc, #0x0c000003		@ turn off interrupts
139#endif
140
141		/*
142		 * Note that some cache flushing and other stuff may
143		 * be needed here - is there an Angel SWI call for this?
144		 */
145
146		/*
147		 * some architecture specific code can be inserted
148		 * by the linker here, but it should preserve r7, r8, and r9.
149		 */
150
151		.text
152		adr	r0, LC0
153		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
154		subs	r0, r0, r1		@ calculate the delta offset
155
156						@ if delta is zero, we are
157		beq	not_relocated		@ running at the address we
158						@ were linked at.
159
160		/*
161		 * We're running at a different address.  We need to fix
162		 * up various pointers:
163		 *   r5 - zImage base address
164		 *   r6 - GOT start
165		 *   ip - GOT end
166		 */
167		add	r5, r5, r0
168		add	r6, r6, r0
169		add	ip, ip, r0
170
171#ifndef CONFIG_ZBOOT_ROM
172		/*
173		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
174		 * we need to fix up pointers into the BSS region.
175		 *   r2 - BSS start
176		 *   r3 - BSS end
177		 *   sp - stack pointer
178		 */
179		add	r2, r2, r0
180		add	r3, r3, r0
181		add	sp, sp, r0
182
183		/*
184		 * Relocate all entries in the GOT table.
185		 */
1861:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
187		add	r1, r1, r0		@ table.  This fixes up the
188		str	r1, [r6], #4		@ C references.
189		cmp	r6, ip
190		blo	1b
191#else
192
193		/*
194		 * Relocate entries in the GOT table.  We only relocate
195		 * the entries that are outside the (relocated) BSS region.
196		 */
1971:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
198		cmp	r1, r2			@ entry < bss_start ||
199		cmphs	r3, r1			@ _end < entry
200		addlo	r1, r1, r0		@ table.  This fixes up the
201		str	r1, [r6], #4		@ C references.
202		cmp	r6, ip
203		blo	1b
204#endif
205
206not_relocated:	mov	r0, #0
2071:		str	r0, [r2], #4		@ clear bss
208		str	r0, [r2], #4
209		str	r0, [r2], #4
210		str	r0, [r2], #4
211		cmp	r2, r3
212		blo	1b
213
214		/*
215		 * The C runtime environment should now be setup
216		 * sufficiently.  Turn the cache on, set up some
217		 * pointers, and start decompressing.
218		 */
219		bl	cache_on
220
221		mov	r1, sp			@ malloc space above stack
222		add	r2, sp, #0x10000	@ 64k max
223
224/*
225 * Check to see if we will overwrite ourselves.
226 *   r4 = final kernel address
227 *   r5 = start of this image
228 *   r2 = end of malloc space (and therefore this image)
229 * We basically want:
230 *   r4 >= r2 -> OK
231 *   r4 + image length <= r5 -> OK
232 */
233		cmp	r4, r2
234		bhs	wont_overwrite
235		add	r0, r4, #4096*1024	@ 4MB largest kernel size
236		cmp	r0, r5
237		bls	wont_overwrite
238
239		mov	r5, r2			@ decompress after malloc space
240		mov	r0, r5
241		mov	r3, r7
242		bl	decompress_kernel
243
244		add	r0, r0, #127
245		bic	r0, r0, #127		@ align the kernel length
246/*
247 * r0     = decompressed kernel length
248 * r1-r3  = unused
249 * r4     = kernel execution address
250 * r5     = decompressed kernel start
251 * r6     = processor ID
252 * r7     = architecture ID
253 * r8     = atags pointer
254 * r9-r14 = corrupted
255 */
256		add	r1, r5, r0		@ end of decompressed kernel
257		adr	r2, reloc_start
258		ldr	r3, LC1
259		add	r3, r2, r3
2601:		ldmia	r2!, {r9 - r14}		@ copy relocation code
261		stmia	r1!, {r9 - r14}
262		ldmia	r2!, {r9 - r14}
263		stmia	r1!, {r9 - r14}
264		cmp	r2, r3
265		blo	1b
266
267		bl	cache_clean_flush
268		add	pc, r5, r0		@ call relocation code
269
270/*
271 * We're not in danger of overwriting ourselves.  Do this the simple way.
272 *
273 * r4     = kernel execution address
274 * r7     = architecture ID
275 */
276wont_overwrite:	mov	r0, r4
277		mov	r3, r7
278		bl	decompress_kernel
279		b	call_kernel
280
281		.type	LC0, #object
282LC0:		.word	LC0			@ r1
283		.word	__bss_start		@ r2
284		.word	_end			@ r3
285		.word	zreladdr		@ r4
286		.word	_start			@ r5
287		.word	_got_start		@ r6
288		.word	_got_end		@ ip
289		.word	user_stack+4096		@ sp
290LC1:		.word	reloc_end - reloc_start
291		.size	LC0, . - LC0
292
293#ifdef CONFIG_ARCH_RPC
294		.globl	params
295params:		ldr	r0, =params_phys
296		mov	pc, lr
297		.ltorg
298		.align
299#endif
300
301/*
302 * Turn on the cache.  We need to setup some page tables so that we
303 * can have both the I and D caches on.
304 *
305 * We place the page tables 16k down from the kernel execution address,
306 * and we hope that nothing else is using it.  If we're using it, we
307 * will go pop!
308 *
309 * On entry,
310 *  r4 = kernel execution address
311 *  r6 = processor ID
312 *  r7 = architecture number
313 *  r8 = atags pointer
314 *  r9 = run-time address of "start"  (???)
315 * On exit,
316 *  r1, r2, r3, r9, r10, r12 corrupted
317 * This routine must preserve:
318 *  r4, r5, r6, r7, r8
319 */
320		.align	5
321cache_on:	mov	r3, #8			@ cache_on function
322		b	call_cache_fn
323
324/*
325 * Initialize the highest priority protection region, PR7
326 * to cover all 32bit address and cacheable and bufferable.
327 */
328__armv4_mpu_cache_on:
329		mov	r0, #0x3f		@ 4G, the whole
330		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
331		mcr 	p15, 0, r0, c6, c7, 1
332
333		mov	r0, #0x80		@ PR7
334		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
335		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
336		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
337
338		mov	r0, #0xc000
339		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
340		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
341
342		mov	r0, #0
343		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
344		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
345		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
346		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
347						@ ...I .... ..D. WC.M
348		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
349		orr	r0, r0, #0x1000		@ ...1 .... .... ....
350
351		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
352
353		mov	r0, #0
354		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
355		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
356		mov	pc, lr
357
358__armv3_mpu_cache_on:
359		mov	r0, #0x3f		@ 4G, the whole
360		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
361
362		mov	r0, #0x80		@ PR7
363		mcr	p15, 0, r0, c2, c0, 0	@ cache on
364		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
365
366		mov	r0, #0xc000
367		mcr	p15, 0, r0, c5, c0, 0	@ access permission
368
369		mov	r0, #0
370		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
371		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
372						@ .... .... .... WC.M
373		orr	r0, r0, #0x000d		@ .... .... .... 11.1
374		mov	r0, #0
375		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
376
377		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
378		mov	pc, lr
379
380__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
381		bic	r3, r3, #0xff		@ Align the pointer
382		bic	r3, r3, #0x3f00
383/*
384 * Initialise the page tables, turning on the cacheable and bufferable
385 * bits for the RAM area only.
386 */
387		mov	r0, r3
388		mov	r9, r0, lsr #18
389		mov	r9, r9, lsl #18		@ start of RAM
390		add	r10, r9, #0x10000000	@ a reasonable RAM size
391		mov	r1, #0x12
392		orr	r1, r1, #3 << 10
393		add	r2, r3, #16384
3941:		cmp	r1, r9			@ if virt > start of RAM
395		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
396		cmp	r1, r10			@ if virt > end of RAM
397		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
398		str	r1, [r0], #4		@ 1:1 mapping
399		add	r1, r1, #1048576
400		teq	r0, r2
401		bne	1b
402/*
403 * If ever we are running from Flash, then we surely want the cache
404 * to be enabled also for our execution instance...  We map 2MB of it
405 * so there is no map overlap problem for up to 1 MB compressed kernel.
406 * If the execution is in RAM then we would only be duplicating the above.
407 */
408		mov	r1, #0x1e
409		orr	r1, r1, #3 << 10
410		mov	r2, pc, lsr #20
411		orr	r1, r1, r2, lsl #20
412		add	r0, r3, r2, lsl #2
413		str	r1, [r0], #4
414		add	r1, r1, #1048576
415		str	r1, [r0]
416		mov	pc, lr
417
418__armv4_mmu_cache_on:
419		mov	r12, lr
420		bl	__setup_mmu
421		mov	r0, #0
422		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
423		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
424		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
425		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
426		orr	r0, r0, #0x0030
427		bl	__common_mmu_cache_on
428		mov	r0, #0
429		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
430		mov	pc, r12
431
432__arm6_mmu_cache_on:
433		mov	r12, lr
434		bl	__setup_mmu
435		mov	r0, #0
436		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
437		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
438		mov	r0, #0x30
439		bl	__common_mmu_cache_on
440		mov	r0, #0
441		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
442		mov	pc, r12
443
444__common_mmu_cache_on:
445#ifndef DEBUG
446		orr	r0, r0, #0x000d		@ Write buffer, mmu
447#endif
448		mov	r1, #-1
449		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
450		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
451		mcr	p15, 0, r0, c1, c0, 0	@ load control register
452		mov	pc, lr
453
454/*
455 * All code following this line is relocatable.  It is relocated by
456 * the above code to the end of the decompressed kernel image and
457 * executed there.  During this time, we have no stacks.
458 *
459 * r0     = decompressed kernel length
460 * r1-r3  = unused
461 * r4     = kernel execution address
462 * r5     = decompressed kernel start
463 * r6     = processor ID
464 * r7     = architecture ID
465 * r8     = atags pointer
466 * r9-r14 = corrupted
467 */
468		.align	5
469reloc_start:	add	r9, r5, r0
470		debug_reloc_start
471		mov	r1, r4
4721:
473		.rept	4
474		ldmia	r5!, {r0, r2, r3, r10 - r14}	@ relocate kernel
475		stmia	r1!, {r0, r2, r3, r10 - r14}
476		.endr
477
478		cmp	r5, r9
479		blo	1b
480		debug_reloc_end
481
482call_kernel:	bl	cache_clean_flush
483		bl	cache_off
484		mov	r0, #0			@ must be zero
485		mov	r1, r7			@ restore architecture number
486		mov	r2, r8			@ restore atags pointer
487		mov	pc, r4			@ call kernel
488
489/*
490 * Here follow the relocatable cache support functions for the
491 * various processors.  This is a generic hook for locating an
492 * entry and jumping to an instruction at the specified offset
493 * from the start of the block.  Please note this is all position
494 * independent code.
495 *
496 *  r1  = corrupted
497 *  r2  = corrupted
498 *  r3  = block offset
499 *  r6  = corrupted
500 *  r12 = corrupted
501 */
502
503call_cache_fn:	adr	r12, proc_types
504		mrc	p15, 0, r6, c0, c0	@ get processor ID
5051:		ldr	r1, [r12, #0]		@ get value
506		ldr	r2, [r12, #4]		@ get mask
507		eor	r1, r1, r6		@ (real ^ match)
508		tst	r1, r2			@       & mask
509		addeq	pc, r12, r3		@ call cache function
510		add	r12, r12, #4*5
511		b	1b
512
513/*
514 * Table for cache operations.  This is basically:
515 *   - CPU ID match
516 *   - CPU ID mask
517 *   - 'cache on' method instruction
518 *   - 'cache off' method instruction
519 *   - 'cache flush' method instruction
520 *
521 * We match an entry using: ((real_id ^ match) & mask) == 0
522 *
523 * Writethrough caches generally only need 'on' and 'off'
524 * methods.  Writeback caches _must_ have the flush method
525 * defined.
526 */
527		.type	proc_types,#object
528proc_types:
529		.word	0x41560600		@ ARM6/610
530		.word	0xffffffe0
531		b	__arm6_mmu_cache_off	@ works, but slow
532		b	__arm6_mmu_cache_off
533		mov	pc, lr
534@		b	__arm6_mmu_cache_on		@ untested
535@		b	__arm6_mmu_cache_off
536@		b	__armv3_mmu_cache_flush
537
538		.word	0x00000000		@ old ARM ID
539		.word	0x0000f000
540		mov	pc, lr
541		mov	pc, lr
542		mov	pc, lr
543
544		.word	0x41007000		@ ARM7/710
545		.word	0xfff8fe00
546		b	__arm7_mmu_cache_off
547		b	__arm7_mmu_cache_off
548		mov	pc, lr
549
550		.word	0x41807200		@ ARM720T (writethrough)
551		.word	0xffffff00
552		b	__armv4_mmu_cache_on
553		b	__armv4_mmu_cache_off
554		mov	pc, lr
555
556		.word	0x41007400		@ ARM74x
557		.word	0xff00ff00
558		b	__armv3_mpu_cache_on
559		b	__armv3_mpu_cache_off
560		b	__armv3_mpu_cache_flush
561
562		.word	0x41009400		@ ARM94x
563		.word	0xff00ff00
564		b	__armv4_mpu_cache_on
565		b	__armv4_mpu_cache_off
566		b	__armv4_mpu_cache_flush
567
568		.word	0x00007000		@ ARM7 IDs
569		.word	0x0000f000
570		mov	pc, lr
571		mov	pc, lr
572		mov	pc, lr
573
574		@ Everything from here on will be the new ID system.
575
576		.word	0x4401a100		@ sa110 / sa1100
577		.word	0xffffffe0
578		b	__armv4_mmu_cache_on
579		b	__armv4_mmu_cache_off
580		b	__armv4_mmu_cache_flush
581
582		.word	0x6901b110		@ sa1110
583		.word	0xfffffff0
584		b	__armv4_mmu_cache_on
585		b	__armv4_mmu_cache_off
586		b	__armv4_mmu_cache_flush
587
588		@ These match on the architecture ID
589
590		.word	0x00020000		@ ARMv4T
591		.word	0x000f0000
592		b	__armv4_mmu_cache_on
593		b	__armv4_mmu_cache_off
594		b	__armv4_mmu_cache_flush
595
596		.word	0x00050000		@ ARMv5TE
597		.word	0x000f0000
598		b	__armv4_mmu_cache_on
599		b	__armv4_mmu_cache_off
600		b	__armv4_mmu_cache_flush
601
602		.word	0x00060000		@ ARMv5TEJ
603		.word	0x000f0000
604		b	__armv4_mmu_cache_on
605		b	__armv4_mmu_cache_off
606		b	__armv4_mmu_cache_flush
607
608		.word	0x0007b000		@ ARMv6
609		.word	0x0007f000
610		b	__armv4_mmu_cache_on
611		b	__armv4_mmu_cache_off
612		b	__armv6_mmu_cache_flush
613
614		.word	0			@ unrecognised type
615		.word	0
616		mov	pc, lr
617		mov	pc, lr
618		mov	pc, lr
619
620		.size	proc_types, . - proc_types
621
622/*
623 * Turn off the Cache and MMU.  ARMv3 does not support
624 * reading the control register, but ARMv4 does.
625 *
626 * On entry,  r6 = processor ID
627 * On exit,   r0, r1, r2, r3, r12 corrupted
628 * This routine must preserve: r4, r6, r7
629 */
630		.align	5
631cache_off:	mov	r3, #12			@ cache_off function
632		b	call_cache_fn
633
634__armv4_mpu_cache_off:
635		mrc	p15, 0, r0, c1, c0
636		bic	r0, r0, #0x000d
637		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
638		mov	r0, #0
639		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
640		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
641		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
642		mov	pc, lr
643
644__armv3_mpu_cache_off:
645		mrc	p15, 0, r0, c1, c0
646		bic	r0, r0, #0x000d
647		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
648		mov	r0, #0
649		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
650		mov	pc, lr
651
652__armv4_mmu_cache_off:
653		mrc	p15, 0, r0, c1, c0
654		bic	r0, r0, #0x000d
655		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
656		mov	r0, #0
657		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
658		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
659		mov	pc, lr
660
661__arm6_mmu_cache_off:
662		mov	r0, #0x00000030		@ ARM6 control reg.
663		b	__armv3_mmu_cache_off
664
665__arm7_mmu_cache_off:
666		mov	r0, #0x00000070		@ ARM7 control reg.
667		b	__armv3_mmu_cache_off
668
669__armv3_mmu_cache_off:
670		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
671		mov	r0, #0
672		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
673		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
674		mov	pc, lr
675
676/*
677 * Clean and flush the cache to maintain consistency.
678 *
679 * On entry,
680 *  r6 = processor ID
681 * On exit,
682 *  r1, r2, r3, r11, r12 corrupted
683 * This routine must preserve:
684 *  r0, r4, r5, r6, r7
685 */
686		.align	5
687cache_clean_flush:
688		mov	r3, #16
689		b	call_cache_fn
690
691__armv4_mpu_cache_flush:
692		mov	r2, #1
693		mov	r3, #0
694		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
695		mov	r1, #7 << 5		@ 8 segments
6961:		orr	r3, r1, #63 << 26	@ 64 entries
6972:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
698		subs	r3, r3, #1 << 26
699		bcs	2b			@ entries 63 to 0
700		subs 	r1, r1, #1 << 5
701		bcs	1b			@ segments 7 to 0
702
703		teq	r2, #0
704		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
705		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
706		mov	pc, lr
707
708
709__armv6_mmu_cache_flush:
710		mov	r1, #0
711		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
712		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
713		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
714		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
715		mov	pc, lr
716
717__armv4_mmu_cache_flush:
718		mov	r2, #64*1024		@ default: 32K dcache size (*2)
719		mov	r11, #32		@ default: 32 byte line size
720		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
721		teq	r3, r6			@ cache ID register present?
722		beq	no_cache_id
723		mov	r1, r3, lsr #18
724		and	r1, r1, #7
725		mov	r2, #1024
726		mov	r2, r2, lsl r1		@ base dcache size *2
727		tst	r3, #1 << 14		@ test M bit
728		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
729		mov	r3, r3, lsr #12
730		and	r3, r3, #3
731		mov	r11, #8
732		mov	r11, r11, lsl r3	@ cache line size in bytes
733no_cache_id:
734		bic	r1, pc, #63		@ align to longest cache line
735		add	r2, r1, r2
7361:		ldr	r3, [r1], r11		@ s/w flush D cache
737		teq	r1, r2
738		bne	1b
739
740		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
741		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
742		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
743		mov	pc, lr
744
745__armv3_mmu_cache_flush:
746__armv3_mpu_cache_flush:
747		mov	r1, #0
748		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
749		mov	pc, lr
750
751/*
752 * Various debugging routines for printing hex characters and
753 * memory, which again must be relocatable.
754 */
755#ifdef DEBUG
756		.type	phexbuf,#object
757phexbuf:	.space	12
758		.size	phexbuf, . - phexbuf
759
760phex:		adr	r3, phexbuf
761		mov	r2, #0
762		strb	r2, [r3, r1]
7631:		subs	r1, r1, #1
764		movmi	r0, r3
765		bmi	puts
766		and	r2, r0, #15
767		mov	r0, r0, lsr #4
768		cmp	r2, #10
769		addge	r2, r2, #7
770		add	r2, r2, #'0'
771		strb	r2, [r3, r1]
772		b	1b
773
774puts:		loadsp	r3
7751:		ldrb	r2, [r0], #1
776		teq	r2, #0
777		moveq	pc, lr
7782:		writeb	r2, r3
779		mov	r1, #0x00020000
7803:		subs	r1, r1, #1
781		bne	3b
782		teq	r2, #'\n'
783		moveq	r2, #'\r'
784		beq	2b
785		teq	r0, #0
786		bne	1b
787		mov	pc, lr
788putc:
789		mov	r2, r0
790		mov	r0, #0
791		loadsp	r3
792		b	2b
793
794memdump:	mov	r12, r0
795		mov	r10, lr
796		mov	r11, #0
7972:		mov	r0, r11, lsl #2
798		add	r0, r0, r12
799		mov	r1, #8
800		bl	phex
801		mov	r0, #':'
802		bl	putc
8031:		mov	r0, #' '
804		bl	putc
805		ldr	r0, [r12, r11, lsl #2]
806		mov	r1, #8
807		bl	phex
808		and	r0, r11, #7
809		teq	r0, #3
810		moveq	r0, #' '
811		bleq	putc
812		and	r0, r11, #7
813		add	r11, r11, #1
814		teq	r0, #7
815		bne	1b
816		mov	r0, #'\n'
817		bl	putc
818		cmp	r11, #64
819		blt	2b
820		mov	pc, r10
821#endif
822
823reloc_end:
824
825		.align
826		.section ".stack", "w"
827user_stack:	.space	4096
828