xref: /linux/arch/arm/boot/compressed/head.S (revision 5bf2b19320ec31d094d7370fdf536f7fd91fd799)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb, tmp
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_V7)
31		.macro	loadsp, rb, tmp
32		.endm
33		.macro	writeb, ch, rb
34wait:		mrc	p14, 0, pc, c0, c1, 0
35		bcs	wait
36		mcr	p14, 0, \ch, c0, c5, 0
37		.endm
38#elif defined(CONFIG_CPU_XSCALE)
39		.macro	loadsp, rb, tmp
40		.endm
41		.macro	writeb, ch, rb
42		mcr	p14, 0, \ch, c8, c0, 0
43		.endm
44#else
45		.macro	loadsp, rb, tmp
46		.endm
47		.macro	writeb, ch, rb
48		mcr	p14, 0, \ch, c1, c0, 0
49		.endm
50#endif
51
52#else
53
54#include <mach/debug-macro.S>
55
56		.macro	writeb,	ch, rb
57		senduart \ch, \rb
58		.endm
59
60#if defined(CONFIG_ARCH_SA1100)
61		.macro	loadsp, rb, tmp
62		mov	\rb, #0x80000000	@ physical base address
63#ifdef CONFIG_DEBUG_LL_SER3
64		add	\rb, \rb, #0x00050000	@ Ser3
65#else
66		add	\rb, \rb, #0x00010000	@ Ser1
67#endif
68		.endm
69#elif defined(CONFIG_ARCH_S3C2410)
70		.macro loadsp, rb, tmp
71		mov	\rb, #0x50000000
72		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
73		.endm
74#else
75		.macro	loadsp,	rb, tmp
76		addruart \rb, \tmp
77		.endm
78#endif
79#endif
80#endif
81
82		.macro	kputc,val
83		mov	r0, \val
84		bl	putc
85		.endm
86
87		.macro	kphex,val,len
88		mov	r0, \val
89		mov	r1, #\len
90		bl	phex
91		.endm
92
93		.macro	debug_reloc_start
94#ifdef DEBUG
95		kputc	#'\n'
96		kphex	r6, 8		/* processor id */
97		kputc	#':'
98		kphex	r7, 8		/* architecture id */
99#ifdef CONFIG_CPU_CP15
100		kputc	#':'
101		mrc	p15, 0, r0, c1, c0
102		kphex	r0, 8		/* control reg */
103#endif
104		kputc	#'\n'
105		kphex	r5, 8		/* decompressed kernel start */
106		kputc	#'-'
107		kphex	r9, 8		/* decompressed kernel end  */
108		kputc	#'>'
109		kphex	r4, 8		/* kernel execution address */
110		kputc	#'\n'
111#endif
112		.endm
113
114		.macro	debug_reloc_end
115#ifdef DEBUG
116		kphex	r5, 8		/* end of kernel */
117		kputc	#'\n'
118		mov	r0, r4
119		bl	memdump		/* dump 256 bytes at start of kernel */
120#endif
121		.endm
122
123		.section ".start", #alloc, #execinstr
124/*
125 * sort out different calling conventions
126 */
127		.align
128start:
129		.type	start,#function
130		.rept	8
131		mov	r0, r0
132		.endr
133
134		b	1f
135		.word	0x016f2818		@ Magic numbers to help the loader
136		.word	start			@ absolute load/run zImage address
137		.word	_edata			@ zImage end address
1381:		mov	r7, r1			@ save architecture ID
139		mov	r8, r2			@ save atags pointer
140
141#ifndef __ARM_ARCH_2__
142		/*
143		 * Booting from Angel - need to enter SVC mode and disable
144		 * FIQs/IRQs (numeric definitions from angel arm.h source).
145		 * We only do this if we were in user mode on entry.
146		 */
147		mrs	r2, cpsr		@ get current mode
148		tst	r2, #3			@ not user?
149		bne	not_angel
150		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
151 ARM(		swi	0x123456	)	@ angel_SWI_ARM
152 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
153not_angel:
154		mrs	r2, cpsr		@ turn off interrupts to
155		orr	r2, r2, #0xc0		@ prevent angel from running
156		msr	cpsr_c, r2
157#else
158		teqp	pc, #0x0c000003		@ turn off interrupts
159#endif
160
161		/*
162		 * Note that some cache flushing and other stuff may
163		 * be needed here - is there an Angel SWI call for this?
164		 */
165
166		/*
167		 * some architecture specific code can be inserted
168		 * by the linker here, but it should preserve r7, r8, and r9.
169		 */
170
171		.text
172		adr	r0, LC0
173 ARM(		ldmia	r0, {r1, r2, r3, r5, r6, r11, ip, sp})
174 THUMB(		ldmia	r0, {r1, r2, r3, r5, r6, r11, ip}	)
175 THUMB(		ldr	sp, [r0, #32]				)
176#ifdef CONFIG_AUTO_ZRELADDR
177		@ determine final kernel image address
178		and	r4, pc, #0xf8000000
179		add	r4, r4, #TEXT_OFFSET
180#else
181		ldr	r4, =CONFIG_ZRELADDR
182#endif
183		subs	r0, r0, r1		@ calculate the delta offset
184
185						@ if delta is zero, we are
186		beq	not_relocated		@ running at the address we
187						@ were linked at.
188
189		/*
190		 * We're running at a different address.  We need to fix
191		 * up various pointers:
192		 *   r5 - zImage base address (_start)
193		 *   r6 - size of decompressed image
194		 *   r11 - GOT start
195		 *   ip - GOT end
196		 */
197		add	r5, r5, r0
198		add	r11, r11, r0
199		add	ip, ip, r0
200
201#ifndef CONFIG_ZBOOT_ROM
202		/*
203		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
204		 * we need to fix up pointers into the BSS region.
205		 *   r2 - BSS start
206		 *   r3 - BSS end
207		 *   sp - stack pointer
208		 */
209		add	r2, r2, r0
210		add	r3, r3, r0
211		add	sp, sp, r0
212
213		/*
214		 * Relocate all entries in the GOT table.
215		 */
2161:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
217		add	r1, r1, r0		@ table.  This fixes up the
218		str	r1, [r11], #4		@ C references.
219		cmp	r11, ip
220		blo	1b
221#else
222
223		/*
224		 * Relocate entries in the GOT table.  We only relocate
225		 * the entries that are outside the (relocated) BSS region.
226		 */
2271:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
228		cmp	r1, r2			@ entry < bss_start ||
229		cmphs	r3, r1			@ _end < entry
230		addlo	r1, r1, r0		@ table.  This fixes up the
231		str	r1, [r11], #4		@ C references.
232		cmp	r11, ip
233		blo	1b
234#endif
235
236not_relocated:	mov	r0, #0
2371:		str	r0, [r2], #4		@ clear bss
238		str	r0, [r2], #4
239		str	r0, [r2], #4
240		str	r0, [r2], #4
241		cmp	r2, r3
242		blo	1b
243
244		/*
245		 * The C runtime environment should now be setup
246		 * sufficiently.  Turn the cache on, set up some
247		 * pointers, and start decompressing.
248		 */
249		bl	cache_on
250
251		mov	r1, sp			@ malloc space above stack
252		add	r2, sp, #0x10000	@ 64k max
253
254/*
255 * Check to see if we will overwrite ourselves.
256 *   r4 = final kernel address
257 *   r5 = start of this image
258 *   r6 = size of decompressed image
259 *   r2 = end of malloc space (and therefore this image)
260 * We basically want:
261 *   r4 >= r2 -> OK
262 *   r4 + image length <= r5 -> OK
263 */
264		cmp	r4, r2
265		bhs	wont_overwrite
266		add	r0, r4, r6
267		cmp	r0, r5
268		bls	wont_overwrite
269
270		mov	r5, r2			@ decompress after malloc space
271		mov	r0, r5
272		mov	r3, r7
273		bl	decompress_kernel
274
275		add	r0, r0, #127 + 128	@ alignment + stack
276		bic	r0, r0, #127		@ align the kernel length
277/*
278 * r0     = decompressed kernel length
279 * r1-r3  = unused
280 * r4     = kernel execution address
281 * r5     = decompressed kernel start
282 * r7     = architecture ID
283 * r8     = atags pointer
284 * r9-r12,r14 = corrupted
285 */
286		add	r1, r5, r0		@ end of decompressed kernel
287		adr	r2, reloc_start
288		ldr	r3, LC1
289		add	r3, r2, r3
2901:		ldmia	r2!, {r9 - r12, r14}	@ copy relocation code
291		stmia	r1!, {r9 - r12, r14}
292		ldmia	r2!, {r9 - r12, r14}
293		stmia	r1!, {r9 - r12, r14}
294		cmp	r2, r3
295		blo	1b
296		mov	sp, r1
297		add	sp, sp, #128		@ relocate the stack
298
299		bl	cache_clean_flush
300 ARM(		add	pc, r5, r0		) @ call relocation code
301 THUMB(		add	r12, r5, r0		)
302 THUMB(		mov	pc, r12			) @ call relocation code
303
304/*
305 * We're not in danger of overwriting ourselves.  Do this the simple way.
306 *
307 * r4     = kernel execution address
308 * r7     = architecture ID
309 */
310wont_overwrite:	mov	r0, r4
311		mov	r3, r7
312		bl	decompress_kernel
313		b	call_kernel
314
315		.align	2
316		.type	LC0, #object
317LC0:		.word	LC0			@ r1
318		.word	__bss_start		@ r2
319		.word	_end			@ r3
320		.word	_start			@ r5
321		.word	_image_size		@ r6
322		.word	_got_start		@ r11
323		.word	_got_end		@ ip
324		.word	user_stack_end		@ sp
325LC1:		.word	reloc_end - reloc_start
326		.size	LC0, . - LC0
327
328#ifdef CONFIG_ARCH_RPC
329		.globl	params
330params:		ldr	r0, =0x10000100		@ params_phys for RPC
331		mov	pc, lr
332		.ltorg
333		.align
334#endif
335
336/*
337 * Turn on the cache.  We need to setup some page tables so that we
338 * can have both the I and D caches on.
339 *
340 * We place the page tables 16k down from the kernel execution address,
341 * and we hope that nothing else is using it.  If we're using it, we
342 * will go pop!
343 *
344 * On entry,
345 *  r4 = kernel execution address
346 *  r7 = architecture number
347 *  r8 = atags pointer
348 * On exit,
349 *  r0, r1, r2, r3, r9, r10, r12 corrupted
350 * This routine must preserve:
351 *  r4, r5, r6, r7, r8
352 */
353		.align	5
354cache_on:	mov	r3, #8			@ cache_on function
355		b	call_cache_fn
356
357/*
358 * Initialize the highest priority protection region, PR7
359 * to cover all 32bit address and cacheable and bufferable.
360 */
361__armv4_mpu_cache_on:
362		mov	r0, #0x3f		@ 4G, the whole
363		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
364		mcr 	p15, 0, r0, c6, c7, 1
365
366		mov	r0, #0x80		@ PR7
367		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
368		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
369		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
370
371		mov	r0, #0xc000
372		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
373		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
374
375		mov	r0, #0
376		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
377		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
378		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
379		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
380						@ ...I .... ..D. WC.M
381		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
382		orr	r0, r0, #0x1000		@ ...1 .... .... ....
383
384		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
385
386		mov	r0, #0
387		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
388		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
389		mov	pc, lr
390
391__armv3_mpu_cache_on:
392		mov	r0, #0x3f		@ 4G, the whole
393		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
394
395		mov	r0, #0x80		@ PR7
396		mcr	p15, 0, r0, c2, c0, 0	@ cache on
397		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
398
399		mov	r0, #0xc000
400		mcr	p15, 0, r0, c5, c0, 0	@ access permission
401
402		mov	r0, #0
403		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
404		/*
405		 * ?? ARMv3 MMU does not allow reading the control register,
406		 * does this really work on ARMv3 MPU?
407		 */
408		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
409						@ .... .... .... WC.M
410		orr	r0, r0, #0x000d		@ .... .... .... 11.1
411		/* ?? this overwrites the value constructed above? */
412		mov	r0, #0
413		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
414
415		/* ?? invalidate for the second time? */
416		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
417		mov	pc, lr
418
419__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
420		bic	r3, r3, #0xff		@ Align the pointer
421		bic	r3, r3, #0x3f00
422/*
423 * Initialise the page tables, turning on the cacheable and bufferable
424 * bits for the RAM area only.
425 */
426		mov	r0, r3
427		mov	r9, r0, lsr #18
428		mov	r9, r9, lsl #18		@ start of RAM
429		add	r10, r9, #0x10000000	@ a reasonable RAM size
430		mov	r1, #0x12
431		orr	r1, r1, #3 << 10
432		add	r2, r3, #16384
4331:		cmp	r1, r9			@ if virt > start of RAM
434		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
435		cmp	r1, r10			@ if virt > end of RAM
436		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
437		str	r1, [r0], #4		@ 1:1 mapping
438		add	r1, r1, #1048576
439		teq	r0, r2
440		bne	1b
441/*
442 * If ever we are running from Flash, then we surely want the cache
443 * to be enabled also for our execution instance...  We map 2MB of it
444 * so there is no map overlap problem for up to 1 MB compressed kernel.
445 * If the execution is in RAM then we would only be duplicating the above.
446 */
447		mov	r1, #0x1e
448		orr	r1, r1, #3 << 10
449		mov	r2, pc, lsr #20
450		orr	r1, r1, r2, lsl #20
451		add	r0, r3, r2, lsl #2
452		str	r1, [r0], #4
453		add	r1, r1, #1048576
454		str	r1, [r0]
455		mov	pc, lr
456ENDPROC(__setup_mmu)
457
458__armv4_mmu_cache_on:
459		mov	r12, lr
460#ifdef CONFIG_MMU
461		bl	__setup_mmu
462		mov	r0, #0
463		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
464		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
465		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
466		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
467		orr	r0, r0, #0x0030
468#ifdef CONFIG_CPU_ENDIAN_BE8
469		orr	r0, r0, #1 << 25	@ big-endian page tables
470#endif
471		bl	__common_mmu_cache_on
472		mov	r0, #0
473		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
474#endif
475		mov	pc, r12
476
477__armv7_mmu_cache_on:
478		mov	r12, lr
479#ifdef CONFIG_MMU
480		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
481		tst	r11, #0xf		@ VMSA
482		blne	__setup_mmu
483		mov	r0, #0
484		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
485		tst	r11, #0xf		@ VMSA
486		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
487#endif
488		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
489		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
490		orr	r0, r0, #0x003c		@ write buffer
491#ifdef CONFIG_MMU
492#ifdef CONFIG_CPU_ENDIAN_BE8
493		orr	r0, r0, #1 << 25	@ big-endian page tables
494#endif
495		orrne	r0, r0, #1		@ MMU enabled
496		movne	r1, #-1
497		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
498		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
499#endif
500		mcr	p15, 0, r0, c1, c0, 0	@ load control register
501		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
502		mov	r0, #0
503		mcr	p15, 0, r0, c7, c5, 4	@ ISB
504		mov	pc, r12
505
506__fa526_cache_on:
507		mov	r12, lr
508		bl	__setup_mmu
509		mov	r0, #0
510		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
511		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
512		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
513		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
514		orr	r0, r0, #0x1000		@ I-cache enable
515		bl	__common_mmu_cache_on
516		mov	r0, #0
517		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
518		mov	pc, r12
519
520__arm6_mmu_cache_on:
521		mov	r12, lr
522		bl	__setup_mmu
523		mov	r0, #0
524		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
525		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
526		mov	r0, #0x30
527		bl	__common_mmu_cache_on
528		mov	r0, #0
529		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
530		mov	pc, r12
531
532__common_mmu_cache_on:
533#ifndef CONFIG_THUMB2_KERNEL
534#ifndef DEBUG
535		orr	r0, r0, #0x000d		@ Write buffer, mmu
536#endif
537		mov	r1, #-1
538		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
539		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
540		b	1f
541		.align	5			@ cache line aligned
5421:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
543		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
544		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
545#endif
546
547/*
548 * All code following this line is relocatable.  It is relocated by
549 * the above code to the end of the decompressed kernel image and
550 * executed there.  During this time, we have no stacks.
551 *
552 * r0     = decompressed kernel length
553 * r1-r3  = unused
554 * r4     = kernel execution address
555 * r5     = decompressed kernel start
556 * r7     = architecture ID
557 * r8     = atags pointer
558 * r9-r12,r14 = corrupted
559 */
560		.align	5
561reloc_start:	add	r9, r5, r0
562		sub	r9, r9, #128		@ do not copy the stack
563		debug_reloc_start
564		mov	r1, r4
5651:
566		.rept	4
567		ldmia	r5!, {r0, r2, r3, r10 - r12, r14}	@ relocate kernel
568		stmia	r1!, {r0, r2, r3, r10 - r12, r14}
569		.endr
570
571		cmp	r5, r9
572		blo	1b
573		mov	sp, r1
574		add	sp, sp, #128		@ relocate the stack
575		debug_reloc_end
576
577call_kernel:	bl	cache_clean_flush
578		bl	cache_off
579		mov	r0, #0			@ must be zero
580		mov	r1, r7			@ restore architecture number
581		mov	r2, r8			@ restore atags pointer
582		mov	pc, r4			@ call kernel
583
584/*
585 * Here follow the relocatable cache support functions for the
586 * various processors.  This is a generic hook for locating an
587 * entry and jumping to an instruction at the specified offset
588 * from the start of the block.  Please note this is all position
589 * independent code.
590 *
591 *  r1  = corrupted
592 *  r2  = corrupted
593 *  r3  = block offset
594 *  r9  = corrupted
595 *  r12 = corrupted
596 */
597
598call_cache_fn:	adr	r12, proc_types
599#ifdef CONFIG_CPU_CP15
600		mrc	p15, 0, r9, c0, c0	@ get processor ID
601#else
602		ldr	r9, =CONFIG_PROCESSOR_ID
603#endif
6041:		ldr	r1, [r12, #0]		@ get value
605		ldr	r2, [r12, #4]		@ get mask
606		eor	r1, r1, r9		@ (real ^ match)
607		tst	r1, r2			@       & mask
608 ARM(		addeq	pc, r12, r3		) @ call cache function
609 THUMB(		addeq	r12, r3			)
610 THUMB(		moveq	pc, r12			) @ call cache function
611		add	r12, r12, #4*5
612		b	1b
613
614/*
615 * Table for cache operations.  This is basically:
616 *   - CPU ID match
617 *   - CPU ID mask
618 *   - 'cache on' method instruction
619 *   - 'cache off' method instruction
620 *   - 'cache flush' method instruction
621 *
622 * We match an entry using: ((real_id ^ match) & mask) == 0
623 *
624 * Writethrough caches generally only need 'on' and 'off'
625 * methods.  Writeback caches _must_ have the flush method
626 * defined.
627 */
628		.align	2
629		.type	proc_types,#object
630proc_types:
631		.word	0x41560600		@ ARM6/610
632		.word	0xffffffe0
633		W(b)	__arm6_mmu_cache_off	@ works, but slow
634		W(b)	__arm6_mmu_cache_off
635		mov	pc, lr
636 THUMB(		nop				)
637@		b	__arm6_mmu_cache_on		@ untested
638@		b	__arm6_mmu_cache_off
639@		b	__armv3_mmu_cache_flush
640
641		.word	0x00000000		@ old ARM ID
642		.word	0x0000f000
643		mov	pc, lr
644 THUMB(		nop				)
645		mov	pc, lr
646 THUMB(		nop				)
647		mov	pc, lr
648 THUMB(		nop				)
649
650		.word	0x41007000		@ ARM7/710
651		.word	0xfff8fe00
652		W(b)	__arm7_mmu_cache_off
653		W(b)	__arm7_mmu_cache_off
654		mov	pc, lr
655 THUMB(		nop				)
656
657		.word	0x41807200		@ ARM720T (writethrough)
658		.word	0xffffff00
659		W(b)	__armv4_mmu_cache_on
660		W(b)	__armv4_mmu_cache_off
661		mov	pc, lr
662 THUMB(		nop				)
663
664		.word	0x41007400		@ ARM74x
665		.word	0xff00ff00
666		W(b)	__armv3_mpu_cache_on
667		W(b)	__armv3_mpu_cache_off
668		W(b)	__armv3_mpu_cache_flush
669
670		.word	0x41009400		@ ARM94x
671		.word	0xff00ff00
672		W(b)	__armv4_mpu_cache_on
673		W(b)	__armv4_mpu_cache_off
674		W(b)	__armv4_mpu_cache_flush
675
676		.word	0x00007000		@ ARM7 IDs
677		.word	0x0000f000
678		mov	pc, lr
679 THUMB(		nop				)
680		mov	pc, lr
681 THUMB(		nop				)
682		mov	pc, lr
683 THUMB(		nop				)
684
685		@ Everything from here on will be the new ID system.
686
687		.word	0x4401a100		@ sa110 / sa1100
688		.word	0xffffffe0
689		W(b)	__armv4_mmu_cache_on
690		W(b)	__armv4_mmu_cache_off
691		W(b)	__armv4_mmu_cache_flush
692
693		.word	0x6901b110		@ sa1110
694		.word	0xfffffff0
695		W(b)	__armv4_mmu_cache_on
696		W(b)	__armv4_mmu_cache_off
697		W(b)	__armv4_mmu_cache_flush
698
699		.word	0x56056900
700		.word	0xffffff00		@ PXA9xx
701		W(b)	__armv4_mmu_cache_on
702		W(b)	__armv4_mmu_cache_off
703		W(b)	__armv4_mmu_cache_flush
704
705		.word	0x56158000		@ PXA168
706		.word	0xfffff000
707		W(b)	__armv4_mmu_cache_on
708		W(b)	__armv4_mmu_cache_off
709		W(b)	__armv5tej_mmu_cache_flush
710
711		.word	0x56050000		@ Feroceon
712		.word	0xff0f0000
713		W(b)	__armv4_mmu_cache_on
714		W(b)	__armv4_mmu_cache_off
715		W(b)	__armv5tej_mmu_cache_flush
716
717#ifdef CONFIG_CPU_FEROCEON_OLD_ID
718		/* this conflicts with the standard ARMv5TE entry */
719		.long	0x41009260		@ Old Feroceon
720		.long	0xff00fff0
721		b	__armv4_mmu_cache_on
722		b	__armv4_mmu_cache_off
723		b	__armv5tej_mmu_cache_flush
724#endif
725
726		.word	0x66015261		@ FA526
727		.word	0xff01fff1
728		W(b)	__fa526_cache_on
729		W(b)	__armv4_mmu_cache_off
730		W(b)	__fa526_cache_flush
731
732		@ These match on the architecture ID
733
734		.word	0x00020000		@ ARMv4T
735		.word	0x000f0000
736		W(b)	__armv4_mmu_cache_on
737		W(b)	__armv4_mmu_cache_off
738		W(b)	__armv4_mmu_cache_flush
739
740		.word	0x00050000		@ ARMv5TE
741		.word	0x000f0000
742		W(b)	__armv4_mmu_cache_on
743		W(b)	__armv4_mmu_cache_off
744		W(b)	__armv4_mmu_cache_flush
745
746		.word	0x00060000		@ ARMv5TEJ
747		.word	0x000f0000
748		W(b)	__armv4_mmu_cache_on
749		W(b)	__armv4_mmu_cache_off
750		W(b)	__armv5tej_mmu_cache_flush
751
752		.word	0x0007b000		@ ARMv6
753		.word	0x000ff000
754		W(b)	__armv4_mmu_cache_on
755		W(b)	__armv4_mmu_cache_off
756		W(b)	__armv6_mmu_cache_flush
757
758		.word	0x560f5810		@ Marvell PJ4 ARMv6
759		.word	0xff0ffff0
760		W(b)	__armv4_mmu_cache_on
761		W(b)	__armv4_mmu_cache_off
762		W(b)	__armv6_mmu_cache_flush
763
764		.word	0x000f0000		@ new CPU Id
765		.word	0x000f0000
766		W(b)	__armv7_mmu_cache_on
767		W(b)	__armv7_mmu_cache_off
768		W(b)	__armv7_mmu_cache_flush
769
770		.word	0			@ unrecognised type
771		.word	0
772		mov	pc, lr
773 THUMB(		nop				)
774		mov	pc, lr
775 THUMB(		nop				)
776		mov	pc, lr
777 THUMB(		nop				)
778
779		.size	proc_types, . - proc_types
780
781/*
782 * Turn off the Cache and MMU.  ARMv3 does not support
783 * reading the control register, but ARMv4 does.
784 *
785 * On exit,
786 *  r0, r1, r2, r3, r9, r12 corrupted
787 * This routine must preserve:
788 *  r4, r6, r7
789 */
790		.align	5
791cache_off:	mov	r3, #12			@ cache_off function
792		b	call_cache_fn
793
794__armv4_mpu_cache_off:
795		mrc	p15, 0, r0, c1, c0
796		bic	r0, r0, #0x000d
797		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
798		mov	r0, #0
799		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
800		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
801		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
802		mov	pc, lr
803
804__armv3_mpu_cache_off:
805		mrc	p15, 0, r0, c1, c0
806		bic	r0, r0, #0x000d
807		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
808		mov	r0, #0
809		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
810		mov	pc, lr
811
812__armv4_mmu_cache_off:
813#ifdef CONFIG_MMU
814		mrc	p15, 0, r0, c1, c0
815		bic	r0, r0, #0x000d
816		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
817		mov	r0, #0
818		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
819		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
820#endif
821		mov	pc, lr
822
823__armv7_mmu_cache_off:
824		mrc	p15, 0, r0, c1, c0
825#ifdef CONFIG_MMU
826		bic	r0, r0, #0x000d
827#else
828		bic	r0, r0, #0x000c
829#endif
830		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
831		mov	r12, lr
832		bl	__armv7_mmu_cache_flush
833		mov	r0, #0
834#ifdef CONFIG_MMU
835		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
836#endif
837		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
838		mcr	p15, 0, r0, c7, c10, 4	@ DSB
839		mcr	p15, 0, r0, c7, c5, 4	@ ISB
840		mov	pc, r12
841
842__arm6_mmu_cache_off:
843		mov	r0, #0x00000030		@ ARM6 control reg.
844		b	__armv3_mmu_cache_off
845
846__arm7_mmu_cache_off:
847		mov	r0, #0x00000070		@ ARM7 control reg.
848		b	__armv3_mmu_cache_off
849
850__armv3_mmu_cache_off:
851		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
852		mov	r0, #0
853		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
854		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
855		mov	pc, lr
856
857/*
858 * Clean and flush the cache to maintain consistency.
859 *
860 * On exit,
861 *  r1, r2, r3, r9, r10, r11, r12 corrupted
862 * This routine must preserve:
863 *  r0, r4, r5, r6, r7
864 */
865		.align	5
866cache_clean_flush:
867		mov	r3, #16
868		b	call_cache_fn
869
870__armv4_mpu_cache_flush:
871		mov	r2, #1
872		mov	r3, #0
873		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
874		mov	r1, #7 << 5		@ 8 segments
8751:		orr	r3, r1, #63 << 26	@ 64 entries
8762:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
877		subs	r3, r3, #1 << 26
878		bcs	2b			@ entries 63 to 0
879		subs 	r1, r1, #1 << 5
880		bcs	1b			@ segments 7 to 0
881
882		teq	r2, #0
883		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
884		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
885		mov	pc, lr
886
887__fa526_cache_flush:
888		mov	r1, #0
889		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
890		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
891		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
892		mov	pc, lr
893
894__armv6_mmu_cache_flush:
895		mov	r1, #0
896		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
897		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
898		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
899		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
900		mov	pc, lr
901
902__armv7_mmu_cache_flush:
903		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
904		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
905		mov	r10, #0
906		beq	hierarchical
907		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
908		b	iflush
909hierarchical:
910		mcr	p15, 0, r10, c7, c10, 5	@ DMB
911		stmfd	sp!, {r0-r7, r9-r11}
912		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
913		ands	r3, r0, #0x7000000	@ extract loc from clidr
914		mov	r3, r3, lsr #23		@ left align loc bit field
915		beq	finished		@ if loc is 0, then no need to clean
916		mov	r10, #0			@ start clean at cache level 0
917loop1:
918		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
919		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
920		and	r1, r1, #7		@ mask of the bits for current cache only
921		cmp	r1, #2			@ see what cache we have at this level
922		blt	skip			@ skip if no cache, or just i-cache
923		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
924		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
925		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
926		and	r2, r1, #7		@ extract the length of the cache lines
927		add	r2, r2, #4		@ add 4 (line length offset)
928		ldr	r4, =0x3ff
929		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
930		clz	r5, r4			@ find bit position of way size increment
931		ldr	r7, =0x7fff
932		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
933loop2:
934		mov	r9, r4			@ create working copy of max way size
935loop3:
936 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
937 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
938 THUMB(		lsl	r6, r9, r5		)
939 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
940 THUMB(		lsl	r6, r7, r2		)
941 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
942		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
943		subs	r9, r9, #1		@ decrement the way
944		bge	loop3
945		subs	r7, r7, #1		@ decrement the index
946		bge	loop2
947skip:
948		add	r10, r10, #2		@ increment cache number
949		cmp	r3, r10
950		bgt	loop1
951finished:
952		ldmfd	sp!, {r0-r7, r9-r11}
953		mov	r10, #0			@ swith back to cache level 0
954		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
955iflush:
956		mcr	p15, 0, r10, c7, c10, 4	@ DSB
957		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
958		mcr	p15, 0, r10, c7, c10, 4	@ DSB
959		mcr	p15, 0, r10, c7, c5, 4	@ ISB
960		mov	pc, lr
961
962__armv5tej_mmu_cache_flush:
9631:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
964		bne	1b
965		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
966		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
967		mov	pc, lr
968
969__armv4_mmu_cache_flush:
970		mov	r2, #64*1024		@ default: 32K dcache size (*2)
971		mov	r11, #32		@ default: 32 byte line size
972		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
973		teq	r3, r9			@ cache ID register present?
974		beq	no_cache_id
975		mov	r1, r3, lsr #18
976		and	r1, r1, #7
977		mov	r2, #1024
978		mov	r2, r2, lsl r1		@ base dcache size *2
979		tst	r3, #1 << 14		@ test M bit
980		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
981		mov	r3, r3, lsr #12
982		and	r3, r3, #3
983		mov	r11, #8
984		mov	r11, r11, lsl r3	@ cache line size in bytes
985no_cache_id:
986		mov	r1, pc
987		bic	r1, r1, #63		@ align to longest cache line
988		add	r2, r1, r2
9891:
990 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
991 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
992 THUMB(		add     r1, r1, r11		)
993		teq	r1, r2
994		bne	1b
995
996		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
997		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
998		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
999		mov	pc, lr
1000
1001__armv3_mmu_cache_flush:
1002__armv3_mpu_cache_flush:
1003		mov	r1, #0
1004		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1005		mov	pc, lr
1006
1007/*
1008 * Various debugging routines for printing hex characters and
1009 * memory, which again must be relocatable.
1010 */
1011#ifdef DEBUG
1012		.align	2
1013		.type	phexbuf,#object
1014phexbuf:	.space	12
1015		.size	phexbuf, . - phexbuf
1016
1017@ phex corrupts {r0, r1, r2, r3}
1018phex:		adr	r3, phexbuf
1019		mov	r2, #0
1020		strb	r2, [r3, r1]
10211:		subs	r1, r1, #1
1022		movmi	r0, r3
1023		bmi	puts
1024		and	r2, r0, #15
1025		mov	r0, r0, lsr #4
1026		cmp	r2, #10
1027		addge	r2, r2, #7
1028		add	r2, r2, #'0'
1029		strb	r2, [r3, r1]
1030		b	1b
1031
1032@ puts corrupts {r0, r1, r2, r3}
1033puts:		loadsp	r3, r1
10341:		ldrb	r2, [r0], #1
1035		teq	r2, #0
1036		moveq	pc, lr
10372:		writeb	r2, r3
1038		mov	r1, #0x00020000
10393:		subs	r1, r1, #1
1040		bne	3b
1041		teq	r2, #'\n'
1042		moveq	r2, #'\r'
1043		beq	2b
1044		teq	r0, #0
1045		bne	1b
1046		mov	pc, lr
1047@ putc corrupts {r0, r1, r2, r3}
1048putc:
1049		mov	r2, r0
1050		mov	r0, #0
1051		loadsp	r3, r1
1052		b	2b
1053
1054@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1055memdump:	mov	r12, r0
1056		mov	r10, lr
1057		mov	r11, #0
10582:		mov	r0, r11, lsl #2
1059		add	r0, r0, r12
1060		mov	r1, #8
1061		bl	phex
1062		mov	r0, #':'
1063		bl	putc
10641:		mov	r0, #' '
1065		bl	putc
1066		ldr	r0, [r12, r11, lsl #2]
1067		mov	r1, #8
1068		bl	phex
1069		and	r0, r11, #7
1070		teq	r0, #3
1071		moveq	r0, #' '
1072		bleq	putc
1073		and	r0, r11, #7
1074		add	r11, r11, #1
1075		teq	r0, #7
1076		bne	1b
1077		mov	r0, #'\n'
1078		bl	putc
1079		cmp	r11, #64
1080		blt	2b
1081		mov	pc, r10
1082#endif
1083
1084		.ltorg
1085reloc_end:
1086
1087		.align
1088		.section ".stack", "w"
1089user_stack:	.space	4096
1090user_stack_end:
1091