xref: /linux/arch/arm/boot/compressed/head.S (revision edabd38e1a017e922e3e3b485ee3ddb4df433aa4)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_XSCALE)
31		.macro	loadsp, rb
32		.endm
33		.macro	writeb, ch, rb
34		mcr	p14, 0, \ch, c8, c0, 0
35		.endm
36#else
37		.macro	loadsp, rb
38		.endm
39		.macro	writeb, ch, rb
40		mcr	p14, 0, \ch, c1, c0, 0
41		.endm
42#endif
43
44#else
45
46#include <mach/debug-macro.S>
47
48		.macro	writeb,	ch, rb
49		senduart \ch, \rb
50		.endm
51
52#if defined(CONFIG_ARCH_SA1100)
53		.macro	loadsp, rb
54		mov	\rb, #0x80000000	@ physical base address
55#ifdef CONFIG_DEBUG_LL_SER3
56		add	\rb, \rb, #0x00050000	@ Ser3
57#else
58		add	\rb, \rb, #0x00010000	@ Ser1
59#endif
60		.endm
61#elif defined(CONFIG_ARCH_S3C2410)
62		.macro loadsp, rb
63		mov	\rb, #0x50000000
64		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
65		.endm
66#else
67		.macro	loadsp,	rb
68		addruart \rb
69		.endm
70#endif
71#endif
72#endif
73
74		.macro	kputc,val
75		mov	r0, \val
76		bl	putc
77		.endm
78
79		.macro	kphex,val,len
80		mov	r0, \val
81		mov	r1, #\len
82		bl	phex
83		.endm
84
85		.macro	debug_reloc_start
86#ifdef DEBUG
87		kputc	#'\n'
88		kphex	r6, 8		/* processor id */
89		kputc	#':'
90		kphex	r7, 8		/* architecture id */
91#ifdef CONFIG_CPU_CP15
92		kputc	#':'
93		mrc	p15, 0, r0, c1, c0
94		kphex	r0, 8		/* control reg */
95#endif
96		kputc	#'\n'
97		kphex	r5, 8		/* decompressed kernel start */
98		kputc	#'-'
99		kphex	r9, 8		/* decompressed kernel end  */
100		kputc	#'>'
101		kphex	r4, 8		/* kernel execution address */
102		kputc	#'\n'
103#endif
104		.endm
105
106		.macro	debug_reloc_end
107#ifdef DEBUG
108		kphex	r5, 8		/* end of kernel */
109		kputc	#'\n'
110		mov	r0, r4
111		bl	memdump		/* dump 256 bytes at start of kernel */
112#endif
113		.endm
114
115		.section ".start", #alloc, #execinstr
116/*
117 * sort out different calling conventions
118 */
119		.align
120start:
121		.type	start,#function
122		.rept	8
123		mov	r0, r0
124		.endr
125
126		b	1f
127		.word	0x016f2818		@ Magic numbers to help the loader
128		.word	start			@ absolute load/run zImage address
129		.word	_edata			@ zImage end address
1301:		mov	r7, r1			@ save architecture ID
131		mov	r8, r2			@ save atags pointer
132
133#ifndef __ARM_ARCH_2__
134		/*
135		 * Booting from Angel - need to enter SVC mode and disable
136		 * FIQs/IRQs (numeric definitions from angel arm.h source).
137		 * We only do this if we were in user mode on entry.
138		 */
139		mrs	r2, cpsr		@ get current mode
140		tst	r2, #3			@ not user?
141		bne	not_angel
142		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
143 ARM(		swi	0x123456	)	@ angel_SWI_ARM
144 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
145not_angel:
146		mrs	r2, cpsr		@ turn off interrupts to
147		orr	r2, r2, #0xc0		@ prevent angel from running
148		msr	cpsr_c, r2
149#else
150		teqp	pc, #0x0c000003		@ turn off interrupts
151#endif
152
153		/*
154		 * Note that some cache flushing and other stuff may
155		 * be needed here - is there an Angel SWI call for this?
156		 */
157
158		/*
159		 * some architecture specific code can be inserted
160		 * by the linker here, but it should preserve r7, r8, and r9.
161		 */
162
163		.text
164		adr	r0, LC0
165 ARM(		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}	)
166 THUMB(		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip}	)
167 THUMB(		ldr	sp, [r0, #28]				)
168		subs	r0, r0, r1		@ calculate the delta offset
169
170						@ if delta is zero, we are
171		beq	not_relocated		@ running at the address we
172						@ were linked at.
173
174		/*
175		 * We're running at a different address.  We need to fix
176		 * up various pointers:
177		 *   r5 - zImage base address
178		 *   r6 - GOT start
179		 *   ip - GOT end
180		 */
181		add	r5, r5, r0
182		add	r6, r6, r0
183		add	ip, ip, r0
184
185#ifndef CONFIG_ZBOOT_ROM
186		/*
187		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
188		 * we need to fix up pointers into the BSS region.
189		 *   r2 - BSS start
190		 *   r3 - BSS end
191		 *   sp - stack pointer
192		 */
193		add	r2, r2, r0
194		add	r3, r3, r0
195		add	sp, sp, r0
196
197		/*
198		 * Relocate all entries in the GOT table.
199		 */
2001:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
201		add	r1, r1, r0		@ table.  This fixes up the
202		str	r1, [r6], #4		@ C references.
203		cmp	r6, ip
204		blo	1b
205#else
206
207		/*
208		 * Relocate entries in the GOT table.  We only relocate
209		 * the entries that are outside the (relocated) BSS region.
210		 */
2111:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
212		cmp	r1, r2			@ entry < bss_start ||
213		cmphs	r3, r1			@ _end < entry
214		addlo	r1, r1, r0		@ table.  This fixes up the
215		str	r1, [r6], #4		@ C references.
216		cmp	r6, ip
217		blo	1b
218#endif
219
220not_relocated:	mov	r0, #0
2211:		str	r0, [r2], #4		@ clear bss
222		str	r0, [r2], #4
223		str	r0, [r2], #4
224		str	r0, [r2], #4
225		cmp	r2, r3
226		blo	1b
227
228		/*
229		 * The C runtime environment should now be setup
230		 * sufficiently.  Turn the cache on, set up some
231		 * pointers, and start decompressing.
232		 */
233		bl	cache_on
234
235		mov	r1, sp			@ malloc space above stack
236		add	r2, sp, #0x10000	@ 64k max
237
238/*
239 * Check to see if we will overwrite ourselves.
240 *   r4 = final kernel address
241 *   r5 = start of this image
242 *   r2 = end of malloc space (and therefore this image)
243 * We basically want:
244 *   r4 >= r2 -> OK
245 *   r4 + image length <= r5 -> OK
246 */
247		cmp	r4, r2
248		bhs	wont_overwrite
249		sub	r3, sp, r5		@ > compressed kernel size
250		add	r0, r4, r3, lsl #2	@ allow for 4x expansion
251		cmp	r0, r5
252		bls	wont_overwrite
253
254		mov	r5, r2			@ decompress after malloc space
255		mov	r0, r5
256		mov	r3, r7
257		bl	decompress_kernel
258
259		add	r0, r0, #127 + 128	@ alignment + stack
260		bic	r0, r0, #127		@ align the kernel length
261/*
262 * r0     = decompressed kernel length
263 * r1-r3  = unused
264 * r4     = kernel execution address
265 * r5     = decompressed kernel start
266 * r6     = processor ID
267 * r7     = architecture ID
268 * r8     = atags pointer
269 * r9-r12,r14 = corrupted
270 */
271		add	r1, r5, r0		@ end of decompressed kernel
272		adr	r2, reloc_start
273		ldr	r3, LC1
274		add	r3, r2, r3
2751:		ldmia	r2!, {r9 - r12, r14}	@ copy relocation code
276		stmia	r1!, {r9 - r12, r14}
277		ldmia	r2!, {r9 - r12, r14}
278		stmia	r1!, {r9 - r12, r14}
279		cmp	r2, r3
280		blo	1b
281		mov	sp, r1
282		add	sp, sp, #128		@ relocate the stack
283
284		bl	cache_clean_flush
285 ARM(		add	pc, r5, r0		) @ call relocation code
286 THUMB(		add	r12, r5, r0		)
287 THUMB(		mov	pc, r12			) @ call relocation code
288
289/*
290 * We're not in danger of overwriting ourselves.  Do this the simple way.
291 *
292 * r4     = kernel execution address
293 * r7     = architecture ID
294 */
295wont_overwrite:	mov	r0, r4
296		mov	r3, r7
297		bl	decompress_kernel
298		b	call_kernel
299
300		.align	2
301		.type	LC0, #object
302LC0:		.word	LC0			@ r1
303		.word	__bss_start		@ r2
304		.word	_end			@ r3
305		.word	zreladdr		@ r4
306		.word	_start			@ r5
307		.word	_got_start		@ r6
308		.word	_got_end		@ ip
309		.word	user_stack+4096		@ sp
310LC1:		.word	reloc_end - reloc_start
311		.size	LC0, . - LC0
312
313#ifdef CONFIG_ARCH_RPC
314		.globl	params
315params:		ldr	r0, =params_phys
316		mov	pc, lr
317		.ltorg
318		.align
319#endif
320
321/*
322 * Turn on the cache.  We need to setup some page tables so that we
323 * can have both the I and D caches on.
324 *
325 * We place the page tables 16k down from the kernel execution address,
326 * and we hope that nothing else is using it.  If we're using it, we
327 * will go pop!
328 *
329 * On entry,
330 *  r4 = kernel execution address
331 *  r6 = processor ID
332 *  r7 = architecture number
333 *  r8 = atags pointer
334 *  r9 = run-time address of "start"  (???)
335 * On exit,
336 *  r1, r2, r3, r9, r10, r12 corrupted
337 * This routine must preserve:
338 *  r4, r5, r6, r7, r8
339 */
340		.align	5
341cache_on:	mov	r3, #8			@ cache_on function
342		b	call_cache_fn
343
344/*
345 * Initialize the highest priority protection region, PR7
346 * to cover all 32bit address and cacheable and bufferable.
347 */
348__armv4_mpu_cache_on:
349		mov	r0, #0x3f		@ 4G, the whole
350		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
351		mcr 	p15, 0, r0, c6, c7, 1
352
353		mov	r0, #0x80		@ PR7
354		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
355		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
356		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
357
358		mov	r0, #0xc000
359		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
360		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
361
362		mov	r0, #0
363		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
364		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
365		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
366		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
367						@ ...I .... ..D. WC.M
368		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
369		orr	r0, r0, #0x1000		@ ...1 .... .... ....
370
371		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
372
373		mov	r0, #0
374		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
375		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
376		mov	pc, lr
377
378__armv3_mpu_cache_on:
379		mov	r0, #0x3f		@ 4G, the whole
380		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
381
382		mov	r0, #0x80		@ PR7
383		mcr	p15, 0, r0, c2, c0, 0	@ cache on
384		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
385
386		mov	r0, #0xc000
387		mcr	p15, 0, r0, c5, c0, 0	@ access permission
388
389		mov	r0, #0
390		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
391		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
392						@ .... .... .... WC.M
393		orr	r0, r0, #0x000d		@ .... .... .... 11.1
394		mov	r0, #0
395		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
396
397		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
398		mov	pc, lr
399
400__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
401		bic	r3, r3, #0xff		@ Align the pointer
402		bic	r3, r3, #0x3f00
403/*
404 * Initialise the page tables, turning on the cacheable and bufferable
405 * bits for the RAM area only.
406 */
407		mov	r0, r3
408		mov	r9, r0, lsr #18
409		mov	r9, r9, lsl #18		@ start of RAM
410		add	r10, r9, #0x10000000	@ a reasonable RAM size
411		mov	r1, #0x12
412		orr	r1, r1, #3 << 10
413		add	r2, r3, #16384
4141:		cmp	r1, r9			@ if virt > start of RAM
415		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
416		cmp	r1, r10			@ if virt > end of RAM
417		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
418		str	r1, [r0], #4		@ 1:1 mapping
419		add	r1, r1, #1048576
420		teq	r0, r2
421		bne	1b
422/*
423 * If ever we are running from Flash, then we surely want the cache
424 * to be enabled also for our execution instance...  We map 2MB of it
425 * so there is no map overlap problem for up to 1 MB compressed kernel.
426 * If the execution is in RAM then we would only be duplicating the above.
427 */
428		mov	r1, #0x1e
429		orr	r1, r1, #3 << 10
430		mov	r2, pc, lsr #20
431		orr	r1, r1, r2, lsl #20
432		add	r0, r3, r2, lsl #2
433		str	r1, [r0], #4
434		add	r1, r1, #1048576
435		str	r1, [r0]
436		mov	pc, lr
437ENDPROC(__setup_mmu)
438
439__armv4_mmu_cache_on:
440		mov	r12, lr
441#ifdef CONFIG_MMU
442		bl	__setup_mmu
443		mov	r0, #0
444		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
445		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
446		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
447		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
448		orr	r0, r0, #0x0030
449#ifdef CONFIG_CPU_ENDIAN_BE8
450		orr	r0, r0, #1 << 25	@ big-endian page tables
451#endif
452		bl	__common_mmu_cache_on
453		mov	r0, #0
454		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
455#endif
456		mov	pc, r12
457
458__armv7_mmu_cache_on:
459		mov	r12, lr
460#ifdef CONFIG_MMU
461		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
462		tst	r11, #0xf		@ VMSA
463		blne	__setup_mmu
464		mov	r0, #0
465		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
466		tst	r11, #0xf		@ VMSA
467		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
468#endif
469		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
470		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
471		orr	r0, r0, #0x003c		@ write buffer
472#ifdef CONFIG_MMU
473#ifdef CONFIG_CPU_ENDIAN_BE8
474		orr	r0, r0, #1 << 25	@ big-endian page tables
475#endif
476		orrne	r0, r0, #1		@ MMU enabled
477		movne	r1, #-1
478		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
479		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
480#endif
481		mcr	p15, 0, r0, c1, c0, 0	@ load control register
482		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
483		mov	r0, #0
484		mcr	p15, 0, r0, c7, c5, 4	@ ISB
485		mov	pc, r12
486
487__fa526_cache_on:
488		mov	r12, lr
489		bl	__setup_mmu
490		mov	r0, #0
491		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
492		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
493		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
494		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
495		orr	r0, r0, #0x1000		@ I-cache enable
496		bl	__common_mmu_cache_on
497		mov	r0, #0
498		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
499		mov	pc, r12
500
501__arm6_mmu_cache_on:
502		mov	r12, lr
503		bl	__setup_mmu
504		mov	r0, #0
505		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
506		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
507		mov	r0, #0x30
508		bl	__common_mmu_cache_on
509		mov	r0, #0
510		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
511		mov	pc, r12
512
513__common_mmu_cache_on:
514#ifndef CONFIG_THUMB2_KERNEL
515#ifndef DEBUG
516		orr	r0, r0, #0x000d		@ Write buffer, mmu
517#endif
518		mov	r1, #-1
519		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
520		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
521		b	1f
522		.align	5			@ cache line aligned
5231:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
524		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
525		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
526#endif
527
528/*
529 * All code following this line is relocatable.  It is relocated by
530 * the above code to the end of the decompressed kernel image and
531 * executed there.  During this time, we have no stacks.
532 *
533 * r0     = decompressed kernel length
534 * r1-r3  = unused
535 * r4     = kernel execution address
536 * r5     = decompressed kernel start
537 * r6     = processor ID
538 * r7     = architecture ID
539 * r8     = atags pointer
540 * r9-r12,r14 = corrupted
541 */
542		.align	5
543reloc_start:	add	r9, r5, r0
544		sub	r9, r9, #128		@ do not copy the stack
545		debug_reloc_start
546		mov	r1, r4
5471:
548		.rept	4
549		ldmia	r5!, {r0, r2, r3, r10 - r12, r14}	@ relocate kernel
550		stmia	r1!, {r0, r2, r3, r10 - r12, r14}
551		.endr
552
553		cmp	r5, r9
554		blo	1b
555		mov	sp, r1
556		add	sp, sp, #128		@ relocate the stack
557		debug_reloc_end
558
559call_kernel:	bl	cache_clean_flush
560		bl	cache_off
561		mov	r0, #0			@ must be zero
562		mov	r1, r7			@ restore architecture number
563		mov	r2, r8			@ restore atags pointer
564		mov	pc, r4			@ call kernel
565
566/*
567 * Here follow the relocatable cache support functions for the
568 * various processors.  This is a generic hook for locating an
569 * entry and jumping to an instruction at the specified offset
570 * from the start of the block.  Please note this is all position
571 * independent code.
572 *
573 *  r1  = corrupted
574 *  r2  = corrupted
575 *  r3  = block offset
576 *  r6  = corrupted
577 *  r12 = corrupted
578 */
579
580call_cache_fn:	adr	r12, proc_types
581#ifdef CONFIG_CPU_CP15
582		mrc	p15, 0, r6, c0, c0	@ get processor ID
583#else
584		ldr	r6, =CONFIG_PROCESSOR_ID
585#endif
5861:		ldr	r1, [r12, #0]		@ get value
587		ldr	r2, [r12, #4]		@ get mask
588		eor	r1, r1, r6		@ (real ^ match)
589		tst	r1, r2			@       & mask
590 ARM(		addeq	pc, r12, r3		) @ call cache function
591 THUMB(		addeq	r12, r3			)
592 THUMB(		moveq	pc, r12			) @ call cache function
593		add	r12, r12, #4*5
594		b	1b
595
596/*
597 * Table for cache operations.  This is basically:
598 *   - CPU ID match
599 *   - CPU ID mask
600 *   - 'cache on' method instruction
601 *   - 'cache off' method instruction
602 *   - 'cache flush' method instruction
603 *
604 * We match an entry using: ((real_id ^ match) & mask) == 0
605 *
606 * Writethrough caches generally only need 'on' and 'off'
607 * methods.  Writeback caches _must_ have the flush method
608 * defined.
609 */
610		.align	2
611		.type	proc_types,#object
612proc_types:
613		.word	0x41560600		@ ARM6/610
614		.word	0xffffffe0
615		W(b)	__arm6_mmu_cache_off	@ works, but slow
616		W(b)	__arm6_mmu_cache_off
617		mov	pc, lr
618 THUMB(		nop				)
619@		b	__arm6_mmu_cache_on		@ untested
620@		b	__arm6_mmu_cache_off
621@		b	__armv3_mmu_cache_flush
622
623		.word	0x00000000		@ old ARM ID
624		.word	0x0000f000
625		mov	pc, lr
626 THUMB(		nop				)
627		mov	pc, lr
628 THUMB(		nop				)
629		mov	pc, lr
630 THUMB(		nop				)
631
632		.word	0x41007000		@ ARM7/710
633		.word	0xfff8fe00
634		W(b)	__arm7_mmu_cache_off
635		W(b)	__arm7_mmu_cache_off
636		mov	pc, lr
637 THUMB(		nop				)
638
639		.word	0x41807200		@ ARM720T (writethrough)
640		.word	0xffffff00
641		W(b)	__armv4_mmu_cache_on
642		W(b)	__armv4_mmu_cache_off
643		mov	pc, lr
644 THUMB(		nop				)
645
646		.word	0x41007400		@ ARM74x
647		.word	0xff00ff00
648		W(b)	__armv3_mpu_cache_on
649		W(b)	__armv3_mpu_cache_off
650		W(b)	__armv3_mpu_cache_flush
651
652		.word	0x41009400		@ ARM94x
653		.word	0xff00ff00
654		W(b)	__armv4_mpu_cache_on
655		W(b)	__armv4_mpu_cache_off
656		W(b)	__armv4_mpu_cache_flush
657
658		.word	0x00007000		@ ARM7 IDs
659		.word	0x0000f000
660		mov	pc, lr
661 THUMB(		nop				)
662		mov	pc, lr
663 THUMB(		nop				)
664		mov	pc, lr
665 THUMB(		nop				)
666
667		@ Everything from here on will be the new ID system.
668
669		.word	0x4401a100		@ sa110 / sa1100
670		.word	0xffffffe0
671		W(b)	__armv4_mmu_cache_on
672		W(b)	__armv4_mmu_cache_off
673		W(b)	__armv4_mmu_cache_flush
674
675		.word	0x6901b110		@ sa1110
676		.word	0xfffffff0
677		W(b)	__armv4_mmu_cache_on
678		W(b)	__armv4_mmu_cache_off
679		W(b)	__armv4_mmu_cache_flush
680
681		.word	0x56056930
682		.word	0xff0ffff0		@ PXA935
683		W(b)	__armv4_mmu_cache_on
684		W(b)	__armv4_mmu_cache_off
685		W(b)	__armv4_mmu_cache_flush
686
687		.word	0x56158000		@ PXA168
688		.word	0xfffff000
689		W(b)	__armv4_mmu_cache_on
690		W(b)	__armv4_mmu_cache_off
691		W(b)	__armv5tej_mmu_cache_flush
692
693		.word	0x56056930
694		.word	0xff0ffff0		@ PXA935
695		W(b)	__armv4_mmu_cache_on
696		W(b)	__armv4_mmu_cache_off
697		W(b)	__armv4_mmu_cache_flush
698
699		.word	0x56050000		@ Feroceon
700		.word	0xff0f0000
701		W(b)	__armv4_mmu_cache_on
702		W(b)	__armv4_mmu_cache_off
703		W(b)	__armv5tej_mmu_cache_flush
704
705#ifdef CONFIG_CPU_FEROCEON_OLD_ID
706		/* this conflicts with the standard ARMv5TE entry */
707		.long	0x41009260		@ Old Feroceon
708		.long	0xff00fff0
709		b	__armv4_mmu_cache_on
710		b	__armv4_mmu_cache_off
711		b	__armv5tej_mmu_cache_flush
712#endif
713
714		.word	0x66015261		@ FA526
715		.word	0xff01fff1
716		W(b)	__fa526_cache_on
717		W(b)	__armv4_mmu_cache_off
718		W(b)	__fa526_cache_flush
719
720		@ These match on the architecture ID
721
722		.word	0x00020000		@ ARMv4T
723		.word	0x000f0000
724		W(b)	__armv4_mmu_cache_on
725		W(b)	__armv4_mmu_cache_off
726		W(b)	__armv4_mmu_cache_flush
727
728		.word	0x00050000		@ ARMv5TE
729		.word	0x000f0000
730		W(b)	__armv4_mmu_cache_on
731		W(b)	__armv4_mmu_cache_off
732		W(b)	__armv4_mmu_cache_flush
733
734		.word	0x00060000		@ ARMv5TEJ
735		.word	0x000f0000
736		W(b)	__armv4_mmu_cache_on
737		W(b)	__armv4_mmu_cache_off
738		W(b)	__armv4_mmu_cache_flush
739
740		.word	0x0007b000		@ ARMv6
741		.word	0x000ff000
742		W(b)	__armv4_mmu_cache_on
743		W(b)	__armv4_mmu_cache_off
744		W(b)	__armv6_mmu_cache_flush
745
746		.word	0x560f5810		@ Marvell PJ4 ARMv6
747		.word	0xff0ffff0
748		W(b)	__armv4_mmu_cache_on
749		W(b)	__armv4_mmu_cache_off
750		W(b)	__armv6_mmu_cache_flush
751
752		.word	0x000f0000		@ new CPU Id
753		.word	0x000f0000
754		W(b)	__armv7_mmu_cache_on
755		W(b)	__armv7_mmu_cache_off
756		W(b)	__armv7_mmu_cache_flush
757
758		.word	0			@ unrecognised type
759		.word	0
760		mov	pc, lr
761 THUMB(		nop				)
762		mov	pc, lr
763 THUMB(		nop				)
764		mov	pc, lr
765 THUMB(		nop				)
766
767		.size	proc_types, . - proc_types
768
769/*
770 * Turn off the Cache and MMU.  ARMv3 does not support
771 * reading the control register, but ARMv4 does.
772 *
773 * On entry,  r6 = processor ID
774 * On exit,   r0, r1, r2, r3, r12 corrupted
775 * This routine must preserve: r4, r6, r7
776 */
777		.align	5
778cache_off:	mov	r3, #12			@ cache_off function
779		b	call_cache_fn
780
781__armv4_mpu_cache_off:
782		mrc	p15, 0, r0, c1, c0
783		bic	r0, r0, #0x000d
784		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
785		mov	r0, #0
786		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
787		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
788		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
789		mov	pc, lr
790
791__armv3_mpu_cache_off:
792		mrc	p15, 0, r0, c1, c0
793		bic	r0, r0, #0x000d
794		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
795		mov	r0, #0
796		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
797		mov	pc, lr
798
799__armv4_mmu_cache_off:
800#ifdef CONFIG_MMU
801		mrc	p15, 0, r0, c1, c0
802		bic	r0, r0, #0x000d
803		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
804		mov	r0, #0
805		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
806		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
807#endif
808		mov	pc, lr
809
810__armv7_mmu_cache_off:
811		mrc	p15, 0, r0, c1, c0
812#ifdef CONFIG_MMU
813		bic	r0, r0, #0x000d
814#else
815		bic	r0, r0, #0x000c
816#endif
817		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
818		mov	r12, lr
819		bl	__armv7_mmu_cache_flush
820		mov	r0, #0
821#ifdef CONFIG_MMU
822		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
823#endif
824		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
825		mcr	p15, 0, r0, c7, c10, 4	@ DSB
826		mcr	p15, 0, r0, c7, c5, 4	@ ISB
827		mov	pc, r12
828
829__arm6_mmu_cache_off:
830		mov	r0, #0x00000030		@ ARM6 control reg.
831		b	__armv3_mmu_cache_off
832
833__arm7_mmu_cache_off:
834		mov	r0, #0x00000070		@ ARM7 control reg.
835		b	__armv3_mmu_cache_off
836
837__armv3_mmu_cache_off:
838		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
839		mov	r0, #0
840		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
841		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
842		mov	pc, lr
843
844/*
845 * Clean and flush the cache to maintain consistency.
846 *
847 * On entry,
848 *  r6 = processor ID
849 * On exit,
850 *  r1, r2, r3, r11, r12 corrupted
851 * This routine must preserve:
852 *  r0, r4, r5, r6, r7
853 */
854		.align	5
855cache_clean_flush:
856		mov	r3, #16
857		b	call_cache_fn
858
859__armv4_mpu_cache_flush:
860		mov	r2, #1
861		mov	r3, #0
862		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
863		mov	r1, #7 << 5		@ 8 segments
8641:		orr	r3, r1, #63 << 26	@ 64 entries
8652:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
866		subs	r3, r3, #1 << 26
867		bcs	2b			@ entries 63 to 0
868		subs 	r1, r1, #1 << 5
869		bcs	1b			@ segments 7 to 0
870
871		teq	r2, #0
872		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
873		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
874		mov	pc, lr
875
876__fa526_cache_flush:
877		mov	r1, #0
878		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
879		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
880		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
881		mov	pc, lr
882
883__armv6_mmu_cache_flush:
884		mov	r1, #0
885		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
886		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
887		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
888		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
889		mov	pc, lr
890
891__armv7_mmu_cache_flush:
892		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
893		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
894		mov	r10, #0
895		beq	hierarchical
896		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
897		b	iflush
898hierarchical:
899		mcr	p15, 0, r10, c7, c10, 5	@ DMB
900		stmfd	sp!, {r0-r7, r9-r11}
901		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
902		ands	r3, r0, #0x7000000	@ extract loc from clidr
903		mov	r3, r3, lsr #23		@ left align loc bit field
904		beq	finished		@ if loc is 0, then no need to clean
905		mov	r10, #0			@ start clean at cache level 0
906loop1:
907		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
908		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
909		and	r1, r1, #7		@ mask of the bits for current cache only
910		cmp	r1, #2			@ see what cache we have at this level
911		blt	skip			@ skip if no cache, or just i-cache
912		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
913		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
914		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
915		and	r2, r1, #7		@ extract the length of the cache lines
916		add	r2, r2, #4		@ add 4 (line length offset)
917		ldr	r4, =0x3ff
918		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
919		clz	r5, r4			@ find bit position of way size increment
920		ldr	r7, =0x7fff
921		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
922loop2:
923		mov	r9, r4			@ create working copy of max way size
924loop3:
925 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
926 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
927 THUMB(		lsl	r6, r9, r5		)
928 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
929 THUMB(		lsl	r6, r7, r2		)
930 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
931		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
932		subs	r9, r9, #1		@ decrement the way
933		bge	loop3
934		subs	r7, r7, #1		@ decrement the index
935		bge	loop2
936skip:
937		add	r10, r10, #2		@ increment cache number
938		cmp	r3, r10
939		bgt	loop1
940finished:
941		ldmfd	sp!, {r0-r7, r9-r11}
942		mov	r10, #0			@ swith back to cache level 0
943		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
944iflush:
945		mcr	p15, 0, r10, c7, c10, 4	@ DSB
946		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
947		mcr	p15, 0, r10, c7, c10, 4	@ DSB
948		mcr	p15, 0, r10, c7, c5, 4	@ ISB
949		mov	pc, lr
950
951__armv5tej_mmu_cache_flush:
9521:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
953		bne	1b
954		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
955		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
956		mov	pc, lr
957
958__armv4_mmu_cache_flush:
959		mov	r2, #64*1024		@ default: 32K dcache size (*2)
960		mov	r11, #32		@ default: 32 byte line size
961		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
962		teq	r3, r6			@ cache ID register present?
963		beq	no_cache_id
964		mov	r1, r3, lsr #18
965		and	r1, r1, #7
966		mov	r2, #1024
967		mov	r2, r2, lsl r1		@ base dcache size *2
968		tst	r3, #1 << 14		@ test M bit
969		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
970		mov	r3, r3, lsr #12
971		and	r3, r3, #3
972		mov	r11, #8
973		mov	r11, r11, lsl r3	@ cache line size in bytes
974no_cache_id:
975		mov	r1, pc
976		bic	r1, r1, #63		@ align to longest cache line
977		add	r2, r1, r2
9781:
979 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
980 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
981 THUMB(		add     r1, r1, r11		)
982		teq	r1, r2
983		bne	1b
984
985		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
986		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
987		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
988		mov	pc, lr
989
990__armv3_mmu_cache_flush:
991__armv3_mpu_cache_flush:
992		mov	r1, #0
993		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
994		mov	pc, lr
995
996/*
997 * Various debugging routines for printing hex characters and
998 * memory, which again must be relocatable.
999 */
1000#ifdef DEBUG
1001		.align	2
1002		.type	phexbuf,#object
1003phexbuf:	.space	12
1004		.size	phexbuf, . - phexbuf
1005
1006phex:		adr	r3, phexbuf
1007		mov	r2, #0
1008		strb	r2, [r3, r1]
10091:		subs	r1, r1, #1
1010		movmi	r0, r3
1011		bmi	puts
1012		and	r2, r0, #15
1013		mov	r0, r0, lsr #4
1014		cmp	r2, #10
1015		addge	r2, r2, #7
1016		add	r2, r2, #'0'
1017		strb	r2, [r3, r1]
1018		b	1b
1019
1020puts:		loadsp	r3
10211:		ldrb	r2, [r0], #1
1022		teq	r2, #0
1023		moveq	pc, lr
10242:		writeb	r2, r3
1025		mov	r1, #0x00020000
10263:		subs	r1, r1, #1
1027		bne	3b
1028		teq	r2, #'\n'
1029		moveq	r2, #'\r'
1030		beq	2b
1031		teq	r0, #0
1032		bne	1b
1033		mov	pc, lr
1034putc:
1035		mov	r2, r0
1036		mov	r0, #0
1037		loadsp	r3
1038		b	2b
1039
1040memdump:	mov	r12, r0
1041		mov	r10, lr
1042		mov	r11, #0
10432:		mov	r0, r11, lsl #2
1044		add	r0, r0, r12
1045		mov	r1, #8
1046		bl	phex
1047		mov	r0, #':'
1048		bl	putc
10491:		mov	r0, #' '
1050		bl	putc
1051		ldr	r0, [r12, r11, lsl #2]
1052		mov	r1, #8
1053		bl	phex
1054		and	r0, r11, #7
1055		teq	r0, #3
1056		moveq	r0, #' '
1057		bleq	putc
1058		and	r0, r11, #7
1059		add	r11, r11, #1
1060		teq	r0, #7
1061		bne	1b
1062		mov	r0, #'\n'
1063		bl	putc
1064		cmp	r11, #64
1065		blt	2b
1066		mov	pc, r10
1067#endif
1068
1069		.ltorg
1070reloc_end:
1071
1072		.align
1073		.section ".stack", "w"
1074user_stack:	.space	4096
1075