xref: /linux/arch/arm/boot/compressed/head.S (revision 776cfebb430c7b22c208b1b17add97f354d97cab)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/config.h>
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#include <asm/arch/debug-macro.S>
23
24#if defined(CONFIG_DEBUG_ICEDCC)
25		.macro	loadsp, rb
26		.endm
27		.macro writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c1, 0
29		.endm
30#else
31		.macro	writeb,	ch, rb
32		senduart \ch, \rb
33		.endm
34
35#if defined(CONFIG_FOOTBRIDGE) || \
36    defined(CONFIG_ARCH_RPC) || \
37    defined(CONFIG_ARCH_INTEGRATOR) || \
38    defined(CONFIG_ARCH_PXA) || \
39    defined(CONFIG_ARCH_IXP4XX) || \
40    defined(CONFIG_ARCH_IXP2000) || \
41    defined(CONFIG_ARCH_LH7A40X) || \
42    defined(CONFIG_ARCH_OMAP)
43		.macro	loadsp,	rb
44		addruart \rb
45		.endm
46#elif defined(CONFIG_ARCH_SA1100)
47		.macro	loadsp, rb
48		mov	\rb, #0x80000000	@ physical base address
49#  if defined(CONFIG_DEBUG_LL_SER3)
50		add	\rb, \rb, #0x00050000	@ Ser3
51#  else
52		add	\rb, \rb, #0x00010000	@ Ser1
53#  endif
54		.endm
55#elif defined(CONFIG_ARCH_IOP331)
56		.macro loadsp, rb
57                mov   	\rb, #0xff000000
58                orr     \rb, \rb, #0x00ff0000
59                orr     \rb, \rb, #0x0000f700   @ location of the UART
60		.endm
61#elif defined(CONFIG_ARCH_S3C2410)
62		.macro loadsp, rb
63		mov	\rb, #0x50000000
64		add	\rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT
65		.endm
66#else
67#error no serial architecture defined
68#endif
69#endif
70#endif
71
72		.macro	kputc,val
73		mov	r0, \val
74		bl	putc
75		.endm
76
77		.macro	kphex,val,len
78		mov	r0, \val
79		mov	r1, #\len
80		bl	phex
81		.endm
82
83		.macro	debug_reloc_start
84#ifdef DEBUG
85		kputc	#'\n'
86		kphex	r6, 8		/* processor id */
87		kputc	#':'
88		kphex	r7, 8		/* architecture id */
89		kputc	#':'
90		mrc	p15, 0, r0, c1, c0
91		kphex	r0, 8		/* control reg */
92		kputc	#'\n'
93		kphex	r5, 8		/* decompressed kernel start */
94		kputc	#'-'
95		kphex	r8, 8		/* decompressed kernel end  */
96		kputc	#'>'
97		kphex	r4, 8		/* kernel execution address */
98		kputc	#'\n'
99#endif
100		.endm
101
102		.macro	debug_reloc_end
103#ifdef DEBUG
104		kphex	r5, 8		/* end of kernel */
105		kputc	#'\n'
106		mov	r0, r4
107		bl	memdump		/* dump 256 bytes at start of kernel */
108#endif
109		.endm
110
111		.section ".start", #alloc, #execinstr
112/*
113 * sort out different calling conventions
114 */
115		.align
116start:
117		.type	start,#function
118		.rept	8
119		mov	r0, r0
120		.endr
121
122		b	1f
123		.word	0x016f2818		@ Magic numbers to help the loader
124		.word	start			@ absolute load/run zImage address
125		.word	_edata			@ zImage end address
1261:		mov	r7, r1			@ save architecture ID
127		mov	r8, #0			@ save r0
128
129#ifndef __ARM_ARCH_2__
130		/*
131		 * Booting from Angel - need to enter SVC mode and disable
132		 * FIQs/IRQs (numeric definitions from angel arm.h source).
133		 * We only do this if we were in user mode on entry.
134		 */
135		mrs	r2, cpsr		@ get current mode
136		tst	r2, #3			@ not user?
137		bne	not_angel
138		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
139		swi	0x123456		@ angel_SWI_ARM
140not_angel:
141		mrs	r2, cpsr		@ turn off interrupts to
142		orr	r2, r2, #0xc0		@ prevent angel from running
143		msr	cpsr_c, r2
144#else
145		teqp	pc, #0x0c000003		@ turn off interrupts
146#endif
147
148		/*
149		 * Note that some cache flushing and other stuff may
150		 * be needed here - is there an Angel SWI call for this?
151		 */
152
153		/*
154		 * some architecture specific code can be inserted
155		 * by the linker here, but it should preserve r7 and r8.
156		 */
157
158		.text
159		adr	r0, LC0
160		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
161		subs	r0, r0, r1		@ calculate the delta offset
162
163						@ if delta is zero, we are
164		beq	not_relocated		@ running at the address we
165						@ were linked at.
166
167		/*
168		 * We're running at a different address.  We need to fix
169		 * up various pointers:
170		 *   r5 - zImage base address
171		 *   r6 - GOT start
172		 *   ip - GOT end
173		 */
174		add	r5, r5, r0
175		add	r6, r6, r0
176		add	ip, ip, r0
177
178#ifndef CONFIG_ZBOOT_ROM
179		/*
180		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
181		 * we need to fix up pointers into the BSS region.
182		 *   r2 - BSS start
183		 *   r3 - BSS end
184		 *   sp - stack pointer
185		 */
186		add	r2, r2, r0
187		add	r3, r3, r0
188		add	sp, sp, r0
189
190		/*
191		 * Relocate all entries in the GOT table.
192		 */
1931:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
194		add	r1, r1, r0		@ table.  This fixes up the
195		str	r1, [r6], #4		@ C references.
196		cmp	r6, ip
197		blo	1b
198#else
199
200		/*
201		 * Relocate entries in the GOT table.  We only relocate
202		 * the entries that are outside the (relocated) BSS region.
203		 */
2041:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
205		cmp	r1, r2			@ entry < bss_start ||
206		cmphs	r3, r1			@ _end < entry
207		addlo	r1, r1, r0		@ table.  This fixes up the
208		str	r1, [r6], #4		@ C references.
209		cmp	r6, ip
210		blo	1b
211#endif
212
213not_relocated:	mov	r0, #0
2141:		str	r0, [r2], #4		@ clear bss
215		str	r0, [r2], #4
216		str	r0, [r2], #4
217		str	r0, [r2], #4
218		cmp	r2, r3
219		blo	1b
220
221		/*
222		 * The C runtime environment should now be setup
223		 * sufficiently.  Turn the cache on, set up some
224		 * pointers, and start decompressing.
225		 */
226		bl	cache_on
227
228		mov	r1, sp			@ malloc space above stack
229		add	r2, sp, #0x10000	@ 64k max
230
231/*
232 * Check to see if we will overwrite ourselves.
233 *   r4 = final kernel address
234 *   r5 = start of this image
235 *   r2 = end of malloc space (and therefore this image)
236 * We basically want:
237 *   r4 >= r2 -> OK
238 *   r4 + image length <= r5 -> OK
239 */
240		cmp	r4, r2
241		bhs	wont_overwrite
242		add	r0, r4, #4096*1024	@ 4MB largest kernel size
243		cmp	r0, r5
244		bls	wont_overwrite
245
246		mov	r5, r2			@ decompress after malloc space
247		mov	r0, r5
248		mov	r3, r7
249		bl	decompress_kernel
250
251		add	r0, r0, #127
252		bic	r0, r0, #127		@ align the kernel length
253/*
254 * r0     = decompressed kernel length
255 * r1-r3  = unused
256 * r4     = kernel execution address
257 * r5     = decompressed kernel start
258 * r6     = processor ID
259 * r7     = architecture ID
260 * r8-r14 = unused
261 */
262		add	r1, r5, r0		@ end of decompressed kernel
263		adr	r2, reloc_start
264		ldr	r3, LC1
265		add	r3, r2, r3
2661:		ldmia	r2!, {r8 - r13}		@ copy relocation code
267		stmia	r1!, {r8 - r13}
268		ldmia	r2!, {r8 - r13}
269		stmia	r1!, {r8 - r13}
270		cmp	r2, r3
271		blo	1b
272
273		bl	cache_clean_flush
274		add	pc, r5, r0		@ call relocation code
275
276/*
277 * We're not in danger of overwriting ourselves.  Do this the simple way.
278 *
279 * r4     = kernel execution address
280 * r7     = architecture ID
281 */
282wont_overwrite:	mov	r0, r4
283		mov	r3, r7
284		bl	decompress_kernel
285		b	call_kernel
286
287		.type	LC0, #object
288LC0:		.word	LC0			@ r1
289		.word	__bss_start		@ r2
290		.word	_end			@ r3
291		.word	zreladdr		@ r4
292		.word	_start			@ r5
293		.word	_got_start		@ r6
294		.word	_got_end		@ ip
295		.word	user_stack+4096		@ sp
296LC1:		.word	reloc_end - reloc_start
297		.size	LC0, . - LC0
298
299#ifdef CONFIG_ARCH_RPC
300		.globl	params
301params:		ldr	r0, =params_phys
302		mov	pc, lr
303		.ltorg
304		.align
305#endif
306
307/*
308 * Turn on the cache.  We need to setup some page tables so that we
309 * can have both the I and D caches on.
310 *
311 * We place the page tables 16k down from the kernel execution address,
312 * and we hope that nothing else is using it.  If we're using it, we
313 * will go pop!
314 *
315 * On entry,
316 *  r4 = kernel execution address
317 *  r6 = processor ID
318 *  r7 = architecture number
319 *  r8 = run-time address of "start"
320 * On exit,
321 *  r1, r2, r3, r8, r9, r12 corrupted
322 * This routine must preserve:
323 *  r4, r5, r6, r7
324 */
325		.align	5
326cache_on:	mov	r3, #8			@ cache_on function
327		b	call_cache_fn
328
329__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
330		bic	r3, r3, #0xff		@ Align the pointer
331		bic	r3, r3, #0x3f00
332/*
333 * Initialise the page tables, turning on the cacheable and bufferable
334 * bits for the RAM area only.
335 */
336		mov	r0, r3
337		mov	r8, r0, lsr #18
338		mov	r8, r8, lsl #18		@ start of RAM
339		add	r9, r8, #0x10000000	@ a reasonable RAM size
340		mov	r1, #0x12
341		orr	r1, r1, #3 << 10
342		add	r2, r3, #16384
3431:		cmp	r1, r8			@ if virt > start of RAM
344		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
345		cmp	r1, r9			@ if virt > end of RAM
346		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
347		str	r1, [r0], #4		@ 1:1 mapping
348		add	r1, r1, #1048576
349		teq	r0, r2
350		bne	1b
351/*
352 * If ever we are running from Flash, then we surely want the cache
353 * to be enabled also for our execution instance...  We map 2MB of it
354 * so there is no map overlap problem for up to 1 MB compressed kernel.
355 * If the execution is in RAM then we would only be duplicating the above.
356 */
357		mov	r1, #0x1e
358		orr	r1, r1, #3 << 10
359		mov	r2, pc, lsr #20
360		orr	r1, r1, r2, lsl #20
361		add	r0, r3, r2, lsl #2
362		str	r1, [r0], #4
363		add	r1, r1, #1048576
364		str	r1, [r0]
365		mov	pc, lr
366
367__armv4_cache_on:
368		mov	r12, lr
369		bl	__setup_mmu
370		mov	r0, #0
371		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
372		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
373		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
374		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
375		orr	r0, r0, #0x0030
376		bl	__common_cache_on
377		mov	r0, #0
378		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
379		mov	pc, r12
380
381__arm6_cache_on:
382		mov	r12, lr
383		bl	__setup_mmu
384		mov	r0, #0
385		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
386		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
387		mov	r0, #0x30
388		bl	__common_cache_on
389		mov	r0, #0
390		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
391		mov	pc, r12
392
393__common_cache_on:
394#ifndef DEBUG
395		orr	r0, r0, #0x000d		@ Write buffer, mmu
396#endif
397		mov	r1, #-1
398		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
399		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
400		mcr	p15, 0, r0, c1, c0, 0	@ load control register
401		mov	pc, lr
402
403/*
404 * All code following this line is relocatable.  It is relocated by
405 * the above code to the end of the decompressed kernel image and
406 * executed there.  During this time, we have no stacks.
407 *
408 * r0     = decompressed kernel length
409 * r1-r3  = unused
410 * r4     = kernel execution address
411 * r5     = decompressed kernel start
412 * r6     = processor ID
413 * r7     = architecture ID
414 * r8-r14 = unused
415 */
416		.align	5
417reloc_start:	add	r8, r5, r0
418		debug_reloc_start
419		mov	r1, r4
4201:
421		.rept	4
422		ldmia	r5!, {r0, r2, r3, r9 - r13}	@ relocate kernel
423		stmia	r1!, {r0, r2, r3, r9 - r13}
424		.endr
425
426		cmp	r5, r8
427		blo	1b
428		debug_reloc_end
429
430call_kernel:	bl	cache_clean_flush
431		bl	cache_off
432		mov	r0, #0
433		mov	r1, r7			@ restore architecture number
434		mov	pc, r4			@ call kernel
435
436/*
437 * Here follow the relocatable cache support functions for the
438 * various processors.  This is a generic hook for locating an
439 * entry and jumping to an instruction at the specified offset
440 * from the start of the block.  Please note this is all position
441 * independent code.
442 *
443 *  r1  = corrupted
444 *  r2  = corrupted
445 *  r3  = block offset
446 *  r6  = corrupted
447 *  r12 = corrupted
448 */
449
450call_cache_fn:	adr	r12, proc_types
451		mrc	p15, 0, r6, c0, c0	@ get processor ID
4521:		ldr	r1, [r12, #0]		@ get value
453		ldr	r2, [r12, #4]		@ get mask
454		eor	r1, r1, r6		@ (real ^ match)
455		tst	r1, r2			@       & mask
456		addeq	pc, r12, r3		@ call cache function
457		add	r12, r12, #4*5
458		b	1b
459
460/*
461 * Table for cache operations.  This is basically:
462 *   - CPU ID match
463 *   - CPU ID mask
464 *   - 'cache on' method instruction
465 *   - 'cache off' method instruction
466 *   - 'cache flush' method instruction
467 *
468 * We match an entry using: ((real_id ^ match) & mask) == 0
469 *
470 * Writethrough caches generally only need 'on' and 'off'
471 * methods.  Writeback caches _must_ have the flush method
472 * defined.
473 */
474		.type	proc_types,#object
475proc_types:
476		.word	0x41560600		@ ARM6/610
477		.word	0xffffffe0
478		b	__arm6_cache_off	@ works, but slow
479		b	__arm6_cache_off
480		mov	pc, lr
481@		b	__arm6_cache_on		@ untested
482@		b	__arm6_cache_off
483@		b	__armv3_cache_flush
484
485		.word	0x00000000		@ old ARM ID
486		.word	0x0000f000
487		mov	pc, lr
488		mov	pc, lr
489		mov	pc, lr
490
491		.word	0x41007000		@ ARM7/710
492		.word	0xfff8fe00
493		b	__arm7_cache_off
494		b	__arm7_cache_off
495		mov	pc, lr
496
497		.word	0x41807200		@ ARM720T (writethrough)
498		.word	0xffffff00
499		b	__armv4_cache_on
500		b	__armv4_cache_off
501		mov	pc, lr
502
503		.word	0x00007000		@ ARM7 IDs
504		.word	0x0000f000
505		mov	pc, lr
506		mov	pc, lr
507		mov	pc, lr
508
509		@ Everything from here on will be the new ID system.
510
511		.word	0x4401a100		@ sa110 / sa1100
512		.word	0xffffffe0
513		b	__armv4_cache_on
514		b	__armv4_cache_off
515		b	__armv4_cache_flush
516
517		.word	0x6901b110		@ sa1110
518		.word	0xfffffff0
519		b	__armv4_cache_on
520		b	__armv4_cache_off
521		b	__armv4_cache_flush
522
523		@ These match on the architecture ID
524
525		.word	0x00020000		@ ARMv4T
526		.word	0x000f0000
527		b	__armv4_cache_on
528		b	__armv4_cache_off
529		b	__armv4_cache_flush
530
531		.word	0x00050000		@ ARMv5TE
532		.word	0x000f0000
533		b	__armv4_cache_on
534		b	__armv4_cache_off
535		b	__armv4_cache_flush
536
537		.word	0x00060000		@ ARMv5TEJ
538		.word	0x000f0000
539		b	__armv4_cache_on
540		b	__armv4_cache_off
541		b	__armv4_cache_flush
542
543		.word	0x00070000		@ ARMv6
544		.word	0x000f0000
545		b	__armv4_cache_on
546		b	__armv4_cache_off
547		b	__armv6_cache_flush
548
549		.word	0			@ unrecognised type
550		.word	0
551		mov	pc, lr
552		mov	pc, lr
553		mov	pc, lr
554
555		.size	proc_types, . - proc_types
556
557/*
558 * Turn off the Cache and MMU.  ARMv3 does not support
559 * reading the control register, but ARMv4 does.
560 *
561 * On entry,  r6 = processor ID
562 * On exit,   r0, r1, r2, r3, r12 corrupted
563 * This routine must preserve: r4, r6, r7
564 */
565		.align	5
566cache_off:	mov	r3, #12			@ cache_off function
567		b	call_cache_fn
568
569__armv4_cache_off:
570		mrc	p15, 0, r0, c1, c0
571		bic	r0, r0, #0x000d
572		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
573		mov	r0, #0
574		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
575		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
576		mov	pc, lr
577
578__arm6_cache_off:
579		mov	r0, #0x00000030		@ ARM6 control reg.
580		b	__armv3_cache_off
581
582__arm7_cache_off:
583		mov	r0, #0x00000070		@ ARM7 control reg.
584		b	__armv3_cache_off
585
586__armv3_cache_off:
587		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
588		mov	r0, #0
589		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
590		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
591		mov	pc, lr
592
593/*
594 * Clean and flush the cache to maintain consistency.
595 *
596 * On entry,
597 *  r6 = processor ID
598 * On exit,
599 *  r1, r2, r3, r11, r12 corrupted
600 * This routine must preserve:
601 *  r0, r4, r5, r6, r7
602 */
603		.align	5
604cache_clean_flush:
605		mov	r3, #16
606		b	call_cache_fn
607
608__armv6_cache_flush:
609		mov	r1, #0
610		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
611		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
612		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
613		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
614		mov	pc, lr
615
616__armv4_cache_flush:
617		mov	r2, #64*1024		@ default: 32K dcache size (*2)
618		mov	r11, #32		@ default: 32 byte line size
619		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
620		teq	r3, r6			@ cache ID register present?
621		beq	no_cache_id
622		mov	r1, r3, lsr #18
623		and	r1, r1, #7
624		mov	r2, #1024
625		mov	r2, r2, lsl r1		@ base dcache size *2
626		tst	r3, #1 << 14		@ test M bit
627		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
628		mov	r3, r3, lsr #12
629		and	r3, r3, #3
630		mov	r11, #8
631		mov	r11, r11, lsl r3	@ cache line size in bytes
632no_cache_id:
633		bic	r1, pc, #63		@ align to longest cache line
634		add	r2, r1, r2
6351:		ldr	r3, [r1], r11		@ s/w flush D cache
636		teq	r1, r2
637		bne	1b
638
639		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
640		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
641		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
642		mov	pc, lr
643
644__armv3_cache_flush:
645		mov	r1, #0
646		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
647		mov	pc, lr
648
649/*
650 * Various debugging routines for printing hex characters and
651 * memory, which again must be relocatable.
652 */
653#ifdef DEBUG
654		.type	phexbuf,#object
655phexbuf:	.space	12
656		.size	phexbuf, . - phexbuf
657
658phex:		adr	r3, phexbuf
659		mov	r2, #0
660		strb	r2, [r3, r1]
6611:		subs	r1, r1, #1
662		movmi	r0, r3
663		bmi	puts
664		and	r2, r0, #15
665		mov	r0, r0, lsr #4
666		cmp	r2, #10
667		addge	r2, r2, #7
668		add	r2, r2, #'0'
669		strb	r2, [r3, r1]
670		b	1b
671
672puts:		loadsp	r3
6731:		ldrb	r2, [r0], #1
674		teq	r2, #0
675		moveq	pc, lr
6762:		writeb	r2, r3
677		mov	r1, #0x00020000
6783:		subs	r1, r1, #1
679		bne	3b
680		teq	r2, #'\n'
681		moveq	r2, #'\r'
682		beq	2b
683		teq	r0, #0
684		bne	1b
685		mov	pc, lr
686putc:
687		mov	r2, r0
688		mov	r0, #0
689		loadsp	r3
690		b	2b
691
692memdump:	mov	r12, r0
693		mov	r10, lr
694		mov	r11, #0
6952:		mov	r0, r11, lsl #2
696		add	r0, r0, r12
697		mov	r1, #8
698		bl	phex
699		mov	r0, #':'
700		bl	putc
7011:		mov	r0, #' '
702		bl	putc
703		ldr	r0, [r12, r11, lsl #2]
704		mov	r1, #8
705		bl	phex
706		and	r0, r11, #7
707		teq	r0, #3
708		moveq	r0, #' '
709		bleq	putc
710		and	r0, r11, #7
711		add	r11, r11, #1
712		teq	r0, #7
713		bne	1b
714		mov	r0, #'\n'
715		bl	putc
716		cmp	r11, #64
717		blt	2b
718		mov	pc, r10
719#endif
720
721reloc_end:
722
723		.align
724		.section ".stack", "w"
725user_stack:	.space	4096
726