xref: /linux/arch/arm/boot/compressed/head.S (revision de2fe5e07d58424bc286fff3fd3c1b0bf933cd58)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/config.h>
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23		.macro	loadsp, rb
24		.endm
25		.macro	writeb, ch, rb
26		mcr	p14, 0, \ch, c0, c1, 0
27		.endm
28#else
29
30#include <asm/arch/debug-macro.S>
31
32		.macro	writeb,	ch, rb
33		senduart \ch, \rb
34		.endm
35
36#if defined(CONFIG_ARCH_SA1100)
37		.macro	loadsp, rb
38		mov	\rb, #0x80000000	@ physical base address
39#ifdef CONFIG_DEBUG_LL_SER3
40		add	\rb, \rb, #0x00050000	@ Ser3
41#else
42		add	\rb, \rb, #0x00010000	@ Ser1
43#endif
44		.endm
45#elif defined(CONFIG_ARCH_IOP331)
46		.macro loadsp, rb
47                mov   	\rb, #0xff000000
48                orr     \rb, \rb, #0x00ff0000
49                orr     \rb, \rb, #0x0000f700   @ location of the UART
50		.endm
51#elif defined(CONFIG_ARCH_S3C2410)
52		.macro loadsp, rb
53		mov	\rb, #0x50000000
54		add	\rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT
55		.endm
56#else
57		.macro	loadsp,	rb
58		addruart \rb
59		.endm
60#endif
61#endif
62#endif
63
64		.macro	kputc,val
65		mov	r0, \val
66		bl	putc
67		.endm
68
69		.macro	kphex,val,len
70		mov	r0, \val
71		mov	r1, #\len
72		bl	phex
73		.endm
74
75		.macro	debug_reloc_start
76#ifdef DEBUG
77		kputc	#'\n'
78		kphex	r6, 8		/* processor id */
79		kputc	#':'
80		kphex	r7, 8		/* architecture id */
81		kputc	#':'
82		mrc	p15, 0, r0, c1, c0
83		kphex	r0, 8		/* control reg */
84		kputc	#'\n'
85		kphex	r5, 8		/* decompressed kernel start */
86		kputc	#'-'
87		kphex	r9, 8		/* decompressed kernel end  */
88		kputc	#'>'
89		kphex	r4, 8		/* kernel execution address */
90		kputc	#'\n'
91#endif
92		.endm
93
94		.macro	debug_reloc_end
95#ifdef DEBUG
96		kphex	r5, 8		/* end of kernel */
97		kputc	#'\n'
98		mov	r0, r4
99		bl	memdump		/* dump 256 bytes at start of kernel */
100#endif
101		.endm
102
103		.section ".start", #alloc, #execinstr
104/*
105 * sort out different calling conventions
106 */
107		.align
108start:
109		.type	start,#function
110		.rept	8
111		mov	r0, r0
112		.endr
113
114		b	1f
115		.word	0x016f2818		@ Magic numbers to help the loader
116		.word	start			@ absolute load/run zImage address
117		.word	_edata			@ zImage end address
1181:		mov	r7, r1			@ save architecture ID
119		mov	r8, r2			@ save atags pointer
120
121#ifndef __ARM_ARCH_2__
122		/*
123		 * Booting from Angel - need to enter SVC mode and disable
124		 * FIQs/IRQs (numeric definitions from angel arm.h source).
125		 * We only do this if we were in user mode on entry.
126		 */
127		mrs	r2, cpsr		@ get current mode
128		tst	r2, #3			@ not user?
129		bne	not_angel
130		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
131		swi	0x123456		@ angel_SWI_ARM
132not_angel:
133		mrs	r2, cpsr		@ turn off interrupts to
134		orr	r2, r2, #0xc0		@ prevent angel from running
135		msr	cpsr_c, r2
136#else
137		teqp	pc, #0x0c000003		@ turn off interrupts
138#endif
139
140		/*
141		 * Note that some cache flushing and other stuff may
142		 * be needed here - is there an Angel SWI call for this?
143		 */
144
145		/*
146		 * some architecture specific code can be inserted
147		 * by the linker here, but it should preserve r7, r8, and r9.
148		 */
149
150		.text
151		adr	r0, LC0
152		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
153		subs	r0, r0, r1		@ calculate the delta offset
154
155						@ if delta is zero, we are
156		beq	not_relocated		@ running at the address we
157						@ were linked at.
158
159		/*
160		 * We're running at a different address.  We need to fix
161		 * up various pointers:
162		 *   r5 - zImage base address
163		 *   r6 - GOT start
164		 *   ip - GOT end
165		 */
166		add	r5, r5, r0
167		add	r6, r6, r0
168		add	ip, ip, r0
169
170#ifndef CONFIG_ZBOOT_ROM
171		/*
172		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
173		 * we need to fix up pointers into the BSS region.
174		 *   r2 - BSS start
175		 *   r3 - BSS end
176		 *   sp - stack pointer
177		 */
178		add	r2, r2, r0
179		add	r3, r3, r0
180		add	sp, sp, r0
181
182		/*
183		 * Relocate all entries in the GOT table.
184		 */
1851:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
186		add	r1, r1, r0		@ table.  This fixes up the
187		str	r1, [r6], #4		@ C references.
188		cmp	r6, ip
189		blo	1b
190#else
191
192		/*
193		 * Relocate entries in the GOT table.  We only relocate
194		 * the entries that are outside the (relocated) BSS region.
195		 */
1961:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
197		cmp	r1, r2			@ entry < bss_start ||
198		cmphs	r3, r1			@ _end < entry
199		addlo	r1, r1, r0		@ table.  This fixes up the
200		str	r1, [r6], #4		@ C references.
201		cmp	r6, ip
202		blo	1b
203#endif
204
205not_relocated:	mov	r0, #0
2061:		str	r0, [r2], #4		@ clear bss
207		str	r0, [r2], #4
208		str	r0, [r2], #4
209		str	r0, [r2], #4
210		cmp	r2, r3
211		blo	1b
212
213		/*
214		 * The C runtime environment should now be setup
215		 * sufficiently.  Turn the cache on, set up some
216		 * pointers, and start decompressing.
217		 */
218		bl	cache_on
219
220		mov	r1, sp			@ malloc space above stack
221		add	r2, sp, #0x10000	@ 64k max
222
223/*
224 * Check to see if we will overwrite ourselves.
225 *   r4 = final kernel address
226 *   r5 = start of this image
227 *   r2 = end of malloc space (and therefore this image)
228 * We basically want:
229 *   r4 >= r2 -> OK
230 *   r4 + image length <= r5 -> OK
231 */
232		cmp	r4, r2
233		bhs	wont_overwrite
234		add	r0, r4, #4096*1024	@ 4MB largest kernel size
235		cmp	r0, r5
236		bls	wont_overwrite
237
238		mov	r5, r2			@ decompress after malloc space
239		mov	r0, r5
240		mov	r3, r7
241		bl	decompress_kernel
242
243		add	r0, r0, #127
244		bic	r0, r0, #127		@ align the kernel length
245/*
246 * r0     = decompressed kernel length
247 * r1-r3  = unused
248 * r4     = kernel execution address
249 * r5     = decompressed kernel start
250 * r6     = processor ID
251 * r7     = architecture ID
252 * r8     = atags pointer
253 * r9-r14 = corrupted
254 */
255		add	r1, r5, r0		@ end of decompressed kernel
256		adr	r2, reloc_start
257		ldr	r3, LC1
258		add	r3, r2, r3
2591:		ldmia	r2!, {r9 - r14}		@ copy relocation code
260		stmia	r1!, {r9 - r14}
261		ldmia	r2!, {r9 - r14}
262		stmia	r1!, {r9 - r14}
263		cmp	r2, r3
264		blo	1b
265
266		bl	cache_clean_flush
267		add	pc, r5, r0		@ call relocation code
268
269/*
270 * We're not in danger of overwriting ourselves.  Do this the simple way.
271 *
272 * r4     = kernel execution address
273 * r7     = architecture ID
274 */
275wont_overwrite:	mov	r0, r4
276		mov	r3, r7
277		bl	decompress_kernel
278		b	call_kernel
279
280		.type	LC0, #object
281LC0:		.word	LC0			@ r1
282		.word	__bss_start		@ r2
283		.word	_end			@ r3
284		.word	zreladdr		@ r4
285		.word	_start			@ r5
286		.word	_got_start		@ r6
287		.word	_got_end		@ ip
288		.word	user_stack+4096		@ sp
289LC1:		.word	reloc_end - reloc_start
290		.size	LC0, . - LC0
291
292#ifdef CONFIG_ARCH_RPC
293		.globl	params
294params:		ldr	r0, =params_phys
295		mov	pc, lr
296		.ltorg
297		.align
298#endif
299
300/*
301 * Turn on the cache.  We need to setup some page tables so that we
302 * can have both the I and D caches on.
303 *
304 * We place the page tables 16k down from the kernel execution address,
305 * and we hope that nothing else is using it.  If we're using it, we
306 * will go pop!
307 *
308 * On entry,
309 *  r4 = kernel execution address
310 *  r6 = processor ID
311 *  r7 = architecture number
312 *  r8 = atags pointer
313 *  r9 = run-time address of "start"  (???)
314 * On exit,
315 *  r1, r2, r3, r9, r10, r12 corrupted
316 * This routine must preserve:
317 *  r4, r5, r6, r7, r8
318 */
319		.align	5
320cache_on:	mov	r3, #8			@ cache_on function
321		b	call_cache_fn
322
323__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
324		bic	r3, r3, #0xff		@ Align the pointer
325		bic	r3, r3, #0x3f00
326/*
327 * Initialise the page tables, turning on the cacheable and bufferable
328 * bits for the RAM area only.
329 */
330		mov	r0, r3
331		mov	r9, r0, lsr #18
332		mov	r9, r9, lsl #18		@ start of RAM
333		add	r10, r9, #0x10000000	@ a reasonable RAM size
334		mov	r1, #0x12
335		orr	r1, r1, #3 << 10
336		add	r2, r3, #16384
3371:		cmp	r1, r9			@ if virt > start of RAM
338		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
339		cmp	r1, r10			@ if virt > end of RAM
340		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
341		str	r1, [r0], #4		@ 1:1 mapping
342		add	r1, r1, #1048576
343		teq	r0, r2
344		bne	1b
345/*
346 * If ever we are running from Flash, then we surely want the cache
347 * to be enabled also for our execution instance...  We map 2MB of it
348 * so there is no map overlap problem for up to 1 MB compressed kernel.
349 * If the execution is in RAM then we would only be duplicating the above.
350 */
351		mov	r1, #0x1e
352		orr	r1, r1, #3 << 10
353		mov	r2, pc, lsr #20
354		orr	r1, r1, r2, lsl #20
355		add	r0, r3, r2, lsl #2
356		str	r1, [r0], #4
357		add	r1, r1, #1048576
358		str	r1, [r0]
359		mov	pc, lr
360
361__armv4_mmu_cache_on:
362		mov	r12, lr
363		bl	__setup_mmu
364		mov	r0, #0
365		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
366		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
367		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
368		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
369		orr	r0, r0, #0x0030
370		bl	__common_mmu_cache_on
371		mov	r0, #0
372		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
373		mov	pc, r12
374
375__arm6_mmu_cache_on:
376		mov	r12, lr
377		bl	__setup_mmu
378		mov	r0, #0
379		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
380		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
381		mov	r0, #0x30
382		bl	__common_mmu_cache_on
383		mov	r0, #0
384		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
385		mov	pc, r12
386
387__common_mmu_cache_on:
388#ifndef DEBUG
389		orr	r0, r0, #0x000d		@ Write buffer, mmu
390#endif
391		mov	r1, #-1
392		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
393		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
394		mcr	p15, 0, r0, c1, c0, 0	@ load control register
395		mov	pc, lr
396
397/*
398 * All code following this line is relocatable.  It is relocated by
399 * the above code to the end of the decompressed kernel image and
400 * executed there.  During this time, we have no stacks.
401 *
402 * r0     = decompressed kernel length
403 * r1-r3  = unused
404 * r4     = kernel execution address
405 * r5     = decompressed kernel start
406 * r6     = processor ID
407 * r7     = architecture ID
408 * r8     = atags pointer
409 * r9-r14 = corrupted
410 */
411		.align	5
412reloc_start:	add	r9, r5, r0
413		debug_reloc_start
414		mov	r1, r4
4151:
416		.rept	4
417		ldmia	r5!, {r0, r2, r3, r10 - r14}	@ relocate kernel
418		stmia	r1!, {r0, r2, r3, r10 - r14}
419		.endr
420
421		cmp	r5, r9
422		blo	1b
423		debug_reloc_end
424
425call_kernel:	bl	cache_clean_flush
426		bl	cache_off
427		mov	r0, #0			@ must be zero
428		mov	r1, r7			@ restore architecture number
429		mov	r2, r8			@ restore atags pointer
430		mov	pc, r4			@ call kernel
431
432/*
433 * Here follow the relocatable cache support functions for the
434 * various processors.  This is a generic hook for locating an
435 * entry and jumping to an instruction at the specified offset
436 * from the start of the block.  Please note this is all position
437 * independent code.
438 *
439 *  r1  = corrupted
440 *  r2  = corrupted
441 *  r3  = block offset
442 *  r6  = corrupted
443 *  r12 = corrupted
444 */
445
446call_cache_fn:	adr	r12, proc_types
447		mrc	p15, 0, r6, c0, c0	@ get processor ID
4481:		ldr	r1, [r12, #0]		@ get value
449		ldr	r2, [r12, #4]		@ get mask
450		eor	r1, r1, r6		@ (real ^ match)
451		tst	r1, r2			@       & mask
452		addeq	pc, r12, r3		@ call cache function
453		add	r12, r12, #4*5
454		b	1b
455
456/*
457 * Table for cache operations.  This is basically:
458 *   - CPU ID match
459 *   - CPU ID mask
460 *   - 'cache on' method instruction
461 *   - 'cache off' method instruction
462 *   - 'cache flush' method instruction
463 *
464 * We match an entry using: ((real_id ^ match) & mask) == 0
465 *
466 * Writethrough caches generally only need 'on' and 'off'
467 * methods.  Writeback caches _must_ have the flush method
468 * defined.
469 */
470		.type	proc_types,#object
471proc_types:
472		.word	0x41560600		@ ARM6/610
473		.word	0xffffffe0
474		b	__arm6_mmu_cache_off	@ works, but slow
475		b	__arm6_mmu_cache_off
476		mov	pc, lr
477@		b	__arm6_mmu_cache_on		@ untested
478@		b	__arm6_mmu_cache_off
479@		b	__armv3_mmu_cache_flush
480
481		.word	0x00000000		@ old ARM ID
482		.word	0x0000f000
483		mov	pc, lr
484		mov	pc, lr
485		mov	pc, lr
486
487		.word	0x41007000		@ ARM7/710
488		.word	0xfff8fe00
489		b	__arm7_mmu_cache_off
490		b	__arm7_mmu_cache_off
491		mov	pc, lr
492
493		.word	0x41807200		@ ARM720T (writethrough)
494		.word	0xffffff00
495		b	__armv4_mmu_cache_on
496		b	__armv4_mmu_cache_off
497		mov	pc, lr
498
499		.word	0x00007000		@ ARM7 IDs
500		.word	0x0000f000
501		mov	pc, lr
502		mov	pc, lr
503		mov	pc, lr
504
505		@ Everything from here on will be the new ID system.
506
507		.word	0x4401a100		@ sa110 / sa1100
508		.word	0xffffffe0
509		b	__armv4_mmu_cache_on
510		b	__armv4_mmu_cache_off
511		b	__armv4_mmu_cache_flush
512
513		.word	0x6901b110		@ sa1110
514		.word	0xfffffff0
515		b	__armv4_mmu_cache_on
516		b	__armv4_mmu_cache_off
517		b	__armv4_mmu_cache_flush
518
519		@ These match on the architecture ID
520
521		.word	0x00020000		@ ARMv4T
522		.word	0x000f0000
523		b	__armv4_mmu_cache_on
524		b	__armv4_mmu_cache_off
525		b	__armv4_mmu_cache_flush
526
527		.word	0x00050000		@ ARMv5TE
528		.word	0x000f0000
529		b	__armv4_mmu_cache_on
530		b	__armv4_mmu_cache_off
531		b	__armv4_mmu_cache_flush
532
533		.word	0x00060000		@ ARMv5TEJ
534		.word	0x000f0000
535		b	__armv4_mmu_cache_on
536		b	__armv4_mmu_cache_off
537		b	__armv4_mmu_cache_flush
538
539		.word	0x00070000		@ ARMv6
540		.word	0x000f0000
541		b	__armv4_mmu_cache_on
542		b	__armv4_mmu_cache_off
543		b	__armv6_mmu_cache_flush
544
545		.word	0			@ unrecognised type
546		.word	0
547		mov	pc, lr
548		mov	pc, lr
549		mov	pc, lr
550
551		.size	proc_types, . - proc_types
552
553/*
554 * Turn off the Cache and MMU.  ARMv3 does not support
555 * reading the control register, but ARMv4 does.
556 *
557 * On entry,  r6 = processor ID
558 * On exit,   r0, r1, r2, r3, r12 corrupted
559 * This routine must preserve: r4, r6, r7
560 */
561		.align	5
562cache_off:	mov	r3, #12			@ cache_off function
563		b	call_cache_fn
564
565__armv4_mmu_cache_off:
566		mrc	p15, 0, r0, c1, c0
567		bic	r0, r0, #0x000d
568		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
569		mov	r0, #0
570		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
571		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
572		mov	pc, lr
573
574__arm6_mmu_cache_off:
575		mov	r0, #0x00000030		@ ARM6 control reg.
576		b	__armv3_mmu_cache_off
577
578__arm7_mmu_cache_off:
579		mov	r0, #0x00000070		@ ARM7 control reg.
580		b	__armv3_mmu_cache_off
581
582__armv3_mmu_cache_off:
583		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
584		mov	r0, #0
585		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
586		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
587		mov	pc, lr
588
589/*
590 * Clean and flush the cache to maintain consistency.
591 *
592 * On entry,
593 *  r6 = processor ID
594 * On exit,
595 *  r1, r2, r3, r11, r12 corrupted
596 * This routine must preserve:
597 *  r0, r4, r5, r6, r7
598 */
599		.align	5
600cache_clean_flush:
601		mov	r3, #16
602		b	call_cache_fn
603
604__armv6_mmu_cache_flush:
605		mov	r1, #0
606		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
607		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
608		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
609		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
610		mov	pc, lr
611
612__armv4_mmu_cache_flush:
613		mov	r2, #64*1024		@ default: 32K dcache size (*2)
614		mov	r11, #32		@ default: 32 byte line size
615		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
616		teq	r3, r6			@ cache ID register present?
617		beq	no_cache_id
618		mov	r1, r3, lsr #18
619		and	r1, r1, #7
620		mov	r2, #1024
621		mov	r2, r2, lsl r1		@ base dcache size *2
622		tst	r3, #1 << 14		@ test M bit
623		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
624		mov	r3, r3, lsr #12
625		and	r3, r3, #3
626		mov	r11, #8
627		mov	r11, r11, lsl r3	@ cache line size in bytes
628no_cache_id:
629		bic	r1, pc, #63		@ align to longest cache line
630		add	r2, r1, r2
6311:		ldr	r3, [r1], r11		@ s/w flush D cache
632		teq	r1, r2
633		bne	1b
634
635		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
636		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
637		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
638		mov	pc, lr
639
640__armv3_mmu_cache_flush:
641		mov	r1, #0
642		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
643		mov	pc, lr
644
645/*
646 * Various debugging routines for printing hex characters and
647 * memory, which again must be relocatable.
648 */
649#ifdef DEBUG
650		.type	phexbuf,#object
651phexbuf:	.space	12
652		.size	phexbuf, . - phexbuf
653
654phex:		adr	r3, phexbuf
655		mov	r2, #0
656		strb	r2, [r3, r1]
6571:		subs	r1, r1, #1
658		movmi	r0, r3
659		bmi	puts
660		and	r2, r0, #15
661		mov	r0, r0, lsr #4
662		cmp	r2, #10
663		addge	r2, r2, #7
664		add	r2, r2, #'0'
665		strb	r2, [r3, r1]
666		b	1b
667
668puts:		loadsp	r3
6691:		ldrb	r2, [r0], #1
670		teq	r2, #0
671		moveq	pc, lr
6722:		writeb	r2, r3
673		mov	r1, #0x00020000
6743:		subs	r1, r1, #1
675		bne	3b
676		teq	r2, #'\n'
677		moveq	r2, #'\r'
678		beq	2b
679		teq	r0, #0
680		bne	1b
681		mov	pc, lr
682putc:
683		mov	r2, r0
684		mov	r0, #0
685		loadsp	r3
686		b	2b
687
688memdump:	mov	r12, r0
689		mov	r10, lr
690		mov	r11, #0
6912:		mov	r0, r11, lsl #2
692		add	r0, r0, r12
693		mov	r1, #8
694		bl	phex
695		mov	r0, #':'
696		bl	putc
6971:		mov	r0, #' '
698		bl	putc
699		ldr	r0, [r12, r11, lsl #2]
700		mov	r1, #8
701		bl	phex
702		and	r0, r11, #7
703		teq	r0, #3
704		moveq	r0, #' '
705		bleq	putc
706		and	r0, r11, #7
707		add	r11, r11, #1
708		teq	r0, #7
709		bne	1b
710		mov	r0, #'\n'
711		bl	putc
712		cmp	r11, #64
713		blt	2b
714		mov	pc, r10
715#endif
716
717reloc_end:
718
719		.align
720		.section ".stack", "w"
721user_stack:	.space	4096
722