1/* 2 * linux/arch/arm/boot/compressed/head.S 3 * 4 * Copyright (C) 1996-2002 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10#include <linux/config.h> 11#include <linux/linkage.h> 12 13/* 14 * Debugging stuff 15 * 16 * Note that these macros must not contain any code which is not 17 * 100% relocatable. Any attempt to do so will result in a crash. 18 * Please select one of the following when turning on debugging. 19 */ 20#ifdef DEBUG 21 22#include <asm/arch/debug-macro.S> 23 24#if defined(CONFIG_DEBUG_ICEDCC) 25 .macro loadsp, rb 26 .endm 27 .macro writeb, ch, rb 28 mcr p14, 0, \ch, c0, c1, 0 29 .endm 30#else 31 .macro writeb, ch, rb 32 senduart \ch, \rb 33 .endm 34 35#if defined(CONFIG_FOOTBRIDGE) || \ 36 defined(CONFIG_ARCH_RPC) || \ 37 defined(CONFIG_ARCH_INTEGRATOR) || \ 38 defined(CONFIG_ARCH_PXA) || \ 39 defined(CONFIG_ARCH_IXP4XX) || \ 40 defined(CONFIG_ARCH_IXP2000) || \ 41 defined(CONFIG_ARCH_LH7A40X) || \ 42 defined(CONFIG_ARCH_OMAP) 43 .macro loadsp, rb 44 addruart \rb 45 .endm 46#elif defined(CONFIG_ARCH_SA1100) 47 .macro loadsp, rb 48 mov \rb, #0x80000000 @ physical base address 49# if defined(CONFIG_DEBUG_LL_SER3) 50 add \rb, \rb, #0x00050000 @ Ser3 51# else 52 add \rb, \rb, #0x00010000 @ Ser1 53# endif 54 .endm 55#elif defined(CONFIG_ARCH_IOP331) 56 .macro loadsp, rb 57 mov \rb, #0xff000000 58 orr \rb, \rb, #0x00ff0000 59 orr \rb, \rb, #0x0000f700 @ location of the UART 60 .endm 61#elif defined(CONFIG_ARCH_S3C2410) 62 .macro loadsp, rb 63 mov \rb, #0x50000000 64 add \rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT 65 .endm 66#else 67#error no serial architecture defined 68#endif 69#endif 70#endif 71 72 .macro kputc,val 73 mov r0, \val 74 bl putc 75 .endm 76 77 .macro kphex,val,len 78 mov r0, \val 79 mov r1, #\len 80 bl phex 81 .endm 82 83 .macro debug_reloc_start 84#ifdef DEBUG 85 kputc #'\n' 86 kphex r6, 8 /* processor id */ 87 kputc #':' 88 kphex r7, 8 /* architecture id */ 89 kputc #':' 90 mrc p15, 0, r0, c1, c0 91 kphex r0, 8 /* control reg */ 92 kputc #'\n' 93 kphex r5, 8 /* decompressed kernel start */ 94 kputc #'-' 95 kphex r8, 8 /* decompressed kernel end */ 96 kputc #'>' 97 kphex r4, 8 /* kernel execution address */ 98 kputc #'\n' 99#endif 100 .endm 101 102 .macro debug_reloc_end 103#ifdef DEBUG 104 kphex r5, 8 /* end of kernel */ 105 kputc #'\n' 106 mov r0, r4 107 bl memdump /* dump 256 bytes at start of kernel */ 108#endif 109 .endm 110 111 .section ".start", #alloc, #execinstr 112/* 113 * sort out different calling conventions 114 */ 115 .align 116start: 117 .type start,#function 118 .rept 8 119 mov r0, r0 120 .endr 121 122 b 1f 123 .word 0x016f2818 @ Magic numbers to help the loader 124 .word start @ absolute load/run zImage address 125 .word _edata @ zImage end address 1261: mov r7, r1 @ save architecture ID 127 mov r8, #0 @ save r0 128 129#ifndef __ARM_ARCH_2__ 130 /* 131 * Booting from Angel - need to enter SVC mode and disable 132 * FIQs/IRQs (numeric definitions from angel arm.h source). 133 * We only do this if we were in user mode on entry. 134 */ 135 mrs r2, cpsr @ get current mode 136 tst r2, #3 @ not user? 137 bne not_angel 138 mov r0, #0x17 @ angel_SWIreason_EnterSVC 139 swi 0x123456 @ angel_SWI_ARM 140not_angel: 141 mrs r2, cpsr @ turn off interrupts to 142 orr r2, r2, #0xc0 @ prevent angel from running 143 msr cpsr_c, r2 144#else 145 teqp pc, #0x0c000003 @ turn off interrupts 146#endif 147 148 /* 149 * Note that some cache flushing and other stuff may 150 * be needed here - is there an Angel SWI call for this? 151 */ 152 153 /* 154 * some architecture specific code can be inserted 155 * by the linker here, but it should preserve r7 and r8. 156 */ 157 158 .text 159 adr r0, LC0 160 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp} 161 subs r0, r0, r1 @ calculate the delta offset 162 163 @ if delta is zero, we are 164 beq not_relocated @ running at the address we 165 @ were linked at. 166 167 /* 168 * We're running at a different address. We need to fix 169 * up various pointers: 170 * r5 - zImage base address 171 * r6 - GOT start 172 * ip - GOT end 173 */ 174 add r5, r5, r0 175 add r6, r6, r0 176 add ip, ip, r0 177 178#ifndef CONFIG_ZBOOT_ROM 179 /* 180 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n, 181 * we need to fix up pointers into the BSS region. 182 * r2 - BSS start 183 * r3 - BSS end 184 * sp - stack pointer 185 */ 186 add r2, r2, r0 187 add r3, r3, r0 188 add sp, sp, r0 189 190 /* 191 * Relocate all entries in the GOT table. 192 */ 1931: ldr r1, [r6, #0] @ relocate entries in the GOT 194 add r1, r1, r0 @ table. This fixes up the 195 str r1, [r6], #4 @ C references. 196 cmp r6, ip 197 blo 1b 198#else 199 200 /* 201 * Relocate entries in the GOT table. We only relocate 202 * the entries that are outside the (relocated) BSS region. 203 */ 2041: ldr r1, [r6, #0] @ relocate entries in the GOT 205 cmp r1, r2 @ entry < bss_start || 206 cmphs r3, r1 @ _end < entry 207 addlo r1, r1, r0 @ table. This fixes up the 208 str r1, [r6], #4 @ C references. 209 cmp r6, ip 210 blo 1b 211#endif 212 213not_relocated: mov r0, #0 2141: str r0, [r2], #4 @ clear bss 215 str r0, [r2], #4 216 str r0, [r2], #4 217 str r0, [r2], #4 218 cmp r2, r3 219 blo 1b 220 221 /* 222 * The C runtime environment should now be setup 223 * sufficiently. Turn the cache on, set up some 224 * pointers, and start decompressing. 225 */ 226 bl cache_on 227 228 mov r1, sp @ malloc space above stack 229 add r2, sp, #0x10000 @ 64k max 230 231/* 232 * Check to see if we will overwrite ourselves. 233 * r4 = final kernel address 234 * r5 = start of this image 235 * r2 = end of malloc space (and therefore this image) 236 * We basically want: 237 * r4 >= r2 -> OK 238 * r4 + image length <= r5 -> OK 239 */ 240 cmp r4, r2 241 bhs wont_overwrite 242 add r0, r4, #4096*1024 @ 4MB largest kernel size 243 cmp r0, r5 244 bls wont_overwrite 245 246 mov r5, r2 @ decompress after malloc space 247 mov r0, r5 248 mov r3, r7 249 bl decompress_kernel 250 251 add r0, r0, #127 252 bic r0, r0, #127 @ align the kernel length 253/* 254 * r0 = decompressed kernel length 255 * r1-r3 = unused 256 * r4 = kernel execution address 257 * r5 = decompressed kernel start 258 * r6 = processor ID 259 * r7 = architecture ID 260 * r8-r14 = unused 261 */ 262 add r1, r5, r0 @ end of decompressed kernel 263 adr r2, reloc_start 264 ldr r3, LC1 265 add r3, r2, r3 2661: ldmia r2!, {r8 - r13} @ copy relocation code 267 stmia r1!, {r8 - r13} 268 ldmia r2!, {r8 - r13} 269 stmia r1!, {r8 - r13} 270 cmp r2, r3 271 blo 1b 272 273 bl cache_clean_flush 274 add pc, r5, r0 @ call relocation code 275 276/* 277 * We're not in danger of overwriting ourselves. Do this the simple way. 278 * 279 * r4 = kernel execution address 280 * r7 = architecture ID 281 */ 282wont_overwrite: mov r0, r4 283 mov r3, r7 284 bl decompress_kernel 285 b call_kernel 286 287 .type LC0, #object 288LC0: .word LC0 @ r1 289 .word __bss_start @ r2 290 .word _end @ r3 291 .word zreladdr @ r4 292 .word _start @ r5 293 .word _got_start @ r6 294 .word _got_end @ ip 295 .word user_stack+4096 @ sp 296LC1: .word reloc_end - reloc_start 297 .size LC0, . - LC0 298 299#ifdef CONFIG_ARCH_RPC 300 .globl params 301params: ldr r0, =params_phys 302 mov pc, lr 303 .ltorg 304 .align 305#endif 306 307/* 308 * Turn on the cache. We need to setup some page tables so that we 309 * can have both the I and D caches on. 310 * 311 * We place the page tables 16k down from the kernel execution address, 312 * and we hope that nothing else is using it. If we're using it, we 313 * will go pop! 314 * 315 * On entry, 316 * r4 = kernel execution address 317 * r6 = processor ID 318 * r7 = architecture number 319 * r8 = run-time address of "start" 320 * On exit, 321 * r1, r2, r3, r8, r9, r12 corrupted 322 * This routine must preserve: 323 * r4, r5, r6, r7 324 */ 325 .align 5 326cache_on: mov r3, #8 @ cache_on function 327 b call_cache_fn 328 329__setup_mmu: sub r3, r4, #16384 @ Page directory size 330 bic r3, r3, #0xff @ Align the pointer 331 bic r3, r3, #0x3f00 332/* 333 * Initialise the page tables, turning on the cacheable and bufferable 334 * bits for the RAM area only. 335 */ 336 mov r0, r3 337 mov r8, r0, lsr #18 338 mov r8, r8, lsl #18 @ start of RAM 339 add r9, r8, #0x10000000 @ a reasonable RAM size 340 mov r1, #0x12 341 orr r1, r1, #3 << 10 342 add r2, r3, #16384 3431: cmp r1, r8 @ if virt > start of RAM 344 orrhs r1, r1, #0x0c @ set cacheable, bufferable 345 cmp r1, r9 @ if virt > end of RAM 346 bichs r1, r1, #0x0c @ clear cacheable, bufferable 347 str r1, [r0], #4 @ 1:1 mapping 348 add r1, r1, #1048576 349 teq r0, r2 350 bne 1b 351/* 352 * If ever we are running from Flash, then we surely want the cache 353 * to be enabled also for our execution instance... We map 2MB of it 354 * so there is no map overlap problem for up to 1 MB compressed kernel. 355 * If the execution is in RAM then we would only be duplicating the above. 356 */ 357 mov r1, #0x1e 358 orr r1, r1, #3 << 10 359 mov r2, pc, lsr #20 360 orr r1, r1, r2, lsl #20 361 add r0, r3, r2, lsl #2 362 str r1, [r0], #4 363 add r1, r1, #1048576 364 str r1, [r0] 365 mov pc, lr 366 367__armv4_cache_on: 368 mov r12, lr 369 bl __setup_mmu 370 mov r0, #0 371 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 372 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 373 mrc p15, 0, r0, c1, c0, 0 @ read control reg 374 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement 375 orr r0, r0, #0x0030 376 bl __common_cache_on 377 mov r0, #0 378 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 379 mov pc, r12 380 381__arm6_cache_on: 382 mov r12, lr 383 bl __setup_mmu 384 mov r0, #0 385 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 386 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 387 mov r0, #0x30 388 bl __common_cache_on 389 mov r0, #0 390 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 391 mov pc, r12 392 393__common_cache_on: 394#ifndef DEBUG 395 orr r0, r0, #0x000d @ Write buffer, mmu 396#endif 397 mov r1, #-1 398 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer 399 mcr p15, 0, r1, c3, c0, 0 @ load domain access control 400 mcr p15, 0, r0, c1, c0, 0 @ load control register 401 mov pc, lr 402 403/* 404 * All code following this line is relocatable. It is relocated by 405 * the above code to the end of the decompressed kernel image and 406 * executed there. During this time, we have no stacks. 407 * 408 * r0 = decompressed kernel length 409 * r1-r3 = unused 410 * r4 = kernel execution address 411 * r5 = decompressed kernel start 412 * r6 = processor ID 413 * r7 = architecture ID 414 * r8-r14 = unused 415 */ 416 .align 5 417reloc_start: add r8, r5, r0 418 debug_reloc_start 419 mov r1, r4 4201: 421 .rept 4 422 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel 423 stmia r1!, {r0, r2, r3, r9 - r13} 424 .endr 425 426 cmp r5, r8 427 blo 1b 428 debug_reloc_end 429 430call_kernel: bl cache_clean_flush 431 bl cache_off 432 mov r0, #0 433 mov r1, r7 @ restore architecture number 434 mov pc, r4 @ call kernel 435 436/* 437 * Here follow the relocatable cache support functions for the 438 * various processors. This is a generic hook for locating an 439 * entry and jumping to an instruction at the specified offset 440 * from the start of the block. Please note this is all position 441 * independent code. 442 * 443 * r1 = corrupted 444 * r2 = corrupted 445 * r3 = block offset 446 * r6 = corrupted 447 * r12 = corrupted 448 */ 449 450call_cache_fn: adr r12, proc_types 451 mrc p15, 0, r6, c0, c0 @ get processor ID 4521: ldr r1, [r12, #0] @ get value 453 ldr r2, [r12, #4] @ get mask 454 eor r1, r1, r6 @ (real ^ match) 455 tst r1, r2 @ & mask 456 addeq pc, r12, r3 @ call cache function 457 add r12, r12, #4*5 458 b 1b 459 460/* 461 * Table for cache operations. This is basically: 462 * - CPU ID match 463 * - CPU ID mask 464 * - 'cache on' method instruction 465 * - 'cache off' method instruction 466 * - 'cache flush' method instruction 467 * 468 * We match an entry using: ((real_id ^ match) & mask) == 0 469 * 470 * Writethrough caches generally only need 'on' and 'off' 471 * methods. Writeback caches _must_ have the flush method 472 * defined. 473 */ 474 .type proc_types,#object 475proc_types: 476 .word 0x41560600 @ ARM6/610 477 .word 0xffffffe0 478 b __arm6_cache_off @ works, but slow 479 b __arm6_cache_off 480 mov pc, lr 481@ b __arm6_cache_on @ untested 482@ b __arm6_cache_off 483@ b __armv3_cache_flush 484 485 .word 0x00000000 @ old ARM ID 486 .word 0x0000f000 487 mov pc, lr 488 mov pc, lr 489 mov pc, lr 490 491 .word 0x41007000 @ ARM7/710 492 .word 0xfff8fe00 493 b __arm7_cache_off 494 b __arm7_cache_off 495 mov pc, lr 496 497 .word 0x41807200 @ ARM720T (writethrough) 498 .word 0xffffff00 499 b __armv4_cache_on 500 b __armv4_cache_off 501 mov pc, lr 502 503 .word 0x00007000 @ ARM7 IDs 504 .word 0x0000f000 505 mov pc, lr 506 mov pc, lr 507 mov pc, lr 508 509 @ Everything from here on will be the new ID system. 510 511 .word 0x4401a100 @ sa110 / sa1100 512 .word 0xffffffe0 513 b __armv4_cache_on 514 b __armv4_cache_off 515 b __armv4_cache_flush 516 517 .word 0x6901b110 @ sa1110 518 .word 0xfffffff0 519 b __armv4_cache_on 520 b __armv4_cache_off 521 b __armv4_cache_flush 522 523 @ These match on the architecture ID 524 525 .word 0x00020000 @ ARMv4T 526 .word 0x000f0000 527 b __armv4_cache_on 528 b __armv4_cache_off 529 b __armv4_cache_flush 530 531 .word 0x00050000 @ ARMv5TE 532 .word 0x000f0000 533 b __armv4_cache_on 534 b __armv4_cache_off 535 b __armv4_cache_flush 536 537 .word 0x00060000 @ ARMv5TEJ 538 .word 0x000f0000 539 b __armv4_cache_on 540 b __armv4_cache_off 541 b __armv4_cache_flush 542 543 .word 0x00070000 @ ARMv6 544 .word 0x000f0000 545 b __armv4_cache_on 546 b __armv4_cache_off 547 b __armv6_cache_flush 548 549 .word 0 @ unrecognised type 550 .word 0 551 mov pc, lr 552 mov pc, lr 553 mov pc, lr 554 555 .size proc_types, . - proc_types 556 557/* 558 * Turn off the Cache and MMU. ARMv3 does not support 559 * reading the control register, but ARMv4 does. 560 * 561 * On entry, r6 = processor ID 562 * On exit, r0, r1, r2, r3, r12 corrupted 563 * This routine must preserve: r4, r6, r7 564 */ 565 .align 5 566cache_off: mov r3, #12 @ cache_off function 567 b call_cache_fn 568 569__armv4_cache_off: 570 mrc p15, 0, r0, c1, c0 571 bic r0, r0, #0x000d 572 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off 573 mov r0, #0 574 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 575 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 576 mov pc, lr 577 578__arm6_cache_off: 579 mov r0, #0x00000030 @ ARM6 control reg. 580 b __armv3_cache_off 581 582__arm7_cache_off: 583 mov r0, #0x00000070 @ ARM7 control reg. 584 b __armv3_cache_off 585 586__armv3_cache_off: 587 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off 588 mov r0, #0 589 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 590 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 591 mov pc, lr 592 593/* 594 * Clean and flush the cache to maintain consistency. 595 * 596 * On entry, 597 * r6 = processor ID 598 * On exit, 599 * r1, r2, r3, r11, r12 corrupted 600 * This routine must preserve: 601 * r0, r4, r5, r6, r7 602 */ 603 .align 5 604cache_clean_flush: 605 mov r3, #16 606 b call_cache_fn 607 608__armv6_cache_flush: 609 mov r1, #0 610 mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D 611 mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB 612 mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified 613 mcr p15, 0, r1, c7, c10, 4 @ drain WB 614 mov pc, lr 615 616__armv4_cache_flush: 617 mov r2, #64*1024 @ default: 32K dcache size (*2) 618 mov r11, #32 @ default: 32 byte line size 619 mrc p15, 0, r3, c0, c0, 1 @ read cache type 620 teq r3, r6 @ cache ID register present? 621 beq no_cache_id 622 mov r1, r3, lsr #18 623 and r1, r1, #7 624 mov r2, #1024 625 mov r2, r2, lsl r1 @ base dcache size *2 626 tst r3, #1 << 14 @ test M bit 627 addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1 628 mov r3, r3, lsr #12 629 and r3, r3, #3 630 mov r11, #8 631 mov r11, r11, lsl r3 @ cache line size in bytes 632no_cache_id: 633 bic r1, pc, #63 @ align to longest cache line 634 add r2, r1, r2 6351: ldr r3, [r1], r11 @ s/w flush D cache 636 teq r1, r2 637 bne 1b 638 639 mcr p15, 0, r1, c7, c5, 0 @ flush I cache 640 mcr p15, 0, r1, c7, c6, 0 @ flush D cache 641 mcr p15, 0, r1, c7, c10, 4 @ drain WB 642 mov pc, lr 643 644__armv3_cache_flush: 645 mov r1, #0 646 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 647 mov pc, lr 648 649/* 650 * Various debugging routines for printing hex characters and 651 * memory, which again must be relocatable. 652 */ 653#ifdef DEBUG 654 .type phexbuf,#object 655phexbuf: .space 12 656 .size phexbuf, . - phexbuf 657 658phex: adr r3, phexbuf 659 mov r2, #0 660 strb r2, [r3, r1] 6611: subs r1, r1, #1 662 movmi r0, r3 663 bmi puts 664 and r2, r0, #15 665 mov r0, r0, lsr #4 666 cmp r2, #10 667 addge r2, r2, #7 668 add r2, r2, #'0' 669 strb r2, [r3, r1] 670 b 1b 671 672puts: loadsp r3 6731: ldrb r2, [r0], #1 674 teq r2, #0 675 moveq pc, lr 6762: writeb r2, r3 677 mov r1, #0x00020000 6783: subs r1, r1, #1 679 bne 3b 680 teq r2, #'\n' 681 moveq r2, #'\r' 682 beq 2b 683 teq r0, #0 684 bne 1b 685 mov pc, lr 686putc: 687 mov r2, r0 688 mov r0, #0 689 loadsp r3 690 b 2b 691 692memdump: mov r12, r0 693 mov r10, lr 694 mov r11, #0 6952: mov r0, r11, lsl #2 696 add r0, r0, r12 697 mov r1, #8 698 bl phex 699 mov r0, #':' 700 bl putc 7011: mov r0, #' ' 702 bl putc 703 ldr r0, [r12, r11, lsl #2] 704 mov r1, #8 705 bl phex 706 and r0, r11, #7 707 teq r0, #3 708 moveq r0, #' ' 709 bleq putc 710 and r0, r11, #7 711 add r11, r11, #1 712 teq r0, #7 713 bne 1b 714 mov r0, #'\n' 715 bl putc 716 cmp r11, #64 717 blt 2b 718 mov pc, r10 719#endif 720 721reloc_end: 722 723 .align 724 .section ".stack", "w" 725user_stack: .space 4096 726