1/* 2 * This file contains miscellaneous low-level functions. 3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 4 * 5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) 6 * and Paul Mackerras. 7 * 8 * kexec bits: 9 * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> 10 * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 */ 18 19#include <linux/sys.h> 20#include <asm/unistd.h> 21#include <asm/errno.h> 22#include <asm/reg.h> 23#include <asm/page.h> 24#include <asm/cache.h> 25#include <asm/cputable.h> 26#include <asm/mmu.h> 27#include <asm/ppc_asm.h> 28#include <asm/thread_info.h> 29#include <asm/asm-offsets.h> 30#include <asm/processor.h> 31#include <asm/kexec.h> 32 33 .text 34 35#ifdef CONFIG_IRQSTACKS 36_GLOBAL(call_do_softirq) 37 mflr r0 38 stw r0,4(r1) 39 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) 40 mr r1,r3 41 bl __do_softirq 42 lwz r1,0(r1) 43 lwz r0,4(r1) 44 mtlr r0 45 blr 46 47_GLOBAL(call_handle_irq) 48 mflr r0 49 stw r0,4(r1) 50 mtctr r6 51 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) 52 mr r1,r5 53 bctrl 54 lwz r1,0(r1) 55 lwz r0,4(r1) 56 mtlr r0 57 blr 58#endif /* CONFIG_IRQSTACKS */ 59 60/* 61 * This returns the high 64 bits of the product of two 64-bit numbers. 62 */ 63_GLOBAL(mulhdu) 64 cmpwi r6,0 65 cmpwi cr1,r3,0 66 mr r10,r4 67 mulhwu r4,r4,r5 68 beq 1f 69 mulhwu r0,r10,r6 70 mullw r7,r10,r5 71 addc r7,r0,r7 72 addze r4,r4 731: beqlr cr1 /* all done if high part of A is 0 */ 74 mr r10,r3 75 mullw r9,r3,r5 76 mulhwu r3,r3,r5 77 beq 2f 78 mullw r0,r10,r6 79 mulhwu r8,r10,r6 80 addc r7,r0,r7 81 adde r4,r4,r8 82 addze r3,r3 832: addc r4,r4,r9 84 addze r3,r3 85 blr 86 87/* 88 * sub_reloc_offset(x) returns x - reloc_offset(). 89 */ 90_GLOBAL(sub_reloc_offset) 91 mflr r0 92 bl 1f 931: mflr r5 94 lis r4,1b@ha 95 addi r4,r4,1b@l 96 subf r5,r4,r5 97 subf r3,r5,r3 98 mtlr r0 99 blr 100 101/* 102 * reloc_got2 runs through the .got2 section adding an offset 103 * to each entry. 104 */ 105_GLOBAL(reloc_got2) 106 mflr r11 107 lis r7,__got2_start@ha 108 addi r7,r7,__got2_start@l 109 lis r8,__got2_end@ha 110 addi r8,r8,__got2_end@l 111 subf r8,r7,r8 112 srwi. r8,r8,2 113 beqlr 114 mtctr r8 115 bl 1f 1161: mflr r0 117 lis r4,1b@ha 118 addi r4,r4,1b@l 119 subf r0,r4,r0 120 add r7,r0,r7 1212: lwz r0,0(r7) 122 add r0,r0,r3 123 stw r0,0(r7) 124 addi r7,r7,4 125 bdnz 2b 126 mtlr r11 127 blr 128 129/* 130 * call_setup_cpu - call the setup_cpu function for this cpu 131 * r3 = data offset, r24 = cpu number 132 * 133 * Setup function is called with: 134 * r3 = data offset 135 * r4 = ptr to CPU spec (relocated) 136 */ 137_GLOBAL(call_setup_cpu) 138 addis r4,r3,cur_cpu_spec@ha 139 addi r4,r4,cur_cpu_spec@l 140 lwz r4,0(r4) 141 add r4,r4,r3 142 lwz r5,CPU_SPEC_SETUP(r4) 143 cmpwi 0,r5,0 144 add r5,r5,r3 145 beqlr 146 mtctr r5 147 bctr 148 149#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx) 150 151/* This gets called by via-pmu.c to switch the PLL selection 152 * on 750fx CPU. This function should really be moved to some 153 * other place (as most of the cpufreq code in via-pmu 154 */ 155_GLOBAL(low_choose_750fx_pll) 156 /* Clear MSR:EE */ 157 mfmsr r7 158 rlwinm r0,r7,0,17,15 159 mtmsr r0 160 161 /* If switching to PLL1, disable HID0:BTIC */ 162 cmplwi cr0,r3,0 163 beq 1f 164 mfspr r5,SPRN_HID0 165 rlwinm r5,r5,0,27,25 166 sync 167 mtspr SPRN_HID0,r5 168 isync 169 sync 170 1711: 172 /* Calc new HID1 value */ 173 mfspr r4,SPRN_HID1 /* Build a HID1:PS bit from parameter */ 174 rlwinm r5,r3,16,15,15 /* Clear out HID1:PS from value read */ 175 rlwinm r4,r4,0,16,14 /* Could have I used rlwimi here ? */ 176 or r4,r4,r5 177 mtspr SPRN_HID1,r4 178 179 /* Store new HID1 image */ 180 rlwinm r6,r1,0,0,(31-THREAD_SHIFT) 181 lwz r6,TI_CPU(r6) 182 slwi r6,r6,2 183 addis r6,r6,nap_save_hid1@ha 184 stw r4,nap_save_hid1@l(r6) 185 186 /* If switching to PLL0, enable HID0:BTIC */ 187 cmplwi cr0,r3,0 188 bne 1f 189 mfspr r5,SPRN_HID0 190 ori r5,r5,HID0_BTIC 191 sync 192 mtspr SPRN_HID0,r5 193 isync 194 sync 195 1961: 197 /* Return */ 198 mtmsr r7 199 blr 200 201_GLOBAL(low_choose_7447a_dfs) 202 /* Clear MSR:EE */ 203 mfmsr r7 204 rlwinm r0,r7,0,17,15 205 mtmsr r0 206 207 /* Calc new HID1 value */ 208 mfspr r4,SPRN_HID1 209 insrwi r4,r3,1,9 /* insert parameter into bit 9 */ 210 sync 211 mtspr SPRN_HID1,r4 212 sync 213 isync 214 215 /* Return */ 216 mtmsr r7 217 blr 218 219#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */ 220 221/* 222 * complement mask on the msr then "or" some values on. 223 * _nmask_and_or_msr(nmask, value_to_or) 224 */ 225_GLOBAL(_nmask_and_or_msr) 226 mfmsr r0 /* Get current msr */ 227 andc r0,r0,r3 /* And off the bits set in r3 (first parm) */ 228 or r0,r0,r4 /* Or on the bits in r4 (second parm) */ 229 SYNC /* Some chip revs have problems here... */ 230 mtmsr r0 /* Update machine state */ 231 isync 232 blr /* Done */ 233 234#ifdef CONFIG_40x 235 236/* 237 * Do an IO access in real mode 238 */ 239_GLOBAL(real_readb) 240 mfmsr r7 241 ori r0,r7,MSR_DR 242 xori r0,r0,MSR_DR 243 sync 244 mtmsr r0 245 sync 246 isync 247 lbz r3,0(r3) 248 sync 249 mtmsr r7 250 sync 251 isync 252 blr 253 254 /* 255 * Do an IO access in real mode 256 */ 257_GLOBAL(real_writeb) 258 mfmsr r7 259 ori r0,r7,MSR_DR 260 xori r0,r0,MSR_DR 261 sync 262 mtmsr r0 263 sync 264 isync 265 stb r3,0(r4) 266 sync 267 mtmsr r7 268 sync 269 isync 270 blr 271 272#endif /* CONFIG_40x */ 273 274/* 275 * Flush MMU TLB 276 */ 277#ifndef CONFIG_FSL_BOOKE 278_GLOBAL(_tlbil_all) 279_GLOBAL(_tlbil_pid) 280#endif 281_GLOBAL(_tlbia) 282#if defined(CONFIG_40x) 283 sync /* Flush to memory before changing mapping */ 284 tlbia 285 isync /* Flush shadow TLB */ 286#elif defined(CONFIG_44x) 287 li r3,0 288 sync 289 290 /* Load high watermark */ 291 lis r4,tlb_44x_hwater@ha 292 lwz r5,tlb_44x_hwater@l(r4) 293 2941: tlbwe r3,r3,PPC44x_TLB_PAGEID 295 addi r3,r3,1 296 cmpw 0,r3,r5 297 ble 1b 298 299 isync 300#elif defined(CONFIG_FSL_BOOKE) 301 /* Invalidate all entries in TLB0 */ 302 li r3, 0x04 303 tlbivax 0,3 304 /* Invalidate all entries in TLB1 */ 305 li r3, 0x0c 306 tlbivax 0,3 307 msync 308#ifdef CONFIG_SMP 309 tlbsync 310#endif /* CONFIG_SMP */ 311#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ 312#if defined(CONFIG_SMP) 313 rlwinm r8,r1,0,0,(31-THREAD_SHIFT) 314 lwz r8,TI_CPU(r8) 315 oris r8,r8,10 316 mfmsr r10 317 SYNC 318 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ 319 rlwinm r0,r0,0,28,26 /* clear DR */ 320 mtmsr r0 321 SYNC_601 322 isync 323 lis r9,mmu_hash_lock@h 324 ori r9,r9,mmu_hash_lock@l 325 tophys(r9,r9) 32610: lwarx r7,0,r9 327 cmpwi 0,r7,0 328 bne- 10b 329 stwcx. r8,0,r9 330 bne- 10b 331 sync 332 tlbia 333 sync 334 TLBSYNC 335 li r0,0 336 stw r0,0(r9) /* clear mmu_hash_lock */ 337 mtmsr r10 338 SYNC_601 339 isync 340#else /* CONFIG_SMP */ 341 sync 342 tlbia 343 sync 344#endif /* CONFIG_SMP */ 345#endif /* ! defined(CONFIG_40x) */ 346 blr 347 348/* 349 * Flush MMU TLB for a particular address 350 */ 351#ifndef CONFIG_FSL_BOOKE 352_GLOBAL(_tlbil_va) 353#endif 354_GLOBAL(_tlbie) 355#if defined(CONFIG_40x) 356 /* We run the search with interrupts disabled because we have to change 357 * the PID and I don't want to preempt when that happens. 358 */ 359 mfmsr r5 360 mfspr r6,SPRN_PID 361 wrteei 0 362 mtspr SPRN_PID,r4 363 tlbsx. r3, 0, r3 364 mtspr SPRN_PID,r6 365 wrtee r5 366 bne 10f 367 sync 368 /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear. 369 * Since 25 is the V bit in the TLB_TAG, loading this value will invalidate 370 * the TLB entry. */ 371 tlbwe r3, r3, TLB_TAG 372 isync 37310: 374 375#elif defined(CONFIG_44x) 376 mfspr r5,SPRN_MMUCR 377 rlwimi r5,r4,0,24,31 /* Set TID */ 378 379 /* We have to run the search with interrupts disabled, even critical 380 * and debug interrupts (in fact the only critical exceptions we have 381 * are debug and machine check). Otherwise an interrupt which causes 382 * a TLB miss can clobber the MMUCR between the mtspr and the tlbsx. */ 383 mfmsr r4 384 lis r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@ha 385 addi r6,r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l 386 andc r6,r4,r6 387 mtmsr r6 388 mtspr SPRN_MMUCR,r5 389 tlbsx. r3, 0, r3 390 mtmsr r4 391 bne 10f 392 sync 393 /* There are only 64 TLB entries, so r3 < 64, 394 * which means bit 22, is clear. Since 22 is 395 * the V bit in the TLB_PAGEID, loading this 396 * value will invalidate the TLB entry. 397 */ 398 tlbwe r3, r3, PPC44x_TLB_PAGEID 399 isync 40010: 401#elif defined(CONFIG_FSL_BOOKE) 402 rlwinm r4, r3, 0, 0, 19 403 ori r5, r4, 0x08 /* TLBSEL = 1 */ 404 tlbivax 0, r4 405 tlbivax 0, r5 406 msync 407#if defined(CONFIG_SMP) 408 tlbsync 409#endif /* CONFIG_SMP */ 410#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ 411#if defined(CONFIG_SMP) 412 rlwinm r8,r1,0,0,(31-THREAD_SHIFT) 413 lwz r8,TI_CPU(r8) 414 oris r8,r8,11 415 mfmsr r10 416 SYNC 417 rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ 418 rlwinm r0,r0,0,28,26 /* clear DR */ 419 mtmsr r0 420 SYNC_601 421 isync 422 lis r9,mmu_hash_lock@h 423 ori r9,r9,mmu_hash_lock@l 424 tophys(r9,r9) 42510: lwarx r7,0,r9 426 cmpwi 0,r7,0 427 bne- 10b 428 stwcx. r8,0,r9 429 bne- 10b 430 eieio 431 tlbie r3 432 sync 433 TLBSYNC 434 li r0,0 435 stw r0,0(r9) /* clear mmu_hash_lock */ 436 mtmsr r10 437 SYNC_601 438 isync 439#else /* CONFIG_SMP */ 440 tlbie r3 441 sync 442#endif /* CONFIG_SMP */ 443#endif /* ! CONFIG_40x */ 444 blr 445 446#if defined(CONFIG_FSL_BOOKE) 447/* 448 * Flush MMU TLB, but only on the local processor (no broadcast) 449 */ 450_GLOBAL(_tlbil_all) 451#define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ 452 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) 453 li r3,(MMUCSR0_TLBFI)@l 454 mtspr SPRN_MMUCSR0, r3 4551: 456 mfspr r3,SPRN_MMUCSR0 457 andi. r3,r3,MMUCSR0_TLBFI@l 458 bne 1b 459 blr 460 461/* 462 * Flush MMU TLB for a particular process id, but only on the local processor 463 * (no broadcast) 464 */ 465_GLOBAL(_tlbil_pid) 466/* we currently do an invalidate all since we don't have per pid invalidate */ 467 li r3,(MMUCSR0_TLBFI)@l 468 mtspr SPRN_MMUCSR0, r3 4691: 470 mfspr r3,SPRN_MMUCSR0 471 andi. r3,r3,MMUCSR0_TLBFI@l 472 bne 1b 473 msync 474 isync 475 blr 476 477/* 478 * Flush MMU TLB for a particular address, but only on the local processor 479 * (no broadcast) 480 */ 481_GLOBAL(_tlbil_va) 482 mfmsr r10 483 wrteei 0 484 slwi r4,r4,16 485 mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ 486 tlbsx 0,r3 487 mfspr r4,SPRN_MAS1 /* check valid */ 488 andis. r3,r4,MAS1_VALID@h 489 beqlr 490 rlwinm r4,r4,0,1,31 491 mtspr SPRN_MAS1,r4 492 tlbwe 493 msync 494 isync 495 wrtee r10 496 blr 497#endif /* CONFIG_FSL_BOOKE */ 498 499 500/* 501 * Flush instruction cache. 502 * This is a no-op on the 601. 503 */ 504_GLOBAL(flush_instruction_cache) 505#if defined(CONFIG_8xx) 506 isync 507 lis r5, IDC_INVALL@h 508 mtspr SPRN_IC_CST, r5 509#elif defined(CONFIG_4xx) 510#ifdef CONFIG_403GCX 511 li r3, 512 512 mtctr r3 513 lis r4, KERNELBASE@h 5141: iccci 0, r4 515 addi r4, r4, 16 516 bdnz 1b 517#else 518 lis r3, KERNELBASE@h 519 iccci 0,r3 520#endif 521#elif CONFIG_FSL_BOOKE 522BEGIN_FTR_SECTION 523 mfspr r3,SPRN_L1CSR0 524 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC 525 /* msync; isync recommended here */ 526 mtspr SPRN_L1CSR0,r3 527 isync 528 blr 529END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) 530 mfspr r3,SPRN_L1CSR1 531 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR 532 mtspr SPRN_L1CSR1,r3 533#else 534 mfspr r3,SPRN_PVR 535 rlwinm r3,r3,16,16,31 536 cmpwi 0,r3,1 537 beqlr /* for 601, do nothing */ 538 /* 603/604 processor - use invalidate-all bit in HID0 */ 539 mfspr r3,SPRN_HID0 540 ori r3,r3,HID0_ICFI 541 mtspr SPRN_HID0,r3 542#endif /* CONFIG_8xx/4xx */ 543 isync 544 blr 545 546/* 547 * Write any modified data cache blocks out to memory 548 * and invalidate the corresponding instruction cache blocks. 549 * This is a no-op on the 601. 550 * 551 * flush_icache_range(unsigned long start, unsigned long stop) 552 */ 553_KPROBE(__flush_icache_range) 554BEGIN_FTR_SECTION 555 blr /* for 601, do nothing */ 556END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 557 li r5,L1_CACHE_BYTES-1 558 andc r3,r3,r5 559 subf r4,r3,r4 560 add r4,r4,r5 561 srwi. r4,r4,L1_CACHE_SHIFT 562 beqlr 563 mtctr r4 564 mr r6,r3 5651: dcbst 0,r3 566 addi r3,r3,L1_CACHE_BYTES 567 bdnz 1b 568 sync /* wait for dcbst's to get to ram */ 569 mtctr r4 5702: icbi 0,r6 571 addi r6,r6,L1_CACHE_BYTES 572 bdnz 2b 573 sync /* additional sync needed on g4 */ 574 isync 575 blr 576/* 577 * Write any modified data cache blocks out to memory. 578 * Does not invalidate the corresponding cache lines (especially for 579 * any corresponding instruction cache). 580 * 581 * clean_dcache_range(unsigned long start, unsigned long stop) 582 */ 583_GLOBAL(clean_dcache_range) 584 li r5,L1_CACHE_BYTES-1 585 andc r3,r3,r5 586 subf r4,r3,r4 587 add r4,r4,r5 588 srwi. r4,r4,L1_CACHE_SHIFT 589 beqlr 590 mtctr r4 591 5921: dcbst 0,r3 593 addi r3,r3,L1_CACHE_BYTES 594 bdnz 1b 595 sync /* wait for dcbst's to get to ram */ 596 blr 597 598/* 599 * Write any modified data cache blocks out to memory and invalidate them. 600 * Does not invalidate the corresponding instruction cache blocks. 601 * 602 * flush_dcache_range(unsigned long start, unsigned long stop) 603 */ 604_GLOBAL(flush_dcache_range) 605 li r5,L1_CACHE_BYTES-1 606 andc r3,r3,r5 607 subf r4,r3,r4 608 add r4,r4,r5 609 srwi. r4,r4,L1_CACHE_SHIFT 610 beqlr 611 mtctr r4 612 6131: dcbf 0,r3 614 addi r3,r3,L1_CACHE_BYTES 615 bdnz 1b 616 sync /* wait for dcbst's to get to ram */ 617 blr 618 619/* 620 * Like above, but invalidate the D-cache. This is used by the 8xx 621 * to invalidate the cache so the PPC core doesn't get stale data 622 * from the CPM (no cache snooping here :-). 623 * 624 * invalidate_dcache_range(unsigned long start, unsigned long stop) 625 */ 626_GLOBAL(invalidate_dcache_range) 627 li r5,L1_CACHE_BYTES-1 628 andc r3,r3,r5 629 subf r4,r3,r4 630 add r4,r4,r5 631 srwi. r4,r4,L1_CACHE_SHIFT 632 beqlr 633 mtctr r4 634 6351: dcbi 0,r3 636 addi r3,r3,L1_CACHE_BYTES 637 bdnz 1b 638 sync /* wait for dcbi's to get to ram */ 639 blr 640 641/* 642 * Flush a particular page from the data cache to RAM. 643 * Note: this is necessary because the instruction cache does *not* 644 * snoop from the data cache. 645 * This is a no-op on the 601 which has a unified cache. 646 * 647 * void __flush_dcache_icache(void *page) 648 */ 649_GLOBAL(__flush_dcache_icache) 650BEGIN_FTR_SECTION 651 blr 652END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 653 rlwinm r3,r3,0,0,19 /* Get page base address */ 654 li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ 655 mtctr r4 656 mr r6,r3 6570: dcbst 0,r3 /* Write line to ram */ 658 addi r3,r3,L1_CACHE_BYTES 659 bdnz 0b 660 sync 661#ifndef CONFIG_44x 662 /* We don't flush the icache on 44x. Those have a virtual icache 663 * and we don't have access to the virtual address here (it's 664 * not the page vaddr but where it's mapped in user space). The 665 * flushing of the icache on these is handled elsewhere, when 666 * a change in the address space occurs, before returning to 667 * user space 668 */ 669 mtctr r4 6701: icbi 0,r6 671 addi r6,r6,L1_CACHE_BYTES 672 bdnz 1b 673 sync 674 isync 675#endif /* CONFIG_44x */ 676 blr 677 678/* 679 * Flush a particular page from the data cache to RAM, identified 680 * by its physical address. We turn off the MMU so we can just use 681 * the physical address (this may be a highmem page without a kernel 682 * mapping). 683 * 684 * void __flush_dcache_icache_phys(unsigned long physaddr) 685 */ 686_GLOBAL(__flush_dcache_icache_phys) 687BEGIN_FTR_SECTION 688 blr /* for 601, do nothing */ 689END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 690 mfmsr r10 691 rlwinm r0,r10,0,28,26 /* clear DR */ 692 mtmsr r0 693 isync 694 rlwinm r3,r3,0,0,19 /* Get page base address */ 695 li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ 696 mtctr r4 697 mr r6,r3 6980: dcbst 0,r3 /* Write line to ram */ 699 addi r3,r3,L1_CACHE_BYTES 700 bdnz 0b 701 sync 702 mtctr r4 7031: icbi 0,r6 704 addi r6,r6,L1_CACHE_BYTES 705 bdnz 1b 706 sync 707 mtmsr r10 /* restore DR */ 708 isync 709 blr 710 711/* 712 * Clear pages using the dcbz instruction, which doesn't cause any 713 * memory traffic (except to write out any cache lines which get 714 * displaced). This only works on cacheable memory. 715 * 716 * void clear_pages(void *page, int order) ; 717 */ 718_GLOBAL(clear_pages) 719 li r0,4096/L1_CACHE_BYTES 720 slw r0,r0,r4 721 mtctr r0 722#ifdef CONFIG_8xx 723 li r4, 0 7241: stw r4, 0(r3) 725 stw r4, 4(r3) 726 stw r4, 8(r3) 727 stw r4, 12(r3) 728#else 7291: dcbz 0,r3 730#endif 731 addi r3,r3,L1_CACHE_BYTES 732 bdnz 1b 733 blr 734 735/* 736 * Copy a whole page. We use the dcbz instruction on the destination 737 * to reduce memory traffic (it eliminates the unnecessary reads of 738 * the destination into cache). This requires that the destination 739 * is cacheable. 740 */ 741#define COPY_16_BYTES \ 742 lwz r6,4(r4); \ 743 lwz r7,8(r4); \ 744 lwz r8,12(r4); \ 745 lwzu r9,16(r4); \ 746 stw r6,4(r3); \ 747 stw r7,8(r3); \ 748 stw r8,12(r3); \ 749 stwu r9,16(r3) 750 751_GLOBAL(copy_page) 752 addi r3,r3,-4 753 addi r4,r4,-4 754 755#ifdef CONFIG_8xx 756 /* don't use prefetch on 8xx */ 757 li r0,4096/L1_CACHE_BYTES 758 mtctr r0 7591: COPY_16_BYTES 760 bdnz 1b 761 blr 762 763#else /* not 8xx, we can prefetch */ 764 li r5,4 765 766#if MAX_COPY_PREFETCH > 1 767 li r0,MAX_COPY_PREFETCH 768 li r11,4 769 mtctr r0 77011: dcbt r11,r4 771 addi r11,r11,L1_CACHE_BYTES 772 bdnz 11b 773#else /* MAX_COPY_PREFETCH == 1 */ 774 dcbt r5,r4 775 li r11,L1_CACHE_BYTES+4 776#endif /* MAX_COPY_PREFETCH */ 777 li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH 778 crclr 4*cr0+eq 7792: 780 mtctr r0 7811: 782 dcbt r11,r4 783 dcbz r5,r3 784 COPY_16_BYTES 785#if L1_CACHE_BYTES >= 32 786 COPY_16_BYTES 787#if L1_CACHE_BYTES >= 64 788 COPY_16_BYTES 789 COPY_16_BYTES 790#if L1_CACHE_BYTES >= 128 791 COPY_16_BYTES 792 COPY_16_BYTES 793 COPY_16_BYTES 794 COPY_16_BYTES 795#endif 796#endif 797#endif 798 bdnz 1b 799 beqlr 800 crnot 4*cr0+eq,4*cr0+eq 801 li r0,MAX_COPY_PREFETCH 802 li r11,4 803 b 2b 804#endif /* CONFIG_8xx */ 805 806/* 807 * void atomic_clear_mask(atomic_t mask, atomic_t *addr) 808 * void atomic_set_mask(atomic_t mask, atomic_t *addr); 809 */ 810_GLOBAL(atomic_clear_mask) 81110: lwarx r5,0,r4 812 andc r5,r5,r3 813 PPC405_ERR77(0,r4) 814 stwcx. r5,0,r4 815 bne- 10b 816 blr 817_GLOBAL(atomic_set_mask) 81810: lwarx r5,0,r4 819 or r5,r5,r3 820 PPC405_ERR77(0,r4) 821 stwcx. r5,0,r4 822 bne- 10b 823 blr 824 825/* 826 * Extended precision shifts. 827 * 828 * Updated to be valid for shift counts from 0 to 63 inclusive. 829 * -- Gabriel 830 * 831 * R3/R4 has 64 bit value 832 * R5 has shift count 833 * result in R3/R4 834 * 835 * ashrdi3: arithmetic right shift (sign propagation) 836 * lshrdi3: logical right shift 837 * ashldi3: left shift 838 */ 839_GLOBAL(__ashrdi3) 840 subfic r6,r5,32 841 srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count 842 addi r7,r5,32 # could be xori, or addi with -32 843 slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) 844 rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0 845 sraw r7,r3,r7 # t2 = MSW >> (count-32) 846 or r4,r4,r6 # LSW |= t1 847 slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2 848 sraw r3,r3,r5 # MSW = MSW >> count 849 or r4,r4,r7 # LSW |= t2 850 blr 851 852_GLOBAL(__ashldi3) 853 subfic r6,r5,32 854 slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count 855 addi r7,r5,32 # could be xori, or addi with -32 856 srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count) 857 slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32) 858 or r3,r3,r6 # MSW |= t1 859 slw r4,r4,r5 # LSW = LSW << count 860 or r3,r3,r7 # MSW |= t2 861 blr 862 863_GLOBAL(__lshrdi3) 864 subfic r6,r5,32 865 srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count 866 addi r7,r5,32 # could be xori, or addi with -32 867 slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) 868 srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32) 869 or r4,r4,r6 # LSW |= t1 870 srw r3,r3,r5 # MSW = MSW >> count 871 or r4,r4,r7 # LSW |= t2 872 blr 873 874/* 875 * 64-bit comparison: __ucmpdi2(u64 a, u64 b) 876 * Returns 0 if a < b, 1 if a == b, 2 if a > b. 877 */ 878_GLOBAL(__ucmpdi2) 879 cmplw r3,r5 880 li r3,1 881 bne 1f 882 cmplw r4,r6 883 beqlr 8841: li r3,0 885 bltlr 886 li r3,2 887 blr 888 889_GLOBAL(abs) 890 srawi r4,r3,31 891 xor r3,r3,r4 892 sub r3,r3,r4 893 blr 894 895/* 896 * Create a kernel thread 897 * kernel_thread(fn, arg, flags) 898 */ 899_GLOBAL(kernel_thread) 900 stwu r1,-16(r1) 901 stw r30,8(r1) 902 stw r31,12(r1) 903 mr r30,r3 /* function */ 904 mr r31,r4 /* argument */ 905 ori r3,r5,CLONE_VM /* flags */ 906 oris r3,r3,CLONE_UNTRACED>>16 907 li r4,0 /* new sp (unused) */ 908 li r0,__NR_clone 909 sc 910 bns+ 1f /* did system call indicate error? */ 911 neg r3,r3 /* if so, make return code negative */ 9121: cmpwi 0,r3,0 /* parent or child? */ 913 bne 2f /* return if parent */ 914 li r0,0 /* make top-level stack frame */ 915 stwu r0,-16(r1) 916 mtlr r30 /* fn addr in lr */ 917 mr r3,r31 /* load arg and call fn */ 918 PPC440EP_ERR42 919 blrl 920 li r0,__NR_exit /* exit if function returns */ 921 li r3,0 922 sc 9232: lwz r30,8(r1) 924 lwz r31,12(r1) 925 addi r1,r1,16 926 blr 927 928/* 929 * This routine is just here to keep GCC happy - sigh... 930 */ 931_GLOBAL(__main) 932 blr 933 934#ifdef CONFIG_KEXEC 935 /* 936 * Must be relocatable PIC code callable as a C function. 937 */ 938 .globl relocate_new_kernel 939relocate_new_kernel: 940 /* r3 = page_list */ 941 /* r4 = reboot_code_buffer */ 942 /* r5 = start_address */ 943 944 li r0, 0 945 946 /* 947 * Set Machine Status Register to a known status, 948 * switch the MMU off and jump to 1: in a single step. 949 */ 950 951 mr r8, r0 952 ori r8, r8, MSR_RI|MSR_ME 953 mtspr SPRN_SRR1, r8 954 addi r8, r4, 1f - relocate_new_kernel 955 mtspr SPRN_SRR0, r8 956 sync 957 rfi 958 9591: 960 /* from this point address translation is turned off */ 961 /* and interrupts are disabled */ 962 963 /* set a new stack at the bottom of our page... */ 964 /* (not really needed now) */ 965 addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ 966 stw r0, 0(r1) 967 968 /* Do the copies */ 969 li r6, 0 /* checksum */ 970 mr r0, r3 971 b 1f 972 9730: /* top, read another word for the indirection page */ 974 lwzu r0, 4(r3) 975 9761: 977 /* is it a destination page? (r8) */ 978 rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ 979 beq 2f 980 981 rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ 982 b 0b 983 9842: /* is it an indirection page? (r3) */ 985 rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ 986 beq 2f 987 988 rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ 989 subi r3, r3, 4 990 b 0b 991 9922: /* are we done? */ 993 rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ 994 beq 2f 995 b 3f 996 9972: /* is it a source page? (r9) */ 998 rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ 999 beq 0b 1000 1001 rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ 1002 1003 li r7, PAGE_SIZE / 4 1004 mtctr r7 1005 subi r9, r9, 4 1006 subi r8, r8, 4 10079: 1008 lwzu r0, 4(r9) /* do the copy */ 1009 xor r6, r6, r0 1010 stwu r0, 4(r8) 1011 dcbst 0, r8 1012 sync 1013 icbi 0, r8 1014 bdnz 9b 1015 1016 addi r9, r9, 4 1017 addi r8, r8, 4 1018 b 0b 1019 10203: 1021 1022 /* To be certain of avoiding problems with self-modifying code 1023 * execute a serializing instruction here. 1024 */ 1025 isync 1026 sync 1027 1028 /* jump to the entry point, usually the setup routine */ 1029 mtlr r5 1030 blrl 1031 10321: b 1b 1033 1034relocate_new_kernel_end: 1035 1036 .globl relocate_new_kernel_size 1037relocate_new_kernel_size: 1038 .long relocate_new_kernel_end - relocate_new_kernel 1039#endif 1040