1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12#include <asm/asm-compat.h> 13#include <asm/feature-fixups.h> 14 15 .align 7 16_GLOBAL_TOC(memcpy) 17BEGIN_FTR_SECTION 18#ifdef __LITTLE_ENDIAN__ 19 cmpdi cr7,r5,0 20#else 21 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 22#endif 23FTR_SECTION_ELSE 24#ifdef CONFIG_PPC_BOOK3S_64 25#ifndef SELFTEST 26 b memcpy_power7 27#endif 28#endif 29ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 30#ifdef __LITTLE_ENDIAN__ 31 /* dumb little-endian memcpy that will get replaced at runtime */ 32 addi r9,r3,-1 33 addi r4,r4,-1 34 beqlr cr7 35 mtctr r5 361: lbzu r10,1(r4) 37 stbu r10,1(r9) 38 bdnz 1b 39 blr 40#else 41 PPC_MTOCRF(0x01,r5) 42 cmpldi cr1,r5,16 43 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 44 andi. r6,r6,7 45 dcbt 0,r4 46 blt cr1,.Lshort_copy 47/* Below we want to nop out the bne if we're on a CPU that has the 48 CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 49 cleared. 50 At the time of writing the only CPU that has this combination of bits 51 set is Power6. */ 52BEGIN_FTR_SECTION 53 nop 54FTR_SECTION_ELSE 55 bne .Ldst_unaligned 56ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 57 CPU_FTR_UNALIGNED_LD_STD) 58.Ldst_aligned: 59 addi r3,r3,-16 60BEGIN_FTR_SECTION 61 andi. r0,r4,7 62 bne .Lsrc_unaligned 63END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 64 srdi r7,r5,4 65 ld r9,0(r4) 66 addi r4,r4,-8 67 mtctr r7 68 andi. r5,r5,7 69 bf cr7*4+0,2f 70 addi r3,r3,8 71 addi r4,r4,8 72 mr r8,r9 73 blt cr1,3f 741: ld r9,8(r4) 75 std r8,8(r3) 762: ldu r8,16(r4) 77 stdu r9,16(r3) 78 bdnz 1b 793: std r8,8(r3) 80 beq 3f 81 addi r3,r3,16 82.Ldo_tail: 83 bf cr7*4+1,1f 84 lwz r9,8(r4) 85 addi r4,r4,4 86 stw r9,0(r3) 87 addi r3,r3,4 881: bf cr7*4+2,2f 89 lhz r9,8(r4) 90 addi r4,r4,2 91 sth r9,0(r3) 92 addi r3,r3,2 932: bf cr7*4+3,3f 94 lbz r9,8(r4) 95 stb r9,0(r3) 963: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 97 blr 98 99.Lsrc_unaligned: 100 srdi r6,r5,3 101 addi r5,r5,-16 102 subf r4,r0,r4 103 srdi r7,r5,4 104 sldi r10,r0,3 105 cmpdi cr6,r6,3 106 andi. r5,r5,7 107 mtctr r7 108 subfic r11,r10,64 109 add r5,r5,r0 110 111 bt cr7*4+0,0f 112 113 ld r9,0(r4) # 3+2n loads, 2+2n stores 114 ld r0,8(r4) 115 sld r6,r9,r10 116 ldu r9,16(r4) 117 srd r7,r0,r11 118 sld r8,r0,r10 119 or r7,r7,r6 120 blt cr6,4f 121 ld r0,8(r4) 122 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 123 b 2f 124 1250: ld r0,0(r4) # 4+2n loads, 3+2n stores 126 ldu r9,8(r4) 127 sld r8,r0,r10 128 addi r3,r3,-8 129 blt cr6,5f 130 ld r0,8(r4) 131 srd r12,r9,r11 132 sld r6,r9,r10 133 ldu r9,16(r4) 134 or r12,r8,r12 135 srd r7,r0,r11 136 sld r8,r0,r10 137 addi r3,r3,16 138 beq cr6,3f 139 140 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 1411: or r7,r7,r6 142 ld r0,8(r4) 143 std r12,8(r3) 1442: srd r12,r9,r11 145 sld r6,r9,r10 146 ldu r9,16(r4) 147 or r12,r8,r12 148 stdu r7,16(r3) 149 srd r7,r0,r11 150 sld r8,r0,r10 151 bdnz 1b 152 1533: std r12,8(r3) 154 or r7,r7,r6 1554: std r7,16(r3) 1565: srd r12,r9,r11 157 or r12,r8,r12 158 std r12,24(r3) 159 beq 4f 160 cmpwi cr1,r5,8 161 addi r3,r3,32 162 sld r9,r9,r10 163 ble cr1,6f 164 ld r0,8(r4) 165 srd r7,r0,r11 166 or r9,r7,r9 1676: 168 bf cr7*4+1,1f 169 rotldi r9,r9,32 170 stw r9,0(r3) 171 addi r3,r3,4 1721: bf cr7*4+2,2f 173 rotldi r9,r9,16 174 sth r9,0(r3) 175 addi r3,r3,2 1762: bf cr7*4+3,3f 177 rotldi r9,r9,8 178 stb r9,0(r3) 1793: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 180 blr 181 182.Ldst_unaligned: 183 PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 184 subf r5,r6,r5 185 li r7,0 186 cmpldi cr1,r5,16 187 bf cr7*4+3,1f 188 lbz r0,0(r4) 189 stb r0,0(r3) 190 addi r7,r7,1 1911: bf cr7*4+2,2f 192 lhzx r0,r7,r4 193 sthx r0,r7,r3 194 addi r7,r7,2 1952: bf cr7*4+1,3f 196 lwzx r0,r7,r4 197 stwx r0,r7,r3 1983: PPC_MTOCRF(0x01,r5) 199 add r4,r6,r4 200 add r3,r6,r3 201 b .Ldst_aligned 202 203.Lshort_copy: 204 bf cr7*4+0,1f 205 lwz r0,0(r4) 206 lwz r9,4(r4) 207 addi r4,r4,8 208 stw r0,0(r3) 209 stw r9,4(r3) 210 addi r3,r3,8 2111: bf cr7*4+1,2f 212 lwz r0,0(r4) 213 addi r4,r4,4 214 stw r0,0(r3) 215 addi r3,r3,4 2162: bf cr7*4+2,3f 217 lhz r0,0(r4) 218 addi r4,r4,2 219 sth r0,0(r3) 220 addi r3,r3,2 2213: bf cr7*4+3,4f 222 lbz r0,0(r4) 223 stb r0,0(r3) 2244: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 225 blr 226#endif 227EXPORT_SYMBOL(memcpy) 228