1 #include <linux/string.h> 2 #include <linux/export.h> 3 4 #undef memcpy 5 #undef memset 6 7 __visible void *memcpy(void *to, const void *from, size_t n) 8 { 9 #ifdef CONFIG_X86_USE_3DNOW 10 return __memcpy3d(to, from, n); 11 #else 12 return __memcpy(to, from, n); 13 #endif 14 } 15 EXPORT_SYMBOL(memcpy); 16 17 __visible void *memset(void *s, int c, size_t count) 18 { 19 return __memset(s, c, count); 20 } 21 EXPORT_SYMBOL(memset); 22 23 __visible void *memmove(void *dest, const void *src, size_t n) 24 { 25 int d0,d1,d2,d3,d4,d5; 26 char *ret = dest; 27 28 __asm__ __volatile__( 29 /* Handle more 16 bytes in loop */ 30 "cmp $0x10, %0\n\t" 31 "jb 1f\n\t" 32 33 /* Decide forward/backward copy mode */ 34 "cmp %2, %1\n\t" 35 "jb 2f\n\t" 36 37 /* 38 * movs instruction have many startup latency 39 * so we handle small size by general register. 40 */ 41 "cmp $680, %0\n\t" 42 "jb 3f\n\t" 43 /* 44 * movs instruction is only good for aligned case. 45 */ 46 "mov %1, %3\n\t" 47 "xor %2, %3\n\t" 48 "and $0xff, %3\n\t" 49 "jz 4f\n\t" 50 "3:\n\t" 51 "sub $0x10, %0\n\t" 52 53 /* 54 * We gobble 16 bytes forward in each loop. 55 */ 56 "3:\n\t" 57 "sub $0x10, %0\n\t" 58 "mov 0*4(%1), %3\n\t" 59 "mov 1*4(%1), %4\n\t" 60 "mov %3, 0*4(%2)\n\t" 61 "mov %4, 1*4(%2)\n\t" 62 "mov 2*4(%1), %3\n\t" 63 "mov 3*4(%1), %4\n\t" 64 "mov %3, 2*4(%2)\n\t" 65 "mov %4, 3*4(%2)\n\t" 66 "lea 0x10(%1), %1\n\t" 67 "lea 0x10(%2), %2\n\t" 68 "jae 3b\n\t" 69 "add $0x10, %0\n\t" 70 "jmp 1f\n\t" 71 72 /* 73 * Handle data forward by movs. 74 */ 75 ".p2align 4\n\t" 76 "4:\n\t" 77 "mov -4(%1, %0), %3\n\t" 78 "lea -4(%2, %0), %4\n\t" 79 "shr $2, %0\n\t" 80 "rep movsl\n\t" 81 "mov %3, (%4)\n\t" 82 "jmp 11f\n\t" 83 /* 84 * Handle data backward by movs. 85 */ 86 ".p2align 4\n\t" 87 "6:\n\t" 88 "mov (%1), %3\n\t" 89 "mov %2, %4\n\t" 90 "lea -4(%1, %0), %1\n\t" 91 "lea -4(%2, %0), %2\n\t" 92 "shr $2, %0\n\t" 93 "std\n\t" 94 "rep movsl\n\t" 95 "mov %3,(%4)\n\t" 96 "cld\n\t" 97 "jmp 11f\n\t" 98 99 /* 100 * Start to prepare for backward copy. 101 */ 102 ".p2align 4\n\t" 103 "2:\n\t" 104 "cmp $680, %0\n\t" 105 "jb 5f\n\t" 106 "mov %1, %3\n\t" 107 "xor %2, %3\n\t" 108 "and $0xff, %3\n\t" 109 "jz 6b\n\t" 110 111 /* 112 * Calculate copy position to tail. 113 */ 114 "5:\n\t" 115 "add %0, %1\n\t" 116 "add %0, %2\n\t" 117 "sub $0x10, %0\n\t" 118 119 /* 120 * We gobble 16 bytes backward in each loop. 121 */ 122 "7:\n\t" 123 "sub $0x10, %0\n\t" 124 125 "mov -1*4(%1), %3\n\t" 126 "mov -2*4(%1), %4\n\t" 127 "mov %3, -1*4(%2)\n\t" 128 "mov %4, -2*4(%2)\n\t" 129 "mov -3*4(%1), %3\n\t" 130 "mov -4*4(%1), %4\n\t" 131 "mov %3, -3*4(%2)\n\t" 132 "mov %4, -4*4(%2)\n\t" 133 "lea -0x10(%1), %1\n\t" 134 "lea -0x10(%2), %2\n\t" 135 "jae 7b\n\t" 136 /* 137 * Calculate copy position to head. 138 */ 139 "add $0x10, %0\n\t" 140 "sub %0, %1\n\t" 141 "sub %0, %2\n\t" 142 143 /* 144 * Move data from 8 bytes to 15 bytes. 145 */ 146 ".p2align 4\n\t" 147 "1:\n\t" 148 "cmp $8, %0\n\t" 149 "jb 8f\n\t" 150 "mov 0*4(%1), %3\n\t" 151 "mov 1*4(%1), %4\n\t" 152 "mov -2*4(%1, %0), %5\n\t" 153 "mov -1*4(%1, %0), %1\n\t" 154 155 "mov %3, 0*4(%2)\n\t" 156 "mov %4, 1*4(%2)\n\t" 157 "mov %5, -2*4(%2, %0)\n\t" 158 "mov %1, -1*4(%2, %0)\n\t" 159 "jmp 11f\n\t" 160 161 /* 162 * Move data from 4 bytes to 7 bytes. 163 */ 164 ".p2align 4\n\t" 165 "8:\n\t" 166 "cmp $4, %0\n\t" 167 "jb 9f\n\t" 168 "mov 0*4(%1), %3\n\t" 169 "mov -1*4(%1, %0), %4\n\t" 170 "mov %3, 0*4(%2)\n\t" 171 "mov %4, -1*4(%2, %0)\n\t" 172 "jmp 11f\n\t" 173 174 /* 175 * Move data from 2 bytes to 3 bytes. 176 */ 177 ".p2align 4\n\t" 178 "9:\n\t" 179 "cmp $2, %0\n\t" 180 "jb 10f\n\t" 181 "movw 0*2(%1), %%dx\n\t" 182 "movw -1*2(%1, %0), %%bx\n\t" 183 "movw %%dx, 0*2(%2)\n\t" 184 "movw %%bx, -1*2(%2, %0)\n\t" 185 "jmp 11f\n\t" 186 187 /* 188 * Move data for 1 byte. 189 */ 190 ".p2align 4\n\t" 191 "10:\n\t" 192 "cmp $1, %0\n\t" 193 "jb 11f\n\t" 194 "movb (%1), %%cl\n\t" 195 "movb %%cl, (%2)\n\t" 196 ".p2align 4\n\t" 197 "11:" 198 : "=&c" (d0), "=&S" (d1), "=&D" (d2), 199 "=r" (d3),"=r" (d4), "=r"(d5) 200 :"0" (n), 201 "1" (src), 202 "2" (dest) 203 :"memory"); 204 205 return ret; 206 207 } 208 EXPORT_SYMBOL(memmove); 209