Lines Matching +full:src +full:-
2 * memcpy - copy memory area
4 * Copyright (c) 2013-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
9 This memcpy routine is optimised for Cortex-A15 cores and takes advantage
14 ARMv6 (ARMv7-a if using Neon)
29 .arch armv7-a
59 #define src r1 macro
69 #define A_l r2 /* Call-clobbered. */
70 #define A_h r3 /* Call-clobbered. */
79 /* Number of lines ahead to pre-fetch data. If you change this the code
87 vldr \vreg, [src, #\base]
89 vldr d0, [src, #\base + 8]
91 vldr d1, [src, #\base + 16]
93 vldr d2, [src, #\base + 24]
95 vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
97 vldr d0, [src, #\base + 40]
99 vldr d1, [src, #\base + 48]
101 vldr d2, [src, #\base + 56]
106 vldr \vreg, [src, #\base]
108 vldr d0, [src, #\base + 8]
110 vldr d1, [src, #\base + 16]
112 vldr d2, [src, #\base + 24]
115 vldr d0, [src, #\base + 40]
117 vldr d1, [src, #\base + 48]
119 vldr d2, [src, #\base + 56]
134 rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
136 vld1.8 {d0}, [src]! /* 14 words to go. */
138 vld1.8 {d0}, [src]! /* 12 words to go. */
140 vld1.8 {d0}, [src]! /* 10 words to go. */
142 vld1.8 {d0}, [src]! /* 8 words to go. */
144 vld1.8 {d0}, [src]! /* 6 words to go. */
146 vld1.8 {d0}, [src]! /* 4 words to go. */
148 vld1.8 {d0}, [src]! /* 2 words to go. */
152 ldrne tmp1, [src], #4
159 add src, src, tmp1
160 rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
164 ldr tmp1, [src, #-60] /* 15 words to go. */
165 str tmp1, [dst, #-60]
167 ldr tmp1, [src, #-56] /* 14 words to go. */
168 str tmp1, [dst, #-56]
169 ldr tmp1, [src, #-52]
170 str tmp1, [dst, #-52]
172 ldr tmp1, [src, #-48] /* 12 words to go. */
173 str tmp1, [dst, #-48]
174 ldr tmp1, [src, #-44]
175 str tmp1, [dst, #-44]
177 ldr tmp1, [src, #-40] /* 10 words to go. */
178 str tmp1, [dst, #-40]
179 ldr tmp1, [src, #-36]
180 str tmp1, [dst, #-36]
182 ldr tmp1, [src, #-32] /* 8 words to go. */
183 str tmp1, [dst, #-32]
184 ldr tmp1, [src, #-28]
185 str tmp1, [dst, #-28]
187 ldr tmp1, [src, #-24] /* 6 words to go. */
188 str tmp1, [dst, #-24]
189 ldr tmp1, [src, #-20]
190 str tmp1, [dst, #-20]
192 ldr tmp1, [src, #-16] /* 4 words to go. */
193 str tmp1, [dst, #-16]
194 ldr tmp1, [src, #-12]
195 str tmp1, [dst, #-12]
197 ldr tmp1, [src, #-8] /* 2 words to go. */
198 str tmp1, [dst, #-8]
199 ldr tmp1, [src, #-4]
200 str tmp1, [dst, #-4]
204 ldrhcs tmp1, [src], #2
205 ldrbne src, [src] /* Src is dead, use as a scratch. */
207 strbne src, [dst]
212 str tmp2, [sp, #-FRAME_SIZE]!
213 and tmp2, src, #7
219 /* Magic dust alert! Force VFP on Cortex-A9. Experiments show
225 /* SRC and DST have the same mutual 64-bit alignment, but we may
226 still need to pre-copy some bytes to get to natural alignment.
227 We bring SRC and DST into full 64-bit alignment. */
232 ldrmi tmp1, [src], #4
235 ldrhcs tmp1, [src], #2
236 ldrbne tmp2, [src], #1
250 vldr d0, [src, #0]
252 vldr d1, [src, #8]
254 vldr d0, [src, #16]
256 vldr d1, [src, #24]
258 vldr d0, [src, #32]
260 vldr d1, [src, #40]
262 vldr d0, [src, #48]
264 vldr d1, [src, #56]
266 add src, src, #64
276 add src, src, tmp1
277 rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
280 vldr d0, [src, #-56] /* 14 words to go. */
281 vstr d0, [dst, #-56]
282 vldr d0, [src, #-48] /* 12 words to go. */
283 vstr d0, [dst, #-48]
284 vldr d0, [src, #-40] /* 10 words to go. */
285 vstr d0, [dst, #-40]
286 vldr d0, [src, #-32] /* 8 words to go. */
287 vstr d0, [dst, #-32]
288 vldr d0, [src, #-24] /* 6 words to go. */
289 vstr d0, [dst, #-24]
290 vldr d0, [src, #-16] /* 4 words to go. */
291 vstr d0, [dst, #-16]
292 vldr d0, [src, #-8] /* 2 words to go. */
293 vstr d0, [dst, #-8]
295 sub src, src, #8
298 ldrd A_l, A_h, [src, #8]
300 ldrd A_l, A_h, [src, #16]
302 ldrd A_l, A_h, [src, #24]
304 ldrd A_l, A_h, [src, #32]
306 ldrd A_l, A_h, [src, #40]
308 ldrd A_l, A_h, [src, #48]
310 ldrd A_l, A_h, [src, #56]
312 ldrd A_l, A_h, [src, #64]!
321 add src, src, #8
325 /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
326 we know that the src and dest are 64-bit aligned so we can use
333 add src, src, tmp1
334 rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
336 ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
337 strd A_l, A_h, [dst, #-56]
338 ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
339 strd A_l, A_h, [dst, #-48]
340 ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
341 strd A_l, A_h, [dst, #-40]
342 ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
343 strd A_l, A_h, [dst, #-32]
344 ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
345 strd A_l, A_h, [dst, #-24]
346 ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
347 strd A_l, A_h, [dst, #-16]
348 ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
349 strd A_l, A_h, [dst, #-8]
353 ldrne tmp1, [src], #4
356 ldrhcs tmp1, [src], #2
357 ldrbne tmp2, [src]
374 vldr d3, [src, #0]
375 vldr d4, [src, #64]
376 vldr d5, [src, #128]
377 vldr d6, [src, #192]
378 vldr d7, [src, #256]
380 vldr d0, [src, #8]
381 vldr d1, [src, #16]
382 vldr d2, [src, #24]
383 add src, src, #32
392 add src, src, #3 * 64
396 add src, src, #2 * 64
404 add src, src, #3 * 64
408 vldr d7, [src, #64]
410 vldr d0, [src, #64 + 8]
412 vldr d1, [src, #64 + 16]
414 vldr d2, [src, #64 + 24]
416 add src, src, #96
427 /* Pre-bias src and dst. */
428 sub src, src, #8
430 pld [src, #8]
431 pld [src, #72]
433 pld [src, #136]
434 ldrd A_l, A_h, [src, #8]
436 ldrd B_l, B_h, [src, #16]
438 ldrd C_l, C_h, [src, #24]
440 pld [src, #200]
441 ldrd D_l, D_h, [src, #32]!
445 pld [src, #232]
447 ldrd A_l, A_h, [src, #40]
449 ldrd B_l, B_h, [src, #48]
451 ldrd C_l, C_h, [src, #56]
453 ldrd D_l, D_h, [src, #64]!
457 ldrd A_l, A_h, [src, #8]
459 ldrd B_l, B_h, [src, #16]
461 ldrd C_l, C_h, [src, #24]
463 ldrd D_l, D_h, [src, #32]
465 /* Save the remaining bytes and restore the callee-saved regs. */
467 add src, src, #40
482 pld [src]
483 pld [src, #64]
486 /* Bring DST to 64-bit alignment. */
488 pld [src, #(2 * 64)]
492 ldrmi tmp1, [src], #4
495 ldrbne tmp1, [src], #1
496 ldrhcs tmp2, [src], #2
500 pld [src, #(3 * 64)]
504 pld [src, #(4 * 64)]
507 vld1.8 {d0-d3}, [src]!
508 vld1.8 {d4-d7}, [src]!
512 pld [src, #(4 * 64)]
513 vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
514 vld1.8 {d0-d3}, [src]!
515 vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
516 vld1.8 {d4-d7}, [src]!
520 vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
521 vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
525 sub src, src, #4
528 ldr A_l, [src, #4]
529 ldr A_h, [src, #8]
531 ldr B_l, [src, #12]
532 ldr B_h, [src, #16]
534 ldr C_l, [src, #20]
535 ldr C_h, [src, #24]
537 ldr D_l, [src, #28]
538 ldr D_h, [src, #32]!
542 pld [src, #(5 * 64) - (32 - 4)]
544 ldr A_l, [src, #36]
545 ldr A_h, [src, #40]
547 ldr B_l, [src, #44]
548 ldr B_h, [src, #48]
550 ldr C_l, [src, #52]
551 ldr C_h, [src, #56]
553 ldr D_l, [src, #60]
554 ldr D_h, [src, #64]!
558 ldr A_l, [src, #4]
559 ldr A_h, [src, #8]
561 ldr B_l, [src, #12]
562 ldr B_h, [src, #16]
564 ldr C_l, [src, #20]
565 ldr C_h, [src, #24]
567 ldr D_l, [src, #28]
568 ldr D_h, [src, #32]
571 /* Save the remaining bytes and restore the callee-saved regs. */
573 add src, src, #36