1 #include <stdlib.h> 2 3 // WARNING: When building the scalar versions of these functions you need to 4 // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang 5 // from recognising a loop idiom and planting calls to memcpy! 6 7 static void *__arm_sc_memcpy_fwd(void *dest, const void *src, 8 size_t n) __arm_streaming_compatible { 9 unsigned char *destp = (unsigned char *)dest; 10 const unsigned char *srcp = (const unsigned char *)src; 11 for (size_t i = 0; i < n; ++i) 12 destp[i] = srcp[i]; 13 14 return dest; 15 } 16 17 // If dest and src overlap then behaviour is undefined, hence we can add the 18 // restrict keywords here. This also matches the definition of the libc memcpy 19 // according to the man page. 20 void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, 21 size_t n) __arm_streaming_compatible { 22 return __arm_sc_memcpy_fwd(dest, src, n); 23 } 24 25 void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { 26 unsigned char *destp = (unsigned char *)dest; 27 unsigned char c8 = (unsigned char)c; 28 for (size_t i = 0; i < n; ++i) 29 destp[i] = c8; 30 31 return dest; 32 } 33 34 static void *__arm_sc_memcpy_rev(void *dest, const void *src, 35 size_t n) __arm_streaming_compatible { 36 unsigned char *destp = (unsigned char *)dest; 37 const unsigned char *srcp = (const unsigned char *)src; 38 // TODO: Improve performance by copying larger chunks in reverse, or by 39 // using SVE. 40 while (n > 0) { 41 --n; 42 destp[n] = srcp[n]; 43 } 44 return dest; 45 } 46 47 // Semantically a memmove is equivalent to the following: 48 // 1. Copy the entire contents of src to a temporary array that does not 49 // overlap with src or dest. 50 // 2. Copy the contents of the temporary array into dest. 51 void *__arm_sc_memmove(void *dest, const void *src, 52 size_t n) __arm_streaming_compatible { 53 unsigned char *destp = (unsigned char *)dest; 54 const unsigned char *srcp = (const unsigned char *)src; 55 56 // If src and dest don't overlap then just invoke memcpy 57 if ((srcp > (destp + n)) || (destp > (srcp + n))) 58 return __arm_sc_memcpy_fwd(dest, src, n); 59 60 // Overlap case 1: 61 // src: Low | -> | High 62 // dest: Low | -> | High 63 // Here src is always ahead of dest at a higher addres. If we first read a 64 // chunk of data from src we can safely write the same chunk to dest without 65 // corrupting future reads of src. 66 if (srcp > destp) 67 return __arm_sc_memcpy_fwd(dest, src, n); 68 69 // Overlap case 2: 70 // src: Low | -> | High 71 // dest: Low | -> | High 72 // While we're in the overlap region we're always corrupting future reads of 73 // src when writing to dest. An efficient way to do this is to copy the data 74 // in reverse by starting at the highest address. 75 return __arm_sc_memcpy_rev(dest, src, n); 76 } 77 78 const void *__arm_sc_memchr(const void *src, int c, 79 size_t n) __arm_streaming_compatible { 80 const unsigned char *srcp = (const unsigned char *)src; 81 unsigned char c8 = (unsigned char)c; 82 for (size_t i = 0; i < n; ++i) 83 if (srcp[i] == c8) 84 return &srcp[i]; 85 86 return NULL; 87 } 88