1*7a6dacacSDimitry Andric #include <stdlib.h> 2*7a6dacacSDimitry Andric 3*7a6dacacSDimitry Andric // WARNING: When building the scalar versions of these functions you need to 4*7a6dacacSDimitry Andric // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang 5*7a6dacacSDimitry Andric // from recognising a loop idiom and planting calls to memcpy! 6*7a6dacacSDimitry Andric 7*7a6dacacSDimitry Andric static void *__arm_sc_memcpy_fwd(void *dest, const void *src, 8*7a6dacacSDimitry Andric size_t n) __arm_streaming_compatible { 9*7a6dacacSDimitry Andric unsigned char *destp = (unsigned char *)dest; 10*7a6dacacSDimitry Andric const unsigned char *srcp = (const unsigned char *)src; 11*7a6dacacSDimitry Andric for (size_t i = 0; i < n; ++i) 12*7a6dacacSDimitry Andric destp[i] = srcp[i]; 13*7a6dacacSDimitry Andric 14*7a6dacacSDimitry Andric return dest; 15*7a6dacacSDimitry Andric } 16*7a6dacacSDimitry Andric 17*7a6dacacSDimitry Andric // If dest and src overlap then behaviour is undefined, hence we can add the 18*7a6dacacSDimitry Andric // restrict keywords here. This also matches the definition of the libc memcpy 19*7a6dacacSDimitry Andric // according to the man page. 20*7a6dacacSDimitry Andric void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, 21*7a6dacacSDimitry Andric size_t n) __arm_streaming_compatible { 22*7a6dacacSDimitry Andric return __arm_sc_memcpy_fwd(dest, src, n); 23*7a6dacacSDimitry Andric } 24*7a6dacacSDimitry Andric 25*7a6dacacSDimitry Andric void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { 26*7a6dacacSDimitry Andric unsigned char *destp = (unsigned char *)dest; 27*7a6dacacSDimitry Andric unsigned char c8 = (unsigned char)c; 28*7a6dacacSDimitry Andric for (size_t i = 0; i < n; ++i) 29*7a6dacacSDimitry Andric destp[i] = c8; 30*7a6dacacSDimitry Andric 31*7a6dacacSDimitry Andric return dest; 32*7a6dacacSDimitry Andric } 33*7a6dacacSDimitry Andric 34*7a6dacacSDimitry Andric static void *__arm_sc_memcpy_rev(void *dest, const void *src, 35*7a6dacacSDimitry Andric size_t n) __arm_streaming_compatible { 36*7a6dacacSDimitry Andric unsigned char *destp = (unsigned char *)dest; 37*7a6dacacSDimitry Andric const unsigned char *srcp = (const unsigned char *)src; 38*7a6dacacSDimitry Andric // TODO: Improve performance by copying larger chunks in reverse, or by 39*7a6dacacSDimitry Andric // using SVE. 40*7a6dacacSDimitry Andric while (n > 0) { 41*7a6dacacSDimitry Andric --n; 42*7a6dacacSDimitry Andric destp[n] = srcp[n]; 43*7a6dacacSDimitry Andric } 44*7a6dacacSDimitry Andric return dest; 45*7a6dacacSDimitry Andric } 46*7a6dacacSDimitry Andric 47*7a6dacacSDimitry Andric // Semantically a memmove is equivalent to the following: 48*7a6dacacSDimitry Andric // 1. Copy the entire contents of src to a temporary array that does not 49*7a6dacacSDimitry Andric // overlap with src or dest. 50*7a6dacacSDimitry Andric // 2. Copy the contents of the temporary array into dest. 51*7a6dacacSDimitry Andric void *__arm_sc_memmove(void *dest, const void *src, 52*7a6dacacSDimitry Andric size_t n) __arm_streaming_compatible { 53*7a6dacacSDimitry Andric unsigned char *destp = (unsigned char *)dest; 54*7a6dacacSDimitry Andric const unsigned char *srcp = (const unsigned char *)src; 55*7a6dacacSDimitry Andric 56*7a6dacacSDimitry Andric // If src and dest don't overlap then just invoke memcpy 57*7a6dacacSDimitry Andric if ((srcp > (destp + n)) || (destp > (srcp + n))) 58*7a6dacacSDimitry Andric return __arm_sc_memcpy_fwd(dest, src, n); 59*7a6dacacSDimitry Andric 60*7a6dacacSDimitry Andric // Overlap case 1: 61*7a6dacacSDimitry Andric // src: Low | -> | High 62*7a6dacacSDimitry Andric // dest: Low | -> | High 63*7a6dacacSDimitry Andric // Here src is always ahead of dest at a higher addres. If we first read a 64*7a6dacacSDimitry Andric // chunk of data from src we can safely write the same chunk to dest without 65*7a6dacacSDimitry Andric // corrupting future reads of src. 66*7a6dacacSDimitry Andric if (srcp > destp) 67*7a6dacacSDimitry Andric return __arm_sc_memcpy_fwd(dest, src, n); 68*7a6dacacSDimitry Andric 69*7a6dacacSDimitry Andric // Overlap case 2: 70*7a6dacacSDimitry Andric // src: Low | -> | High 71*7a6dacacSDimitry Andric // dest: Low | -> | High 72*7a6dacacSDimitry Andric // While we're in the overlap region we're always corrupting future reads of 73*7a6dacacSDimitry Andric // src when writing to dest. An efficient way to do this is to copy the data 74*7a6dacacSDimitry Andric // in reverse by starting at the highest address. 75*7a6dacacSDimitry Andric return __arm_sc_memcpy_rev(dest, src, n); 76*7a6dacacSDimitry Andric } 77*7a6dacacSDimitry Andric 78*7a6dacacSDimitry Andric const void *__arm_sc_memchr(const void *src, int c, 79*7a6dacacSDimitry Andric size_t n) __arm_streaming_compatible { 80*7a6dacacSDimitry Andric const unsigned char *srcp = (const unsigned char *)src; 81*7a6dacacSDimitry Andric unsigned char c8 = (unsigned char)c; 82*7a6dacacSDimitry Andric for (size_t i = 0; i < n; ++i) 83*7a6dacacSDimitry Andric if (srcp[i] == c8) 84*7a6dacacSDimitry Andric return &srcp[i]; 85*7a6dacacSDimitry Andric 86*7a6dacacSDimitry Andric return NULL; 87*7a6dacacSDimitry Andric } 88