xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c (revision f5f40dd63bc7acbb5312b26ac1ea1103c12352a6)
1 #include <stdlib.h>
2 
3 // WARNING: When building the scalar versions of these functions you need to
4 // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
5 // from recognising a loop idiom and planting calls to memcpy!
6 
7 static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
8                                  size_t n) __arm_streaming_compatible {
9   unsigned char *destp = (unsigned char *)dest;
10   const unsigned char *srcp = (const unsigned char *)src;
11   for (size_t i = 0; i < n; ++i)
12     destp[i] = srcp[i];
13 
14   return dest;
15 }
16 
17 // If dest and src overlap then behaviour is undefined, hence we can add the
18 // restrict keywords here. This also matches the definition of the libc memcpy
19 // according to the man page.
20 void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
21                       size_t n) __arm_streaming_compatible {
22   return __arm_sc_memcpy_fwd(dest, src, n);
23 }
24 
25 void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
26   unsigned char *destp = (unsigned char *)dest;
27   unsigned char c8 = (unsigned char)c;
28   for (size_t i = 0; i < n; ++i)
29     destp[i] = c8;
30 
31   return dest;
32 }
33 
34 static void *__arm_sc_memcpy_rev(void *dest, const void *src,
35                                  size_t n) __arm_streaming_compatible {
36   unsigned char *destp = (unsigned char *)dest;
37   const unsigned char *srcp = (const unsigned char *)src;
38   // TODO: Improve performance by copying larger chunks in reverse, or by
39   // using SVE.
40   while (n > 0) {
41     --n;
42     destp[n] = srcp[n];
43   }
44   return dest;
45 }
46 
47 // Semantically a memmove is equivalent to the following:
48 //   1. Copy the entire contents of src to a temporary array that does not
49 //      overlap with src or dest.
50 //   2. Copy the contents of the temporary array into dest.
51 void *__arm_sc_memmove(void *dest, const void *src,
52                        size_t n) __arm_streaming_compatible {
53   unsigned char *destp = (unsigned char *)dest;
54   const unsigned char *srcp = (const unsigned char *)src;
55 
56   // If src and dest don't overlap then just invoke memcpy
57   if ((srcp > (destp + n)) || (destp > (srcp + n)))
58     return __arm_sc_memcpy_fwd(dest, src, n);
59 
60   // Overlap case 1:
61   //     src: Low     |   ->   |     High
62   //    dest: Low  |   ->   |        High
63   // Here src is always ahead of dest at a higher addres. If we first read a
64   // chunk of data from src we can safely write the same chunk to dest without
65   // corrupting future reads of src.
66   if (srcp > destp)
67     return __arm_sc_memcpy_fwd(dest, src, n);
68 
69   // Overlap case 2:
70   //     src: Low  |   ->   |        High
71   //    dest: Low     |   ->   |     High
72   // While we're in the overlap region we're always corrupting future reads of
73   // src when writing to dest. An efficient way to do this is to copy the data
74   // in reverse by starting at the highest address.
75   return __arm_sc_memcpy_rev(dest, src, n);
76 }
77 
78 const void *__arm_sc_memchr(const void *src, int c,
79                             size_t n) __arm_streaming_compatible {
80   const unsigned char *srcp = (const unsigned char *)src;
81   unsigned char c8 = (unsigned char)c;
82   for (size_t i = 0; i < n; ++i)
83     if (srcp[i] == c8)
84       return &srcp[i];
85 
86   return NULL;
87 }
88