xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/lse.S (revision fe75646a0234a261c0013bf1840fdac4acaf0cec)
1// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2// See https://llvm.org/LICENSE.txt for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
5#include "assembly.h"
6
7// Out-of-line LSE atomics helpers. Ported from libgcc library.
8// N = {1, 2, 4, 8}
9// M = {1, 2, 4, 8, 16}
10// ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'}
11// Routines implemented:
12//
13//  iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
14//  iN __aarch64_swpN_ORDER(iN val, iN *ptr)
15//  iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
16//  iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
17//  iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
18//  iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
19//
20// Routines may modify temporary registers tmp0, tmp1, tmp2,
21// return value x0 and the flags only.
22
23#ifdef __aarch64__
24
25#ifdef HAS_ASM_LSE
26.arch armv8-a+lse
27#else
28.arch armv8-a
29#endif
30
31#if !defined(__APPLE__)
32HIDDEN(__aarch64_have_lse_atomics)
33#else
34HIDDEN(___aarch64_have_lse_atomics)
35#endif
36
37// Generate mnemonics for
38// L_cas:                                 SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5
39// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8    MODEL: 1,2,3,4,5
40
41#if SIZE == 1
42#define S b
43#define UXT uxtb
44#define B 0x00000000
45#elif SIZE == 2
46#define S h
47#define UXT uxth
48#define B 0x40000000
49#elif SIZE == 4 || SIZE == 8 || SIZE == 16
50#define S
51#define UXT mov
52#if SIZE == 4
53#define B 0x80000000
54#elif SIZE == 8
55#define B 0xc0000000
56#endif
57#else
58#error
59#endif // SIZE
60
61#if MODEL == 1
62#define SUFF _relax
63#define A
64#define L
65#define M 0x000000
66#define N 0x000000
67#define BARRIER
68#elif MODEL == 2
69#define SUFF _acq
70#define A a
71#define L
72#define M 0x400000
73#define N 0x800000
74#define BARRIER
75#elif MODEL == 3
76#define SUFF _rel
77#define A
78#define L l
79#define M 0x008000
80#define N 0x400000
81#define BARRIER
82#elif MODEL == 4
83#define SUFF _acq_rel
84#define A a
85#define L l
86#define M 0x408000
87#define N 0xc00000
88#define BARRIER
89#elif MODEL == 5
90#define SUFF _sync
91#ifdef L_swp
92// swp has _acq semantics.
93#define A a
94#define L
95#define M 0x400000
96#define N 0x800000
97#else
98// All other _sync functions have _seq semantics.
99#define A a
100#define L l
101#define M 0x408000
102#define N 0xc00000
103#endif
104#define BARRIER dmb ish
105#else
106#error
107#endif // MODEL
108
109// Define register size.
110#define x(N) GLUE2(x, N)
111#define w(N) GLUE2(w, N)
112#if SIZE < 8
113#define s(N) w(N)
114#else
115#define s(N) x(N)
116#endif
117
118#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
119#if MODEL == 5
120// Drop A for _sync functions.
121#define LDXR GLUE3(ld, xr, S)
122#else
123#define LDXR GLUE4(ld, A, xr, S)
124#endif
125#define STXR GLUE4(st, L, xr, S)
126
127// Define temporary registers.
128#define tmp0 16
129#define tmp1 17
130#define tmp2 15
131
132// Macro for branch to label if no LSE available
133.macro JUMP_IF_NOT_LSE label
134#if !defined(__APPLE__)
135        adrp    x(tmp0), __aarch64_have_lse_atomics
136        ldrb    w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
137#else
138        adrp    x(tmp0), ___aarch64_have_lse_atomics@page
139        ldrb    w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff]
140#endif
141        cbz     w(tmp0), \label
142.endm
143
144#ifdef L_cas
145DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
146        JUMP_IF_NOT_LSE 8f
147#if SIZE < 16
148#ifdef HAS_ASM_LSE
149#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2]
150#else
151#define CAS .inst 0x08a07c41 + B + M
152#endif
153        CAS    // s(0), s(1), [x2]
154        ret
1558:
156        UXT    s(tmp0), s(0)
1570:
158        LDXR   s(0), [x2]
159        cmp    s(0), s(tmp0)
160        bne    1f
161        STXR   w(tmp1), s(1), [x2]
162        cbnz   w(tmp1), 0b
1631:
164        BARRIER
165        ret
166#else
167#if MODEL == 5
168// Drop A for _sync functions.
169#define LDXP GLUE2(ld, xp)
170#else
171#define LDXP GLUE3(ld, A, xp)
172#endif
173#define STXP GLUE3(st, L, xp)
174#ifdef HAS_ASM_LSE
175#define CASP GLUE3(casp, A, L)  x0, x1, x2, x3, [x4]
176#else
177#define CASP .inst 0x48207c82 + M
178#endif
179
180        CASP   // x0, x1, x2, x3, [x4]
181        ret
1828:
183        mov    x(tmp0), x0
184        mov    x(tmp1), x1
1850:
186        LDXP   x0, x1, [x4]
187        cmp    x0, x(tmp0)
188        ccmp   x1, x(tmp1), #0, eq
189        bne    1f
190        STXP   w(tmp2), x2, x3, [x4]
191        cbnz   w(tmp2), 0b
1921:
193        BARRIER
194        ret
195#endif
196END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
197#endif // L_cas
198
199#ifdef L_swp
200#ifdef HAS_ASM_LSE
201#define SWP GLUE4(swp, A, L, S)  s(0), s(0), [x1]
202#else
203#define SWP .inst 0x38208020 + B + N
204#endif
205DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
206        JUMP_IF_NOT_LSE 8f
207        SWP    // s(0), s(0), [x1]
208        ret
2098:
210        mov    s(tmp0), s(0)
2110:
212        LDXR   s(0), [x1]
213        STXR   w(tmp1), s(tmp0), [x1]
214        cbnz   w(tmp1), 0b
215        BARRIER
216        ret
217END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
218#endif // L_swp
219
220#if defined(L_ldadd) || defined(L_ldclr) ||                                    \
221    defined(L_ldeor) || defined(L_ldset)
222
223#ifdef L_ldadd
224#define LDNM ldadd
225#define OP add
226#define OPN 0x0000
227#elif defined(L_ldclr)
228#define LDNM ldclr
229#define OP bic
230#define OPN 0x1000
231#elif defined(L_ldeor)
232#define LDNM ldeor
233#define OP eor
234#define OPN 0x2000
235#elif defined(L_ldset)
236#define LDNM ldset
237#define OP orr
238#define OPN 0x3000
239#else
240#error
241#endif
242
243#ifdef HAS_ASM_LSE
244#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1]
245#else
246#define LDOP .inst 0x38200020 + OPN + B + N
247#endif
248
249DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
250        JUMP_IF_NOT_LSE 8f
251        LDOP // s(0), s(0), [x1]
252        ret
2538:
254        mov    s(tmp0), s(0)
2550:
256        LDXR   s(0), [x1]
257        OP     s(tmp1), s(0), s(tmp0)
258        STXR   w(tmp2), s(tmp1), [x1]
259        cbnz   w(tmp2), 0b
260        BARRIER
261        ret
262END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
263#endif // L_ldadd L_ldclr L_ldeor L_ldset
264
265NO_EXEC_STACK_DIRECTIVE
266
267// GNU property note for BTI and PAC
268GNU_PROPERTY_BTI_PAC
269
270#endif // __aarch64__
271