xref: /freebsd/contrib/llvm-project/clang/lib/Headers/arm_acle.h (revision 85868e8a1daeaae7a0e48effb2ea2310ae3b02c6)
1 /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __ARM_ACLE_H
11 #define __ARM_ACLE_H
12 
13 #ifndef __ARM_ACLE
14 #error "ACLE intrinsics support not enabled."
15 #endif
16 
17 #include <stdint.h>
18 
19 #if defined(__cplusplus)
20 extern "C" {
21 #endif
22 
23 /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
24 /* 8.3 Memory barriers */
25 #if !defined(_MSC_VER)
26 #define __dmb(i) __builtin_arm_dmb(i)
27 #define __dsb(i) __builtin_arm_dsb(i)
28 #define __isb(i) __builtin_arm_isb(i)
29 #endif
30 
31 /* 8.4 Hints */
32 
33 #if !defined(_MSC_VER)
34 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
35   __builtin_arm_wfi();
36 }
37 
38 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
39   __builtin_arm_wfe();
40 }
41 
42 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
43   __builtin_arm_sev();
44 }
45 
46 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
47   __builtin_arm_sevl();
48 }
49 
50 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
51   __builtin_arm_yield();
52 }
53 #endif
54 
55 #if __ARM_32BIT_STATE
56 #define __dbg(t) __builtin_arm_dbg(t)
57 #endif
58 
59 /* 8.5 Swap */
60 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
61 __swp(uint32_t __x, volatile uint32_t *__p) {
62   uint32_t v;
63   do
64     v = __builtin_arm_ldrex(__p);
65   while (__builtin_arm_strex(__x, __p));
66   return v;
67 }
68 
69 /* 8.6 Memory prefetch intrinsics */
70 /* 8.6.1 Data prefetch */
71 #define __pld(addr) __pldx(0, 0, 0, addr)
72 
73 #if __ARM_32BIT_STATE
74 #define __pldx(access_kind, cache_level, retention_policy, addr) \
75   __builtin_arm_prefetch(addr, access_kind, 1)
76 #else
77 #define __pldx(access_kind, cache_level, retention_policy, addr) \
78   __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
79 #endif
80 
81 /* 8.6.2 Instruction prefetch */
82 #define __pli(addr) __plix(0, 0, addr)
83 
84 #if __ARM_32BIT_STATE
85 #define __plix(cache_level, retention_policy, addr) \
86   __builtin_arm_prefetch(addr, 0, 0)
87 #else
88 #define __plix(cache_level, retention_policy, addr) \
89   __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
90 #endif
91 
92 /* 8.7 NOP */
93 static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
94   __builtin_arm_nop();
95 }
96 
97 /* 9 DATA-PROCESSING INTRINSICS */
98 /* 9.2 Miscellaneous data-processing intrinsics */
99 /* ROR */
100 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
101 __ror(uint32_t __x, uint32_t __y) {
102   __y %= 32;
103   if (__y == 0)
104     return __x;
105   return (__x >> __y) | (__x << (32 - __y));
106 }
107 
108 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
109 __rorll(uint64_t __x, uint32_t __y) {
110   __y %= 64;
111   if (__y == 0)
112     return __x;
113   return (__x >> __y) | (__x << (64 - __y));
114 }
115 
116 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
117 __rorl(unsigned long __x, uint32_t __y) {
118 #if __SIZEOF_LONG__ == 4
119   return __ror(__x, __y);
120 #else
121   return __rorll(__x, __y);
122 #endif
123 }
124 
125 
126 /* CLZ */
127 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
128 __clz(uint32_t __t) {
129   return __builtin_clz(__t);
130 }
131 
132 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
133 __clzl(unsigned long __t) {
134   return __builtin_clzl(__t);
135 }
136 
137 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
138 __clzll(uint64_t __t) {
139   return __builtin_clzll(__t);
140 }
141 
142 /* REV */
143 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
144 __rev(uint32_t __t) {
145   return __builtin_bswap32(__t);
146 }
147 
148 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
149 __revl(unsigned long __t) {
150 #if __SIZEOF_LONG__ == 4
151   return __builtin_bswap32(__t);
152 #else
153   return __builtin_bswap64(__t);
154 #endif
155 }
156 
157 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
158 __revll(uint64_t __t) {
159   return __builtin_bswap64(__t);
160 }
161 
162 /* REV16 */
163 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
164 __rev16(uint32_t __t) {
165   return __ror(__rev(__t), 16);
166 }
167 
168 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
169 __rev16ll(uint64_t __t) {
170   return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
171 }
172 
173 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
174 __rev16l(unsigned long __t) {
175 #if __SIZEOF_LONG__ == 4
176     return __rev16(__t);
177 #else
178     return __rev16ll(__t);
179 #endif
180 }
181 
182 /* REVSH */
183 static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
184 __revsh(int16_t __t) {
185   return __builtin_bswap16(__t);
186 }
187 
188 /* RBIT */
189 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
190 __rbit(uint32_t __t) {
191   return __builtin_arm_rbit(__t);
192 }
193 
194 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
195 __rbitll(uint64_t __t) {
196 #if __ARM_32BIT_STATE
197   return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
198          __builtin_arm_rbit(__t >> 32);
199 #else
200   return __builtin_arm_rbit64(__t);
201 #endif
202 }
203 
204 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
205 __rbitl(unsigned long __t) {
206 #if __SIZEOF_LONG__ == 4
207   return __rbit(__t);
208 #else
209   return __rbitll(__t);
210 #endif
211 }
212 
213 /*
214  * 9.3 16-bit multiplications
215  */
216 #if __ARM_FEATURE_DSP
217 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
218 __smulbb(int32_t __a, int32_t __b) {
219   return __builtin_arm_smulbb(__a, __b);
220 }
221 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
222 __smulbt(int32_t __a, int32_t __b) {
223   return __builtin_arm_smulbt(__a, __b);
224 }
225 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
226 __smultb(int32_t __a, int32_t __b) {
227   return __builtin_arm_smultb(__a, __b);
228 }
229 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
230 __smultt(int32_t __a, int32_t __b) {
231   return __builtin_arm_smultt(__a, __b);
232 }
233 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
234 __smulwb(int32_t __a, int32_t __b) {
235   return __builtin_arm_smulwb(__a, __b);
236 }
237 static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
238 __smulwt(int32_t __a, int32_t __b) {
239   return __builtin_arm_smulwt(__a, __b);
240 }
241 #endif
242 
243 /*
244  * 9.4 Saturating intrinsics
245  *
246  * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
247  * intrinsics are implemented and the flag is enabled.
248  */
249 /* 9.4.1 Width-specified saturation intrinsics */
250 #if __ARM_FEATURE_SAT
251 #define __ssat(x, y) __builtin_arm_ssat(x, y)
252 #define __usat(x, y) __builtin_arm_usat(x, y)
253 #endif
254 
255 /* 9.4.2 Saturating addition and subtraction intrinsics */
256 #if __ARM_FEATURE_DSP
257 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
258 __qadd(int32_t __t, int32_t __v) {
259   return __builtin_arm_qadd(__t, __v);
260 }
261 
262 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
263 __qsub(int32_t __t, int32_t __v) {
264   return __builtin_arm_qsub(__t, __v);
265 }
266 
267 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
268 __qdbl(int32_t __t) {
269   return __builtin_arm_qadd(__t, __t);
270 }
271 #endif
272 
273 /* 9.4.3 Accumultating multiplications */
274 #if __ARM_FEATURE_DSP
275 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
276 __smlabb(int32_t __a, int32_t __b, int32_t __c) {
277   return __builtin_arm_smlabb(__a, __b, __c);
278 }
279 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
280 __smlabt(int32_t __a, int32_t __b, int32_t __c) {
281   return __builtin_arm_smlabt(__a, __b, __c);
282 }
283 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
284 __smlatb(int32_t __a, int32_t __b, int32_t __c) {
285   return __builtin_arm_smlatb(__a, __b, __c);
286 }
287 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
288 __smlatt(int32_t __a, int32_t __b, int32_t __c) {
289   return __builtin_arm_smlatt(__a, __b, __c);
290 }
291 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
292 __smlawb(int32_t __a, int32_t __b, int32_t __c) {
293   return __builtin_arm_smlawb(__a, __b, __c);
294 }
295 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
296 __smlawt(int32_t __a, int32_t __b, int32_t __c) {
297   return __builtin_arm_smlawt(__a, __b, __c);
298 }
299 #endif
300 
301 
302 /* 9.5.4 Parallel 16-bit saturation */
303 #if __ARM_FEATURE_SIMD32
304 #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
305 #define __usat16(x, y) __builtin_arm_usat16(x, y)
306 #endif
307 
308 /* 9.5.5 Packing and unpacking */
309 #if __ARM_FEATURE_SIMD32
310 typedef int32_t int8x4_t;
311 typedef int32_t int16x2_t;
312 typedef uint32_t uint8x4_t;
313 typedef uint32_t uint16x2_t;
314 
315 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
316 __sxtab16(int16x2_t __a, int8x4_t __b) {
317   return __builtin_arm_sxtab16(__a, __b);
318 }
319 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
320 __sxtb16(int8x4_t __a) {
321   return __builtin_arm_sxtb16(__a);
322 }
323 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
324 __uxtab16(int16x2_t __a, int8x4_t __b) {
325   return __builtin_arm_uxtab16(__a, __b);
326 }
327 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
328 __uxtb16(int8x4_t __a) {
329   return __builtin_arm_uxtb16(__a);
330 }
331 #endif
332 
333 /* 9.5.6 Parallel selection */
334 #if __ARM_FEATURE_SIMD32
335 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
336 __sel(uint8x4_t __a, uint8x4_t __b) {
337   return __builtin_arm_sel(__a, __b);
338 }
339 #endif
340 
341 /* 9.5.7 Parallel 8-bit addition and subtraction */
342 #if __ARM_FEATURE_SIMD32
343 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
344 __qadd8(int8x4_t __a, int8x4_t __b) {
345   return __builtin_arm_qadd8(__a, __b);
346 }
347 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
348 __qsub8(int8x4_t __a, int8x4_t __b) {
349   return __builtin_arm_qsub8(__a, __b);
350 }
351 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
352 __sadd8(int8x4_t __a, int8x4_t __b) {
353   return __builtin_arm_sadd8(__a, __b);
354 }
355 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
356 __shadd8(int8x4_t __a, int8x4_t __b) {
357   return __builtin_arm_shadd8(__a, __b);
358 }
359 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
360 __shsub8(int8x4_t __a, int8x4_t __b) {
361   return __builtin_arm_shsub8(__a, __b);
362 }
363 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
364 __ssub8(int8x4_t __a, int8x4_t __b) {
365   return __builtin_arm_ssub8(__a, __b);
366 }
367 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
368 __uadd8(uint8x4_t __a, uint8x4_t __b) {
369   return __builtin_arm_uadd8(__a, __b);
370 }
371 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
372 __uhadd8(uint8x4_t __a, uint8x4_t __b) {
373   return __builtin_arm_uhadd8(__a, __b);
374 }
375 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
376 __uhsub8(uint8x4_t __a, uint8x4_t __b) {
377   return __builtin_arm_uhsub8(__a, __b);
378 }
379 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
380 __uqadd8(uint8x4_t __a, uint8x4_t __b) {
381   return __builtin_arm_uqadd8(__a, __b);
382 }
383 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
384 __uqsub8(uint8x4_t __a, uint8x4_t __b) {
385   return __builtin_arm_uqsub8(__a, __b);
386 }
387 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
388 __usub8(uint8x4_t __a, uint8x4_t __b) {
389   return __builtin_arm_usub8(__a, __b);
390 }
391 #endif
392 
393 /* 9.5.8 Sum of 8-bit absolute differences */
394 #if __ARM_FEATURE_SIMD32
395 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
396 __usad8(uint8x4_t __a, uint8x4_t __b) {
397   return __builtin_arm_usad8(__a, __b);
398 }
399 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
400 __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
401   return __builtin_arm_usada8(__a, __b, __c);
402 }
403 #endif
404 
405 /* 9.5.9 Parallel 16-bit addition and subtraction */
406 #if __ARM_FEATURE_SIMD32
407 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
408 __qadd16(int16x2_t __a, int16x2_t __b) {
409   return __builtin_arm_qadd16(__a, __b);
410 }
411 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
412 __qasx(int16x2_t __a, int16x2_t __b) {
413   return __builtin_arm_qasx(__a, __b);
414 }
415 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
416 __qsax(int16x2_t __a, int16x2_t __b) {
417   return __builtin_arm_qsax(__a, __b);
418 }
419 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
420 __qsub16(int16x2_t __a, int16x2_t __b) {
421   return __builtin_arm_qsub16(__a, __b);
422 }
423 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
424 __sadd16(int16x2_t __a, int16x2_t __b) {
425   return __builtin_arm_sadd16(__a, __b);
426 }
427 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
428 __sasx(int16x2_t __a, int16x2_t __b) {
429   return __builtin_arm_sasx(__a, __b);
430 }
431 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
432 __shadd16(int16x2_t __a, int16x2_t __b) {
433   return __builtin_arm_shadd16(__a, __b);
434 }
435 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
436 __shasx(int16x2_t __a, int16x2_t __b) {
437   return __builtin_arm_shasx(__a, __b);
438 }
439 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
440 __shsax(int16x2_t __a, int16x2_t __b) {
441   return __builtin_arm_shsax(__a, __b);
442 }
443 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
444 __shsub16(int16x2_t __a, int16x2_t __b) {
445   return __builtin_arm_shsub16(__a, __b);
446 }
447 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
448 __ssax(int16x2_t __a, int16x2_t __b) {
449   return __builtin_arm_ssax(__a, __b);
450 }
451 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
452 __ssub16(int16x2_t __a, int16x2_t __b) {
453   return __builtin_arm_ssub16(__a, __b);
454 }
455 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
456 __uadd16(uint16x2_t __a, uint16x2_t __b) {
457   return __builtin_arm_uadd16(__a, __b);
458 }
459 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
460 __uasx(uint16x2_t __a, uint16x2_t __b) {
461   return __builtin_arm_uasx(__a, __b);
462 }
463 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
464 __uhadd16(uint16x2_t __a, uint16x2_t __b) {
465   return __builtin_arm_uhadd16(__a, __b);
466 }
467 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
468 __uhasx(uint16x2_t __a, uint16x2_t __b) {
469   return __builtin_arm_uhasx(__a, __b);
470 }
471 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
472 __uhsax(uint16x2_t __a, uint16x2_t __b) {
473   return __builtin_arm_uhsax(__a, __b);
474 }
475 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
476 __uhsub16(uint16x2_t __a, uint16x2_t __b) {
477   return __builtin_arm_uhsub16(__a, __b);
478 }
479 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
480 __uqadd16(uint16x2_t __a, uint16x2_t __b) {
481   return __builtin_arm_uqadd16(__a, __b);
482 }
483 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
484 __uqasx(uint16x2_t __a, uint16x2_t __b) {
485   return __builtin_arm_uqasx(__a, __b);
486 }
487 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
488 __uqsax(uint16x2_t __a, uint16x2_t __b) {
489   return __builtin_arm_uqsax(__a, __b);
490 }
491 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
492 __uqsub16(uint16x2_t __a, uint16x2_t __b) {
493   return __builtin_arm_uqsub16(__a, __b);
494 }
495 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
496 __usax(uint16x2_t __a, uint16x2_t __b) {
497   return __builtin_arm_usax(__a, __b);
498 }
499 static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
500 __usub16(uint16x2_t __a, uint16x2_t __b) {
501   return __builtin_arm_usub16(__a, __b);
502 }
503 #endif
504 
505 /* 9.5.10 Parallel 16-bit multiplications */
506 #if __ARM_FEATURE_SIMD32
507 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
508 __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
509   return __builtin_arm_smlad(__a, __b, __c);
510 }
511 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
512 __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
513   return __builtin_arm_smladx(__a, __b, __c);
514 }
515 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
516 __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
517   return __builtin_arm_smlald(__a, __b, __c);
518 }
519 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
520 __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
521   return __builtin_arm_smlaldx(__a, __b, __c);
522 }
523 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
524 __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
525   return __builtin_arm_smlsd(__a, __b, __c);
526 }
527 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
528 __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
529   return __builtin_arm_smlsdx(__a, __b, __c);
530 }
531 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
532 __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
533   return __builtin_arm_smlsld(__a, __b, __c);
534 }
535 static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
536 __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
537   return __builtin_arm_smlsldx(__a, __b, __c);
538 }
539 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
540 __smuad(int16x2_t __a, int16x2_t __b) {
541   return __builtin_arm_smuad(__a, __b);
542 }
543 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
544 __smuadx(int16x2_t __a, int16x2_t __b) {
545   return __builtin_arm_smuadx(__a, __b);
546 }
547 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
548 __smusd(int16x2_t __a, int16x2_t __b) {
549   return __builtin_arm_smusd(__a, __b);
550 }
551 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
552 __smusdx(int16x2_t __a, int16x2_t __b) {
553   return __builtin_arm_smusdx(__a, __b);
554 }
555 #endif
556 
557 /* 9.7 CRC32 intrinsics */
558 #if __ARM_FEATURE_CRC32
559 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
560 __crc32b(uint32_t __a, uint8_t __b) {
561   return __builtin_arm_crc32b(__a, __b);
562 }
563 
564 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
565 __crc32h(uint32_t __a, uint16_t __b) {
566   return __builtin_arm_crc32h(__a, __b);
567 }
568 
569 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
570 __crc32w(uint32_t __a, uint32_t __b) {
571   return __builtin_arm_crc32w(__a, __b);
572 }
573 
574 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
575 __crc32d(uint32_t __a, uint64_t __b) {
576   return __builtin_arm_crc32d(__a, __b);
577 }
578 
579 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
580 __crc32cb(uint32_t __a, uint8_t __b) {
581   return __builtin_arm_crc32cb(__a, __b);
582 }
583 
584 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
585 __crc32ch(uint32_t __a, uint16_t __b) {
586   return __builtin_arm_crc32ch(__a, __b);
587 }
588 
589 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
590 __crc32cw(uint32_t __a, uint32_t __b) {
591   return __builtin_arm_crc32cw(__a, __b);
592 }
593 
594 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
595 __crc32cd(uint32_t __a, uint64_t __b) {
596   return __builtin_arm_crc32cd(__a, __b);
597 }
598 #endif
599 
600 /* Armv8.3-A Javascript conversion intrinsic */
601 #if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
602 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
603 __jcvt(double __a) {
604   return __builtin_arm_jcvt(__a);
605 }
606 #endif
607 
608 /* 10.1 Special register intrinsics */
609 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
610 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
611 #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
612 #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
613 #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
614 #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
615 
616 /* Memory Tagging Extensions (MTE) Intrinsics */
617 #if __ARM_FEATURE_MEMORY_TAGGING
618 #define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
619 #define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
620 #define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded)
621 #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
622 #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
623 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
624 #endif
625 
626 /* Transactional Memory Extension (TME) Intrinsics */
627 #if __ARM_FEATURE_TME
628 
629 #define _TMFAILURE_REASON  0x00007fffu
630 #define _TMFAILURE_RTRY    0x00008000u
631 #define _TMFAILURE_CNCL    0x00010000u
632 #define _TMFAILURE_MEM     0x00020000u
633 #define _TMFAILURE_IMP     0x00040000u
634 #define _TMFAILURE_ERR     0x00080000u
635 #define _TMFAILURE_SIZE    0x00100000u
636 #define _TMFAILURE_NEST    0x00200000u
637 #define _TMFAILURE_DBG     0x00400000u
638 #define _TMFAILURE_INT     0x00800000u
639 #define _TMFAILURE_TRIVIAL 0x01000000u
640 
641 #define __tstart()        __builtin_arm_tstart()
642 #define __tcommit()       __builtin_arm_tcommit()
643 #define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
644 #define __ttest()         __builtin_arm_ttest()
645 
646 #endif /* __ARM_FEATURE_TME */
647 
648 #if defined(__cplusplus)
649 }
650 #endif
651 
652 #endif /* __ARM_ACLE_H */
653