xref: /freebsd/crypto/openssl/crypto/armcap.c (revision f25b8c9fb4f58cf61adb47d7570abe7caa6d385d)
1 /*
2  * Copyright 2011-2025 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <openssl/crypto.h>
14 #ifdef __APPLE__
15 #include <sys/sysctl.h>
16 #else
17 #include <setjmp.h>
18 #include <signal.h>
19 #endif
20 #include "internal/cryptlib.h"
21 #ifdef _WIN32
22 #include <windows.h>
23 #else
24 #include <unistd.h>
25 #endif
26 #include "arm_arch.h"
27 
28 unsigned int OPENSSL_armcap_P = 0;
29 unsigned int OPENSSL_arm_midr = 0;
30 unsigned int OPENSSL_armv8_rsa_neonized = 0;
31 
32 #ifdef _WIN32
OPENSSL_cpuid_setup(void)33 void OPENSSL_cpuid_setup(void)
34 {
35     OPENSSL_armcap_P |= ARMV7_NEON;
36     OPENSSL_armv8_rsa_neonized = 1;
37     if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
38         /* These are all covered by one call in Windows */
39         OPENSSL_armcap_P |= ARMV8_AES;
40         OPENSSL_armcap_P |= ARMV8_PMULL;
41         OPENSSL_armcap_P |= ARMV8_SHA1;
42         OPENSSL_armcap_P |= ARMV8_SHA256;
43     }
44 }
45 
OPENSSL_rdtsc(void)46 uint32_t OPENSSL_rdtsc(void)
47 {
48     return 0;
49 }
50 #elif __ARM_MAX_ARCH__ < 7
OPENSSL_cpuid_setup(void)51 void OPENSSL_cpuid_setup(void)
52 {
53 }
54 
OPENSSL_rdtsc(void)55 uint32_t OPENSSL_rdtsc(void)
56 {
57     return 0;
58 }
59 #else /* !_WIN32 && __ARM_MAX_ARCH__ >= 7 */
60 
61 /* 3 ways of handling things here: __APPLE__,  getauxval() or SIGILL detect */
62 
63 /* First determine if getauxval() is available (OSSL_IMPLEMENT_GETAUXVAL) */
64 
65 #if defined(__GNUC__) && __GNUC__ >= 2
66 void OPENSSL_cpuid_setup(void) __attribute__((constructor));
67 #endif
68 
69 #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
70 #if __GLIBC_PREREQ(2, 16)
71 #include <sys/auxv.h>
72 #define OSSL_IMPLEMENT_GETAUXVAL
73 #endif
74 #elif defined(__ANDROID_API__)
75 /* see https://developer.android.google.cn/ndk/guides/cpu-features */
76 #if __ANDROID_API__ >= 18
77 #include <sys/auxv.h>
78 #define OSSL_IMPLEMENT_GETAUXVAL
79 #endif
80 #endif
81 #if defined(__FreeBSD__) || defined(__OpenBSD__)
82 #include <sys/param.h>
83 #if (defined(__FreeBSD__) && __FreeBSD_version >= 1200000) || (defined(__OpenBSD__) && OpenBSD >= 202409)
84 #include <sys/auxv.h>
85 #define OSSL_IMPLEMENT_GETAUXVAL
86 
getauxval(unsigned long key)87 static unsigned long getauxval(unsigned long key)
88 {
89     unsigned long val = 0ul;
90 
91     if (elf_aux_info((int)key, &val, sizeof(val)) != 0)
92         return 0ul;
93 
94     return val;
95 }
96 #endif
97 #endif
98 
99 /*
100  * Android: according to https://developer.android.com/ndk/guides/cpu-features,
101  * getauxval is supported starting with API level 18
102  */
103 #if defined(__ANDROID__) && defined(__ANDROID_API__) && __ANDROID_API__ >= 18
104 #include <sys/auxv.h>
105 #define OSSL_IMPLEMENT_GETAUXVAL
106 #endif
107 
108 /*
109  * ARM puts the feature bits for Crypto Extensions in AT_HWCAP2, whereas
110  * AArch64 used AT_HWCAP.
111  */
112 #ifndef AT_HWCAP
113 #define AT_HWCAP 16
114 #endif
115 #ifndef AT_HWCAP2
116 #define AT_HWCAP2 26
117 #endif
118 #if defined(__arm__) || defined(__arm)
119 #define OSSL_HWCAP AT_HWCAP
120 #define OSSL_HWCAP_NEON (1 << 12)
121 
122 #define OSSL_HWCAP_CE AT_HWCAP2
123 #define OSSL_HWCAP_CE_AES (1 << 0)
124 #define OSSL_HWCAP_CE_PMULL (1 << 1)
125 #define OSSL_HWCAP_CE_SHA1 (1 << 2)
126 #define OSSL_HWCAP_CE_SHA256 (1 << 3)
127 #elif defined(__aarch64__)
128 #define OSSL_HWCAP AT_HWCAP
129 #define OSSL_HWCAP_NEON (1 << 1)
130 
131 #define OSSL_HWCAP_CE AT_HWCAP
132 #define OSSL_HWCAP_CE_AES (1 << 3)
133 #define OSSL_HWCAP_CE_PMULL (1 << 4)
134 #define OSSL_HWCAP_CE_SHA1 (1 << 5)
135 #define OSSL_HWCAP_CE_SHA256 (1 << 6)
136 #define OSSL_HWCAP_CPUID (1 << 11)
137 #define OSSL_HWCAP_SHA3 (1 << 17)
138 #define OSSL_HWCAP_CE_SM3 (1 << 18)
139 #define OSSL_HWCAP_CE_SM4 (1 << 19)
140 #define OSSL_HWCAP_CE_SHA512 (1 << 21)
141 #define OSSL_HWCAP_SVE (1 << 22)
142 /* AT_HWCAP2 */
143 #define OSSL_HWCAP2 26
144 #define OSSL_HWCAP2_SVE2 (1 << 1)
145 #define OSSL_HWCAP2_RNG (1 << 16)
146 #endif
147 
148 uint32_t _armv7_tick(void);
149 
OPENSSL_rdtsc(void)150 uint32_t OPENSSL_rdtsc(void)
151 {
152     if (OPENSSL_armcap_P & ARMV7_TICK)
153         return _armv7_tick();
154     else
155         return 0;
156 }
157 
158 #ifdef __aarch64__
159 size_t OPENSSL_rndr_asm(unsigned char *buf, size_t len);
160 size_t OPENSSL_rndrrs_asm(unsigned char *buf, size_t len);
161 
162 size_t OPENSSL_rndr_bytes(unsigned char *buf, size_t len);
163 size_t OPENSSL_rndrrs_bytes(unsigned char *buf, size_t len);
164 
OPENSSL_rndr_wrapper(size_t (* func)(unsigned char *,size_t),unsigned char * buf,size_t len)165 static size_t OPENSSL_rndr_wrapper(size_t (*func)(unsigned char *, size_t), unsigned char *buf, size_t len)
166 {
167     size_t buffer_size = 0;
168     int i;
169 
170     for (i = 0; i < 8; i++) {
171         buffer_size = func(buf, len);
172         if (buffer_size == len)
173             break;
174         usleep(5000); /* 5000 microseconds (5 milliseconds) */
175     }
176     return buffer_size;
177 }
178 
OPENSSL_rndr_bytes(unsigned char * buf,size_t len)179 size_t OPENSSL_rndr_bytes(unsigned char *buf, size_t len)
180 {
181     return OPENSSL_rndr_wrapper(OPENSSL_rndr_asm, buf, len);
182 }
183 
OPENSSL_rndrrs_bytes(unsigned char * buf,size_t len)184 size_t OPENSSL_rndrrs_bytes(unsigned char *buf, size_t len)
185 {
186     return OPENSSL_rndr_wrapper(OPENSSL_rndrrs_asm, buf, len);
187 }
188 #endif
189 
190 #if !defined(__APPLE__) && !defined(OSSL_IMPLEMENT_GETAUXVAL)
191 static sigset_t all_masked;
192 
193 static sigjmp_buf ill_jmp;
ill_handler(int sig)194 static void ill_handler(int sig)
195 {
196     siglongjmp(ill_jmp, sig);
197 }
198 
199 /*
200  * Following subroutines could have been inlined, but not all
201  * ARM compilers support inline assembler, and we'd then have to
202  * worry about the compiler optimising out the detection code...
203  */
204 void _armv7_neon_probe(void);
205 void _armv8_aes_probe(void);
206 void _armv8_sha1_probe(void);
207 void _armv8_sha256_probe(void);
208 void _armv8_pmull_probe(void);
209 #ifdef __aarch64__
210 void _armv8_sm3_probe(void);
211 void _armv8_sm4_probe(void);
212 void _armv8_sha512_probe(void);
213 void _armv8_eor3_probe(void);
214 void _armv8_sve_probe(void);
215 void _armv8_sve2_probe(void);
216 void _armv8_rng_probe(void);
217 #endif
218 #endif /* !__APPLE__ && !OSSL_IMPLEMENT_GETAUXVAL */
219 
220 /* We only call _armv8_cpuid_probe() if (OPENSSL_armcap_P & ARMV8_CPUID) != 0 */
221 unsigned int _armv8_cpuid_probe(void);
222 
223 #if defined(__APPLE__)
224 /*
225  * Checks the specified integer sysctl, returning `value` if it's 1, otherwise returning 0.
226  */
sysctl_query(const char * name,unsigned int value)227 static unsigned int sysctl_query(const char *name, unsigned int value)
228 {
229     unsigned int sys_value = 0;
230     size_t len = sizeof(sys_value);
231 
232     return (sysctlbyname(name, &sys_value, &len, NULL, 0) == 0 && sys_value == 1) ? value : 0;
233 }
234 #elif !defined(OSSL_IMPLEMENT_GETAUXVAL)
235 /*
236  * Calls a provided probe function, which may SIGILL. If it doesn't, return `value`, otherwise return 0.
237  */
arm_probe_for(void (* probe)(void),volatile unsigned int value)238 static unsigned int arm_probe_for(void (*probe)(void), volatile unsigned int value)
239 {
240     if (sigsetjmp(ill_jmp, 1) == 0) {
241         probe();
242         return value;
243     } else {
244         /* The probe function gave us SIGILL */
245         return 0;
246     }
247 }
248 #endif
249 
OPENSSL_cpuid_setup(void)250 void OPENSSL_cpuid_setup(void)
251 {
252     const char *e;
253 #if !defined(__APPLE__) && !defined(OSSL_IMPLEMENT_GETAUXVAL)
254     struct sigaction ill_oact, ill_act;
255     sigset_t oset;
256 #endif
257     static int trigger = 0;
258 
259     if (trigger)
260         return;
261     trigger = 1;
262 
263     OPENSSL_armcap_P = 0;
264 
265     if ((e = getenv("OPENSSL_armcap"))) {
266         OPENSSL_armcap_P = (unsigned int)strtoul(e, NULL, 0);
267         return;
268     }
269 
270 #if defined(__APPLE__)
271 #if !defined(__aarch64__)
272     /*
273      * Capability probing by catching SIGILL appears to be problematic
274      * on iOS. But since Apple universe is "monocultural", it's actually
275      * possible to simply set pre-defined processor capability mask.
276      */
277     if (1) {
278         OPENSSL_armcap_P = ARMV7_NEON;
279         return;
280     }
281 #else
282     {
283         /*
284          * From
285          * https://github.com/llvm/llvm-project/blob/412237dcd07e5a2afbb1767858262a5f037149a3/llvm/lib/Target/AArch64/AArch64.td#L719
286          * all of these have been available on 64-bit Apple Silicon from the
287          * beginning (the A7).
288          */
289         OPENSSL_armcap_P |= ARMV7_NEON | ARMV8_PMULL | ARMV8_AES | ARMV8_SHA1 | ARMV8_SHA256;
290 
291         /* More recent extensions are indicated by sysctls */
292         OPENSSL_armcap_P |= sysctl_query("hw.optional.armv8_2_sha512", ARMV8_SHA512);
293         OPENSSL_armcap_P |= sysctl_query("hw.optional.armv8_2_sha3", ARMV8_SHA3);
294 
295         if (OPENSSL_armcap_P & ARMV8_SHA3) {
296             char uarch[64];
297 
298             size_t len = sizeof(uarch);
299             if ((sysctlbyname("machdep.cpu.brand_string", uarch, &len, NULL, 0) == 0) && ((strncmp(uarch, "Apple M1", 8) == 0) || (strncmp(uarch, "Apple M2", 8) == 0) || (strncmp(uarch, "Apple M3", 8) == 0) || (strncmp(uarch, "Apple M4", 8) == 0))) {
300                 OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
301                 OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
302             }
303         }
304     }
305 #endif /* __aarch64__ */
306 
307 #elif defined(OSSL_IMPLEMENT_GETAUXVAL)
308 
309     if (getauxval(OSSL_HWCAP) & OSSL_HWCAP_NEON) {
310         unsigned long hwcap = getauxval(OSSL_HWCAP_CE);
311 
312         OPENSSL_armcap_P |= ARMV7_NEON;
313 
314         if (hwcap & OSSL_HWCAP_CE_AES)
315             OPENSSL_armcap_P |= ARMV8_AES;
316 
317         if (hwcap & OSSL_HWCAP_CE_PMULL)
318             OPENSSL_armcap_P |= ARMV8_PMULL;
319 
320         if (hwcap & OSSL_HWCAP_CE_SHA1)
321             OPENSSL_armcap_P |= ARMV8_SHA1;
322 
323         if (hwcap & OSSL_HWCAP_CE_SHA256)
324             OPENSSL_armcap_P |= ARMV8_SHA256;
325 
326 #ifdef __aarch64__
327         if (hwcap & OSSL_HWCAP_CE_SM4)
328             OPENSSL_armcap_P |= ARMV8_SM4;
329 
330         if (hwcap & OSSL_HWCAP_CE_SHA512)
331             OPENSSL_armcap_P |= ARMV8_SHA512;
332 
333         if (hwcap & OSSL_HWCAP_CPUID)
334             OPENSSL_armcap_P |= ARMV8_CPUID;
335 
336         if (hwcap & OSSL_HWCAP_CE_SM3)
337             OPENSSL_armcap_P |= ARMV8_SM3;
338         if (hwcap & OSSL_HWCAP_SHA3)
339             OPENSSL_armcap_P |= ARMV8_SHA3;
340 #endif
341     }
342 #ifdef __aarch64__
343     if (getauxval(OSSL_HWCAP) & OSSL_HWCAP_SVE)
344         OPENSSL_armcap_P |= ARMV8_SVE;
345 
346     if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_SVE2)
347         OPENSSL_armcap_P |= ARMV8_SVE2;
348 
349     if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_RNG)
350         OPENSSL_armcap_P |= ARMV8_RNG;
351 #endif
352 
353 #else /* !__APPLE__ && !OSSL_IMPLEMENT_GETAUXVAL */
354 
355     /* If all else fails, do brute force SIGILL-based feature detection */
356 
357     sigfillset(&all_masked);
358     sigdelset(&all_masked, SIGILL);
359     sigdelset(&all_masked, SIGTRAP);
360     sigdelset(&all_masked, SIGFPE);
361     sigdelset(&all_masked, SIGBUS);
362     sigdelset(&all_masked, SIGSEGV);
363 
364     memset(&ill_act, 0, sizeof(ill_act));
365     ill_act.sa_handler = ill_handler;
366     ill_act.sa_mask = all_masked;
367 
368     sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
369     sigaction(SIGILL, &ill_act, &ill_oact);
370 
371     OPENSSL_armcap_P |= arm_probe_for(_armv7_neon_probe, ARMV7_NEON);
372 
373     if (OPENSSL_armcap_P & ARMV7_NEON) {
374 
375         OPENSSL_armcap_P |= arm_probe_for(_armv8_pmull_probe, ARMV8_PMULL | ARMV8_AES);
376         if (!(OPENSSL_armcap_P & ARMV8_AES)) {
377             OPENSSL_armcap_P |= arm_probe_for(_armv8_aes_probe, ARMV8_AES);
378         }
379 
380         OPENSSL_armcap_P |= arm_probe_for(_armv8_sha1_probe, ARMV8_SHA1);
381         OPENSSL_armcap_P |= arm_probe_for(_armv8_sha256_probe, ARMV8_SHA256);
382 
383 #if defined(__aarch64__)
384         OPENSSL_armcap_P |= arm_probe_for(_armv8_sm3_probe, ARMV8_SM3);
385         OPENSSL_armcap_P |= arm_probe_for(_armv8_sm4_probe, ARMV8_SM4);
386         OPENSSL_armcap_P |= arm_probe_for(_armv8_sha512_probe, ARMV8_SHA512);
387         OPENSSL_armcap_P |= arm_probe_for(_armv8_eor3_probe, ARMV8_SHA3);
388 #endif
389     }
390 #ifdef __aarch64__
391     OPENSSL_armcap_P |= arm_probe_for(_armv8_sve_probe, ARMV8_SVE);
392     OPENSSL_armcap_P |= arm_probe_for(_armv8_sve2_probe, ARMV8_SVE2);
393     OPENSSL_armcap_P |= arm_probe_for(_armv8_rng_probe, ARMV8_RNG);
394 #endif
395 
396     /*
397      * Probing for ARMV7_TICK is known to produce unreliable results,
398      * so we only use the feature when the user explicitly enables it
399      * with OPENSSL_armcap.
400      */
401 
402     sigaction(SIGILL, &ill_oact, NULL);
403     sigprocmask(SIG_SETMASK, &oset, NULL);
404 
405 #endif /* __APPLE__, OSSL_IMPLEMENT_GETAUXVAL */
406 
407 #ifdef __aarch64__
408     if (OPENSSL_armcap_P & ARMV8_CPUID)
409         OPENSSL_arm_midr = _armv8_cpuid_probe();
410 
411     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N1)) && (OPENSSL_armcap_P & ARMV7_NEON)) {
412         OPENSSL_armv8_rsa_neonized = 1;
413     }
414     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N2) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_COBALT_100) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) || MIDR_IMPLEMENTER(OPENSSL_arm_midr) == ARM_CPU_IMP_AMPERE) && (OPENSSL_armcap_P & ARMV8_SHA3))
415         OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
416     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) || MIDR_IMPLEMENTER(OPENSSL_arm_midr) == ARM_CPU_IMP_AMPERE) && (OPENSSL_armcap_P & ARMV8_SHA3))
417         OPENSSL_armcap_P |= ARMV8_UNROLL12_EOR3;
418     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) || MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)) && (OPENSSL_armcap_P & ARMV8_SHA3))
419         OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
420 #endif
421 }
422 #endif /* _WIN32, __ARM_MAX_ARCH__ >= 7 */
423