1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License, Version 1.0 only
7 * (the "License"). You may not use this file except in compliance
8 * with the License.
9 *
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or https://opensource.org/licenses/CDDL-1.0.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 *
21 * CDDL HEADER END
22 */
23 /*
24 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 */
27
28 #ifndef _LIBSPL_SYS_SIMD_H
29 #define _LIBSPL_SYS_SIMD_H
30
31 #include <sys/isa_defs.h>
32 #include <sys/types.h>
33
34 /* including <sys/auxv.h> clashes with AT_UID and others */
35 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__)
36 #if defined(__FreeBSD__)
37 #define AT_HWCAP 25
38 #define AT_HWCAP2 26
39 extern int elf_aux_info(int aux, void *buf, int buflen);
getauxval(unsigned long key)40 static inline unsigned long getauxval(unsigned long key)
41 {
42 unsigned long val = 0UL;
43
44 if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
45 return (0UL);
46
47 return (val);
48 }
49 #elif defined(__linux__)
50 #define AT_HWCAP 16
51 #define AT_HWCAP2 26
52 extern unsigned long getauxval(unsigned long type);
53 #endif /* __linux__ */
54 #endif /* arm || aarch64 || powerpc */
55
56 #if defined(__x86)
57 #include <cpuid.h>
58
59 #define kfpu_allowed() 1
60 #define kfpu_begin() do {} while (0)
61 #define kfpu_end() do {} while (0)
62 #define kfpu_init() 0
63 #define kfpu_fini() ((void) 0)
64
65 /*
66 * CPUID feature tests for user-space.
67 *
68 * x86 registers used implicitly by CPUID
69 */
70 typedef enum cpuid_regs {
71 EAX = 0,
72 EBX,
73 ECX,
74 EDX,
75 CPUID_REG_CNT = 4
76 } cpuid_regs_t;
77
78 /*
79 * List of instruction sets identified by CPUID
80 */
81 typedef enum cpuid_inst_sets {
82 SSE = 0,
83 SSE2,
84 SSE3,
85 SSSE3,
86 SSE4_1,
87 SSE4_2,
88 OSXSAVE,
89 AVX,
90 AVX2,
91 BMI1,
92 BMI2,
93 AVX512F,
94 AVX512CD,
95 AVX512DQ,
96 AVX512BW,
97 AVX512IFMA,
98 AVX512VBMI,
99 AVX512PF,
100 AVX512ER,
101 AVX512VL,
102 AES,
103 PCLMULQDQ,
104 MOVBE,
105 SHA_NI,
106 VAES,
107 VPCLMULQDQ
108 } cpuid_inst_sets_t;
109
110 /*
111 * Instruction set descriptor.
112 */
113 typedef struct cpuid_feature_desc {
114 uint32_t leaf; /* CPUID leaf */
115 uint32_t subleaf; /* CPUID sub-leaf */
116 uint32_t flag; /* bit mask of the feature */
117 cpuid_regs_t reg; /* which CPUID return register to test */
118 } cpuid_feature_desc_t;
119
120 #define _AVX512F_BIT (1U << 16)
121 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
122 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
123 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
124 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
125 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
126 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
127 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
128 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
129 #define _AES_BIT (1U << 25)
130 #define _PCLMULQDQ_BIT (1U << 1)
131 #define _MOVBE_BIT (1U << 22)
132 #define _VAES_BIT (1U << 9)
133 #define _VPCLMULQDQ_BIT (1U << 10)
134 #define _SHA_NI_BIT (1U << 29)
135
136 /*
137 * Descriptions of supported instruction sets
138 */
139 static const cpuid_feature_desc_t cpuid_features[] = {
140 [SSE] = {1U, 0U, 1U << 25, EDX },
141 [SSE2] = {1U, 0U, 1U << 26, EDX },
142 [SSE3] = {1U, 0U, 1U << 0, ECX },
143 [SSSE3] = {1U, 0U, 1U << 9, ECX },
144 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
145 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
146 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
147 [AVX] = {1U, 0U, 1U << 28, ECX },
148 [AVX2] = {7U, 0U, 1U << 5, EBX },
149 [BMI1] = {7U, 0U, 1U << 3, EBX },
150 [BMI2] = {7U, 0U, 1U << 8, EBX },
151 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
152 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
153 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
154 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
155 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
156 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
157 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
158 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
159 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
160 [AES] = {1U, 0U, _AES_BIT, ECX },
161 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
162 [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
163 [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX },
164 [VAES] = {7U, 0U, _VAES_BIT, ECX },
165 [VPCLMULQDQ] = {7U, 0U, _VPCLMULQDQ_BIT, ECX },
166 };
167
168 /*
169 * Check if OS supports AVX and AVX2 by checking XCR0
170 * Only call this function if CPUID indicates that AVX feature is
171 * supported by the CPU, otherwise it might be an illegal instruction.
172 */
173 static inline uint64_t
xgetbv(uint32_t index)174 xgetbv(uint32_t index)
175 {
176 uint32_t eax, edx;
177 /* xgetbv - instruction byte code */
178 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
179 : "=a" (eax), "=d" (edx)
180 : "c" (index));
181
182 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
183 }
184
185 /*
186 * Check if CPU supports a feature
187 */
188 static inline boolean_t
__cpuid_check_feature(const cpuid_feature_desc_t * desc)189 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
190 {
191 uint32_t r[CPUID_REG_CNT];
192
193 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
194 /*
195 * __cpuid_count is needed to properly check
196 * for AVX2. It is a macro, so return parameters
197 * are passed by value.
198 */
199 __cpuid_count(desc->leaf, desc->subleaf,
200 r[EAX], r[EBX], r[ECX], r[EDX]);
201 return ((r[desc->reg] & desc->flag) == desc->flag);
202 }
203 return (B_FALSE);
204 }
205
206 #define CPUID_FEATURE_CHECK(name, id) \
207 static inline boolean_t \
208 __cpuid_has_ ## name(void) \
209 { \
210 return (__cpuid_check_feature(&cpuid_features[id])); \
211 }
212
213 /*
214 * Define functions for user-space CPUID features testing
215 */
216 CPUID_FEATURE_CHECK(sse, SSE);
217 CPUID_FEATURE_CHECK(sse2, SSE2);
218 CPUID_FEATURE_CHECK(sse3, SSE3);
219 CPUID_FEATURE_CHECK(ssse3, SSSE3);
220 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
221 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
222 CPUID_FEATURE_CHECK(avx, AVX);
223 CPUID_FEATURE_CHECK(avx2, AVX2);
224 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
225 CPUID_FEATURE_CHECK(bmi1, BMI1);
226 CPUID_FEATURE_CHECK(bmi2, BMI2);
227 CPUID_FEATURE_CHECK(avx512f, AVX512F);
228 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
229 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
230 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
231 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
232 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
233 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
234 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
235 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
236 CPUID_FEATURE_CHECK(aes, AES);
237 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
238 CPUID_FEATURE_CHECK(movbe, MOVBE);
239 CPUID_FEATURE_CHECK(shani, SHA_NI);
240 CPUID_FEATURE_CHECK(vaes, VAES);
241 CPUID_FEATURE_CHECK(vpclmulqdq, VPCLMULQDQ);
242
243 /*
244 * Detect register set support
245 */
246 static inline boolean_t
__simd_state_enabled(const uint64_t state)247 __simd_state_enabled(const uint64_t state)
248 {
249 boolean_t has_osxsave;
250 uint64_t xcr0;
251
252 has_osxsave = __cpuid_has_osxsave();
253 if (!has_osxsave)
254 return (B_FALSE);
255
256 xcr0 = xgetbv(0);
257 return ((xcr0 & state) == state);
258 }
259
260 #define _XSTATE_SSE_AVX (0x2 | 0x4)
261 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
262
263 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
264 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
265
266 /*
267 * Check if SSE instruction set is available
268 */
269 static inline boolean_t
zfs_sse_available(void)270 zfs_sse_available(void)
271 {
272 return (__cpuid_has_sse());
273 }
274
275 /*
276 * Check if SSE2 instruction set is available
277 */
278 static inline boolean_t
zfs_sse2_available(void)279 zfs_sse2_available(void)
280 {
281 return (__cpuid_has_sse2());
282 }
283
284 /*
285 * Check if SSE3 instruction set is available
286 */
287 static inline boolean_t
zfs_sse3_available(void)288 zfs_sse3_available(void)
289 {
290 return (__cpuid_has_sse3());
291 }
292
293 /*
294 * Check if SSSE3 instruction set is available
295 */
296 static inline boolean_t
zfs_ssse3_available(void)297 zfs_ssse3_available(void)
298 {
299 return (__cpuid_has_ssse3());
300 }
301
302 /*
303 * Check if SSE4.1 instruction set is available
304 */
305 static inline boolean_t
zfs_sse4_1_available(void)306 zfs_sse4_1_available(void)
307 {
308 return (__cpuid_has_sse4_1());
309 }
310
311 /*
312 * Check if SSE4.2 instruction set is available
313 */
314 static inline boolean_t
zfs_sse4_2_available(void)315 zfs_sse4_2_available(void)
316 {
317 return (__cpuid_has_sse4_2());
318 }
319
320 /*
321 * Check if AVX instruction set is available
322 */
323 static inline boolean_t
zfs_avx_available(void)324 zfs_avx_available(void)
325 {
326 return (__cpuid_has_avx() && __ymm_enabled());
327 }
328
329 /*
330 * Check if AVX2 instruction set is available
331 */
332 static inline boolean_t
zfs_avx2_available(void)333 zfs_avx2_available(void)
334 {
335 return (__cpuid_has_avx2() && __ymm_enabled());
336 }
337
338 /*
339 * Check if BMI1 instruction set is available
340 */
341 static inline boolean_t
zfs_bmi1_available(void)342 zfs_bmi1_available(void)
343 {
344 return (__cpuid_has_bmi1());
345 }
346
347 /*
348 * Check if BMI2 instruction set is available
349 */
350 static inline boolean_t
zfs_bmi2_available(void)351 zfs_bmi2_available(void)
352 {
353 return (__cpuid_has_bmi2());
354 }
355
356 /*
357 * Check if AES instruction set is available
358 */
359 static inline boolean_t
zfs_aes_available(void)360 zfs_aes_available(void)
361 {
362 return (__cpuid_has_aes());
363 }
364
365 /*
366 * Check if PCLMULQDQ instruction set is available
367 */
368 static inline boolean_t
zfs_pclmulqdq_available(void)369 zfs_pclmulqdq_available(void)
370 {
371 return (__cpuid_has_pclmulqdq());
372 }
373
374 /*
375 * Check if MOVBE instruction is available
376 */
377 static inline boolean_t
zfs_movbe_available(void)378 zfs_movbe_available(void)
379 {
380 return (__cpuid_has_movbe());
381 }
382
383 /*
384 * Check if SHA_NI instruction is available
385 */
386 static inline boolean_t
zfs_shani_available(void)387 zfs_shani_available(void)
388 {
389 return (__cpuid_has_shani());
390 }
391
392 /*
393 * Check if VAES instruction is available
394 */
395 static inline boolean_t
zfs_vaes_available(void)396 zfs_vaes_available(void)
397 {
398 return (__cpuid_has_vaes());
399 }
400
401 /*
402 * Check if VPCLMULQDQ instruction is available
403 */
404 static inline boolean_t
zfs_vpclmulqdq_available(void)405 zfs_vpclmulqdq_available(void)
406 {
407 return (__cpuid_has_vpclmulqdq());
408 }
409
410 /*
411 * AVX-512 family of instruction sets:
412 *
413 * AVX512F Foundation
414 * AVX512CD Conflict Detection Instructions
415 * AVX512ER Exponential and Reciprocal Instructions
416 * AVX512PF Prefetch Instructions
417 *
418 * AVX512BW Byte and Word Instructions
419 * AVX512DQ Double-word and Quadword Instructions
420 * AVX512VL Vector Length Extensions
421 *
422 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
423 * AVX512VBMI Vector Byte Manipulation Instructions
424 */
425
426 /*
427 * Check if AVX512F instruction set is available
428 */
429 static inline boolean_t
zfs_avx512f_available(void)430 zfs_avx512f_available(void)
431 {
432 return (__cpuid_has_avx512f() && __zmm_enabled());
433 }
434
435 /*
436 * Check if AVX512CD instruction set is available
437 */
438 static inline boolean_t
zfs_avx512cd_available(void)439 zfs_avx512cd_available(void)
440 {
441 return (__cpuid_has_avx512cd() && __zmm_enabled());
442 }
443
444 /*
445 * Check if AVX512ER instruction set is available
446 */
447 static inline boolean_t
zfs_avx512er_available(void)448 zfs_avx512er_available(void)
449 {
450 return (__cpuid_has_avx512er() && __zmm_enabled());
451 }
452
453 /*
454 * Check if AVX512PF instruction set is available
455 */
456 static inline boolean_t
zfs_avx512pf_available(void)457 zfs_avx512pf_available(void)
458 {
459 return (__cpuid_has_avx512pf() && __zmm_enabled());
460 }
461
462 /*
463 * Check if AVX512BW instruction set is available
464 */
465 static inline boolean_t
zfs_avx512bw_available(void)466 zfs_avx512bw_available(void)
467 {
468 return (__cpuid_has_avx512bw() && __zmm_enabled());
469 }
470
471 /*
472 * Check if AVX512DQ instruction set is available
473 */
474 static inline boolean_t
zfs_avx512dq_available(void)475 zfs_avx512dq_available(void)
476 {
477 return (__cpuid_has_avx512dq() && __zmm_enabled());
478 }
479
480 /*
481 * Check if AVX512VL instruction set is available
482 */
483 static inline boolean_t
zfs_avx512vl_available(void)484 zfs_avx512vl_available(void)
485 {
486 return (__cpuid_has_avx512vl() && __zmm_enabled());
487 }
488
489 /*
490 * Check if AVX512IFMA instruction set is available
491 */
492 static inline boolean_t
zfs_avx512ifma_available(void)493 zfs_avx512ifma_available(void)
494 {
495 return (__cpuid_has_avx512ifma() && __zmm_enabled());
496 }
497
498 /*
499 * Check if AVX512VBMI instruction set is available
500 */
501 static inline boolean_t
zfs_avx512vbmi_available(void)502 zfs_avx512vbmi_available(void)
503 {
504 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
505 __zmm_enabled());
506 }
507
508 #elif defined(__arm__)
509
510 #define kfpu_allowed() 1
511 #define kfpu_initialize(tsk) do {} while (0)
512 #define kfpu_begin() do {} while (0)
513 #define kfpu_end() do {} while (0)
514
515 #define HWCAP_NEON 0x00001000
516 #define HWCAP2_SHA2 0x00000008
517
518 /*
519 * Check if NEON is available
520 */
521 static inline boolean_t
zfs_neon_available(void)522 zfs_neon_available(void)
523 {
524 unsigned long hwcap = getauxval(AT_HWCAP);
525 return (hwcap & HWCAP_NEON);
526 }
527
528 /*
529 * Check if SHA2 is available
530 */
531 static inline boolean_t
zfs_sha256_available(void)532 zfs_sha256_available(void)
533 {
534 unsigned long hwcap = getauxval(AT_HWCAP);
535 return (hwcap & HWCAP2_SHA2);
536 }
537
538 #elif defined(__aarch64__)
539
540 #define kfpu_allowed() 1
541 #define kfpu_initialize(tsk) do {} while (0)
542 #define kfpu_begin() do {} while (0)
543 #define kfpu_end() do {} while (0)
544
545 #define HWCAP_FP 0x00000001
546 #define HWCAP_SHA2 0x00000040
547 #define HWCAP_SHA512 0x00200000
548
549 /*
550 * Check if NEON is available
551 */
552 static inline boolean_t
zfs_neon_available(void)553 zfs_neon_available(void)
554 {
555 unsigned long hwcap = getauxval(AT_HWCAP);
556 return (hwcap & HWCAP_FP);
557 }
558
559 /*
560 * Check if SHA2 is available
561 */
562 static inline boolean_t
zfs_sha256_available(void)563 zfs_sha256_available(void)
564 {
565 unsigned long hwcap = getauxval(AT_HWCAP);
566 return (hwcap & HWCAP_SHA2);
567 }
568
569 /*
570 * Check if SHA512 is available
571 */
572 static inline boolean_t
zfs_sha512_available(void)573 zfs_sha512_available(void)
574 {
575 unsigned long hwcap = getauxval(AT_HWCAP);
576 return (hwcap & HWCAP_SHA512);
577 }
578
579 #elif defined(__powerpc__)
580
581 #define kfpu_allowed() 0
582 #define kfpu_initialize(tsk) do {} while (0)
583 #define kfpu_begin() do {} while (0)
584 #define kfpu_end() do {} while (0)
585
586 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
587 #define PPC_FEATURE_HAS_VSX 0x00000080
588 #define PPC_FEATURE2_ARCH_2_07 0x80000000
589
590 static inline boolean_t
zfs_altivec_available(void)591 zfs_altivec_available(void)
592 {
593 unsigned long hwcap = getauxval(AT_HWCAP);
594 return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
595 }
596
597 static inline boolean_t
zfs_vsx_available(void)598 zfs_vsx_available(void)
599 {
600 unsigned long hwcap = getauxval(AT_HWCAP);
601 return (hwcap & PPC_FEATURE_HAS_VSX);
602 }
603
604 static inline boolean_t
zfs_isa207_available(void)605 zfs_isa207_available(void)
606 {
607 unsigned long hwcap = getauxval(AT_HWCAP);
608 unsigned long hwcap2 = getauxval(AT_HWCAP2);
609 return ((hwcap & PPC_FEATURE_HAS_VSX) &&
610 (hwcap2 & PPC_FEATURE2_ARCH_2_07));
611 }
612
613 #else
614
615 #define kfpu_allowed() 0
616 #define kfpu_initialize(tsk) do {} while (0)
617 #define kfpu_begin() do {} while (0)
618 #define kfpu_end() do {} while (0)
619
620 #endif
621
622 extern void simd_stat_init(void);
623 extern void simd_stat_fini(void);
624
625 #endif /* _LIBSPL_SYS_SIMD_H */
626