1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License, Version 1.0 only
7 * (the "License"). You may not use this file except in compliance
8 * with the License.
9 *
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or https://opensource.org/licenses/CDDL-1.0.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 *
21 * CDDL HEADER END
22 */
23 /*
24 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 */
27
28 #ifndef _LIBSPL_SYS_SIMD_H
29 #define _LIBSPL_SYS_SIMD_H
30
31 #include <sys/isa_defs.h>
32 #include <sys/types.h>
33
34 /* including <sys/auxv.h> clashes with AT_UID and others */
35 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__)
36 #if defined(__FreeBSD__)
37 #define AT_HWCAP 25
38 #define AT_HWCAP2 26
39 extern int elf_aux_info(int aux, void *buf, int buflen);
getauxval(unsigned long key)40 static inline unsigned long getauxval(unsigned long key)
41 {
42 unsigned long val = 0UL;
43
44 if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
45 return (0UL);
46
47 return (val);
48 }
49 #elif defined(__linux__)
50 #define AT_HWCAP 16
51 #define AT_HWCAP2 26
52 extern unsigned long getauxval(unsigned long type);
53 #endif /* __linux__ */
54 #endif /* arm || aarch64 || powerpc */
55
56 #if defined(__x86)
57 #include <cpuid.h>
58
59 #define kfpu_allowed() 1
60 #define kfpu_begin() do {} while (0)
61 #define kfpu_end() do {} while (0)
62 #define kfpu_init() 0
63 #define kfpu_fini() ((void) 0)
64
65 /*
66 * CPUID feature tests for user-space.
67 *
68 * x86 registers used implicitly by CPUID
69 */
70 typedef enum cpuid_regs {
71 EAX = 0,
72 EBX,
73 ECX,
74 EDX,
75 CPUID_REG_CNT = 4
76 } cpuid_regs_t;
77
78 /*
79 * List of instruction sets identified by CPUID
80 */
81 typedef enum cpuid_inst_sets {
82 SSE = 0,
83 SSE2,
84 SSE3,
85 SSSE3,
86 SSE4_1,
87 SSE4_2,
88 OSXSAVE,
89 AVX,
90 AVX2,
91 BMI1,
92 BMI2,
93 AVX512F,
94 AVX512CD,
95 AVX512DQ,
96 AVX512BW,
97 AVX512IFMA,
98 AVX512VBMI,
99 AVX512PF,
100 AVX512ER,
101 AVX512VL,
102 AES,
103 PCLMULQDQ,
104 MOVBE,
105 SHA_NI
106 } cpuid_inst_sets_t;
107
108 /*
109 * Instruction set descriptor.
110 */
111 typedef struct cpuid_feature_desc {
112 uint32_t leaf; /* CPUID leaf */
113 uint32_t subleaf; /* CPUID sub-leaf */
114 uint32_t flag; /* bit mask of the feature */
115 cpuid_regs_t reg; /* which CPUID return register to test */
116 } cpuid_feature_desc_t;
117
118 #define _AVX512F_BIT (1U << 16)
119 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
120 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
121 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
122 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
123 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
124 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
125 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
126 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
127 #define _AES_BIT (1U << 25)
128 #define _PCLMULQDQ_BIT (1U << 1)
129 #define _MOVBE_BIT (1U << 22)
130 #define _SHA_NI_BIT (1U << 29)
131
132 /*
133 * Descriptions of supported instruction sets
134 */
135 static const cpuid_feature_desc_t cpuid_features[] = {
136 [SSE] = {1U, 0U, 1U << 25, EDX },
137 [SSE2] = {1U, 0U, 1U << 26, EDX },
138 [SSE3] = {1U, 0U, 1U << 0, ECX },
139 [SSSE3] = {1U, 0U, 1U << 9, ECX },
140 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
141 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
142 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
143 [AVX] = {1U, 0U, 1U << 28, ECX },
144 [AVX2] = {7U, 0U, 1U << 5, EBX },
145 [BMI1] = {7U, 0U, 1U << 3, EBX },
146 [BMI2] = {7U, 0U, 1U << 8, EBX },
147 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
148 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
149 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
150 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
151 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
152 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
153 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
154 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
155 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
156 [AES] = {1U, 0U, _AES_BIT, ECX },
157 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
158 [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
159 [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX },
160 };
161
162 /*
163 * Check if OS supports AVX and AVX2 by checking XCR0
164 * Only call this function if CPUID indicates that AVX feature is
165 * supported by the CPU, otherwise it might be an illegal instruction.
166 */
167 static inline uint64_t
xgetbv(uint32_t index)168 xgetbv(uint32_t index)
169 {
170 uint32_t eax, edx;
171 /* xgetbv - instruction byte code */
172 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
173 : "=a" (eax), "=d" (edx)
174 : "c" (index));
175
176 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
177 }
178
179 /*
180 * Check if CPU supports a feature
181 */
182 static inline boolean_t
__cpuid_check_feature(const cpuid_feature_desc_t * desc)183 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
184 {
185 uint32_t r[CPUID_REG_CNT];
186
187 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
188 /*
189 * __cpuid_count is needed to properly check
190 * for AVX2. It is a macro, so return parameters
191 * are passed by value.
192 */
193 __cpuid_count(desc->leaf, desc->subleaf,
194 r[EAX], r[EBX], r[ECX], r[EDX]);
195 return ((r[desc->reg] & desc->flag) == desc->flag);
196 }
197 return (B_FALSE);
198 }
199
200 #define CPUID_FEATURE_CHECK(name, id) \
201 static inline boolean_t \
202 __cpuid_has_ ## name(void) \
203 { \
204 return (__cpuid_check_feature(&cpuid_features[id])); \
205 }
206
207 /*
208 * Define functions for user-space CPUID features testing
209 */
210 CPUID_FEATURE_CHECK(sse, SSE);
211 CPUID_FEATURE_CHECK(sse2, SSE2);
212 CPUID_FEATURE_CHECK(sse3, SSE3);
213 CPUID_FEATURE_CHECK(ssse3, SSSE3);
214 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
215 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
216 CPUID_FEATURE_CHECK(avx, AVX);
217 CPUID_FEATURE_CHECK(avx2, AVX2);
218 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
219 CPUID_FEATURE_CHECK(bmi1, BMI1);
220 CPUID_FEATURE_CHECK(bmi2, BMI2);
221 CPUID_FEATURE_CHECK(avx512f, AVX512F);
222 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
223 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
224 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
225 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
226 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
227 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
228 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
229 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
230 CPUID_FEATURE_CHECK(aes, AES);
231 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
232 CPUID_FEATURE_CHECK(movbe, MOVBE);
233 CPUID_FEATURE_CHECK(shani, SHA_NI);
234
235 /*
236 * Detect register set support
237 */
238 static inline boolean_t
__simd_state_enabled(const uint64_t state)239 __simd_state_enabled(const uint64_t state)
240 {
241 boolean_t has_osxsave;
242 uint64_t xcr0;
243
244 has_osxsave = __cpuid_has_osxsave();
245 if (!has_osxsave)
246 return (B_FALSE);
247
248 xcr0 = xgetbv(0);
249 return ((xcr0 & state) == state);
250 }
251
252 #define _XSTATE_SSE_AVX (0x2 | 0x4)
253 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
254
255 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
256 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
257
258 /*
259 * Check if SSE instruction set is available
260 */
261 static inline boolean_t
zfs_sse_available(void)262 zfs_sse_available(void)
263 {
264 return (__cpuid_has_sse());
265 }
266
267 /*
268 * Check if SSE2 instruction set is available
269 */
270 static inline boolean_t
zfs_sse2_available(void)271 zfs_sse2_available(void)
272 {
273 return (__cpuid_has_sse2());
274 }
275
276 /*
277 * Check if SSE3 instruction set is available
278 */
279 static inline boolean_t
zfs_sse3_available(void)280 zfs_sse3_available(void)
281 {
282 return (__cpuid_has_sse3());
283 }
284
285 /*
286 * Check if SSSE3 instruction set is available
287 */
288 static inline boolean_t
zfs_ssse3_available(void)289 zfs_ssse3_available(void)
290 {
291 return (__cpuid_has_ssse3());
292 }
293
294 /*
295 * Check if SSE4.1 instruction set is available
296 */
297 static inline boolean_t
zfs_sse4_1_available(void)298 zfs_sse4_1_available(void)
299 {
300 return (__cpuid_has_sse4_1());
301 }
302
303 /*
304 * Check if SSE4.2 instruction set is available
305 */
306 static inline boolean_t
zfs_sse4_2_available(void)307 zfs_sse4_2_available(void)
308 {
309 return (__cpuid_has_sse4_2());
310 }
311
312 /*
313 * Check if AVX instruction set is available
314 */
315 static inline boolean_t
zfs_avx_available(void)316 zfs_avx_available(void)
317 {
318 return (__cpuid_has_avx() && __ymm_enabled());
319 }
320
321 /*
322 * Check if AVX2 instruction set is available
323 */
324 static inline boolean_t
zfs_avx2_available(void)325 zfs_avx2_available(void)
326 {
327 return (__cpuid_has_avx2() && __ymm_enabled());
328 }
329
330 /*
331 * Check if BMI1 instruction set is available
332 */
333 static inline boolean_t
zfs_bmi1_available(void)334 zfs_bmi1_available(void)
335 {
336 return (__cpuid_has_bmi1());
337 }
338
339 /*
340 * Check if BMI2 instruction set is available
341 */
342 static inline boolean_t
zfs_bmi2_available(void)343 zfs_bmi2_available(void)
344 {
345 return (__cpuid_has_bmi2());
346 }
347
348 /*
349 * Check if AES instruction set is available
350 */
351 static inline boolean_t
zfs_aes_available(void)352 zfs_aes_available(void)
353 {
354 return (__cpuid_has_aes());
355 }
356
357 /*
358 * Check if PCLMULQDQ instruction set is available
359 */
360 static inline boolean_t
zfs_pclmulqdq_available(void)361 zfs_pclmulqdq_available(void)
362 {
363 return (__cpuid_has_pclmulqdq());
364 }
365
366 /*
367 * Check if MOVBE instruction is available
368 */
369 static inline boolean_t
zfs_movbe_available(void)370 zfs_movbe_available(void)
371 {
372 return (__cpuid_has_movbe());
373 }
374
375 /*
376 * Check if SHA_NI instruction is available
377 */
378 static inline boolean_t
zfs_shani_available(void)379 zfs_shani_available(void)
380 {
381 return (__cpuid_has_shani());
382 }
383
384 /*
385 * AVX-512 family of instruction sets:
386 *
387 * AVX512F Foundation
388 * AVX512CD Conflict Detection Instructions
389 * AVX512ER Exponential and Reciprocal Instructions
390 * AVX512PF Prefetch Instructions
391 *
392 * AVX512BW Byte and Word Instructions
393 * AVX512DQ Double-word and Quadword Instructions
394 * AVX512VL Vector Length Extensions
395 *
396 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
397 * AVX512VBMI Vector Byte Manipulation Instructions
398 */
399
400 /*
401 * Check if AVX512F instruction set is available
402 */
403 static inline boolean_t
zfs_avx512f_available(void)404 zfs_avx512f_available(void)
405 {
406 return (__cpuid_has_avx512f() && __zmm_enabled());
407 }
408
409 /*
410 * Check if AVX512CD instruction set is available
411 */
412 static inline boolean_t
zfs_avx512cd_available(void)413 zfs_avx512cd_available(void)
414 {
415 return (__cpuid_has_avx512cd() && __zmm_enabled());
416 }
417
418 /*
419 * Check if AVX512ER instruction set is available
420 */
421 static inline boolean_t
zfs_avx512er_available(void)422 zfs_avx512er_available(void)
423 {
424 return (__cpuid_has_avx512er() && __zmm_enabled());
425 }
426
427 /*
428 * Check if AVX512PF instruction set is available
429 */
430 static inline boolean_t
zfs_avx512pf_available(void)431 zfs_avx512pf_available(void)
432 {
433 return (__cpuid_has_avx512pf() && __zmm_enabled());
434 }
435
436 /*
437 * Check if AVX512BW instruction set is available
438 */
439 static inline boolean_t
zfs_avx512bw_available(void)440 zfs_avx512bw_available(void)
441 {
442 return (__cpuid_has_avx512bw() && __zmm_enabled());
443 }
444
445 /*
446 * Check if AVX512DQ instruction set is available
447 */
448 static inline boolean_t
zfs_avx512dq_available(void)449 zfs_avx512dq_available(void)
450 {
451 return (__cpuid_has_avx512dq() && __zmm_enabled());
452 }
453
454 /*
455 * Check if AVX512VL instruction set is available
456 */
457 static inline boolean_t
zfs_avx512vl_available(void)458 zfs_avx512vl_available(void)
459 {
460 return (__cpuid_has_avx512vl() && __zmm_enabled());
461 }
462
463 /*
464 * Check if AVX512IFMA instruction set is available
465 */
466 static inline boolean_t
zfs_avx512ifma_available(void)467 zfs_avx512ifma_available(void)
468 {
469 return (__cpuid_has_avx512ifma() && __zmm_enabled());
470 }
471
472 /*
473 * Check if AVX512VBMI instruction set is available
474 */
475 static inline boolean_t
zfs_avx512vbmi_available(void)476 zfs_avx512vbmi_available(void)
477 {
478 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
479 __zmm_enabled());
480 }
481
482 #elif defined(__arm__)
483
484 #define kfpu_allowed() 1
485 #define kfpu_initialize(tsk) do {} while (0)
486 #define kfpu_begin() do {} while (0)
487 #define kfpu_end() do {} while (0)
488
489 #define HWCAP_NEON 0x00001000
490 #define HWCAP2_SHA2 0x00000008
491
492 /*
493 * Check if NEON is available
494 */
495 static inline boolean_t
zfs_neon_available(void)496 zfs_neon_available(void)
497 {
498 unsigned long hwcap = getauxval(AT_HWCAP);
499 return (hwcap & HWCAP_NEON);
500 }
501
502 /*
503 * Check if SHA2 is available
504 */
505 static inline boolean_t
zfs_sha256_available(void)506 zfs_sha256_available(void)
507 {
508 unsigned long hwcap = getauxval(AT_HWCAP);
509 return (hwcap & HWCAP2_SHA2);
510 }
511
512 #elif defined(__aarch64__)
513
514 #define kfpu_allowed() 1
515 #define kfpu_initialize(tsk) do {} while (0)
516 #define kfpu_begin() do {} while (0)
517 #define kfpu_end() do {} while (0)
518
519 #define HWCAP_FP 0x00000001
520 #define HWCAP_SHA2 0x00000040
521 #define HWCAP_SHA512 0x00200000
522
523 /*
524 * Check if NEON is available
525 */
526 static inline boolean_t
zfs_neon_available(void)527 zfs_neon_available(void)
528 {
529 unsigned long hwcap = getauxval(AT_HWCAP);
530 return (hwcap & HWCAP_FP);
531 }
532
533 /*
534 * Check if SHA2 is available
535 */
536 static inline boolean_t
zfs_sha256_available(void)537 zfs_sha256_available(void)
538 {
539 unsigned long hwcap = getauxval(AT_HWCAP);
540 return (hwcap & HWCAP_SHA2);
541 }
542
543 /*
544 * Check if SHA512 is available
545 */
546 static inline boolean_t
zfs_sha512_available(void)547 zfs_sha512_available(void)
548 {
549 unsigned long hwcap = getauxval(AT_HWCAP);
550 return (hwcap & HWCAP_SHA512);
551 }
552
553 #elif defined(__powerpc__)
554
555 #define kfpu_allowed() 0
556 #define kfpu_initialize(tsk) do {} while (0)
557 #define kfpu_begin() do {} while (0)
558 #define kfpu_end() do {} while (0)
559
560 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
561 #define PPC_FEATURE_HAS_VSX 0x00000080
562 #define PPC_FEATURE2_ARCH_2_07 0x80000000
563
564 static inline boolean_t
zfs_altivec_available(void)565 zfs_altivec_available(void)
566 {
567 unsigned long hwcap = getauxval(AT_HWCAP);
568 return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
569 }
570
571 static inline boolean_t
zfs_vsx_available(void)572 zfs_vsx_available(void)
573 {
574 unsigned long hwcap = getauxval(AT_HWCAP);
575 return (hwcap & PPC_FEATURE_HAS_VSX);
576 }
577
578 static inline boolean_t
zfs_isa207_available(void)579 zfs_isa207_available(void)
580 {
581 unsigned long hwcap = getauxval(AT_HWCAP);
582 unsigned long hwcap2 = getauxval(AT_HWCAP2);
583 return ((hwcap & PPC_FEATURE_HAS_VSX) &&
584 (hwcap2 & PPC_FEATURE2_ARCH_2_07));
585 }
586
587 #else
588
589 #define kfpu_allowed() 0
590 #define kfpu_initialize(tsk) do {} while (0)
591 #define kfpu_begin() do {} while (0)
592 #define kfpu_end() do {} while (0)
593
594 #endif
595
596 extern void simd_stat_init(void);
597 extern void simd_stat_fini(void);
598
599 #endif /* _LIBSPL_SYS_SIMD_H */
600