1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
24 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
25 */
26
27 #ifndef _LIBSPL_SYS_SIMD_H
28 #define _LIBSPL_SYS_SIMD_H
29
30 #include <sys/isa_defs.h>
31 #include <sys/types.h>
32
33 /* including <sys/auxv.h> clashes with AT_UID and others */
34 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__)
35 #if defined(__FreeBSD__)
36 #define AT_HWCAP 25
37 #define AT_HWCAP2 26
38 extern int elf_aux_info(int aux, void *buf, int buflen);
getauxval(unsigned long key)39 static inline unsigned long getauxval(unsigned long key)
40 {
41 unsigned long val = 0UL;
42
43 if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
44 return (0UL);
45
46 return (val);
47 }
48 #elif defined(__linux__)
49 #define AT_HWCAP 16
50 #define AT_HWCAP2 26
51 extern unsigned long getauxval(unsigned long type);
52 #endif /* __linux__ */
53 #endif /* arm || aarch64 || powerpc */
54
55 #if defined(__x86)
56 #include <cpuid.h>
57
58 #define kfpu_allowed() 1
59 #define kfpu_begin() do {} while (0)
60 #define kfpu_end() do {} while (0)
61 #define kfpu_init() 0
62 #define kfpu_fini() ((void) 0)
63
64 /*
65 * CPUID feature tests for user-space.
66 *
67 * x86 registers used implicitly by CPUID
68 */
69 typedef enum cpuid_regs {
70 EAX = 0,
71 EBX,
72 ECX,
73 EDX,
74 CPUID_REG_CNT = 4
75 } cpuid_regs_t;
76
77 /*
78 * List of instruction sets identified by CPUID
79 */
80 typedef enum cpuid_inst_sets {
81 SSE = 0,
82 SSE2,
83 SSE3,
84 SSSE3,
85 SSE4_1,
86 SSE4_2,
87 OSXSAVE,
88 AVX,
89 AVX2,
90 BMI1,
91 BMI2,
92 AVX512F,
93 AVX512CD,
94 AVX512DQ,
95 AVX512BW,
96 AVX512IFMA,
97 AVX512VBMI,
98 AVX512PF,
99 AVX512ER,
100 AVX512VL,
101 AES,
102 PCLMULQDQ,
103 MOVBE,
104 SHA_NI
105 } cpuid_inst_sets_t;
106
107 /*
108 * Instruction set descriptor.
109 */
110 typedef struct cpuid_feature_desc {
111 uint32_t leaf; /* CPUID leaf */
112 uint32_t subleaf; /* CPUID sub-leaf */
113 uint32_t flag; /* bit mask of the feature */
114 cpuid_regs_t reg; /* which CPUID return register to test */
115 } cpuid_feature_desc_t;
116
117 #define _AVX512F_BIT (1U << 16)
118 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
119 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
120 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
121 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
122 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
123 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
124 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
125 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
126 #define _AES_BIT (1U << 25)
127 #define _PCLMULQDQ_BIT (1U << 1)
128 #define _MOVBE_BIT (1U << 22)
129 #define _SHA_NI_BIT (1U << 29)
130
131 /*
132 * Descriptions of supported instruction sets
133 */
134 static const cpuid_feature_desc_t cpuid_features[] = {
135 [SSE] = {1U, 0U, 1U << 25, EDX },
136 [SSE2] = {1U, 0U, 1U << 26, EDX },
137 [SSE3] = {1U, 0U, 1U << 0, ECX },
138 [SSSE3] = {1U, 0U, 1U << 9, ECX },
139 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
140 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
141 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
142 [AVX] = {1U, 0U, 1U << 28, ECX },
143 [AVX2] = {7U, 0U, 1U << 5, EBX },
144 [BMI1] = {7U, 0U, 1U << 3, EBX },
145 [BMI2] = {7U, 0U, 1U << 8, EBX },
146 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
147 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
148 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
149 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
150 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
151 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
152 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
153 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
154 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
155 [AES] = {1U, 0U, _AES_BIT, ECX },
156 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
157 [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
158 [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX },
159 };
160
161 /*
162 * Check if OS supports AVX and AVX2 by checking XCR0
163 * Only call this function if CPUID indicates that AVX feature is
164 * supported by the CPU, otherwise it might be an illegal instruction.
165 */
166 static inline uint64_t
xgetbv(uint32_t index)167 xgetbv(uint32_t index)
168 {
169 uint32_t eax, edx;
170 /* xgetbv - instruction byte code */
171 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
172 : "=a" (eax), "=d" (edx)
173 : "c" (index));
174
175 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
176 }
177
178 /*
179 * Check if CPU supports a feature
180 */
181 static inline boolean_t
__cpuid_check_feature(const cpuid_feature_desc_t * desc)182 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
183 {
184 uint32_t r[CPUID_REG_CNT];
185
186 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
187 /*
188 * __cpuid_count is needed to properly check
189 * for AVX2. It is a macro, so return parameters
190 * are passed by value.
191 */
192 __cpuid_count(desc->leaf, desc->subleaf,
193 r[EAX], r[EBX], r[ECX], r[EDX]);
194 return ((r[desc->reg] & desc->flag) == desc->flag);
195 }
196 return (B_FALSE);
197 }
198
199 #define CPUID_FEATURE_CHECK(name, id) \
200 static inline boolean_t \
201 __cpuid_has_ ## name(void) \
202 { \
203 return (__cpuid_check_feature(&cpuid_features[id])); \
204 }
205
206 /*
207 * Define functions for user-space CPUID features testing
208 */
209 CPUID_FEATURE_CHECK(sse, SSE);
210 CPUID_FEATURE_CHECK(sse2, SSE2);
211 CPUID_FEATURE_CHECK(sse3, SSE3);
212 CPUID_FEATURE_CHECK(ssse3, SSSE3);
213 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
214 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
215 CPUID_FEATURE_CHECK(avx, AVX);
216 CPUID_FEATURE_CHECK(avx2, AVX2);
217 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
218 CPUID_FEATURE_CHECK(bmi1, BMI1);
219 CPUID_FEATURE_CHECK(bmi2, BMI2);
220 CPUID_FEATURE_CHECK(avx512f, AVX512F);
221 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
222 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
223 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
224 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
225 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
226 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
227 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
228 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
229 CPUID_FEATURE_CHECK(aes, AES);
230 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
231 CPUID_FEATURE_CHECK(movbe, MOVBE);
232 CPUID_FEATURE_CHECK(shani, SHA_NI);
233
234 /*
235 * Detect register set support
236 */
237 static inline boolean_t
__simd_state_enabled(const uint64_t state)238 __simd_state_enabled(const uint64_t state)
239 {
240 boolean_t has_osxsave;
241 uint64_t xcr0;
242
243 has_osxsave = __cpuid_has_osxsave();
244 if (!has_osxsave)
245 return (B_FALSE);
246
247 xcr0 = xgetbv(0);
248 return ((xcr0 & state) == state);
249 }
250
251 #define _XSTATE_SSE_AVX (0x2 | 0x4)
252 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
253
254 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
255 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
256
257 /*
258 * Check if SSE instruction set is available
259 */
260 static inline boolean_t
zfs_sse_available(void)261 zfs_sse_available(void)
262 {
263 return (__cpuid_has_sse());
264 }
265
266 /*
267 * Check if SSE2 instruction set is available
268 */
269 static inline boolean_t
zfs_sse2_available(void)270 zfs_sse2_available(void)
271 {
272 return (__cpuid_has_sse2());
273 }
274
275 /*
276 * Check if SSE3 instruction set is available
277 */
278 static inline boolean_t
zfs_sse3_available(void)279 zfs_sse3_available(void)
280 {
281 return (__cpuid_has_sse3());
282 }
283
284 /*
285 * Check if SSSE3 instruction set is available
286 */
287 static inline boolean_t
zfs_ssse3_available(void)288 zfs_ssse3_available(void)
289 {
290 return (__cpuid_has_ssse3());
291 }
292
293 /*
294 * Check if SSE4.1 instruction set is available
295 */
296 static inline boolean_t
zfs_sse4_1_available(void)297 zfs_sse4_1_available(void)
298 {
299 return (__cpuid_has_sse4_1());
300 }
301
302 /*
303 * Check if SSE4.2 instruction set is available
304 */
305 static inline boolean_t
zfs_sse4_2_available(void)306 zfs_sse4_2_available(void)
307 {
308 return (__cpuid_has_sse4_2());
309 }
310
311 /*
312 * Check if AVX instruction set is available
313 */
314 static inline boolean_t
zfs_avx_available(void)315 zfs_avx_available(void)
316 {
317 return (__cpuid_has_avx() && __ymm_enabled());
318 }
319
320 /*
321 * Check if AVX2 instruction set is available
322 */
323 static inline boolean_t
zfs_avx2_available(void)324 zfs_avx2_available(void)
325 {
326 return (__cpuid_has_avx2() && __ymm_enabled());
327 }
328
329 /*
330 * Check if BMI1 instruction set is available
331 */
332 static inline boolean_t
zfs_bmi1_available(void)333 zfs_bmi1_available(void)
334 {
335 return (__cpuid_has_bmi1());
336 }
337
338 /*
339 * Check if BMI2 instruction set is available
340 */
341 static inline boolean_t
zfs_bmi2_available(void)342 zfs_bmi2_available(void)
343 {
344 return (__cpuid_has_bmi2());
345 }
346
347 /*
348 * Check if AES instruction set is available
349 */
350 static inline boolean_t
zfs_aes_available(void)351 zfs_aes_available(void)
352 {
353 return (__cpuid_has_aes());
354 }
355
356 /*
357 * Check if PCLMULQDQ instruction set is available
358 */
359 static inline boolean_t
zfs_pclmulqdq_available(void)360 zfs_pclmulqdq_available(void)
361 {
362 return (__cpuid_has_pclmulqdq());
363 }
364
365 /*
366 * Check if MOVBE instruction is available
367 */
368 static inline boolean_t
zfs_movbe_available(void)369 zfs_movbe_available(void)
370 {
371 return (__cpuid_has_movbe());
372 }
373
374 /*
375 * Check if SHA_NI instruction is available
376 */
377 static inline boolean_t
zfs_shani_available(void)378 zfs_shani_available(void)
379 {
380 return (__cpuid_has_shani());
381 }
382
383 /*
384 * AVX-512 family of instruction sets:
385 *
386 * AVX512F Foundation
387 * AVX512CD Conflict Detection Instructions
388 * AVX512ER Exponential and Reciprocal Instructions
389 * AVX512PF Prefetch Instructions
390 *
391 * AVX512BW Byte and Word Instructions
392 * AVX512DQ Double-word and Quadword Instructions
393 * AVX512VL Vector Length Extensions
394 *
395 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
396 * AVX512VBMI Vector Byte Manipulation Instructions
397 */
398
399 /*
400 * Check if AVX512F instruction set is available
401 */
402 static inline boolean_t
zfs_avx512f_available(void)403 zfs_avx512f_available(void)
404 {
405 return (__cpuid_has_avx512f() && __zmm_enabled());
406 }
407
408 /*
409 * Check if AVX512CD instruction set is available
410 */
411 static inline boolean_t
zfs_avx512cd_available(void)412 zfs_avx512cd_available(void)
413 {
414 return (__cpuid_has_avx512cd() && __zmm_enabled());
415 }
416
417 /*
418 * Check if AVX512ER instruction set is available
419 */
420 static inline boolean_t
zfs_avx512er_available(void)421 zfs_avx512er_available(void)
422 {
423 return (__cpuid_has_avx512er() && __zmm_enabled());
424 }
425
426 /*
427 * Check if AVX512PF instruction set is available
428 */
429 static inline boolean_t
zfs_avx512pf_available(void)430 zfs_avx512pf_available(void)
431 {
432 return (__cpuid_has_avx512pf() && __zmm_enabled());
433 }
434
435 /*
436 * Check if AVX512BW instruction set is available
437 */
438 static inline boolean_t
zfs_avx512bw_available(void)439 zfs_avx512bw_available(void)
440 {
441 return (__cpuid_has_avx512bw() && __zmm_enabled());
442 }
443
444 /*
445 * Check if AVX512DQ instruction set is available
446 */
447 static inline boolean_t
zfs_avx512dq_available(void)448 zfs_avx512dq_available(void)
449 {
450 return (__cpuid_has_avx512dq() && __zmm_enabled());
451 }
452
453 /*
454 * Check if AVX512VL instruction set is available
455 */
456 static inline boolean_t
zfs_avx512vl_available(void)457 zfs_avx512vl_available(void)
458 {
459 return (__cpuid_has_avx512vl() && __zmm_enabled());
460 }
461
462 /*
463 * Check if AVX512IFMA instruction set is available
464 */
465 static inline boolean_t
zfs_avx512ifma_available(void)466 zfs_avx512ifma_available(void)
467 {
468 return (__cpuid_has_avx512ifma() && __zmm_enabled());
469 }
470
471 /*
472 * Check if AVX512VBMI instruction set is available
473 */
474 static inline boolean_t
zfs_avx512vbmi_available(void)475 zfs_avx512vbmi_available(void)
476 {
477 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
478 __zmm_enabled());
479 }
480
481 #elif defined(__arm__)
482
483 #define kfpu_allowed() 1
484 #define kfpu_initialize(tsk) do {} while (0)
485 #define kfpu_begin() do {} while (0)
486 #define kfpu_end() do {} while (0)
487
488 #define HWCAP_NEON 0x00001000
489 #define HWCAP2_SHA2 0x00000008
490
491 /*
492 * Check if NEON is available
493 */
494 static inline boolean_t
zfs_neon_available(void)495 zfs_neon_available(void)
496 {
497 unsigned long hwcap = getauxval(AT_HWCAP);
498 return (hwcap & HWCAP_NEON);
499 }
500
501 /*
502 * Check if SHA2 is available
503 */
504 static inline boolean_t
zfs_sha256_available(void)505 zfs_sha256_available(void)
506 {
507 unsigned long hwcap = getauxval(AT_HWCAP);
508 return (hwcap & HWCAP2_SHA2);
509 }
510
511 #elif defined(__aarch64__)
512
513 #define kfpu_allowed() 1
514 #define kfpu_initialize(tsk) do {} while (0)
515 #define kfpu_begin() do {} while (0)
516 #define kfpu_end() do {} while (0)
517
518 #define HWCAP_FP 0x00000001
519 #define HWCAP_SHA2 0x00000040
520 #define HWCAP_SHA512 0x00200000
521
522 /*
523 * Check if NEON is available
524 */
525 static inline boolean_t
zfs_neon_available(void)526 zfs_neon_available(void)
527 {
528 unsigned long hwcap = getauxval(AT_HWCAP);
529 return (hwcap & HWCAP_FP);
530 }
531
532 /*
533 * Check if SHA2 is available
534 */
535 static inline boolean_t
zfs_sha256_available(void)536 zfs_sha256_available(void)
537 {
538 unsigned long hwcap = getauxval(AT_HWCAP);
539 return (hwcap & HWCAP_SHA2);
540 }
541
542 /*
543 * Check if SHA512 is available
544 */
545 static inline boolean_t
zfs_sha512_available(void)546 zfs_sha512_available(void)
547 {
548 unsigned long hwcap = getauxval(AT_HWCAP);
549 return (hwcap & HWCAP_SHA512);
550 }
551
552 #elif defined(__powerpc__)
553
554 #define kfpu_allowed() 0
555 #define kfpu_initialize(tsk) do {} while (0)
556 #define kfpu_begin() do {} while (0)
557 #define kfpu_end() do {} while (0)
558
559 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
560 #define PPC_FEATURE_HAS_VSX 0x00000080
561 #define PPC_FEATURE2_ARCH_2_07 0x80000000
562
563 static inline boolean_t
zfs_altivec_available(void)564 zfs_altivec_available(void)
565 {
566 unsigned long hwcap = getauxval(AT_HWCAP);
567 return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
568 }
569
570 static inline boolean_t
zfs_vsx_available(void)571 zfs_vsx_available(void)
572 {
573 unsigned long hwcap = getauxval(AT_HWCAP);
574 return (hwcap & PPC_FEATURE_HAS_VSX);
575 }
576
577 static inline boolean_t
zfs_isa207_available(void)578 zfs_isa207_available(void)
579 {
580 unsigned long hwcap = getauxval(AT_HWCAP);
581 unsigned long hwcap2 = getauxval(AT_HWCAP2);
582 return ((hwcap & PPC_FEATURE_HAS_VSX) &&
583 (hwcap2 & PPC_FEATURE2_ARCH_2_07));
584 }
585
586 #else
587
588 #define kfpu_allowed() 0
589 #define kfpu_initialize(tsk) do {} while (0)
590 #define kfpu_begin() do {} while (0)
591 #define kfpu_end() do {} while (0)
592
593 #endif
594
595 extern void simd_stat_init(void);
596 extern void simd_stat_fini(void);
597
598 #endif /* _LIBSPL_SYS_SIMD_H */
599