18fdc9ce9SDmitry Chagin/* 28fdc9ce9SDmitry Chagin * This file is in public domain. 38fdc9ce9SDmitry Chagin * Written by Dmitry Chagin <dchagin@FreeBSD.org> 48fdc9ce9SDmitry Chagin */ 58fdc9ce9SDmitry Chagin 6*c8dbef44SDmitry Chagin#if defined(__FreeBSD__) 7*c8dbef44SDmitry Chagin#include <machine/specialreg.h> 8*c8dbef44SDmitry Chagin#else 9*c8dbef44SDmitry Chagin#define CPUID2_OSXSAVE 0x08000000 10*c8dbef44SDmitry Chagin#define CPUID2_AVX 0x10000000 11*c8dbef44SDmitry Chagin#define XFEATURE_ENABLED_X87 0x00000001 12*c8dbef44SDmitry Chagin#define XFEATURE_ENABLED_SSE 0x00000002 13*c8dbef44SDmitry Chagin#define XFEATURE_ENABLED_AVX 0x00000004 14*c8dbef44SDmitry Chagin#define XFEATURE_AVX \ 15*c8dbef44SDmitry Chagin (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX) 16*c8dbef44SDmitry Chagin#endif 17*c8dbef44SDmitry Chagin 180be13a45SDmitry Chagin .text 190be13a45SDmitry Chagin 20*c8dbef44SDmitry Chagin .globl xregs_banks_max 21*c8dbef44SDmitry Chagin .type xregs_banks_max, @function 22*c8dbef44SDmitry Chaginxregs_banks_max: 23*c8dbef44SDmitry Chagin pushq %rbx 24*c8dbef44SDmitry Chagin movl $1, %eax 25*c8dbef44SDmitry Chagin cpuid 26*c8dbef44SDmitry Chagin andl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx 27*c8dbef44SDmitry Chagin cmpl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx 28*c8dbef44SDmitry Chagin jne sse 29*c8dbef44SDmitry Chagin xorl %ecx, %ecx 30*c8dbef44SDmitry Chagin xgetbv 31*c8dbef44SDmitry Chagin andl $XFEATURE_AVX, %eax 32*c8dbef44SDmitry Chagin cmpl $XFEATURE_AVX, %eax 33*c8dbef44SDmitry Chagin jne sse 34*c8dbef44SDmitry Chagin movl $1, %eax 35*c8dbef44SDmitry Chagin jmp out 36*c8dbef44SDmitry Chaginsse: 37*c8dbef44SDmitry Chagin xorl %eax, %eax 38*c8dbef44SDmitry Chaginout: 39*c8dbef44SDmitry Chagin popq %rbx 40*c8dbef44SDmitry Chagin retq 41*c8dbef44SDmitry Chagin 42*c8dbef44SDmitry Chagin .size xregs_banks_max, . - xregs_banks_max 43*c8dbef44SDmitry Chagin 44*c8dbef44SDmitry Chagin 450be13a45SDmitry Chagin .globl cpu_to_xmm 460be13a45SDmitry Chagin .type cpu_to_xmm, @function 470be13a45SDmitry Chagincpu_to_xmm: 488fdc9ce9SDmitry Chagin movdqu %xmm0, (%rdi) 498fdc9ce9SDmitry Chagin movdqu %xmm1, 1 * 16(%rdi) 508fdc9ce9SDmitry Chagin movdqu %xmm2, 2 * 16(%rdi) 518fdc9ce9SDmitry Chagin movdqu %xmm3, 3 * 16(%rdi) 528fdc9ce9SDmitry Chagin movdqu %xmm4, 4 * 16(%rdi) 538fdc9ce9SDmitry Chagin movdqu %xmm5, 5 * 16(%rdi) 548fdc9ce9SDmitry Chagin movdqu %xmm6, 6 * 16(%rdi) 558fdc9ce9SDmitry Chagin movdqu %xmm7, 7 * 16(%rdi) 568fdc9ce9SDmitry Chagin movdqu %xmm8, 8 * 16(%rdi) 578fdc9ce9SDmitry Chagin movdqu %xmm9, 9 * 16(%rdi) 588fdc9ce9SDmitry Chagin movdqu %xmm10, 10 * 16(%rdi) 598fdc9ce9SDmitry Chagin movdqu %xmm11, 11 * 16(%rdi) 608fdc9ce9SDmitry Chagin movdqu %xmm12, 12 * 16(%rdi) 618fdc9ce9SDmitry Chagin movdqu %xmm13, 13 * 16(%rdi) 628fdc9ce9SDmitry Chagin movdqu %xmm14, 14 * 16(%rdi) 638fdc9ce9SDmitry Chagin movdqu %xmm15, 15 * 16(%rdi) 648fdc9ce9SDmitry Chagin retq 658fdc9ce9SDmitry Chagin 660be13a45SDmitry Chagin .size cpu_to_xmm, . - cpu_to_xmm 678fdc9ce9SDmitry Chagin 688fdc9ce9SDmitry Chagin 690be13a45SDmitry Chagin .globl xmm_to_cpu 700be13a45SDmitry Chagin .type xmm_to_cpu, @function 710be13a45SDmitry Chaginxmm_to_cpu: 728fdc9ce9SDmitry Chagin movdqu (%rdi), %xmm0 738fdc9ce9SDmitry Chagin movdqu 1 * 16(%rdi), %xmm1 748fdc9ce9SDmitry Chagin movdqu 2 * 16(%rdi), %xmm2 758fdc9ce9SDmitry Chagin movdqu 3 * 16(%rdi), %xmm3 768fdc9ce9SDmitry Chagin movdqu 4 * 16(%rdi), %xmm4 778fdc9ce9SDmitry Chagin movdqu 5 * 16(%rdi), %xmm5 788fdc9ce9SDmitry Chagin movdqu 6 * 16(%rdi), %xmm6 798fdc9ce9SDmitry Chagin movdqu 7 * 16(%rdi), %xmm7 808fdc9ce9SDmitry Chagin movdqu 8 * 16(%rdi), %xmm8 818fdc9ce9SDmitry Chagin movdqu 9 * 16(%rdi), %xmm9 828fdc9ce9SDmitry Chagin movdqu 10 * 16(%rdi), %xmm10 838fdc9ce9SDmitry Chagin movdqu 11 * 16(%rdi), %xmm11 848fdc9ce9SDmitry Chagin movdqu 12 * 16(%rdi), %xmm12 858fdc9ce9SDmitry Chagin movdqu 13 * 16(%rdi), %xmm13 868fdc9ce9SDmitry Chagin movdqu 14 * 16(%rdi), %xmm14 878fdc9ce9SDmitry Chagin movdqu 15 * 16(%rdi), %xmm15 888fdc9ce9SDmitry Chagin retq 898fdc9ce9SDmitry Chagin 900be13a45SDmitry Chagin .size xmm_to_cpu, . - xmm_to_cpu 918fdc9ce9SDmitry Chagin 92*c8dbef44SDmitry Chagin 93*c8dbef44SDmitry Chagin .globl cpu_to_avx 94*c8dbef44SDmitry Chagin .type cpu_to_avx, @function 95*c8dbef44SDmitry Chagincpu_to_avx: 96*c8dbef44SDmitry Chagin vmovdqu %ymm0, (%rdi) 97*c8dbef44SDmitry Chagin vmovdqu %ymm1, 1 * 32(%rdi) 98*c8dbef44SDmitry Chagin vmovdqu %ymm2, 2 * 32(%rdi) 99*c8dbef44SDmitry Chagin vmovdqu %ymm3, 3 * 32(%rdi) 100*c8dbef44SDmitry Chagin vmovdqu %ymm4, 4 * 32(%rdi) 101*c8dbef44SDmitry Chagin vmovdqu %ymm5, 5 * 32(%rdi) 102*c8dbef44SDmitry Chagin vmovdqu %ymm6, 6 * 32(%rdi) 103*c8dbef44SDmitry Chagin vmovdqu %ymm7, 7 * 32(%rdi) 104*c8dbef44SDmitry Chagin vmovdqu %ymm8, 8 * 32(%rdi) 105*c8dbef44SDmitry Chagin vmovdqu %ymm9, 9 * 32(%rdi) 106*c8dbef44SDmitry Chagin vmovdqu %ymm10, 10 * 32(%rdi) 107*c8dbef44SDmitry Chagin vmovdqu %ymm11, 11 * 32(%rdi) 108*c8dbef44SDmitry Chagin vmovdqu %ymm12, 12 * 32(%rdi) 109*c8dbef44SDmitry Chagin vmovdqu %ymm13, 13 * 32(%rdi) 110*c8dbef44SDmitry Chagin vmovdqu %ymm14, 14 * 32(%rdi) 111*c8dbef44SDmitry Chagin vmovdqu %ymm15, 15 * 32(%rdi) 112*c8dbef44SDmitry Chagin retq 113*c8dbef44SDmitry Chagin 114*c8dbef44SDmitry Chagin .size cpu_to_avx, . - cpu_to_avx 115*c8dbef44SDmitry Chagin 116*c8dbef44SDmitry Chagin 117*c8dbef44SDmitry Chagin .globl avx_to_cpu 118*c8dbef44SDmitry Chagin .type avx_to_cpu, @function 119*c8dbef44SDmitry Chaginavx_to_cpu: 120*c8dbef44SDmitry Chagin vmovdqu (%rdi), %ymm0 121*c8dbef44SDmitry Chagin vmovdqu 1 * 32(%rdi), %ymm1 122*c8dbef44SDmitry Chagin vmovdqu 2 * 32(%rdi), %ymm2 123*c8dbef44SDmitry Chagin vmovdqu 3 * 32(%rdi), %ymm3 124*c8dbef44SDmitry Chagin vmovdqu 4 * 32(%rdi), %ymm4 125*c8dbef44SDmitry Chagin vmovdqu 5 * 32(%rdi), %ymm5 126*c8dbef44SDmitry Chagin vmovdqu 6 * 32(%rdi), %ymm6 127*c8dbef44SDmitry Chagin vmovdqu 7 * 32(%rdi), %ymm7 128*c8dbef44SDmitry Chagin vmovdqu 8 * 32(%rdi), %ymm8 129*c8dbef44SDmitry Chagin vmovdqu 9 * 32(%rdi), %ymm9 130*c8dbef44SDmitry Chagin vmovdqu 10 * 32(%rdi), %ymm10 131*c8dbef44SDmitry Chagin vmovdqu 11 * 32(%rdi), %ymm11 132*c8dbef44SDmitry Chagin vmovdqu 12 * 32(%rdi), %ymm12 133*c8dbef44SDmitry Chagin vmovdqu 13 * 32(%rdi), %ymm13 134*c8dbef44SDmitry Chagin vmovdqu 14 * 32(%rdi), %ymm14 135*c8dbef44SDmitry Chagin vmovdqu 15 * 32(%rdi), %ymm15 136*c8dbef44SDmitry Chagin retq 137*c8dbef44SDmitry Chagin 138*c8dbef44SDmitry Chagin .size avx_to_cpu, . - avx_to_cpu 139*c8dbef44SDmitry Chagin 1408fdc9ce9SDmitry Chagin .section .note.GNU-stack,"",@progbits 141