xref: /freebsd/tools/test/xregs_sig/c2x2c_amd64.S (revision b3e7694832e81d7a904a10f525f8797b753bf0d3)
18fdc9ce9SDmitry Chagin/*
28fdc9ce9SDmitry Chagin * This file is in public domain.
38fdc9ce9SDmitry Chagin * Written by Dmitry Chagin <dchagin@FreeBSD.org>
48fdc9ce9SDmitry Chagin */
58fdc9ce9SDmitry Chagin
6*c8dbef44SDmitry Chagin#if defined(__FreeBSD__)
7*c8dbef44SDmitry Chagin#include <machine/specialreg.h>
8*c8dbef44SDmitry Chagin#else
9*c8dbef44SDmitry Chagin#define	CPUID2_OSXSAVE			0x08000000
10*c8dbef44SDmitry Chagin#define	CPUID2_AVX			0x10000000
11*c8dbef44SDmitry Chagin#define	XFEATURE_ENABLED_X87		0x00000001
12*c8dbef44SDmitry Chagin#define	XFEATURE_ENABLED_SSE		0x00000002
13*c8dbef44SDmitry Chagin#define	XFEATURE_ENABLED_AVX		0x00000004
14*c8dbef44SDmitry Chagin#define	XFEATURE_AVX					\
15*c8dbef44SDmitry Chagin    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
16*c8dbef44SDmitry Chagin#endif
17*c8dbef44SDmitry Chagin
180be13a45SDmitry Chagin	.text
190be13a45SDmitry Chagin
20*c8dbef44SDmitry Chagin	.globl xregs_banks_max
21*c8dbef44SDmitry Chagin	.type xregs_banks_max, @function
22*c8dbef44SDmitry Chaginxregs_banks_max:
23*c8dbef44SDmitry Chagin	pushq	%rbx
24*c8dbef44SDmitry Chagin	movl	$1, %eax
25*c8dbef44SDmitry Chagin	cpuid
26*c8dbef44SDmitry Chagin	andl	$(CPUID2_AVX|CPUID2_OSXSAVE), %ecx
27*c8dbef44SDmitry Chagin	cmpl	$(CPUID2_AVX|CPUID2_OSXSAVE), %ecx
28*c8dbef44SDmitry Chagin	jne	sse
29*c8dbef44SDmitry Chagin	xorl	%ecx, %ecx
30*c8dbef44SDmitry Chagin	xgetbv
31*c8dbef44SDmitry Chagin	andl	$XFEATURE_AVX, %eax
32*c8dbef44SDmitry Chagin	cmpl	$XFEATURE_AVX, %eax
33*c8dbef44SDmitry Chagin	jne	sse
34*c8dbef44SDmitry Chagin	movl	$1, %eax
35*c8dbef44SDmitry Chagin	jmp	out
36*c8dbef44SDmitry Chaginsse:
37*c8dbef44SDmitry Chagin	xorl	%eax, %eax
38*c8dbef44SDmitry Chaginout:
39*c8dbef44SDmitry Chagin	popq	%rbx
40*c8dbef44SDmitry Chagin	retq
41*c8dbef44SDmitry Chagin
42*c8dbef44SDmitry Chagin	.size xregs_banks_max, . - xregs_banks_max
43*c8dbef44SDmitry Chagin
44*c8dbef44SDmitry Chagin
450be13a45SDmitry Chagin	.globl cpu_to_xmm
460be13a45SDmitry Chagin	.type cpu_to_xmm, @function
470be13a45SDmitry Chagincpu_to_xmm:
488fdc9ce9SDmitry Chagin	movdqu	%xmm0, (%rdi)
498fdc9ce9SDmitry Chagin	movdqu	%xmm1, 1 * 16(%rdi)
508fdc9ce9SDmitry Chagin	movdqu	%xmm2, 2 * 16(%rdi)
518fdc9ce9SDmitry Chagin	movdqu	%xmm3, 3 * 16(%rdi)
528fdc9ce9SDmitry Chagin	movdqu	%xmm4, 4 * 16(%rdi)
538fdc9ce9SDmitry Chagin	movdqu	%xmm5, 5 * 16(%rdi)
548fdc9ce9SDmitry Chagin	movdqu	%xmm6, 6 * 16(%rdi)
558fdc9ce9SDmitry Chagin	movdqu	%xmm7, 7 * 16(%rdi)
568fdc9ce9SDmitry Chagin	movdqu	%xmm8, 8 * 16(%rdi)
578fdc9ce9SDmitry Chagin	movdqu	%xmm9, 9 * 16(%rdi)
588fdc9ce9SDmitry Chagin	movdqu	%xmm10, 10 * 16(%rdi)
598fdc9ce9SDmitry Chagin	movdqu	%xmm11, 11 * 16(%rdi)
608fdc9ce9SDmitry Chagin	movdqu	%xmm12, 12 * 16(%rdi)
618fdc9ce9SDmitry Chagin	movdqu	%xmm13, 13 * 16(%rdi)
628fdc9ce9SDmitry Chagin	movdqu	%xmm14, 14 * 16(%rdi)
638fdc9ce9SDmitry Chagin	movdqu	%xmm15, 15 * 16(%rdi)
648fdc9ce9SDmitry Chagin	retq
658fdc9ce9SDmitry Chagin
660be13a45SDmitry Chagin	.size cpu_to_xmm, . - cpu_to_xmm
678fdc9ce9SDmitry Chagin
688fdc9ce9SDmitry Chagin
690be13a45SDmitry Chagin	.globl xmm_to_cpu
700be13a45SDmitry Chagin	.type xmm_to_cpu, @function
710be13a45SDmitry Chaginxmm_to_cpu:
728fdc9ce9SDmitry Chagin	movdqu	(%rdi), %xmm0
738fdc9ce9SDmitry Chagin	movdqu	1 * 16(%rdi), %xmm1
748fdc9ce9SDmitry Chagin	movdqu	2 * 16(%rdi), %xmm2
758fdc9ce9SDmitry Chagin	movdqu	3 * 16(%rdi), %xmm3
768fdc9ce9SDmitry Chagin	movdqu	4 * 16(%rdi), %xmm4
778fdc9ce9SDmitry Chagin	movdqu	5 * 16(%rdi), %xmm5
788fdc9ce9SDmitry Chagin	movdqu	6 * 16(%rdi), %xmm6
798fdc9ce9SDmitry Chagin	movdqu	7 * 16(%rdi), %xmm7
808fdc9ce9SDmitry Chagin	movdqu	8 * 16(%rdi), %xmm8
818fdc9ce9SDmitry Chagin	movdqu	9 * 16(%rdi), %xmm9
828fdc9ce9SDmitry Chagin	movdqu	10 * 16(%rdi), %xmm10
838fdc9ce9SDmitry Chagin	movdqu	11 * 16(%rdi), %xmm11
848fdc9ce9SDmitry Chagin	movdqu	12 * 16(%rdi), %xmm12
858fdc9ce9SDmitry Chagin	movdqu	13 * 16(%rdi), %xmm13
868fdc9ce9SDmitry Chagin	movdqu	14 * 16(%rdi), %xmm14
878fdc9ce9SDmitry Chagin	movdqu	15 * 16(%rdi), %xmm15
888fdc9ce9SDmitry Chagin	retq
898fdc9ce9SDmitry Chagin
900be13a45SDmitry Chagin	.size xmm_to_cpu, . - xmm_to_cpu
918fdc9ce9SDmitry Chagin
92*c8dbef44SDmitry Chagin
93*c8dbef44SDmitry Chagin	.globl cpu_to_avx
94*c8dbef44SDmitry Chagin	.type cpu_to_avx, @function
95*c8dbef44SDmitry Chagincpu_to_avx:
96*c8dbef44SDmitry Chagin	vmovdqu	%ymm0, (%rdi)
97*c8dbef44SDmitry Chagin	vmovdqu	%ymm1, 1 * 32(%rdi)
98*c8dbef44SDmitry Chagin	vmovdqu	%ymm2, 2 * 32(%rdi)
99*c8dbef44SDmitry Chagin	vmovdqu	%ymm3, 3 * 32(%rdi)
100*c8dbef44SDmitry Chagin	vmovdqu	%ymm4, 4 * 32(%rdi)
101*c8dbef44SDmitry Chagin	vmovdqu	%ymm5, 5 * 32(%rdi)
102*c8dbef44SDmitry Chagin	vmovdqu	%ymm6, 6 * 32(%rdi)
103*c8dbef44SDmitry Chagin	vmovdqu	%ymm7, 7 * 32(%rdi)
104*c8dbef44SDmitry Chagin	vmovdqu	%ymm8, 8 * 32(%rdi)
105*c8dbef44SDmitry Chagin	vmovdqu	%ymm9, 9 * 32(%rdi)
106*c8dbef44SDmitry Chagin	vmovdqu	%ymm10, 10 * 32(%rdi)
107*c8dbef44SDmitry Chagin	vmovdqu	%ymm11, 11 * 32(%rdi)
108*c8dbef44SDmitry Chagin	vmovdqu	%ymm12, 12 * 32(%rdi)
109*c8dbef44SDmitry Chagin	vmovdqu	%ymm13, 13 * 32(%rdi)
110*c8dbef44SDmitry Chagin	vmovdqu	%ymm14, 14 * 32(%rdi)
111*c8dbef44SDmitry Chagin	vmovdqu	%ymm15, 15 * 32(%rdi)
112*c8dbef44SDmitry Chagin	retq
113*c8dbef44SDmitry Chagin
114*c8dbef44SDmitry Chagin	.size cpu_to_avx, . - cpu_to_avx
115*c8dbef44SDmitry Chagin
116*c8dbef44SDmitry Chagin
117*c8dbef44SDmitry Chagin	.globl avx_to_cpu
118*c8dbef44SDmitry Chagin	.type avx_to_cpu, @function
119*c8dbef44SDmitry Chaginavx_to_cpu:
120*c8dbef44SDmitry Chagin	vmovdqu	(%rdi), %ymm0
121*c8dbef44SDmitry Chagin	vmovdqu	1 * 32(%rdi), %ymm1
122*c8dbef44SDmitry Chagin	vmovdqu	2 * 32(%rdi), %ymm2
123*c8dbef44SDmitry Chagin	vmovdqu	3 * 32(%rdi), %ymm3
124*c8dbef44SDmitry Chagin	vmovdqu	4 * 32(%rdi), %ymm4
125*c8dbef44SDmitry Chagin	vmovdqu	5 * 32(%rdi), %ymm5
126*c8dbef44SDmitry Chagin	vmovdqu	6 * 32(%rdi), %ymm6
127*c8dbef44SDmitry Chagin	vmovdqu	7 * 32(%rdi), %ymm7
128*c8dbef44SDmitry Chagin	vmovdqu	8 * 32(%rdi), %ymm8
129*c8dbef44SDmitry Chagin	vmovdqu	9 * 32(%rdi), %ymm9
130*c8dbef44SDmitry Chagin	vmovdqu	10 * 32(%rdi), %ymm10
131*c8dbef44SDmitry Chagin	vmovdqu	11 * 32(%rdi), %ymm11
132*c8dbef44SDmitry Chagin	vmovdqu	12 * 32(%rdi), %ymm12
133*c8dbef44SDmitry Chagin	vmovdqu	13 * 32(%rdi), %ymm13
134*c8dbef44SDmitry Chagin	vmovdqu	14 * 32(%rdi), %ymm14
135*c8dbef44SDmitry Chagin	vmovdqu	15 * 32(%rdi), %ymm15
136*c8dbef44SDmitry Chagin	retq
137*c8dbef44SDmitry Chagin
138*c8dbef44SDmitry Chagin	.size avx_to_cpu, . - avx_to_cpu
139*c8dbef44SDmitry Chagin
1408fdc9ce9SDmitry Chagin	.section        .note.GNU-stack,"",@progbits
141