xref: /freebsd/tools/test/xregs_sig/c2x2c_amd64.S (revision 5e3190f700637fcfc1a52daeaa4a031fdd2557c7)
1/*
2 * This file is in public domain.
3 * Written by Dmitry Chagin <dchagin@FreeBSD.org>
4 */
5
6#if defined(__FreeBSD__)
7#include <machine/specialreg.h>
8#else
9#define	CPUID2_OSXSAVE			0x08000000
10#define	CPUID2_AVX			0x10000000
11#define	XFEATURE_ENABLED_X87		0x00000001
12#define	XFEATURE_ENABLED_SSE		0x00000002
13#define	XFEATURE_ENABLED_AVX		0x00000004
14#define	XFEATURE_AVX					\
15    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
16#endif
17
18	.text
19
20	.globl xregs_banks_max
21	.type xregs_banks_max, @function
22xregs_banks_max:
23	pushq	%rbx
24	movl	$1, %eax
25	cpuid
26	andl	$(CPUID2_AVX|CPUID2_OSXSAVE), %ecx
27	cmpl	$(CPUID2_AVX|CPUID2_OSXSAVE), %ecx
28	jne	sse
29	xorl	%ecx, %ecx
30	xgetbv
31	andl	$XFEATURE_AVX, %eax
32	cmpl	$XFEATURE_AVX, %eax
33	jne	sse
34	movl	$1, %eax
35	jmp	out
36sse:
37	xorl	%eax, %eax
38out:
39	popq	%rbx
40	retq
41
42	.size xregs_banks_max, . - xregs_banks_max
43
44
45	.globl cpu_to_xmm
46	.type cpu_to_xmm, @function
47cpu_to_xmm:
48	movdqu	%xmm0, (%rdi)
49	movdqu	%xmm1, 1 * 16(%rdi)
50	movdqu	%xmm2, 2 * 16(%rdi)
51	movdqu	%xmm3, 3 * 16(%rdi)
52	movdqu	%xmm4, 4 * 16(%rdi)
53	movdqu	%xmm5, 5 * 16(%rdi)
54	movdqu	%xmm6, 6 * 16(%rdi)
55	movdqu	%xmm7, 7 * 16(%rdi)
56	movdqu	%xmm8, 8 * 16(%rdi)
57	movdqu	%xmm9, 9 * 16(%rdi)
58	movdqu	%xmm10, 10 * 16(%rdi)
59	movdqu	%xmm11, 11 * 16(%rdi)
60	movdqu	%xmm12, 12 * 16(%rdi)
61	movdqu	%xmm13, 13 * 16(%rdi)
62	movdqu	%xmm14, 14 * 16(%rdi)
63	movdqu	%xmm15, 15 * 16(%rdi)
64	retq
65
66	.size cpu_to_xmm, . - cpu_to_xmm
67
68
69	.globl xmm_to_cpu
70	.type xmm_to_cpu, @function
71xmm_to_cpu:
72	movdqu	(%rdi), %xmm0
73	movdqu	1 * 16(%rdi), %xmm1
74	movdqu	2 * 16(%rdi), %xmm2
75	movdqu	3 * 16(%rdi), %xmm3
76	movdqu	4 * 16(%rdi), %xmm4
77	movdqu	5 * 16(%rdi), %xmm5
78	movdqu	6 * 16(%rdi), %xmm6
79	movdqu	7 * 16(%rdi), %xmm7
80	movdqu	8 * 16(%rdi), %xmm8
81	movdqu	9 * 16(%rdi), %xmm9
82	movdqu	10 * 16(%rdi), %xmm10
83	movdqu	11 * 16(%rdi), %xmm11
84	movdqu	12 * 16(%rdi), %xmm12
85	movdqu	13 * 16(%rdi), %xmm13
86	movdqu	14 * 16(%rdi), %xmm14
87	movdqu	15 * 16(%rdi), %xmm15
88	retq
89
90	.size xmm_to_cpu, . - xmm_to_cpu
91
92
93	.globl cpu_to_avx
94	.type cpu_to_avx, @function
95cpu_to_avx:
96	vmovdqu	%ymm0, (%rdi)
97	vmovdqu	%ymm1, 1 * 32(%rdi)
98	vmovdqu	%ymm2, 2 * 32(%rdi)
99	vmovdqu	%ymm3, 3 * 32(%rdi)
100	vmovdqu	%ymm4, 4 * 32(%rdi)
101	vmovdqu	%ymm5, 5 * 32(%rdi)
102	vmovdqu	%ymm6, 6 * 32(%rdi)
103	vmovdqu	%ymm7, 7 * 32(%rdi)
104	vmovdqu	%ymm8, 8 * 32(%rdi)
105	vmovdqu	%ymm9, 9 * 32(%rdi)
106	vmovdqu	%ymm10, 10 * 32(%rdi)
107	vmovdqu	%ymm11, 11 * 32(%rdi)
108	vmovdqu	%ymm12, 12 * 32(%rdi)
109	vmovdqu	%ymm13, 13 * 32(%rdi)
110	vmovdqu	%ymm14, 14 * 32(%rdi)
111	vmovdqu	%ymm15, 15 * 32(%rdi)
112	retq
113
114	.size cpu_to_avx, . - cpu_to_avx
115
116
117	.globl avx_to_cpu
118	.type avx_to_cpu, @function
119avx_to_cpu:
120	vmovdqu	(%rdi), %ymm0
121	vmovdqu	1 * 32(%rdi), %ymm1
122	vmovdqu	2 * 32(%rdi), %ymm2
123	vmovdqu	3 * 32(%rdi), %ymm3
124	vmovdqu	4 * 32(%rdi), %ymm4
125	vmovdqu	5 * 32(%rdi), %ymm5
126	vmovdqu	6 * 32(%rdi), %ymm6
127	vmovdqu	7 * 32(%rdi), %ymm7
128	vmovdqu	8 * 32(%rdi), %ymm8
129	vmovdqu	9 * 32(%rdi), %ymm9
130	vmovdqu	10 * 32(%rdi), %ymm10
131	vmovdqu	11 * 32(%rdi), %ymm11
132	vmovdqu	12 * 32(%rdi), %ymm12
133	vmovdqu	13 * 32(%rdi), %ymm13
134	vmovdqu	14 * 32(%rdi), %ymm14
135	vmovdqu	15 * 32(%rdi), %ymm15
136	retq
137
138	.size avx_to_cpu, . - avx_to_cpu
139
140	.section        .note.GNU-stack,"",@progbits
141