xref: /illumos-gate/usr/src/test/util-tests/tests/dis/i386/64.bf16.s (revision f334afcfaebea1b7dc3430015651d8d748fa8a3e)
1/*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source.  A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12/*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16/*
17 * Binary floating point 16 instructions, currently just AVX related (i.e. no AMX).
18 *
19 * For the vcvtneps2bf16 instruction, gas sometimes has a variant with 'x' or
20 * 'y' appended which appear to be an indication for the target memory size,
21 * particularly for broadcasts and related. While we use those (as there's no
22 * other way to get that), dis currently does not break these apart.
23 */
24
25.text
26.align 16
27.globl libdis_test
28.type libdis_test, @function
29libdis_test:
30	vcvtne2ps2bf16	%xmm0, %xmm1, %xmm2
31	vcvtne2ps2bf16	%xmm3, %xmm4, %xmm5{%k1}
32	vcvtne2ps2bf16	%xmm6, %xmm7, %xmm8{%k2}{z}
33	vcvtne2ps2bf16	(%rax), %xmm1, %xmm2
34	vcvtne2ps2bf16	0x77(%rbx), %xmm1, %xmm2
35	vcvtne2ps2bf16	0x77(%rcx,%rdx,4), %xmm1, %xmm2
36	vcvtne2ps2bf16	0x7777(%r10){1to4}, %xmm1, %xmm2
37	vcvtne2ps2bf16	0x7777(%r10){1to4}, %xmm1, %xmm2{%k3}
38	vcvtne2ps2bf16	0x7777(%r10){1to4}, %xmm1, %xmm2{%k3}{z}
39
40	vcvtne2ps2bf16	%ymm0, %ymm1, %ymm2
41	vcvtne2ps2bf16	%ymm3, %ymm4, %ymm5{%k1}
42	vcvtne2ps2bf16	%ymm6, %ymm7, %ymm8{%k2}{z}
43	vcvtne2ps2bf16	(%rax), %ymm1, %ymm2
44	vcvtne2ps2bf16	0x77(%rbx), %ymm1, %ymm2
45	vcvtne2ps2bf16	0x77(%rcx,%rdx,4), %ymm1, %ymm2
46	vcvtne2ps2bf16	0x7777(%r10){1to8}, %ymm1, %ymm2
47
48	vcvtne2ps2bf16	%zmm0, %zmm1, %zmm2
49	vcvtne2ps2bf16	%zmm3, %zmm4, %zmm5{%k1}
50	vcvtne2ps2bf16	%zmm6, %zmm7, %zmm8{%k2}{z}
51	vcvtne2ps2bf16	(%rax), %zmm1, %zmm2
52	vcvtne2ps2bf16	0x77(%rbx), %zmm1, %zmm2
53	vcvtne2ps2bf16	0x77(%rcx,%rdx,4), %zmm1, %zmm2
54	vcvtne2ps2bf16	0x7777(%r10){1to16}, %zmm1, %zmm2
55
56	vcvtneps2bf16	%xmm0, %xmm1
57	vcvtneps2bf16	%xmm2, %xmm3{%k4}
58	vcvtneps2bf16	%xmm5, %xmm6{%k7}{z}
59	vcvtneps2bf16x	(%r10), %xmm27
60	vcvtneps2bf16x	0x88(%rbx), %xmm6
61	vcvtneps2bf16x	0x88(%rbx,%rcx,4), %xmm5
62	vcvtneps2bf16x	0x66(%rbx,%rcx,4), %xmm5{%k3}
63	vcvtneps2bf16	(%r11){1to4}, %xmm16
64	vcvtneps2bf16	(%r11){1to4}, %xmm16{%k6}
65	vcvtneps2bf16	(%r10){1to8}, %xmm16
66
67	vcvtneps2bf16	%ymm0, %xmm1
68	vcvtneps2bf16	%ymm2, %xmm3{%k4}
69	vcvtneps2bf16	%ymm5, %xmm6{%k7}{z}
70	vcvtneps2bf16y	(%r10), %xmm27
71	vcvtneps2bf16y	0x88(%rbx), %xmm6
72	vcvtneps2bf16y	0x88(%rbx,%rcx,4), %xmm5
73	vcvtneps2bf16y	-0x66(%rbx,%rcx,4), %xmm5{%k3}
74	vcvtneps2bf16	(%r11){1to8}, %xmm16
75	vcvtneps2bf16	(%r11){1to8}, %xmm16{%k6}
76
77	vcvtneps2bf16	%zmm0, %ymm1
78	vcvtneps2bf16	%zmm2, %ymm3{%k4}
79	vcvtneps2bf16	%zmm5, %ymm6{%k7}{z}
80	vcvtneps2bf16	(%r10), %ymm27
81	vcvtneps2bf16	0x88(%rbx), %ymm6
82	vcvtneps2bf16	0x88(%rbx,%rcx,4), %ymm5
83	vcvtneps2bf16	-0x66(%rbx,%rcx,4), %ymm5{%k3}
84	vcvtneps2bf16	(%r11){1to16}, %ymm16
85	vcvtneps2bf16	(%r11){1to16}, %ymm16{%k6}
86
87	vdpbf16ps	%xmm0, %xmm1, %xmm2
88	vdpbf16ps	%xmm3, %xmm4, %xmm5{%k1}
89	vdpbf16ps	%xmm6, %xmm7, %xmm8{%k2}{z}
90	vdpbf16ps	(%rax), %xmm1, %xmm2
91	vdpbf16ps	0x34(%rbx), %xmm1, %xmm2
92	vdpbf16ps	0x43(%rcx,%rdx,4), %xmm1, %xmm2
93	vdpbf16ps	0x7777(%r10){1to4}, %xmm1, %xmm2
94	vdpbf16ps	0x5555(%r10){1to4}, %xmm1, %xmm2{%k3}
95	vdpbf16ps	0x7777(%r10){1to4}, %xmm1, %xmm2{%k3}{z}
96
97	vdpbf16ps	%ymm0, %ymm1, %ymm2
98	vdpbf16ps	%ymm3, %ymm4, %ymm5{%k1}
99	vdpbf16ps	%ymm6, %ymm7, %ymm8{%k2}{z}
100	vdpbf16ps	(%rax), %ymm1, %ymm2
101	vdpbf16ps	0x43(%rbx), %ymm1, %ymm2
102	vdpbf16ps	0x34(%rcx,%rdx,4), %ymm1, %ymm2
103	vdpbf16ps	0x7777(%r10){1to8}, %ymm1, %ymm2
104
105	vdpbf16ps	%zmm0, %zmm1, %zmm2
106	vdpbf16ps	%zmm3, %zmm4, %zmm5{%k1}
107	vdpbf16ps	%zmm6, %zmm7, %zmm8{%k2}{z}
108	vdpbf16ps	(%rax), %zmm1, %zmm2
109	vdpbf16ps	0x43(%rbx), %zmm1, %zmm2
110	vdpbf16ps	0x34(%rcx,%rdx,4), %zmm1, %zmm2
111	vdpbf16ps	0x6666(%r10){1to16}, %zmm1, %zmm2
112.size libdis_test, [.-libdis_test]
113