1/* 2 * This file is in public domain. 3 * Written by Dmitry Chagin <dchagin@FreeBSD.org> 4 */ 5 6#if defined(__FreeBSD__) 7#include <machine/specialreg.h> 8#else 9#define CPUID2_OSXSAVE 0x08000000 10#define CPUID2_AVX 0x10000000 11#define XFEATURE_ENABLED_X87 0x00000001 12#define XFEATURE_ENABLED_SSE 0x00000002 13#define XFEATURE_ENABLED_AVX 0x00000004 14#define XFEATURE_AVX \ 15 (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX) 16#endif 17 18 .text 19 20 .globl xregs_banks_max 21 .type xregs_banks_max, @function 22xregs_banks_max: 23 pushq %rbx 24 movl $1, %eax 25 cpuid 26 andl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx 27 cmpl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx 28 jne sse 29 xorl %ecx, %ecx 30 xgetbv 31 andl $XFEATURE_AVX, %eax 32 cmpl $XFEATURE_AVX, %eax 33 jne sse 34 movl $1, %eax 35 jmp out 36sse: 37 xorl %eax, %eax 38out: 39 popq %rbx 40 retq 41 42 .size xregs_banks_max, . - xregs_banks_max 43 44 45 .globl cpu_to_xmm 46 .type cpu_to_xmm, @function 47cpu_to_xmm: 48 movdqu %xmm0, (%rdi) 49 movdqu %xmm1, 1 * 16(%rdi) 50 movdqu %xmm2, 2 * 16(%rdi) 51 movdqu %xmm3, 3 * 16(%rdi) 52 movdqu %xmm4, 4 * 16(%rdi) 53 movdqu %xmm5, 5 * 16(%rdi) 54 movdqu %xmm6, 6 * 16(%rdi) 55 movdqu %xmm7, 7 * 16(%rdi) 56 movdqu %xmm8, 8 * 16(%rdi) 57 movdqu %xmm9, 9 * 16(%rdi) 58 movdqu %xmm10, 10 * 16(%rdi) 59 movdqu %xmm11, 11 * 16(%rdi) 60 movdqu %xmm12, 12 * 16(%rdi) 61 movdqu %xmm13, 13 * 16(%rdi) 62 movdqu %xmm14, 14 * 16(%rdi) 63 movdqu %xmm15, 15 * 16(%rdi) 64 retq 65 66 .size cpu_to_xmm, . - cpu_to_xmm 67 68 69 .globl xmm_to_cpu 70 .type xmm_to_cpu, @function 71xmm_to_cpu: 72 movdqu (%rdi), %xmm0 73 movdqu 1 * 16(%rdi), %xmm1 74 movdqu 2 * 16(%rdi), %xmm2 75 movdqu 3 * 16(%rdi), %xmm3 76 movdqu 4 * 16(%rdi), %xmm4 77 movdqu 5 * 16(%rdi), %xmm5 78 movdqu 6 * 16(%rdi), %xmm6 79 movdqu 7 * 16(%rdi), %xmm7 80 movdqu 8 * 16(%rdi), %xmm8 81 movdqu 9 * 16(%rdi), %xmm9 82 movdqu 10 * 16(%rdi), %xmm10 83 movdqu 11 * 16(%rdi), %xmm11 84 movdqu 12 * 16(%rdi), %xmm12 85 movdqu 13 * 16(%rdi), %xmm13 86 movdqu 14 * 16(%rdi), %xmm14 87 movdqu 15 * 16(%rdi), %xmm15 88 retq 89 90 .size xmm_to_cpu, . - xmm_to_cpu 91 92 93 .globl cpu_to_avx 94 .type cpu_to_avx, @function 95cpu_to_avx: 96 vmovdqu %ymm0, (%rdi) 97 vmovdqu %ymm1, 1 * 32(%rdi) 98 vmovdqu %ymm2, 2 * 32(%rdi) 99 vmovdqu %ymm3, 3 * 32(%rdi) 100 vmovdqu %ymm4, 4 * 32(%rdi) 101 vmovdqu %ymm5, 5 * 32(%rdi) 102 vmovdqu %ymm6, 6 * 32(%rdi) 103 vmovdqu %ymm7, 7 * 32(%rdi) 104 vmovdqu %ymm8, 8 * 32(%rdi) 105 vmovdqu %ymm9, 9 * 32(%rdi) 106 vmovdqu %ymm10, 10 * 32(%rdi) 107 vmovdqu %ymm11, 11 * 32(%rdi) 108 vmovdqu %ymm12, 12 * 32(%rdi) 109 vmovdqu %ymm13, 13 * 32(%rdi) 110 vmovdqu %ymm14, 14 * 32(%rdi) 111 vmovdqu %ymm15, 15 * 32(%rdi) 112 retq 113 114 .size cpu_to_avx, . - cpu_to_avx 115 116 117 .globl avx_to_cpu 118 .type avx_to_cpu, @function 119avx_to_cpu: 120 vmovdqu (%rdi), %ymm0 121 vmovdqu 1 * 32(%rdi), %ymm1 122 vmovdqu 2 * 32(%rdi), %ymm2 123 vmovdqu 3 * 32(%rdi), %ymm3 124 vmovdqu 4 * 32(%rdi), %ymm4 125 vmovdqu 5 * 32(%rdi), %ymm5 126 vmovdqu 6 * 32(%rdi), %ymm6 127 vmovdqu 7 * 32(%rdi), %ymm7 128 vmovdqu 8 * 32(%rdi), %ymm8 129 vmovdqu 9 * 32(%rdi), %ymm9 130 vmovdqu 10 * 32(%rdi), %ymm10 131 vmovdqu 11 * 32(%rdi), %ymm11 132 vmovdqu 12 * 32(%rdi), %ymm12 133 vmovdqu 13 * 32(%rdi), %ymm13 134 vmovdqu 14 * 32(%rdi), %ymm14 135 vmovdqu 15 * 32(%rdi), %ymm15 136 retq 137 138 .size avx_to_cpu, . - avx_to_cpu 139 140 .section .note.GNU-stack,"",@progbits 141