1/*- 2* The white paper of AES-NI instructions can be downloaded from: 3 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 4 * 5 * Copyright (C) 2008-2010, Intel Corporation 6 * Author: Huang Ying <ying.huang@intel.com> 7 * Vinodh Gopal <vinodh.gopal@intel.com> 8 * Kahraman Akdemir 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 17 * - Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the 20 * distribution. 21 * 22 * - Neither the name of Intel Corporation nor the names of its 23 * contributors may be used to endorse or promote products 24 * derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 31 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 33 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 34 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40#include <machine/asmacros.h> 41 42 .text 43 44ENTRY(_key_expansion_128) 45_key_expansion_256a: 46 .cfi_startproc 47 pshufd $0b11111111,%xmm1,%xmm1 48 shufps $0b00010000,%xmm0,%xmm4 49 pxor %xmm4,%xmm0 50 shufps $0b10001100,%xmm0,%xmm4 51 pxor %xmm4,%xmm0 52 pxor %xmm1,%xmm0 53 movaps %xmm0,(%edx) 54 addl $0x10,%edx 55 retl 56 .cfi_endproc 57END(_key_expansion_128) 58 59ENTRY(_key_expansion_192a) 60 .cfi_startproc 61 pshufd $0b01010101,%xmm1,%xmm1 62 shufps $0b00010000,%xmm0,%xmm4 63 pxor %xmm4,%xmm0 64 shufps $0b10001100,%xmm0,%xmm4 65 pxor %xmm4,%xmm0 66 pxor %xmm1,%xmm0 67 movaps %xmm2,%xmm5 68 movaps %xmm2,%xmm6 69 pslldq $4,%xmm5 70 pshufd $0b11111111,%xmm0,%xmm3 71 pxor %xmm3,%xmm2 72 pxor %xmm5,%xmm2 73 movaps %xmm0,%xmm1 74 shufps $0b01000100,%xmm0,%xmm6 75 movaps %xmm6,(%edx) 76 shufps $0b01001110,%xmm2,%xmm1 77 movaps %xmm1,0x10(%edx) 78 addl $0x20,%edx 79 retl 80 .cfi_endproc 81END(_key_expansion_192a) 82 83ENTRY(_key_expansion_192b) 84 .cfi_startproc 85 pshufd $0b01010101,%xmm1,%xmm1 86 shufps $0b00010000,%xmm0,%xmm4 87 pxor %xmm4,%xmm0 88 shufps $0b10001100,%xmm0,%xmm4 89 pxor %xmm4,%xmm0 90 pxor %xmm1,%xmm0 91 movaps %xmm2,%xmm5 92 pslldq $4,%xmm5 93 pshufd $0b11111111,%xmm0,%xmm3 94 pxor %xmm3,%xmm2 95 pxor %xmm5,%xmm2 96 movaps %xmm0,(%edx) 97 addl $0x10,%edx 98 retl 99 .cfi_endproc 100END(_key_expansion_192b) 101 102ENTRY(_key_expansion_256b) 103 .cfi_startproc 104 pshufd $0b10101010,%xmm1,%xmm1 105 shufps $0b00010000,%xmm2,%xmm4 106 pxor %xmm4,%xmm2 107 shufps $0b10001100,%xmm2,%xmm4 108 pxor %xmm4,%xmm2 109 pxor %xmm1,%xmm2 110 movaps %xmm2,(%edx) 111 addl $0x10,%edx 112 retl 113 .cfi_endproc 114END(_key_expansion_256b) 115 116ENTRY(aesni_set_enckey) 117 .cfi_startproc 118 pushl %ebp 119 .cfi_adjust_cfa_offset 4 120 movl %esp,%ebp 121 movl 8(%ebp),%ecx 122 movl 12(%ebp),%edx 123 movups (%ecx),%xmm0 # user key (first 16 bytes) 124 movaps %xmm0,(%edx) 125 addl $0x10,%edx # key addr 126 pxor %xmm4,%xmm4 # xmm4 is assumed 0 in _key_expansion_x 127 cmpl $12,16(%ebp) # rounds 128 jb .Lenc_key128 129 je .Lenc_key192 130 movups 0x10(%ecx),%xmm2 # other user key 131 movaps %xmm2,(%edx) 132 addl $0x10,%edx 133// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 134 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 135 call _key_expansion_256a 136// aeskeygenassist $0x1,%xmm0,%xmm1 137 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 138 call _key_expansion_256b 139// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 140 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 141 call _key_expansion_256a 142// aeskeygenassist $0x2,%xmm0,%xmm1 143 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 144 call _key_expansion_256b 145// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 146 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 147 call _key_expansion_256a 148// aeskeygenassist $0x4,%xmm0,%xmm1 149 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 150 call _key_expansion_256b 151// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 152 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 153 call _key_expansion_256a 154// aeskeygenassist $0x8,%xmm0,%xmm1 155 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 156 call _key_expansion_256b 157// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 158 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 159 call _key_expansion_256a 160// aeskeygenassist $0x10,%xmm0,%xmm1 161 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 162 call _key_expansion_256b 163// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 164 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 165 call _key_expansion_256a 166// aeskeygenassist $0x20,%xmm0,%xmm1 167 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 168 call _key_expansion_256b 169// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 170 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 171 call _key_expansion_256a 172 .cfi_adjust_cfa_offset -4 173 leave 174 retl 175.Lenc_key192: 176 movq 0x10(%ecx),%xmm2 # other user key 177// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 178 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 179 call _key_expansion_192a 180// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 181 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 182 call _key_expansion_192b 183// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 184 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 185 call _key_expansion_192a 186// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 187 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 188 call _key_expansion_192b 189// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 190 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 191 call _key_expansion_192a 192// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 193 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 194 call _key_expansion_192b 195// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 196 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 197 call _key_expansion_192a 198// aeskeygenassist $0x80,%xmm2,%xmm1 # round 8 199 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x80 200 call _key_expansion_192b 201 leave 202 .cfi_adjust_cfa_offset -4 203 retl 204.Lenc_key128: 205// aeskeygenassist $0x1,%xmm0,%xmm1 # round 1 206 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 207 call _key_expansion_128 208// aeskeygenassist $0x2,%xmm0,%xmm1 # round 2 209 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 210 call _key_expansion_128 211// aeskeygenassist $0x4,%xmm0,%xmm1 # round 3 212 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 213 call _key_expansion_128 214// aeskeygenassist $0x8,%xmm0,%xmm1 # round 4 215 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 216 call _key_expansion_128 217// aeskeygenassist $0x10,%xmm0,%xmm1 # round 5 218 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 219 call _key_expansion_128 220// aeskeygenassist $0x20,%xmm0,%xmm1 # round 6 221 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 222 call _key_expansion_128 223// aeskeygenassist $0x40,%xmm0,%xmm1 # round 7 224 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x40 225 call _key_expansion_128 226// aeskeygenassist $0x80,%xmm0,%xmm1 # round 8 227 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x80 228 call _key_expansion_128 229// aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9 230 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x1b 231 call _key_expansion_128 232// aeskeygenassist $0x36,%xmm0,%xmm1 # round 10 233 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x36 234 call _key_expansion_128 235 leave 236 .cfi_adjust_cfa_offset -4 237 retl 238 .cfi_endproc 239END(aesni_set_enckey) 240 241ENTRY(aesni_set_deckey) 242 .cfi_startproc 243 pushl %ebp 244 .cfi_adjust_cfa_offset 4 245 movl %esp,%ebp 246 movl 16(%ebp),%eax /* rounds */ 247 movl %eax,%ecx 248 shll $4,%ecx 249 addl 8(%ebp),%ecx /* encrypt_schedule last quad */ 250 movl 12(%ebp),%edx /* decrypt_schedule */ 251 movdqa (%ecx),%xmm0 252 movdqa %xmm0,(%edx) 253 decl %eax 2541: 255 addl $0x10,%edx 256 subl $0x10,%ecx 257// aesimc (%ecx),%xmm1 258 .byte 0x66,0x0f,0x38,0xdb,0x09 259 movdqa %xmm1,(%edx) 260 decl %eax 261 jne 1b 262 263 addl $0x10,%edx 264 subl $0x10,%ecx 265 movdqa (%ecx),%xmm0 266 movdqa %xmm0,(%edx) 267 leave 268 .cfi_adjust_cfa_offset -4 269 retl 270 .cfi_endproc 271END(aesni_set_deckey) 272 273