1*5f270659SKonstantin Belousov/*- 2*5f270659SKonstantin Belousov* The white paper of AES-NI instructions can be downloaded from: 3*5f270659SKonstantin Belousov * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 4*5f270659SKonstantin Belousov * 5*5f270659SKonstantin Belousov * Copyright (C) 2008-2010, Intel Corporation 6*5f270659SKonstantin Belousov * Author: Huang Ying <ying.huang@intel.com> 7*5f270659SKonstantin Belousov * Vinodh Gopal <vinodh.gopal@intel.com> 8*5f270659SKonstantin Belousov * Kahraman Akdemir 9*5f270659SKonstantin Belousov * 10*5f270659SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 11*5f270659SKonstantin Belousov * modification, are permitted provided that the following 12*5f270659SKonstantin Belousov * conditions are met: 13*5f270659SKonstantin Belousov * 14*5f270659SKonstantin Belousov * - Redistributions of source code must retain the above copyright 15*5f270659SKonstantin Belousov * notice, this list of conditions and the following disclaimer. 16*5f270659SKonstantin Belousov * 17*5f270659SKonstantin Belousov * - Redistributions in binary form must reproduce the above copyright 18*5f270659SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 19*5f270659SKonstantin Belousov * documentation and/or other materials provided with the 20*5f270659SKonstantin Belousov * distribution. 21*5f270659SKonstantin Belousov * 22*5f270659SKonstantin Belousov * - Neither the name of Intel Corporation nor the names of its 23*5f270659SKonstantin Belousov * contributors may be used to endorse or promote products 24*5f270659SKonstantin Belousov * derived from this software without specific prior written 25*5f270659SKonstantin Belousov * permission. 26*5f270659SKonstantin Belousov * 27*5f270659SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28*5f270659SKonstantin Belousov * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29*5f270659SKonstantin Belousov * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30*5f270659SKonstantin Belousov * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 31*5f270659SKonstantin Belousov * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 32*5f270659SKonstantin Belousov * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 33*5f270659SKonstantin Belousov * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 34*5f270659SKonstantin Belousov * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35*5f270659SKonstantin Belousov * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36*5f270659SKonstantin Belousov * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37*5f270659SKonstantin Belousov * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38*5f270659SKonstantin Belousov */ 39*5f270659SKonstantin Belousov 40*5f270659SKonstantin Belousov#include <machine/asmacros.h> 41*5f270659SKonstantin Belousov 42*5f270659SKonstantin Belousov .text 43*5f270659SKonstantin Belousov 44*5f270659SKonstantin BelousovENTRY(_key_expansion_128) 45*5f270659SKonstantin Belousov_key_expansion_256a: 46*5f270659SKonstantin Belousov .cfi_startproc 47*5f270659SKonstantin Belousov pshufd $0b11111111,%xmm1,%xmm1 48*5f270659SKonstantin Belousov shufps $0b00010000,%xmm0,%xmm4 49*5f270659SKonstantin Belousov pxor %xmm4,%xmm0 50*5f270659SKonstantin Belousov shufps $0b10001100,%xmm0,%xmm4 51*5f270659SKonstantin Belousov pxor %xmm4,%xmm0 52*5f270659SKonstantin Belousov pxor %xmm1,%xmm0 53*5f270659SKonstantin Belousov movaps %xmm0,(%edx) 54*5f270659SKonstantin Belousov addl $0x10,%edx 55*5f270659SKonstantin Belousov retq 56*5f270659SKonstantin Belousov .cfi_endproc 57*5f270659SKonstantin BelousovEND(_key_expansion_128) 58*5f270659SKonstantin Belousov 59*5f270659SKonstantin BelousovENTRY(_key_expansion_192a) 60*5f270659SKonstantin Belousov .cfi_startproc 61*5f270659SKonstantin Belousov pshufd $0b01010101,%xmm1,%xmm1 62*5f270659SKonstantin Belousov shufps $0b00010000,%xmm0,%xmm4 63*5f270659SKonstantin Belousov pxor %xmm4,%xmm0 64*5f270659SKonstantin Belousov shufps $0b10001100,%xmm0,%xmm4 65*5f270659SKonstantin Belousov pxor %xmm4,%xmm0 66*5f270659SKonstantin Belousov pxor %xmm1,%xmm0 67*5f270659SKonstantin Belousov movaps %xmm2,%xmm5 68*5f270659SKonstantin Belousov movaps %xmm2,%xmm6 69*5f270659SKonstantin Belousov pslldq $4,%xmm5 70*5f270659SKonstantin Belousov pshufd $0b11111111,%xmm0,%xmm3 71*5f270659SKonstantin Belousov pxor %xmm3,%xmm2 72*5f270659SKonstantin Belousov pxor %xmm5,%xmm2 73*5f270659SKonstantin Belousov movaps %xmm0,%xmm1 74*5f270659SKonstantin Belousov shufps $0b01000100,%xmm0,%xmm6 75*5f270659SKonstantin Belousov movaps %xmm6,(%edx) 76*5f270659SKonstantin Belousov shufps $0b01001110,%xmm2,%xmm1 77*5f270659SKonstantin Belousov movaps %xmm1,0x10(%edx) 78*5f270659SKonstantin Belousov addl $0x20,%edx 79*5f270659SKonstantin Belousov retq 80*5f270659SKonstantin Belousov .cfi_endproc 81*5f270659SKonstantin BelousovEND(_key_expansion_192a) 82*5f270659SKonstantin Belousov 83*5f270659SKonstantin BelousovENTRY(_key_expansion_192b) 84*5f270659SKonstantin Belousov .cfi_startproc 85*5f270659SKonstantin Belousov pshufd $0b01010101,%xmm1,%xmm1 86*5f270659SKonstantin Belousov shufps $0b00010000,%xmm0,%xmm4 87*5f270659SKonstantin Belousov pxor %xmm4,%xmm0 88*5f270659SKonstantin Belousov shufps $0b10001100,%xmm0,%xmm4 89*5f270659SKonstantin Belousov pxor %xmm4,%xmm0 90*5f270659SKonstantin Belousov pxor %xmm1,%xmm0 91*5f270659SKonstantin Belousov movaps %xmm2,%xmm5 92*5f270659SKonstantin Belousov pslldq $4,%xmm5 93*5f270659SKonstantin Belousov pshufd $0b11111111,%xmm0,%xmm3 94*5f270659SKonstantin Belousov pxor %xmm3,%xmm2 95*5f270659SKonstantin Belousov pxor %xmm5,%xmm2 96*5f270659SKonstantin Belousov movaps %xmm0,(%edx) 97*5f270659SKonstantin Belousov addl $0x10,%edx 98*5f270659SKonstantin Belousov retl 99*5f270659SKonstantin Belousov .cfi_endproc 100*5f270659SKonstantin BelousovEND(_key_expansion_192b) 101*5f270659SKonstantin Belousov 102*5f270659SKonstantin BelousovENTRY(_key_expansion_256b) 103*5f270659SKonstantin Belousov .cfi_startproc 104*5f270659SKonstantin Belousov pshufd $0b10101010,%xmm1,%xmm1 105*5f270659SKonstantin Belousov shufps $0b00010000,%xmm2,%xmm4 106*5f270659SKonstantin Belousov pxor %xmm4,%xmm2 107*5f270659SKonstantin Belousov shufps $0b10001100,%xmm2,%xmm4 108*5f270659SKonstantin Belousov pxor %xmm4,%xmm2 109*5f270659SKonstantin Belousov pxor %xmm1,%xmm2 110*5f270659SKonstantin Belousov movaps %xmm2,(%edx) 111*5f270659SKonstantin Belousov addl $0x10,%edx 112*5f270659SKonstantin Belousov retl 113*5f270659SKonstantin Belousov .cfi_endproc 114*5f270659SKonstantin BelousovEND(_key_expansion_256b) 115*5f270659SKonstantin Belousov 116*5f270659SKonstantin BelousovENTRY(aesni_set_enckey) 117*5f270659SKonstantin Belousov .cfi_startproc 118*5f270659SKonstantin Belousov pushl %ebp 119*5f270659SKonstantin Belousov .cfi_adjust_cfa_offset 4 120*5f270659SKonstantin Belousov movl %esp,%ebp 121*5f270659SKonstantin Belousov movl 8(%ebp),%ecx 122*5f270659SKonstantin Belousov movl 12(%ebp),%edx 123*5f270659SKonstantin Belousov movups (%ecx),%xmm0 # user key (first 16 bytes) 124*5f270659SKonstantin Belousov movaps %xmm0,(%edx) 125*5f270659SKonstantin Belousov addl $0x10,%edx # key addr 126*5f270659SKonstantin Belousov pxor %xmm4,%xmm4 # xmm4 is assumed 0 in _key_expansion_x 127*5f270659SKonstantin Belousov cmpl $12,16(%ebp) # rounds 128*5f270659SKonstantin Belousov jb .Lenc_key128 129*5f270659SKonstantin Belousov je .Lenc_key192 130*5f270659SKonstantin Belousov movups 0x10(%ecx),%xmm2 # other user key 131*5f270659SKonstantin Belousov movaps %xmm2,(%edx) 132*5f270659SKonstantin Belousov addl $0x10,%edx 133*5f270659SKonstantin Belousov// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 134*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 135*5f270659SKonstantin Belousov call _key_expansion_256a 136*5f270659SKonstantin Belousov// aeskeygenassist $0x1,%xmm0,%xmm1 137*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 138*5f270659SKonstantin Belousov call _key_expansion_256b 139*5f270659SKonstantin Belousov// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 140*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 141*5f270659SKonstantin Belousov call _key_expansion_256a 142*5f270659SKonstantin Belousov// aeskeygenassist $0x2,%xmm0,%xmm1 143*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 144*5f270659SKonstantin Belousov call _key_expansion_256b 145*5f270659SKonstantin Belousov// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 146*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 147*5f270659SKonstantin Belousov call _key_expansion_256a 148*5f270659SKonstantin Belousov// aeskeygenassist $0x4,%xmm0,%xmm1 149*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 150*5f270659SKonstantin Belousov call _key_expansion_256b 151*5f270659SKonstantin Belousov// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 152*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 153*5f270659SKonstantin Belousov call _key_expansion_256a 154*5f270659SKonstantin Belousov// aeskeygenassist $0x8,%xmm0,%xmm1 155*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 156*5f270659SKonstantin Belousov call _key_expansion_256b 157*5f270659SKonstantin Belousov// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 158*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 159*5f270659SKonstantin Belousov call _key_expansion_256a 160*5f270659SKonstantin Belousov// aeskeygenassist $0x10,%xmm0,%xmm1 161*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 162*5f270659SKonstantin Belousov call _key_expansion_256b 163*5f270659SKonstantin Belousov// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 164*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 165*5f270659SKonstantin Belousov call _key_expansion_256a 166*5f270659SKonstantin Belousov// aeskeygenassist $0x20,%xmm0,%xmm1 167*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 168*5f270659SKonstantin Belousov call _key_expansion_256b 169*5f270659SKonstantin Belousov// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 170*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 171*5f270659SKonstantin Belousov call _key_expansion_256a 172*5f270659SKonstantin Belousov .cfi_adjust_cfa_offset -4 173*5f270659SKonstantin Belousov leave 174*5f270659SKonstantin Belousov retl 175*5f270659SKonstantin Belousov.Lenc_key192: 176*5f270659SKonstantin Belousov movq 0x10(%ecx),%xmm2 # other user key 177*5f270659SKonstantin Belousov// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 178*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 179*5f270659SKonstantin Belousov call _key_expansion_192a 180*5f270659SKonstantin Belousov// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 181*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 182*5f270659SKonstantin Belousov call _key_expansion_192b 183*5f270659SKonstantin Belousov// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 184*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 185*5f270659SKonstantin Belousov call _key_expansion_192a 186*5f270659SKonstantin Belousov// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 187*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 188*5f270659SKonstantin Belousov call _key_expansion_192b 189*5f270659SKonstantin Belousov// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 190*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 191*5f270659SKonstantin Belousov call _key_expansion_192a 192*5f270659SKonstantin Belousov// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 193*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 194*5f270659SKonstantin Belousov call _key_expansion_192b 195*5f270659SKonstantin Belousov// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 196*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 197*5f270659SKonstantin Belousov call _key_expansion_192a 198*5f270659SKonstantin Belousov// aeskeygenassist $0x80,%xmm2,%xmm1 # round 8 199*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xca,0x80 200*5f270659SKonstantin Belousov call _key_expansion_192b 201*5f270659SKonstantin Belousov leave 202*5f270659SKonstantin Belousov .cfi_adjust_cfa_offset -4 203*5f270659SKonstantin Belousov retl 204*5f270659SKonstantin Belousov.Lenc_key128: 205*5f270659SKonstantin Belousov// aeskeygenassist $0x1,%xmm0,%xmm1 # round 1 206*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 207*5f270659SKonstantin Belousov call _key_expansion_128 208*5f270659SKonstantin Belousov// aeskeygenassist $0x2,%xmm0,%xmm1 # round 2 209*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 210*5f270659SKonstantin Belousov call _key_expansion_128 211*5f270659SKonstantin Belousov// aeskeygenassist $0x4,%xmm0,%xmm1 # round 3 212*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 213*5f270659SKonstantin Belousov call _key_expansion_128 214*5f270659SKonstantin Belousov// aeskeygenassist $0x8,%xmm0,%xmm1 # round 4 215*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 216*5f270659SKonstantin Belousov call _key_expansion_128 217*5f270659SKonstantin Belousov// aeskeygenassist $0x10,%xmm0,%xmm1 # round 5 218*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 219*5f270659SKonstantin Belousov call _key_expansion_128 220*5f270659SKonstantin Belousov// aeskeygenassist $0x20,%xmm0,%xmm1 # round 6 221*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 222*5f270659SKonstantin Belousov call _key_expansion_128 223*5f270659SKonstantin Belousov// aeskeygenassist $0x40,%xmm0,%xmm1 # round 7 224*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x40 225*5f270659SKonstantin Belousov call _key_expansion_128 226*5f270659SKonstantin Belousov// aeskeygenassist $0x80,%xmm0,%xmm1 # round 8 227*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x80 228*5f270659SKonstantin Belousov call _key_expansion_128 229*5f270659SKonstantin Belousov// aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9 230*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x1b 231*5f270659SKonstantin Belousov call _key_expansion_128 232*5f270659SKonstantin Belousov// aeskeygenassist $0x36,%xmm0,%xmm1 # round 10 233*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x36 234*5f270659SKonstantin Belousov call _key_expansion_128 235*5f270659SKonstantin Belousov leave 236*5f270659SKonstantin Belousov .cfi_adjust_cfa_offset -4 237*5f270659SKonstantin Belousov retl 238*5f270659SKonstantin Belousov .cfi_endproc 239*5f270659SKonstantin BelousovEND(aesni_set_enckey) 240*5f270659SKonstantin Belousov 241*5f270659SKonstantin BelousovENTRY(aesni_set_deckey) 242*5f270659SKonstantin Belousov .cfi_startproc 243*5f270659SKonstantin Belousov pushl %ebp 244*5f270659SKonstantin Belousov .cfi_adjust_cfa_offset 4 245*5f270659SKonstantin Belousov movl %esp,%ebp 246*5f270659SKonstantin Belousov movl 16(%ebp),%eax /* rounds */ 247*5f270659SKonstantin Belousov movl %eax,%ecx 248*5f270659SKonstantin Belousov shll $4,%ecx 249*5f270659SKonstantin Belousov addl 8(%ebp),%ecx /* encrypt_schedule last quad */ 250*5f270659SKonstantin Belousov movl 12(%ebp),%edx /* decrypt_schedule */ 251*5f270659SKonstantin Belousov movdqa (%ecx),%xmm0 252*5f270659SKonstantin Belousov movdqa %xmm0,(%edx) 253*5f270659SKonstantin Belousov decl %eax 254*5f270659SKonstantin Belousov1: 255*5f270659SKonstantin Belousov addl $0x10,%edx 256*5f270659SKonstantin Belousov subl $0x10,%ecx 257*5f270659SKonstantin Belousov// aesimc (%ecx),%xmm1 258*5f270659SKonstantin Belousov .byte 0x66,0x0f,0x38,0xdb,0x09 259*5f270659SKonstantin Belousov movdqa %xmm1,(%edx) 260*5f270659SKonstantin Belousov decl %eax 261*5f270659SKonstantin Belousov jne 1b 262*5f270659SKonstantin Belousov 263*5f270659SKonstantin Belousov addl $0x10,%edx 264*5f270659SKonstantin Belousov subl $0x10,%ecx 265*5f270659SKonstantin Belousov movdqa (%ecx),%xmm0 266*5f270659SKonstantin Belousov movdqa %xmm0,(%edx) 267*5f270659SKonstantin Belousov leave 268*5f270659SKonstantin Belousov .cfi_adjust_cfa_offset -4 269*5f270659SKonstantin Belousov retl 270*5f270659SKonstantin Belousov .cfi_endproc 271*5f270659SKonstantin BelousovEND(aesni_set_deckey) 272*5f270659SKonstantin Belousov 273*5f270659SKonstantin Belousov .ident "$FreeBSD$" 274