1/* 2 * Intel SHA Extensions optimized implementation of a SHA-256 update function 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2015 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * Contact Information: 21 * Sean Gulley <sean.m.gulley@intel.com> 22 * Tim Chen <tim.c.chen@linux.intel.com> 23 * 24 * BSD LICENSE 25 * 26 * Copyright(c) 2015 Intel Corporation. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * * Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * * Redistributions in binary form must reproduce the above copyright 35 * notice, this list of conditions and the following disclaimer in 36 * the documentation and/or other materials provided with the 37 * distribution. 38 * * Neither the name of Intel Corporation nor the names of its 39 * contributors may be used to endorse or promote products derived 40 * from this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * 54 */ 55 56/* 57 * Copyright (c) 2018, Joyent, Inc. 58 */ 59 60/* 61 * illumos uses this file under the terms of the BSD license. 62 * 63 * The following are a series of changes that we have made to this code: 64 * 65 * o Changed the include to be sys/asm_linkage.h. 66 * o Use the sys/asm_linkage.h prototypes for assembly functions. 67 * o Renamed the function from sha256_ni_transform to SHA256TransformBlocks to 68 * match the illumos name for the function. 69 * o The illumos SHA256_CTX does not have the digest as the first member of its 70 * context struct. As such, an offset has to be added to the digest argument 71 * to make sure that we get to the actual digest. 72 * o Update the function prototype block comment to reflect that we are 73 * passing the context and not the direct digest. 74 */ 75 76#include <sys/asm_linkage.h> 77 78#define DIGEST_PTR %rdi /* 1st arg */ 79#define DATA_PTR %rsi /* 2nd arg */ 80#define NUM_BLKS %rdx /* 3rd arg */ 81 82#define SHA256CONSTANTS %rax 83 84#define MSG %xmm0 85#define STATE0 %xmm1 86#define STATE1 %xmm2 87#define MSGTMP0 %xmm3 88#define MSGTMP1 %xmm4 89#define MSGTMP2 %xmm5 90#define MSGTMP3 %xmm6 91#define MSGTMP4 %xmm7 92 93#define SHUF_MASK %xmm8 94 95#define ABEF_SAVE %xmm9 96#define CDGH_SAVE %xmm10 97 98/* 99 * Intel SHA Extensions optimized implementation of a SHA-256 update function 100 * 101 * The function takes a pointer to the current hash values, a pointer to the 102 * input data, and a number of 64 byte blocks to process. Once all blocks have 103 * been processed, the digest pointer is updated with the resulting hash value. 104 * The function only processes complete blocks, there is no functionality to 105 * store partial blocks. All message padding and hash value initialization must 106 * be done outside the update function. 107 * 108 * The indented lines in the loop are instructions related to rounds processing. 109 * The non-indented lines are instructions related to the message schedule. 110 * 111 * void SHA256TransformBlocks(SHA256_CTX *ctx, const void *data, 112 uint32_t numBlocks); 113 * digest : pointer to digest 114 * data: pointer to input data 115 * numBlocks: Number of blocks to process 116 */ 117 118.text 119.align 32 120ENTRY_NP(SHA256TransformBlocks) 121 122 shl $6, NUM_BLKS /* convert to bytes */ 123 jz .Ldone_hash 124 add DATA_PTR, NUM_BLKS /* pointer to end of data */ 125 126 /* 127 * load initial hash values 128 * Need to reorder these appropriately 129 * DCBA, HGFE -> ABEF, CDGH 130 * 131 * Offset DIGEST_PTR to account for the algorithm in the context. 132 */ 133 addq $8, DIGEST_PTR 134 movdqu 0*16(DIGEST_PTR), STATE0 135 movdqu 1*16(DIGEST_PTR), STATE1 136 137 pshufd $0xB1, STATE0, STATE0 /* CDAB */ 138 pshufd $0x1B, STATE1, STATE1 /* EFGH */ 139 movdqa STATE0, MSGTMP4 140 palignr $8, STATE1, STATE0 /* ABEF */ 141 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ 142 143 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 144 lea K256(%rip), SHA256CONSTANTS 145 146.Lloop0: 147 /* Save hash values for addition after rounds */ 148 movdqa STATE0, ABEF_SAVE 149 movdqa STATE1, CDGH_SAVE 150 151 /* Rounds 0-3 */ 152 movdqu 0*16(DATA_PTR), MSG 153 pshufb SHUF_MASK, MSG 154 movdqa MSG, MSGTMP0 155 paddd 0*16(SHA256CONSTANTS), MSG 156 sha256rnds2 STATE0, STATE1 157 pshufd $0x0E, MSG, MSG 158 sha256rnds2 STATE1, STATE0 159 160 /* Rounds 4-7 */ 161 movdqu 1*16(DATA_PTR), MSG 162 pshufb SHUF_MASK, MSG 163 movdqa MSG, MSGTMP1 164 paddd 1*16(SHA256CONSTANTS), MSG 165 sha256rnds2 STATE0, STATE1 166 pshufd $0x0E, MSG, MSG 167 sha256rnds2 STATE1, STATE0 168 sha256msg1 MSGTMP1, MSGTMP0 169 170 /* Rounds 8-11 */ 171 movdqu 2*16(DATA_PTR), MSG 172 pshufb SHUF_MASK, MSG 173 movdqa MSG, MSGTMP2 174 paddd 2*16(SHA256CONSTANTS), MSG 175 sha256rnds2 STATE0, STATE1 176 pshufd $0x0E, MSG, MSG 177 sha256rnds2 STATE1, STATE0 178 sha256msg1 MSGTMP2, MSGTMP1 179 180 /* Rounds 12-15 */ 181 movdqu 3*16(DATA_PTR), MSG 182 pshufb SHUF_MASK, MSG 183 movdqa MSG, MSGTMP3 184 paddd 3*16(SHA256CONSTANTS), MSG 185 sha256rnds2 STATE0, STATE1 186 movdqa MSGTMP3, MSGTMP4 187 palignr $4, MSGTMP2, MSGTMP4 188 paddd MSGTMP4, MSGTMP0 189 sha256msg2 MSGTMP3, MSGTMP0 190 pshufd $0x0E, MSG, MSG 191 sha256rnds2 STATE1, STATE0 192 sha256msg1 MSGTMP3, MSGTMP2 193 194 /* Rounds 16-19 */ 195 movdqa MSGTMP0, MSG 196 paddd 4*16(SHA256CONSTANTS), MSG 197 sha256rnds2 STATE0, STATE1 198 movdqa MSGTMP0, MSGTMP4 199 palignr $4, MSGTMP3, MSGTMP4 200 paddd MSGTMP4, MSGTMP1 201 sha256msg2 MSGTMP0, MSGTMP1 202 pshufd $0x0E, MSG, MSG 203 sha256rnds2 STATE1, STATE0 204 sha256msg1 MSGTMP0, MSGTMP3 205 206 /* Rounds 20-23 */ 207 movdqa MSGTMP1, MSG 208 paddd 5*16(SHA256CONSTANTS), MSG 209 sha256rnds2 STATE0, STATE1 210 movdqa MSGTMP1, MSGTMP4 211 palignr $4, MSGTMP0, MSGTMP4 212 paddd MSGTMP4, MSGTMP2 213 sha256msg2 MSGTMP1, MSGTMP2 214 pshufd $0x0E, MSG, MSG 215 sha256rnds2 STATE1, STATE0 216 sha256msg1 MSGTMP1, MSGTMP0 217 218 /* Rounds 24-27 */ 219 movdqa MSGTMP2, MSG 220 paddd 6*16(SHA256CONSTANTS), MSG 221 sha256rnds2 STATE0, STATE1 222 movdqa MSGTMP2, MSGTMP4 223 palignr $4, MSGTMP1, MSGTMP4 224 paddd MSGTMP4, MSGTMP3 225 sha256msg2 MSGTMP2, MSGTMP3 226 pshufd $0x0E, MSG, MSG 227 sha256rnds2 STATE1, STATE0 228 sha256msg1 MSGTMP2, MSGTMP1 229 230 /* Rounds 28-31 */ 231 movdqa MSGTMP3, MSG 232 paddd 7*16(SHA256CONSTANTS), MSG 233 sha256rnds2 STATE0, STATE1 234 movdqa MSGTMP3, MSGTMP4 235 palignr $4, MSGTMP2, MSGTMP4 236 paddd MSGTMP4, MSGTMP0 237 sha256msg2 MSGTMP3, MSGTMP0 238 pshufd $0x0E, MSG, MSG 239 sha256rnds2 STATE1, STATE0 240 sha256msg1 MSGTMP3, MSGTMP2 241 242 /* Rounds 32-35 */ 243 movdqa MSGTMP0, MSG 244 paddd 8*16(SHA256CONSTANTS), MSG 245 sha256rnds2 STATE0, STATE1 246 movdqa MSGTMP0, MSGTMP4 247 palignr $4, MSGTMP3, MSGTMP4 248 paddd MSGTMP4, MSGTMP1 249 sha256msg2 MSGTMP0, MSGTMP1 250 pshufd $0x0E, MSG, MSG 251 sha256rnds2 STATE1, STATE0 252 sha256msg1 MSGTMP0, MSGTMP3 253 254 /* Rounds 36-39 */ 255 movdqa MSGTMP1, MSG 256 paddd 9*16(SHA256CONSTANTS), MSG 257 sha256rnds2 STATE0, STATE1 258 movdqa MSGTMP1, MSGTMP4 259 palignr $4, MSGTMP0, MSGTMP4 260 paddd MSGTMP4, MSGTMP2 261 sha256msg2 MSGTMP1, MSGTMP2 262 pshufd $0x0E, MSG, MSG 263 sha256rnds2 STATE1, STATE0 264 sha256msg1 MSGTMP1, MSGTMP0 265 266 /* Rounds 40-43 */ 267 movdqa MSGTMP2, MSG 268 paddd 10*16(SHA256CONSTANTS), MSG 269 sha256rnds2 STATE0, STATE1 270 movdqa MSGTMP2, MSGTMP4 271 palignr $4, MSGTMP1, MSGTMP4 272 paddd MSGTMP4, MSGTMP3 273 sha256msg2 MSGTMP2, MSGTMP3 274 pshufd $0x0E, MSG, MSG 275 sha256rnds2 STATE1, STATE0 276 sha256msg1 MSGTMP2, MSGTMP1 277 278 /* Rounds 44-47 */ 279 movdqa MSGTMP3, MSG 280 paddd 11*16(SHA256CONSTANTS), MSG 281 sha256rnds2 STATE0, STATE1 282 movdqa MSGTMP3, MSGTMP4 283 palignr $4, MSGTMP2, MSGTMP4 284 paddd MSGTMP4, MSGTMP0 285 sha256msg2 MSGTMP3, MSGTMP0 286 pshufd $0x0E, MSG, MSG 287 sha256rnds2 STATE1, STATE0 288 sha256msg1 MSGTMP3, MSGTMP2 289 290 /* Rounds 48-51 */ 291 movdqa MSGTMP0, MSG 292 paddd 12*16(SHA256CONSTANTS), MSG 293 sha256rnds2 STATE0, STATE1 294 movdqa MSGTMP0, MSGTMP4 295 palignr $4, MSGTMP3, MSGTMP4 296 paddd MSGTMP4, MSGTMP1 297 sha256msg2 MSGTMP0, MSGTMP1 298 pshufd $0x0E, MSG, MSG 299 sha256rnds2 STATE1, STATE0 300 sha256msg1 MSGTMP0, MSGTMP3 301 302 /* Rounds 52-55 */ 303 movdqa MSGTMP1, MSG 304 paddd 13*16(SHA256CONSTANTS), MSG 305 sha256rnds2 STATE0, STATE1 306 movdqa MSGTMP1, MSGTMP4 307 palignr $4, MSGTMP0, MSGTMP4 308 paddd MSGTMP4, MSGTMP2 309 sha256msg2 MSGTMP1, MSGTMP2 310 pshufd $0x0E, MSG, MSG 311 sha256rnds2 STATE1, STATE0 312 313 /* Rounds 56-59 */ 314 movdqa MSGTMP2, MSG 315 paddd 14*16(SHA256CONSTANTS), MSG 316 sha256rnds2 STATE0, STATE1 317 movdqa MSGTMP2, MSGTMP4 318 palignr $4, MSGTMP1, MSGTMP4 319 paddd MSGTMP4, MSGTMP3 320 sha256msg2 MSGTMP2, MSGTMP3 321 pshufd $0x0E, MSG, MSG 322 sha256rnds2 STATE1, STATE0 323 324 /* Rounds 60-63 */ 325 movdqa MSGTMP3, MSG 326 paddd 15*16(SHA256CONSTANTS), MSG 327 sha256rnds2 STATE0, STATE1 328 pshufd $0x0E, MSG, MSG 329 sha256rnds2 STATE1, STATE0 330 331 /* Add current hash values with previously saved */ 332 paddd ABEF_SAVE, STATE0 333 paddd CDGH_SAVE, STATE1 334 335 /* Increment data pointer and loop if more to process */ 336 add $64, DATA_PTR 337 cmp NUM_BLKS, DATA_PTR 338 jne .Lloop0 339 340 /* Write hash values back in the correct order */ 341 pshufd $0x1B, STATE0, STATE0 /* FEBA */ 342 pshufd $0xB1, STATE1, STATE1 /* DCHG */ 343 movdqa STATE0, MSGTMP4 344 pblendw $0xF0, STATE1, STATE0 /* DCBA */ 345 palignr $8, MSGTMP4, STATE1 /* HGFE */ 346 347 movdqu STATE0, 0*16(DIGEST_PTR) 348 movdqu STATE1, 1*16(DIGEST_PTR) 349 350.Ldone_hash: 351 352 ret 353SET_SIZE(SHA256TransformBlocks) 354 355.section .rodata.cst256.K256, "aM", @progbits, 256 356.align 64 357K256: 358 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 359 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 360 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 361 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 362 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 363 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 364 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 365 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 366 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 367 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 368 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 369 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 370 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 371 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 372 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 373 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 374 375.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 376.align 16 377PSHUFFLE_BYTE_FLIP_MASK: 378 .octa 0x0c0d0e0f08090a0b0405060700010203 379