1 /* 2 * Cryptographic API. 3 * 4 * Glue code for the SHA512 Secure Hash Algorithm assembler 5 * implementation using supplemental SSE3 / AVX / AVX2 instructions. 6 * 7 * This file is based on sha512_generic.c 8 * 9 * Copyright (C) 2013 Intel Corporation 10 * Author: Tim Chen <tim.c.chen@linux.intel.com> 11 * 12 * This program is free software; you can redistribute it and/or modify it 13 * under the terms of the GNU General Public License as published by the Free 14 * Software Foundation; either version 2 of the License, or (at your option) 15 * any later version. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 * SOFTWARE. 25 * 26 */ 27 28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 29 30 #include <crypto/internal/hash.h> 31 #include <linux/init.h> 32 #include <linux/module.h> 33 #include <linux/mm.h> 34 #include <linux/cryptohash.h> 35 #include <linux/types.h> 36 #include <crypto/sha.h> 37 #include <asm/byteorder.h> 38 #include <asm/i387.h> 39 #include <asm/xcr.h> 40 #include <asm/xsave.h> 41 42 #include <linux/string.h> 43 44 asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, 45 u64 rounds); 46 #ifdef CONFIG_AS_AVX 47 asmlinkage void sha512_transform_avx(const char *data, u64 *digest, 48 u64 rounds); 49 #endif 50 #ifdef CONFIG_AS_AVX2 51 asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, 52 u64 rounds); 53 #endif 54 55 static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); 56 57 58 static int sha512_ssse3_init(struct shash_desc *desc) 59 { 60 struct sha512_state *sctx = shash_desc_ctx(desc); 61 62 sctx->state[0] = SHA512_H0; 63 sctx->state[1] = SHA512_H1; 64 sctx->state[2] = SHA512_H2; 65 sctx->state[3] = SHA512_H3; 66 sctx->state[4] = SHA512_H4; 67 sctx->state[5] = SHA512_H5; 68 sctx->state[6] = SHA512_H6; 69 sctx->state[7] = SHA512_H7; 70 sctx->count[0] = sctx->count[1] = 0; 71 72 return 0; 73 } 74 75 static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 76 unsigned int len, unsigned int partial) 77 { 78 struct sha512_state *sctx = shash_desc_ctx(desc); 79 unsigned int done = 0; 80 81 sctx->count[0] += len; 82 if (sctx->count[0] < len) 83 sctx->count[1]++; 84 85 if (partial) { 86 done = SHA512_BLOCK_SIZE - partial; 87 memcpy(sctx->buf + partial, data, done); 88 sha512_transform_asm(sctx->buf, sctx->state, 1); 89 } 90 91 if (len - done >= SHA512_BLOCK_SIZE) { 92 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; 93 94 sha512_transform_asm(data + done, sctx->state, (u64) rounds); 95 96 done += rounds * SHA512_BLOCK_SIZE; 97 } 98 99 memcpy(sctx->buf, data + done, len - done); 100 101 return 0; 102 } 103 104 static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 105 unsigned int len) 106 { 107 struct sha512_state *sctx = shash_desc_ctx(desc); 108 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; 109 int res; 110 111 /* Handle the fast case right here */ 112 if (partial + len < SHA512_BLOCK_SIZE) { 113 sctx->count[0] += len; 114 if (sctx->count[0] < len) 115 sctx->count[1]++; 116 memcpy(sctx->buf + partial, data, len); 117 118 return 0; 119 } 120 121 if (!irq_fpu_usable()) { 122 res = crypto_sha512_update(desc, data, len); 123 } else { 124 kernel_fpu_begin(); 125 res = __sha512_ssse3_update(desc, data, len, partial); 126 kernel_fpu_end(); 127 } 128 129 return res; 130 } 131 132 133 /* Add padding and return the message digest. */ 134 static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) 135 { 136 struct sha512_state *sctx = shash_desc_ctx(desc); 137 unsigned int i, index, padlen; 138 __be64 *dst = (__be64 *)out; 139 __be64 bits[2]; 140 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; 141 142 /* save number of bits */ 143 bits[1] = cpu_to_be64(sctx->count[0] << 3); 144 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); 145 146 /* Pad out to 112 mod 128 and append length */ 147 index = sctx->count[0] & 0x7f; 148 padlen = (index < 112) ? (112 - index) : ((128+112) - index); 149 150 if (!irq_fpu_usable()) { 151 crypto_sha512_update(desc, padding, padlen); 152 crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); 153 } else { 154 kernel_fpu_begin(); 155 /* We need to fill a whole block for __sha512_ssse3_update() */ 156 if (padlen <= 112) { 157 sctx->count[0] += padlen; 158 if (sctx->count[0] < padlen) 159 sctx->count[1]++; 160 memcpy(sctx->buf + index, padding, padlen); 161 } else { 162 __sha512_ssse3_update(desc, padding, padlen, index); 163 } 164 __sha512_ssse3_update(desc, (const u8 *)&bits, 165 sizeof(bits), 112); 166 kernel_fpu_end(); 167 } 168 169 /* Store state in digest */ 170 for (i = 0; i < 8; i++) 171 dst[i] = cpu_to_be64(sctx->state[i]); 172 173 /* Wipe context */ 174 memset(sctx, 0, sizeof(*sctx)); 175 176 return 0; 177 } 178 179 static int sha512_ssse3_export(struct shash_desc *desc, void *out) 180 { 181 struct sha512_state *sctx = shash_desc_ctx(desc); 182 183 memcpy(out, sctx, sizeof(*sctx)); 184 185 return 0; 186 } 187 188 static int sha512_ssse3_import(struct shash_desc *desc, const void *in) 189 { 190 struct sha512_state *sctx = shash_desc_ctx(desc); 191 192 memcpy(sctx, in, sizeof(*sctx)); 193 194 return 0; 195 } 196 197 static int sha384_ssse3_init(struct shash_desc *desc) 198 { 199 struct sha512_state *sctx = shash_desc_ctx(desc); 200 201 sctx->state[0] = SHA384_H0; 202 sctx->state[1] = SHA384_H1; 203 sctx->state[2] = SHA384_H2; 204 sctx->state[3] = SHA384_H3; 205 sctx->state[4] = SHA384_H4; 206 sctx->state[5] = SHA384_H5; 207 sctx->state[6] = SHA384_H6; 208 sctx->state[7] = SHA384_H7; 209 210 sctx->count[0] = sctx->count[1] = 0; 211 212 return 0; 213 } 214 215 static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) 216 { 217 u8 D[SHA512_DIGEST_SIZE]; 218 219 sha512_ssse3_final(desc, D); 220 221 memcpy(hash, D, SHA384_DIGEST_SIZE); 222 memset(D, 0, SHA512_DIGEST_SIZE); 223 224 return 0; 225 } 226 227 static struct shash_alg algs[] = { { 228 .digestsize = SHA512_DIGEST_SIZE, 229 .init = sha512_ssse3_init, 230 .update = sha512_ssse3_update, 231 .final = sha512_ssse3_final, 232 .export = sha512_ssse3_export, 233 .import = sha512_ssse3_import, 234 .descsize = sizeof(struct sha512_state), 235 .statesize = sizeof(struct sha512_state), 236 .base = { 237 .cra_name = "sha512", 238 .cra_driver_name = "sha512-ssse3", 239 .cra_priority = 150, 240 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 241 .cra_blocksize = SHA512_BLOCK_SIZE, 242 .cra_module = THIS_MODULE, 243 } 244 }, { 245 .digestsize = SHA384_DIGEST_SIZE, 246 .init = sha384_ssse3_init, 247 .update = sha512_ssse3_update, 248 .final = sha384_ssse3_final, 249 .export = sha512_ssse3_export, 250 .import = sha512_ssse3_import, 251 .descsize = sizeof(struct sha512_state), 252 .statesize = sizeof(struct sha512_state), 253 .base = { 254 .cra_name = "sha384", 255 .cra_driver_name = "sha384-ssse3", 256 .cra_priority = 150, 257 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 258 .cra_blocksize = SHA384_BLOCK_SIZE, 259 .cra_module = THIS_MODULE, 260 } 261 } }; 262 263 #ifdef CONFIG_AS_AVX 264 static bool __init avx_usable(void) 265 { 266 u64 xcr0; 267 268 if (!cpu_has_avx || !cpu_has_osxsave) 269 return false; 270 271 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 272 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 273 pr_info("AVX detected but unusable.\n"); 274 275 return false; 276 } 277 278 return true; 279 } 280 #endif 281 282 static int __init sha512_ssse3_mod_init(void) 283 { 284 /* test for SSSE3 first */ 285 if (cpu_has_ssse3) 286 sha512_transform_asm = sha512_transform_ssse3; 287 288 #ifdef CONFIG_AS_AVX 289 /* allow AVX to override SSSE3, it's a little faster */ 290 if (avx_usable()) { 291 #ifdef CONFIG_AS_AVX2 292 if (boot_cpu_has(X86_FEATURE_AVX2)) 293 sha512_transform_asm = sha512_transform_rorx; 294 else 295 #endif 296 sha512_transform_asm = sha512_transform_avx; 297 } 298 #endif 299 300 if (sha512_transform_asm) { 301 #ifdef CONFIG_AS_AVX 302 if (sha512_transform_asm == sha512_transform_avx) 303 pr_info("Using AVX optimized SHA-512 implementation\n"); 304 #ifdef CONFIG_AS_AVX2 305 else if (sha512_transform_asm == sha512_transform_rorx) 306 pr_info("Using AVX2 optimized SHA-512 implementation\n"); 307 #endif 308 else 309 #endif 310 pr_info("Using SSSE3 optimized SHA-512 implementation\n"); 311 return crypto_register_shashes(algs, ARRAY_SIZE(algs)); 312 } 313 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 314 315 return -ENODEV; 316 } 317 318 static void __exit sha512_ssse3_mod_fini(void) 319 { 320 crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); 321 } 322 323 module_init(sha512_ssse3_mod_init); 324 module_exit(sha512_ssse3_mod_fini); 325 326 MODULE_LICENSE("GPL"); 327 MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 328 329 MODULE_ALIAS("sha512"); 330 MODULE_ALIAS("sha384"); 331