1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI 4 * instructions. This file contains glue code. 5 * 6 * Copyright (c) 2009 Intel Corp. 7 * Author: Huang Ying <ying.huang@intel.com> 8 */ 9 10 #include <linux/err.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/crypto.h> 15 #include <crypto/algapi.h> 16 #include <crypto/cryptd.h> 17 #include <crypto/gf128mul.h> 18 #include <crypto/internal/hash.h> 19 #include <crypto/internal/simd.h> 20 #include <asm/cpu_device_id.h> 21 #include <asm/simd.h> 22 #include <linux/unaligned.h> 23 24 #define GHASH_BLOCK_SIZE 16 25 #define GHASH_DIGEST_SIZE 16 26 27 void clmul_ghash_mul(char *dst, const le128 *shash); 28 29 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 30 const le128 *shash); 31 32 struct ghash_async_ctx { 33 struct cryptd_ahash *cryptd_tfm; 34 }; 35 36 struct ghash_ctx { 37 le128 shash; 38 }; 39 40 struct ghash_desc_ctx { 41 u8 buffer[GHASH_BLOCK_SIZE]; 42 u32 bytes; 43 }; 44 45 static int ghash_init(struct shash_desc *desc) 46 { 47 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 48 49 memset(dctx, 0, sizeof(*dctx)); 50 51 return 0; 52 } 53 54 static int ghash_setkey(struct crypto_shash *tfm, 55 const u8 *key, unsigned int keylen) 56 { 57 struct ghash_ctx *ctx = crypto_shash_ctx(tfm); 58 u64 a, b; 59 60 if (keylen != GHASH_BLOCK_SIZE) 61 return -EINVAL; 62 63 /* 64 * GHASH maps bits to polynomial coefficients backwards, which makes it 65 * hard to implement. But it can be shown that the GHASH multiplication 66 * 67 * D * K (mod x^128 + x^7 + x^2 + x + 1) 68 * 69 * (where D is a data block and K is the key) is equivalent to: 70 * 71 * bitreflect(D) * bitreflect(K) * x^(-127) 72 * (mod x^128 + x^127 + x^126 + x^121 + 1) 73 * 74 * So, the code below precomputes: 75 * 76 * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1) 77 * 78 * ... but in Montgomery form (so that Montgomery multiplication can be 79 * used), i.e. with an extra x^128 factor, which means actually: 80 * 81 * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1) 82 * 83 * The within-a-byte part of bitreflect() cancels out GHASH's built-in 84 * reflection, and thus bitreflect() is actually a byteswap. 85 */ 86 a = get_unaligned_be64(key); 87 b = get_unaligned_be64(key + 8); 88 ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63)); 89 ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63)); 90 if (a >> 63) 91 ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56); 92 return 0; 93 } 94 95 static int ghash_update(struct shash_desc *desc, 96 const u8 *src, unsigned int srclen) 97 { 98 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 99 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); 100 u8 *dst = dctx->buffer; 101 102 kernel_fpu_begin(); 103 if (dctx->bytes) { 104 int n = min(srclen, dctx->bytes); 105 u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); 106 107 dctx->bytes -= n; 108 srclen -= n; 109 110 while (n--) 111 *pos++ ^= *src++; 112 113 if (!dctx->bytes) 114 clmul_ghash_mul(dst, &ctx->shash); 115 } 116 117 clmul_ghash_update(dst, src, srclen, &ctx->shash); 118 kernel_fpu_end(); 119 120 if (srclen & 0xf) { 121 src += srclen - (srclen & 0xf); 122 srclen &= 0xf; 123 dctx->bytes = GHASH_BLOCK_SIZE - srclen; 124 while (srclen--) 125 *dst++ ^= *src++; 126 } 127 128 return 0; 129 } 130 131 static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) 132 { 133 u8 *dst = dctx->buffer; 134 135 if (dctx->bytes) { 136 u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); 137 138 while (dctx->bytes--) 139 *tmp++ ^= 0; 140 141 kernel_fpu_begin(); 142 clmul_ghash_mul(dst, &ctx->shash); 143 kernel_fpu_end(); 144 } 145 146 dctx->bytes = 0; 147 } 148 149 static int ghash_final(struct shash_desc *desc, u8 *dst) 150 { 151 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 152 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); 153 u8 *buf = dctx->buffer; 154 155 ghash_flush(ctx, dctx); 156 memcpy(dst, buf, GHASH_BLOCK_SIZE); 157 158 return 0; 159 } 160 161 static struct shash_alg ghash_alg = { 162 .digestsize = GHASH_DIGEST_SIZE, 163 .init = ghash_init, 164 .update = ghash_update, 165 .final = ghash_final, 166 .setkey = ghash_setkey, 167 .descsize = sizeof(struct ghash_desc_ctx), 168 .base = { 169 .cra_name = "__ghash", 170 .cra_driver_name = "__ghash-pclmulqdqni", 171 .cra_priority = 0, 172 .cra_flags = CRYPTO_ALG_INTERNAL, 173 .cra_blocksize = GHASH_BLOCK_SIZE, 174 .cra_ctxsize = sizeof(struct ghash_ctx), 175 .cra_module = THIS_MODULE, 176 }, 177 }; 178 179 static int ghash_async_init(struct ahash_request *req) 180 { 181 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 182 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 183 struct ahash_request *cryptd_req = ahash_request_ctx(req); 184 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 185 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 186 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); 187 188 desc->tfm = child; 189 return crypto_shash_init(desc); 190 } 191 192 static void ghash_init_cryptd_req(struct ahash_request *req) 193 { 194 struct ahash_request *cryptd_req = ahash_request_ctx(req); 195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 198 199 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); 200 ahash_request_set_callback(cryptd_req, req->base.flags, 201 req->base.complete, req->base.data); 202 ahash_request_set_crypt(cryptd_req, req->src, req->result, 203 req->nbytes); 204 } 205 206 static int ghash_async_update(struct ahash_request *req) 207 { 208 struct ahash_request *cryptd_req = ahash_request_ctx(req); 209 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 210 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 211 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 212 213 if (!crypto_simd_usable() || 214 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { 215 ghash_init_cryptd_req(req); 216 return crypto_ahash_update(cryptd_req); 217 } else { 218 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 219 return shash_ahash_update(req, desc); 220 } 221 } 222 223 static int ghash_async_final(struct ahash_request *req) 224 { 225 struct ahash_request *cryptd_req = ahash_request_ctx(req); 226 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 227 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 228 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 229 230 if (!crypto_simd_usable() || 231 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { 232 ghash_init_cryptd_req(req); 233 return crypto_ahash_final(cryptd_req); 234 } else { 235 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 236 return crypto_shash_final(desc, req->result); 237 } 238 } 239 240 static int ghash_async_import(struct ahash_request *req, const void *in) 241 { 242 struct ahash_request *cryptd_req = ahash_request_ctx(req); 243 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 244 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 245 246 ghash_async_init(req); 247 memcpy(dctx, in, sizeof(*dctx)); 248 return 0; 249 250 } 251 252 static int ghash_async_export(struct ahash_request *req, void *out) 253 { 254 struct ahash_request *cryptd_req = ahash_request_ctx(req); 255 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 256 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 257 258 memcpy(out, dctx, sizeof(*dctx)); 259 return 0; 260 261 } 262 263 static int ghash_async_digest(struct ahash_request *req) 264 { 265 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 266 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 267 struct ahash_request *cryptd_req = ahash_request_ctx(req); 268 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 269 270 if (!crypto_simd_usable() || 271 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { 272 ghash_init_cryptd_req(req); 273 return crypto_ahash_digest(cryptd_req); 274 } else { 275 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 276 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); 277 278 desc->tfm = child; 279 return shash_ahash_digest(req, desc); 280 } 281 } 282 283 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, 284 unsigned int keylen) 285 { 286 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 287 struct crypto_ahash *child = &ctx->cryptd_tfm->base; 288 289 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); 290 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) 291 & CRYPTO_TFM_REQ_MASK); 292 return crypto_ahash_setkey(child, key, keylen); 293 } 294 295 static int ghash_async_init_tfm(struct crypto_tfm *tfm) 296 { 297 struct cryptd_ahash *cryptd_tfm; 298 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); 299 300 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 301 CRYPTO_ALG_INTERNAL, 302 CRYPTO_ALG_INTERNAL); 303 if (IS_ERR(cryptd_tfm)) 304 return PTR_ERR(cryptd_tfm); 305 ctx->cryptd_tfm = cryptd_tfm; 306 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), 307 sizeof(struct ahash_request) + 308 crypto_ahash_reqsize(&cryptd_tfm->base)); 309 310 return 0; 311 } 312 313 static void ghash_async_exit_tfm(struct crypto_tfm *tfm) 314 { 315 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); 316 317 cryptd_free_ahash(ctx->cryptd_tfm); 318 } 319 320 static struct ahash_alg ghash_async_alg = { 321 .init = ghash_async_init, 322 .update = ghash_async_update, 323 .final = ghash_async_final, 324 .setkey = ghash_async_setkey, 325 .digest = ghash_async_digest, 326 .export = ghash_async_export, 327 .import = ghash_async_import, 328 .halg = { 329 .digestsize = GHASH_DIGEST_SIZE, 330 .statesize = sizeof(struct ghash_desc_ctx), 331 .base = { 332 .cra_name = "ghash", 333 .cra_driver_name = "ghash-clmulni", 334 .cra_priority = 400, 335 .cra_ctxsize = sizeof(struct ghash_async_ctx), 336 .cra_flags = CRYPTO_ALG_ASYNC, 337 .cra_blocksize = GHASH_BLOCK_SIZE, 338 .cra_module = THIS_MODULE, 339 .cra_init = ghash_async_init_tfm, 340 .cra_exit = ghash_async_exit_tfm, 341 }, 342 }, 343 }; 344 345 static const struct x86_cpu_id pcmul_cpu_id[] = { 346 X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */ 347 {} 348 }; 349 MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); 350 351 static int __init ghash_pclmulqdqni_mod_init(void) 352 { 353 int err; 354 355 if (!x86_match_cpu(pcmul_cpu_id)) 356 return -ENODEV; 357 358 err = crypto_register_shash(&ghash_alg); 359 if (err) 360 goto err_out; 361 err = crypto_register_ahash(&ghash_async_alg); 362 if (err) 363 goto err_shash; 364 365 return 0; 366 367 err_shash: 368 crypto_unregister_shash(&ghash_alg); 369 err_out: 370 return err; 371 } 372 373 static void __exit ghash_pclmulqdqni_mod_exit(void) 374 { 375 crypto_unregister_ahash(&ghash_async_alg); 376 crypto_unregister_shash(&ghash_alg); 377 } 378 379 module_init(ghash_pclmulqdqni_mod_init); 380 module_exit(ghash_pclmulqdqni_mod_exit); 381 382 MODULE_LICENSE("GPL"); 383 MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI"); 384 MODULE_ALIAS_CRYPTO("ghash"); 385