1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI 4 * instructions. This file contains glue code. 5 * 6 * Copyright (c) 2009 Intel Corp. 7 * Author: Huang Ying <ying.huang@intel.com> 8 */ 9 10 #include <linux/err.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/crypto.h> 15 #include <crypto/algapi.h> 16 #include <crypto/cryptd.h> 17 #include <crypto/gf128mul.h> 18 #include <crypto/internal/hash.h> 19 #include <crypto/internal/simd.h> 20 #include <asm/cpu_device_id.h> 21 #include <asm/simd.h> 22 #include <linux/unaligned.h> 23 24 #define GHASH_BLOCK_SIZE 16 25 #define GHASH_DIGEST_SIZE 16 26 27 void clmul_ghash_mul(char *dst, const le128 *shash); 28 29 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 30 const le128 *shash); 31 32 struct ghash_async_ctx { 33 struct cryptd_ahash *cryptd_tfm; 34 }; 35 36 struct ghash_ctx { 37 le128 shash; 38 }; 39 40 struct ghash_desc_ctx { 41 u8 buffer[GHASH_BLOCK_SIZE]; 42 u32 bytes; 43 }; 44 45 static int ghash_init(struct shash_desc *desc) 46 { 47 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 48 49 memset(dctx, 0, sizeof(*dctx)); 50 51 return 0; 52 } 53 54 static int ghash_setkey(struct crypto_shash *tfm, 55 const u8 *key, unsigned int keylen) 56 { 57 struct ghash_ctx *ctx = crypto_shash_ctx(tfm); 58 u64 a, b; 59 60 if (keylen != GHASH_BLOCK_SIZE) 61 return -EINVAL; 62 63 /* 64 * GHASH maps bits to polynomial coefficients backwards, which makes it 65 * hard to implement. But it can be shown that the GHASH multiplication 66 * 67 * D * K (mod x^128 + x^7 + x^2 + x + 1) 68 * 69 * (where D is a data block and K is the key) is equivalent to: 70 * 71 * bitreflect(D) * bitreflect(K) * x^(-127) 72 * (mod x^128 + x^127 + x^126 + x^121 + 1) 73 * 74 * So, the code below precomputes: 75 * 76 * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1) 77 * 78 * ... but in Montgomery form (so that Montgomery multiplication can be 79 * used), i.e. with an extra x^128 factor, which means actually: 80 * 81 * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1) 82 * 83 * The within-a-byte part of bitreflect() cancels out GHASH's built-in 84 * reflection, and thus bitreflect() is actually a byteswap. 85 */ 86 a = get_unaligned_be64(key); 87 b = get_unaligned_be64(key + 8); 88 ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63)); 89 ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63)); 90 if (a >> 63) 91 ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56); 92 return 0; 93 } 94 95 static int ghash_update(struct shash_desc *desc, 96 const u8 *src, unsigned int srclen) 97 { 98 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 99 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); 100 u8 *dst = dctx->buffer; 101 102 kernel_fpu_begin(); 103 if (dctx->bytes) { 104 int n = min(srclen, dctx->bytes); 105 u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); 106 107 dctx->bytes -= n; 108 srclen -= n; 109 110 while (n--) 111 *pos++ ^= *src++; 112 113 if (!dctx->bytes) 114 clmul_ghash_mul(dst, &ctx->shash); 115 } 116 117 clmul_ghash_update(dst, src, srclen, &ctx->shash); 118 kernel_fpu_end(); 119 120 if (srclen & 0xf) { 121 src += srclen - (srclen & 0xf); 122 srclen &= 0xf; 123 dctx->bytes = GHASH_BLOCK_SIZE - srclen; 124 while (srclen--) 125 *dst++ ^= *src++; 126 } 127 128 return 0; 129 } 130 131 static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) 132 { 133 u8 *dst = dctx->buffer; 134 135 if (dctx->bytes) { 136 u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); 137 138 while (dctx->bytes--) 139 *tmp++ ^= 0; 140 141 kernel_fpu_begin(); 142 clmul_ghash_mul(dst, &ctx->shash); 143 kernel_fpu_end(); 144 } 145 146 dctx->bytes = 0; 147 } 148 149 static int ghash_final(struct shash_desc *desc, u8 *dst) 150 { 151 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 152 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); 153 u8 *buf = dctx->buffer; 154 155 ghash_flush(ctx, dctx); 156 memcpy(dst, buf, GHASH_BLOCK_SIZE); 157 158 return 0; 159 } 160 161 static struct shash_alg ghash_alg = { 162 .digestsize = GHASH_DIGEST_SIZE, 163 .init = ghash_init, 164 .update = ghash_update, 165 .final = ghash_final, 166 .setkey = ghash_setkey, 167 .descsize = sizeof(struct ghash_desc_ctx), 168 .base = { 169 .cra_name = "__ghash", 170 .cra_driver_name = "__ghash-pclmulqdqni", 171 .cra_priority = 0, 172 .cra_flags = CRYPTO_ALG_INTERNAL, 173 .cra_blocksize = GHASH_BLOCK_SIZE, 174 .cra_ctxsize = sizeof(struct ghash_ctx), 175 .cra_module = THIS_MODULE, 176 }, 177 }; 178 179 static int ghash_async_init(struct ahash_request *req) 180 { 181 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 182 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 183 struct ahash_request *cryptd_req = ahash_request_ctx(req); 184 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 185 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 186 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); 187 188 desc->tfm = child; 189 return crypto_shash_init(desc); 190 } 191 192 static int ghash_async_update(struct ahash_request *req) 193 { 194 struct ahash_request *cryptd_req = ahash_request_ctx(req); 195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 198 199 if (!crypto_simd_usable() || 200 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { 201 memcpy(cryptd_req, req, sizeof(*req)); 202 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); 203 return crypto_ahash_update(cryptd_req); 204 } else { 205 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 206 return shash_ahash_update(req, desc); 207 } 208 } 209 210 static int ghash_async_final(struct ahash_request *req) 211 { 212 struct ahash_request *cryptd_req = ahash_request_ctx(req); 213 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 214 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 215 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 216 217 if (!crypto_simd_usable() || 218 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { 219 memcpy(cryptd_req, req, sizeof(*req)); 220 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); 221 return crypto_ahash_final(cryptd_req); 222 } else { 223 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 224 return crypto_shash_final(desc, req->result); 225 } 226 } 227 228 static int ghash_async_import(struct ahash_request *req, const void *in) 229 { 230 struct ahash_request *cryptd_req = ahash_request_ctx(req); 231 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 232 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 233 234 ghash_async_init(req); 235 memcpy(dctx, in, sizeof(*dctx)); 236 return 0; 237 238 } 239 240 static int ghash_async_export(struct ahash_request *req, void *out) 241 { 242 struct ahash_request *cryptd_req = ahash_request_ctx(req); 243 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 244 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); 245 246 memcpy(out, dctx, sizeof(*dctx)); 247 return 0; 248 249 } 250 251 static int ghash_async_digest(struct ahash_request *req) 252 { 253 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 254 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 255 struct ahash_request *cryptd_req = ahash_request_ctx(req); 256 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; 257 258 if (!crypto_simd_usable() || 259 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { 260 memcpy(cryptd_req, req, sizeof(*req)); 261 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); 262 return crypto_ahash_digest(cryptd_req); 263 } else { 264 struct shash_desc *desc = cryptd_shash_desc(cryptd_req); 265 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); 266 267 desc->tfm = child; 268 return shash_ahash_digest(req, desc); 269 } 270 } 271 272 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, 273 unsigned int keylen) 274 { 275 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); 276 struct crypto_ahash *child = &ctx->cryptd_tfm->base; 277 278 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); 279 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) 280 & CRYPTO_TFM_REQ_MASK); 281 return crypto_ahash_setkey(child, key, keylen); 282 } 283 284 static int ghash_async_init_tfm(struct crypto_tfm *tfm) 285 { 286 struct cryptd_ahash *cryptd_tfm; 287 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); 288 289 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 290 CRYPTO_ALG_INTERNAL, 291 CRYPTO_ALG_INTERNAL); 292 if (IS_ERR(cryptd_tfm)) 293 return PTR_ERR(cryptd_tfm); 294 ctx->cryptd_tfm = cryptd_tfm; 295 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), 296 sizeof(struct ahash_request) + 297 crypto_ahash_reqsize(&cryptd_tfm->base)); 298 299 return 0; 300 } 301 302 static void ghash_async_exit_tfm(struct crypto_tfm *tfm) 303 { 304 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); 305 306 cryptd_free_ahash(ctx->cryptd_tfm); 307 } 308 309 static struct ahash_alg ghash_async_alg = { 310 .init = ghash_async_init, 311 .update = ghash_async_update, 312 .final = ghash_async_final, 313 .setkey = ghash_async_setkey, 314 .digest = ghash_async_digest, 315 .export = ghash_async_export, 316 .import = ghash_async_import, 317 .halg = { 318 .digestsize = GHASH_DIGEST_SIZE, 319 .statesize = sizeof(struct ghash_desc_ctx), 320 .base = { 321 .cra_name = "ghash", 322 .cra_driver_name = "ghash-clmulni", 323 .cra_priority = 400, 324 .cra_ctxsize = sizeof(struct ghash_async_ctx), 325 .cra_flags = CRYPTO_ALG_ASYNC, 326 .cra_blocksize = GHASH_BLOCK_SIZE, 327 .cra_module = THIS_MODULE, 328 .cra_init = ghash_async_init_tfm, 329 .cra_exit = ghash_async_exit_tfm, 330 }, 331 }, 332 }; 333 334 static const struct x86_cpu_id pcmul_cpu_id[] = { 335 X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */ 336 {} 337 }; 338 MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); 339 340 static int __init ghash_pclmulqdqni_mod_init(void) 341 { 342 int err; 343 344 if (!x86_match_cpu(pcmul_cpu_id)) 345 return -ENODEV; 346 347 err = crypto_register_shash(&ghash_alg); 348 if (err) 349 goto err_out; 350 err = crypto_register_ahash(&ghash_async_alg); 351 if (err) 352 goto err_shash; 353 354 return 0; 355 356 err_shash: 357 crypto_unregister_shash(&ghash_alg); 358 err_out: 359 return err; 360 } 361 362 static void __exit ghash_pclmulqdqni_mod_exit(void) 363 { 364 crypto_unregister_ahash(&ghash_async_alg); 365 crypto_unregister_shash(&ghash_alg); 366 } 367 368 module_init(ghash_pclmulqdqni_mod_init); 369 module_exit(ghash_pclmulqdqni_mod_exit); 370 371 MODULE_LICENSE("GPL"); 372 MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI"); 373 MODULE_ALIAS_CRYPTO("ghash"); 374