xref: /linux/arch/x86/crypto/sm4_aesni_avx_glue.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * SM4 Cipher Algorithm, AES-NI/AVX optimized.
4  * as specified in
5  * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
6  *
7  * Copyright (c) 2021, Alibaba Group.
8  * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/crypto.h>
13 #include <linux/kernel.h>
14 #include <asm/simd.h>
15 #include <crypto/internal/simd.h>
16 #include <crypto/internal/skcipher.h>
17 #include <crypto/sm4.h>
18 #include "sm4-avx.h"
19 
20 #define SM4_CRYPT8_BLOCK_SIZE	(SM4_BLOCK_SIZE * 8)
21 
22 asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
23 				const u8 *src, int nblocks);
24 asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
25 				const u8 *src, int nblocks);
26 asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
27 				const u8 *src, u8 *iv);
28 asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
29 				const u8 *src, u8 *iv);
30 
31 static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
32 			unsigned int key_len)
33 {
34 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
35 
36 	return sm4_expandkey(ctx, key, key_len);
37 }
38 
39 static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
40 {
41 	struct skcipher_walk walk;
42 	unsigned int nbytes;
43 	int err;
44 
45 	err = skcipher_walk_virt(&walk, req, false);
46 
47 	while ((nbytes = walk.nbytes) > 0) {
48 		const u8 *src = walk.src.virt.addr;
49 		u8 *dst = walk.dst.virt.addr;
50 
51 		kernel_fpu_begin();
52 		while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
53 			sm4_aesni_avx_crypt8(rkey, dst, src, 8);
54 			dst += SM4_CRYPT8_BLOCK_SIZE;
55 			src += SM4_CRYPT8_BLOCK_SIZE;
56 			nbytes -= SM4_CRYPT8_BLOCK_SIZE;
57 		}
58 		while (nbytes >= SM4_BLOCK_SIZE) {
59 			unsigned int nblocks = min(nbytes >> 4, 4u);
60 			sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
61 			dst += nblocks * SM4_BLOCK_SIZE;
62 			src += nblocks * SM4_BLOCK_SIZE;
63 			nbytes -= nblocks * SM4_BLOCK_SIZE;
64 		}
65 		kernel_fpu_end();
66 
67 		err = skcipher_walk_done(&walk, nbytes);
68 	}
69 
70 	return err;
71 }
72 
73 int sm4_avx_ecb_encrypt(struct skcipher_request *req)
74 {
75 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
76 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
77 
78 	return ecb_do_crypt(req, ctx->rkey_enc);
79 }
80 EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
81 
82 int sm4_avx_ecb_decrypt(struct skcipher_request *req)
83 {
84 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
85 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
86 
87 	return ecb_do_crypt(req, ctx->rkey_dec);
88 }
89 EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
90 
91 int sm4_cbc_encrypt(struct skcipher_request *req)
92 {
93 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
94 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
95 	struct skcipher_walk walk;
96 	unsigned int nbytes;
97 	int err;
98 
99 	err = skcipher_walk_virt(&walk, req, false);
100 
101 	while ((nbytes = walk.nbytes) > 0) {
102 		const u8 *iv = walk.iv;
103 		const u8 *src = walk.src.virt.addr;
104 		u8 *dst = walk.dst.virt.addr;
105 
106 		while (nbytes >= SM4_BLOCK_SIZE) {
107 			crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
108 			sm4_crypt_block(ctx->rkey_enc, dst, dst);
109 			iv = dst;
110 			src += SM4_BLOCK_SIZE;
111 			dst += SM4_BLOCK_SIZE;
112 			nbytes -= SM4_BLOCK_SIZE;
113 		}
114 		if (iv != walk.iv)
115 			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
116 
117 		err = skcipher_walk_done(&walk, nbytes);
118 	}
119 
120 	return err;
121 }
122 EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
123 
124 int sm4_avx_cbc_decrypt(struct skcipher_request *req,
125 			unsigned int bsize, sm4_crypt_func func)
126 {
127 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
128 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
129 	struct skcipher_walk walk;
130 	unsigned int nbytes;
131 	int err;
132 
133 	err = skcipher_walk_virt(&walk, req, false);
134 
135 	while ((nbytes = walk.nbytes) > 0) {
136 		const u8 *src = walk.src.virt.addr;
137 		u8 *dst = walk.dst.virt.addr;
138 
139 		kernel_fpu_begin();
140 
141 		while (nbytes >= bsize) {
142 			func(ctx->rkey_dec, dst, src, walk.iv);
143 			dst += bsize;
144 			src += bsize;
145 			nbytes -= bsize;
146 		}
147 
148 		while (nbytes >= SM4_BLOCK_SIZE) {
149 			u8 keystream[SM4_BLOCK_SIZE * 8];
150 			u8 iv[SM4_BLOCK_SIZE];
151 			unsigned int nblocks = min(nbytes >> 4, 8u);
152 			int i;
153 
154 			sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
155 						src, nblocks);
156 
157 			src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
158 			dst += (nblocks - 1) * SM4_BLOCK_SIZE;
159 			memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
160 
161 			for (i = nblocks - 1; i > 0; i--) {
162 				crypto_xor_cpy(dst, src,
163 					&keystream[i * SM4_BLOCK_SIZE],
164 					SM4_BLOCK_SIZE);
165 				src -= SM4_BLOCK_SIZE;
166 				dst -= SM4_BLOCK_SIZE;
167 			}
168 			crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
169 			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
170 			dst += nblocks * SM4_BLOCK_SIZE;
171 			src += (nblocks + 1) * SM4_BLOCK_SIZE;
172 			nbytes -= nblocks * SM4_BLOCK_SIZE;
173 		}
174 
175 		kernel_fpu_end();
176 		err = skcipher_walk_done(&walk, nbytes);
177 	}
178 
179 	return err;
180 }
181 EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
182 
183 static int cbc_decrypt(struct skcipher_request *req)
184 {
185 	return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
186 				sm4_aesni_avx_cbc_dec_blk8);
187 }
188 
189 int sm4_avx_ctr_crypt(struct skcipher_request *req,
190 			unsigned int bsize, sm4_crypt_func func)
191 {
192 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
193 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
194 	struct skcipher_walk walk;
195 	unsigned int nbytes;
196 	int err;
197 
198 	err = skcipher_walk_virt(&walk, req, false);
199 
200 	while ((nbytes = walk.nbytes) > 0) {
201 		const u8 *src = walk.src.virt.addr;
202 		u8 *dst = walk.dst.virt.addr;
203 
204 		kernel_fpu_begin();
205 
206 		while (nbytes >= bsize) {
207 			func(ctx->rkey_enc, dst, src, walk.iv);
208 			dst += bsize;
209 			src += bsize;
210 			nbytes -= bsize;
211 		}
212 
213 		while (nbytes >= SM4_BLOCK_SIZE) {
214 			u8 keystream[SM4_BLOCK_SIZE * 8];
215 			unsigned int nblocks = min(nbytes >> 4, 8u);
216 			int i;
217 
218 			for (i = 0; i < nblocks; i++) {
219 				memcpy(&keystream[i * SM4_BLOCK_SIZE],
220 					walk.iv, SM4_BLOCK_SIZE);
221 				crypto_inc(walk.iv, SM4_BLOCK_SIZE);
222 			}
223 			sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
224 					keystream, nblocks);
225 
226 			crypto_xor_cpy(dst, src, keystream,
227 					nblocks * SM4_BLOCK_SIZE);
228 			dst += nblocks * SM4_BLOCK_SIZE;
229 			src += nblocks * SM4_BLOCK_SIZE;
230 			nbytes -= nblocks * SM4_BLOCK_SIZE;
231 		}
232 
233 		kernel_fpu_end();
234 
235 		/* tail */
236 		if (walk.nbytes == walk.total && nbytes > 0) {
237 			u8 keystream[SM4_BLOCK_SIZE];
238 
239 			memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
240 			crypto_inc(walk.iv, SM4_BLOCK_SIZE);
241 
242 			sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
243 
244 			crypto_xor_cpy(dst, src, keystream, nbytes);
245 			dst += nbytes;
246 			src += nbytes;
247 			nbytes = 0;
248 		}
249 
250 		err = skcipher_walk_done(&walk, nbytes);
251 	}
252 
253 	return err;
254 }
255 EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
256 
257 static int ctr_crypt(struct skcipher_request *req)
258 {
259 	return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
260 				sm4_aesni_avx_ctr_enc_blk8);
261 }
262 
263 static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
264 	{
265 		.base = {
266 			.cra_name		= "__ecb(sm4)",
267 			.cra_driver_name	= "__ecb-sm4-aesni-avx",
268 			.cra_priority		= 400,
269 			.cra_flags		= CRYPTO_ALG_INTERNAL,
270 			.cra_blocksize		= SM4_BLOCK_SIZE,
271 			.cra_ctxsize		= sizeof(struct sm4_ctx),
272 			.cra_module		= THIS_MODULE,
273 		},
274 		.min_keysize	= SM4_KEY_SIZE,
275 		.max_keysize	= SM4_KEY_SIZE,
276 		.walksize	= 8 * SM4_BLOCK_SIZE,
277 		.setkey		= sm4_skcipher_setkey,
278 		.encrypt	= sm4_avx_ecb_encrypt,
279 		.decrypt	= sm4_avx_ecb_decrypt,
280 	}, {
281 		.base = {
282 			.cra_name		= "__cbc(sm4)",
283 			.cra_driver_name	= "__cbc-sm4-aesni-avx",
284 			.cra_priority		= 400,
285 			.cra_flags		= CRYPTO_ALG_INTERNAL,
286 			.cra_blocksize		= SM4_BLOCK_SIZE,
287 			.cra_ctxsize		= sizeof(struct sm4_ctx),
288 			.cra_module		= THIS_MODULE,
289 		},
290 		.min_keysize	= SM4_KEY_SIZE,
291 		.max_keysize	= SM4_KEY_SIZE,
292 		.ivsize		= SM4_BLOCK_SIZE,
293 		.walksize	= 8 * SM4_BLOCK_SIZE,
294 		.setkey		= sm4_skcipher_setkey,
295 		.encrypt	= sm4_cbc_encrypt,
296 		.decrypt	= cbc_decrypt,
297 	}, {
298 		.base = {
299 			.cra_name		= "__ctr(sm4)",
300 			.cra_driver_name	= "__ctr-sm4-aesni-avx",
301 			.cra_priority		= 400,
302 			.cra_flags		= CRYPTO_ALG_INTERNAL,
303 			.cra_blocksize		= 1,
304 			.cra_ctxsize		= sizeof(struct sm4_ctx),
305 			.cra_module		= THIS_MODULE,
306 		},
307 		.min_keysize	= SM4_KEY_SIZE,
308 		.max_keysize	= SM4_KEY_SIZE,
309 		.ivsize		= SM4_BLOCK_SIZE,
310 		.chunksize	= SM4_BLOCK_SIZE,
311 		.walksize	= 8 * SM4_BLOCK_SIZE,
312 		.setkey		= sm4_skcipher_setkey,
313 		.encrypt	= ctr_crypt,
314 		.decrypt	= ctr_crypt,
315 	}
316 };
317 
318 static struct simd_skcipher_alg *
319 simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
320 
321 static int __init sm4_init(void)
322 {
323 	const char *feature_name;
324 
325 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
326 	    !boot_cpu_has(X86_FEATURE_AES) ||
327 	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
328 		pr_info("AVX or AES-NI instructions are not detected.\n");
329 		return -ENODEV;
330 	}
331 
332 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
333 				&feature_name)) {
334 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
335 		return -ENODEV;
336 	}
337 
338 	return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
339 					ARRAY_SIZE(sm4_aesni_avx_skciphers),
340 					simd_sm4_aesni_avx_skciphers);
341 }
342 
343 static void __exit sm4_exit(void)
344 {
345 	simd_unregister_skciphers(sm4_aesni_avx_skciphers,
346 					ARRAY_SIZE(sm4_aesni_avx_skciphers),
347 					simd_sm4_aesni_avx_skciphers);
348 }
349 
350 module_init(sm4_init);
351 module_exit(sm4_exit);
352 
353 MODULE_LICENSE("GPL v2");
354 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
355 MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
356 MODULE_ALIAS_CRYPTO("sm4");
357 MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
358