xref: /linux/arch/x86/crypto/aegis128-aesni-asm.S (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * AES-NI + SSE4.1 implementation of AEGIS-128
4 *
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 * Copyright 2024 Google LLC
8 */
9
10#include <linux/linkage.h>
11
12#define STATE0	%xmm0
13#define STATE1	%xmm1
14#define STATE2	%xmm2
15#define STATE3	%xmm3
16#define STATE4	%xmm4
17#define KEY	%xmm5
18#define MSG	%xmm5
19#define T0	%xmm6
20#define T1	%xmm7
21
22.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
23.align 16
24.Laegis128_const_0:
25	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
26	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
27.Laegis128_const_1:
28	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
29	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
30
31.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32
32.align 32
33.Lzeropad_mask:
34	.octa 0xffffffffffffffffffffffffffffffff
35	.octa 0
36
37.text
38
39/*
40 * aegis128_update
41 * input:
42 *   STATE[0-4] - input state
43 * output:
44 *   STATE[0-4] - output state (shifted positions)
45 * changed:
46 *   T0
47 */
48.macro aegis128_update
49	movdqa STATE4, T0
50	aesenc STATE0, STATE4
51	aesenc STATE1, STATE0
52	aesenc STATE2, STATE1
53	aesenc STATE3, STATE2
54	aesenc T0,     STATE3
55.endm
56
57/*
58 * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
59 * MSG and zeroize any remaining bytes.  Clobbers %rax, %rcx, and %r8.
60 */
61.macro load_partial
62	sub $8, %ecx			/* LEN - 8 */
63	jle .Lle8\@
64
65	/* Load 9 <= LEN <= 15 bytes: */
66	movq (SRC), MSG			/* Load first 8 bytes */
67	mov (SRC, %rcx), %rax		/* Load last 8 bytes */
68	neg %ecx
69	shl $3, %ecx
70	shr %cl, %rax			/* Discard overlapping bytes */
71	pinsrq $1, %rax, MSG
72	jmp .Ldone\@
73
74.Lle8\@:
75	add $4, %ecx			/* LEN - 4 */
76	jl .Llt4\@
77
78	/* Load 4 <= LEN <= 8 bytes: */
79	mov (SRC), %eax			/* Load first 4 bytes */
80	mov (SRC, %rcx), %r8d		/* Load last 4 bytes */
81	jmp .Lcombine\@
82
83.Llt4\@:
84	/* Load 1 <= LEN <= 3 bytes: */
85	add $2, %ecx			/* LEN - 2 */
86	movzbl (SRC), %eax		/* Load first byte */
87	jl .Lmovq\@
88	movzwl (SRC, %rcx), %r8d	/* Load last 2 bytes */
89.Lcombine\@:
90	shl $3, %ecx
91	shl %cl, %r8
92	or %r8, %rax			/* Combine the two parts */
93.Lmovq\@:
94	movq %rax, MSG
95.Ldone\@:
96.endm
97
98/*
99 * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
100 * DST.  Clobbers %rax, %rcx, and %r8.
101 */
102.macro store_partial msg
103	sub $8, %ecx			/* LEN - 8 */
104	jl .Llt8\@
105
106	/* Store 8 <= LEN <= 15 bytes: */
107	pextrq $1, \msg, %rax
108	mov %ecx, %r8d
109	shl $3, %ecx
110	ror %cl, %rax
111	mov %rax, (DST, %r8)		/* Store last LEN - 8 bytes */
112	movq \msg, (DST)		/* Store first 8 bytes */
113	jmp .Ldone\@
114
115.Llt8\@:
116	add $4, %ecx			/* LEN - 4 */
117	jl .Llt4\@
118
119	/* Store 4 <= LEN <= 7 bytes: */
120	pextrd $1, \msg, %eax
121	mov %ecx, %r8d
122	shl $3, %ecx
123	ror %cl, %eax
124	mov %eax, (DST, %r8)		/* Store last LEN - 4 bytes */
125	movd \msg, (DST)		/* Store first 4 bytes */
126	jmp .Ldone\@
127
128.Llt4\@:
129	/* Store 1 <= LEN <= 3 bytes: */
130	pextrb $0, \msg, 0(DST)
131	cmp $-2, %ecx			/* LEN - 4 == -2, i.e. LEN == 2? */
132	jl .Ldone\@
133	pextrb $1, \msg, 1(DST)
134	je .Ldone\@
135	pextrb $2, \msg, 2(DST)
136.Ldone\@:
137.endm
138
139/*
140 * void aegis128_aesni_init(struct aegis_state *state,
141 *			    const struct aegis_block *key,
142 *			    const u8 iv[AEGIS128_NONCE_SIZE]);
143 */
144SYM_FUNC_START(aegis128_aesni_init)
145	.set STATEP, %rdi
146	.set KEYP, %rsi
147	.set IVP, %rdx
148
149	/* load IV: */
150	movdqu (IVP), T1
151
152	/* load key: */
153	movdqa (KEYP), KEY
154	pxor KEY, T1
155	movdqa T1, STATE0
156	movdqa KEY, STATE3
157	movdqa KEY, STATE4
158
159	/* load the constants: */
160	movdqa .Laegis128_const_0(%rip), STATE2
161	movdqa .Laegis128_const_1(%rip), STATE1
162	pxor STATE2, STATE3
163	pxor STATE1, STATE4
164
165	/* update 10 times with KEY / KEY xor IV: */
166	aegis128_update; pxor KEY, STATE4
167	aegis128_update; pxor T1,  STATE3
168	aegis128_update; pxor KEY, STATE2
169	aegis128_update; pxor T1,  STATE1
170	aegis128_update; pxor KEY, STATE0
171	aegis128_update; pxor T1,  STATE4
172	aegis128_update; pxor KEY, STATE3
173	aegis128_update; pxor T1,  STATE2
174	aegis128_update; pxor KEY, STATE1
175	aegis128_update; pxor T1,  STATE0
176
177	/* store the state: */
178	movdqu STATE0, 0x00(STATEP)
179	movdqu STATE1, 0x10(STATEP)
180	movdqu STATE2, 0x20(STATEP)
181	movdqu STATE3, 0x30(STATEP)
182	movdqu STATE4, 0x40(STATEP)
183	RET
184SYM_FUNC_END(aegis128_aesni_init)
185
186/*
187 * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
188 *			  unsigned int len);
189 *
190 * len must be a multiple of 16.
191 */
192SYM_FUNC_START(aegis128_aesni_ad)
193	.set STATEP, %rdi
194	.set SRC, %rsi
195	.set LEN, %edx
196
197	test LEN, LEN
198	jz .Lad_out
199
200	/* load the state: */
201	movdqu 0x00(STATEP), STATE0
202	movdqu 0x10(STATEP), STATE1
203	movdqu 0x20(STATEP), STATE2
204	movdqu 0x30(STATEP), STATE3
205	movdqu 0x40(STATEP), STATE4
206
207.align 8
208.Lad_loop:
209	movdqu 0x00(SRC), MSG
210	aegis128_update
211	pxor MSG, STATE4
212	sub $0x10, LEN
213	jz .Lad_out_1
214
215	movdqu 0x10(SRC), MSG
216	aegis128_update
217	pxor MSG, STATE3
218	sub $0x10, LEN
219	jz .Lad_out_2
220
221	movdqu 0x20(SRC), MSG
222	aegis128_update
223	pxor MSG, STATE2
224	sub $0x10, LEN
225	jz .Lad_out_3
226
227	movdqu 0x30(SRC), MSG
228	aegis128_update
229	pxor MSG, STATE1
230	sub $0x10, LEN
231	jz .Lad_out_4
232
233	movdqu 0x40(SRC), MSG
234	aegis128_update
235	pxor MSG, STATE0
236	sub $0x10, LEN
237	jz .Lad_out_0
238
239	add $0x50, SRC
240	jmp .Lad_loop
241
242	/* store the state: */
243.Lad_out_0:
244	movdqu STATE0, 0x00(STATEP)
245	movdqu STATE1, 0x10(STATEP)
246	movdqu STATE2, 0x20(STATEP)
247	movdqu STATE3, 0x30(STATEP)
248	movdqu STATE4, 0x40(STATEP)
249	RET
250
251.Lad_out_1:
252	movdqu STATE4, 0x00(STATEP)
253	movdqu STATE0, 0x10(STATEP)
254	movdqu STATE1, 0x20(STATEP)
255	movdqu STATE2, 0x30(STATEP)
256	movdqu STATE3, 0x40(STATEP)
257	RET
258
259.Lad_out_2:
260	movdqu STATE3, 0x00(STATEP)
261	movdqu STATE4, 0x10(STATEP)
262	movdqu STATE0, 0x20(STATEP)
263	movdqu STATE1, 0x30(STATEP)
264	movdqu STATE2, 0x40(STATEP)
265	RET
266
267.Lad_out_3:
268	movdqu STATE2, 0x00(STATEP)
269	movdqu STATE3, 0x10(STATEP)
270	movdqu STATE4, 0x20(STATEP)
271	movdqu STATE0, 0x30(STATEP)
272	movdqu STATE1, 0x40(STATEP)
273	RET
274
275.Lad_out_4:
276	movdqu STATE1, 0x00(STATEP)
277	movdqu STATE2, 0x10(STATEP)
278	movdqu STATE3, 0x20(STATEP)
279	movdqu STATE4, 0x30(STATEP)
280	movdqu STATE0, 0x40(STATEP)
281.Lad_out:
282	RET
283SYM_FUNC_END(aegis128_aesni_ad)
284
285.macro encrypt_block s0 s1 s2 s3 s4 i
286	movdqu (\i * 0x10)(SRC), MSG
287	movdqa MSG, T0
288	pxor \s1, T0
289	pxor \s4, T0
290	movdqa \s2, T1
291	pand \s3, T1
292	pxor T1, T0
293	movdqu T0, (\i * 0x10)(DST)
294
295	aegis128_update
296	pxor MSG, \s4
297
298	sub $0x10, LEN
299	jz .Lenc_out_\i
300.endm
301
302/*
303 * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
304 *			   unsigned int len);
305 *
306 * len must be nonzero and a multiple of 16.
307 */
308SYM_FUNC_START(aegis128_aesni_enc)
309	.set STATEP, %rdi
310	.set SRC, %rsi
311	.set DST, %rdx
312	.set LEN, %ecx
313
314	/* load the state: */
315	movdqu 0x00(STATEP), STATE0
316	movdqu 0x10(STATEP), STATE1
317	movdqu 0x20(STATEP), STATE2
318	movdqu 0x30(STATEP), STATE3
319	movdqu 0x40(STATEP), STATE4
320
321.align 8
322.Lenc_loop:
323	encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
324	encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
325	encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
326	encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
327	encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
328
329	add $0x50, SRC
330	add $0x50, DST
331	jmp .Lenc_loop
332
333	/* store the state: */
334.Lenc_out_0:
335	movdqu STATE4, 0x00(STATEP)
336	movdqu STATE0, 0x10(STATEP)
337	movdqu STATE1, 0x20(STATEP)
338	movdqu STATE2, 0x30(STATEP)
339	movdqu STATE3, 0x40(STATEP)
340	RET
341
342.Lenc_out_1:
343	movdqu STATE3, 0x00(STATEP)
344	movdqu STATE4, 0x10(STATEP)
345	movdqu STATE0, 0x20(STATEP)
346	movdqu STATE1, 0x30(STATEP)
347	movdqu STATE2, 0x40(STATEP)
348	RET
349
350.Lenc_out_2:
351	movdqu STATE2, 0x00(STATEP)
352	movdqu STATE3, 0x10(STATEP)
353	movdqu STATE4, 0x20(STATEP)
354	movdqu STATE0, 0x30(STATEP)
355	movdqu STATE1, 0x40(STATEP)
356	RET
357
358.Lenc_out_3:
359	movdqu STATE1, 0x00(STATEP)
360	movdqu STATE2, 0x10(STATEP)
361	movdqu STATE3, 0x20(STATEP)
362	movdqu STATE4, 0x30(STATEP)
363	movdqu STATE0, 0x40(STATEP)
364	RET
365
366.Lenc_out_4:
367	movdqu STATE0, 0x00(STATEP)
368	movdqu STATE1, 0x10(STATEP)
369	movdqu STATE2, 0x20(STATEP)
370	movdqu STATE3, 0x30(STATEP)
371	movdqu STATE4, 0x40(STATEP)
372.Lenc_out:
373	RET
374SYM_FUNC_END(aegis128_aesni_enc)
375
376/*
377 * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src,
378 *				u8 *dst, unsigned int len);
379 */
380SYM_FUNC_START(aegis128_aesni_enc_tail)
381	.set STATEP, %rdi
382	.set SRC, %rsi
383	.set DST, %rdx
384	.set LEN, %ecx	/* {load,store}_partial rely on this being %ecx */
385
386	/* load the state: */
387	movdqu 0x00(STATEP), STATE0
388	movdqu 0x10(STATEP), STATE1
389	movdqu 0x20(STATEP), STATE2
390	movdqu 0x30(STATEP), STATE3
391	movdqu 0x40(STATEP), STATE4
392
393	/* encrypt message: */
394	mov LEN, %r9d
395	load_partial
396
397	movdqa MSG, T0
398	pxor STATE1, T0
399	pxor STATE4, T0
400	movdqa STATE2, T1
401	pand STATE3, T1
402	pxor T1, T0
403
404	mov %r9d, LEN
405	store_partial T0
406
407	aegis128_update
408	pxor MSG, STATE4
409
410	/* store the state: */
411	movdqu STATE4, 0x00(STATEP)
412	movdqu STATE0, 0x10(STATEP)
413	movdqu STATE1, 0x20(STATEP)
414	movdqu STATE2, 0x30(STATEP)
415	movdqu STATE3, 0x40(STATEP)
416	RET
417SYM_FUNC_END(aegis128_aesni_enc_tail)
418
419.macro decrypt_block s0 s1 s2 s3 s4 i
420	movdqu (\i * 0x10)(SRC), MSG
421	pxor \s1, MSG
422	pxor \s4, MSG
423	movdqa \s2, T1
424	pand \s3, T1
425	pxor T1, MSG
426	movdqu MSG, (\i * 0x10)(DST)
427
428	aegis128_update
429	pxor MSG, \s4
430
431	sub $0x10, LEN
432	jz .Ldec_out_\i
433.endm
434
435/*
436 * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
437 *			   unsigned int len);
438 *
439 * len must be nonzero and a multiple of 16.
440 */
441SYM_FUNC_START(aegis128_aesni_dec)
442	.set STATEP, %rdi
443	.set SRC, %rsi
444	.set DST, %rdx
445	.set LEN, %ecx
446
447	/* load the state: */
448	movdqu 0x00(STATEP), STATE0
449	movdqu 0x10(STATEP), STATE1
450	movdqu 0x20(STATEP), STATE2
451	movdqu 0x30(STATEP), STATE3
452	movdqu 0x40(STATEP), STATE4
453
454.align 8
455.Ldec_loop:
456	decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
457	decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
458	decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
459	decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
460	decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
461
462	add $0x50, SRC
463	add $0x50, DST
464	jmp .Ldec_loop
465
466	/* store the state: */
467.Ldec_out_0:
468	movdqu STATE4, 0x00(STATEP)
469	movdqu STATE0, 0x10(STATEP)
470	movdqu STATE1, 0x20(STATEP)
471	movdqu STATE2, 0x30(STATEP)
472	movdqu STATE3, 0x40(STATEP)
473	RET
474
475.Ldec_out_1:
476	movdqu STATE3, 0x00(STATEP)
477	movdqu STATE4, 0x10(STATEP)
478	movdqu STATE0, 0x20(STATEP)
479	movdqu STATE1, 0x30(STATEP)
480	movdqu STATE2, 0x40(STATEP)
481	RET
482
483.Ldec_out_2:
484	movdqu STATE2, 0x00(STATEP)
485	movdqu STATE3, 0x10(STATEP)
486	movdqu STATE4, 0x20(STATEP)
487	movdqu STATE0, 0x30(STATEP)
488	movdqu STATE1, 0x40(STATEP)
489	RET
490
491.Ldec_out_3:
492	movdqu STATE1, 0x00(STATEP)
493	movdqu STATE2, 0x10(STATEP)
494	movdqu STATE3, 0x20(STATEP)
495	movdqu STATE4, 0x30(STATEP)
496	movdqu STATE0, 0x40(STATEP)
497	RET
498
499.Ldec_out_4:
500	movdqu STATE0, 0x00(STATEP)
501	movdqu STATE1, 0x10(STATEP)
502	movdqu STATE2, 0x20(STATEP)
503	movdqu STATE3, 0x30(STATEP)
504	movdqu STATE4, 0x40(STATEP)
505.Ldec_out:
506	RET
507SYM_FUNC_END(aegis128_aesni_dec)
508
509/*
510 * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src,
511 *				u8 *dst, unsigned int len);
512 */
513SYM_FUNC_START(aegis128_aesni_dec_tail)
514	.set STATEP, %rdi
515	.set SRC, %rsi
516	.set DST, %rdx
517	.set LEN, %ecx	/* {load,store}_partial rely on this being %ecx */
518
519	/* load the state: */
520	movdqu 0x00(STATEP), STATE0
521	movdqu 0x10(STATEP), STATE1
522	movdqu 0x20(STATEP), STATE2
523	movdqu 0x30(STATEP), STATE3
524	movdqu 0x40(STATEP), STATE4
525
526	/* decrypt message: */
527	mov LEN, %r9d
528	load_partial
529
530	pxor STATE1, MSG
531	pxor STATE4, MSG
532	movdqa STATE2, T1
533	pand STATE3, T1
534	pxor T1, MSG
535
536	mov %r9d, LEN
537	store_partial MSG
538
539	/* mask with byte count: */
540	lea .Lzeropad_mask+16(%rip), %rax
541	sub %r9, %rax
542	movdqu (%rax), T0
543	pand T0, MSG
544
545	aegis128_update
546	pxor MSG, STATE4
547
548	/* store the state: */
549	movdqu STATE4, 0x00(STATEP)
550	movdqu STATE0, 0x10(STATEP)
551	movdqu STATE1, 0x20(STATEP)
552	movdqu STATE2, 0x30(STATEP)
553	movdqu STATE3, 0x40(STATEP)
554	RET
555SYM_FUNC_END(aegis128_aesni_dec_tail)
556
557/*
558 * void aegis128_aesni_final(struct aegis_state *state,
559 *			     struct aegis_block *tag_xor,
560 *			     unsigned int assoclen, unsigned int cryptlen);
561 */
562SYM_FUNC_START(aegis128_aesni_final)
563	.set STATEP, %rdi
564	.set TAG_XOR, %rsi
565	.set ASSOCLEN, %edx
566	.set CRYPTLEN, %ecx
567
568	/* load the state: */
569	movdqu 0x00(STATEP), STATE0
570	movdqu 0x10(STATEP), STATE1
571	movdqu 0x20(STATEP), STATE2
572	movdqu 0x30(STATEP), STATE3
573	movdqu 0x40(STATEP), STATE4
574
575	/* prepare length block: */
576	movd ASSOCLEN, MSG
577	pinsrd $2, CRYPTLEN, MSG
578	psllq $3, MSG /* multiply by 8 (to get bit count) */
579
580	pxor STATE3, MSG
581
582	/* update state: */
583	aegis128_update; pxor MSG, STATE4
584	aegis128_update; pxor MSG, STATE3
585	aegis128_update; pxor MSG, STATE2
586	aegis128_update; pxor MSG, STATE1
587	aegis128_update; pxor MSG, STATE0
588	aegis128_update; pxor MSG, STATE4
589	aegis128_update; pxor MSG, STATE3
590
591	/* xor tag: */
592	movdqu (TAG_XOR), MSG
593
594	pxor STATE0, MSG
595	pxor STATE1, MSG
596	pxor STATE2, MSG
597	pxor STATE3, MSG
598	pxor STATE4, MSG
599
600	movdqu MSG, (TAG_XOR)
601	RET
602SYM_FUNC_END(aegis128_aesni_final)
603