xref: /linux/lib/crypto/arm64/aes-modes.S (revision 370c3883195566ee3e7d79e0146c3d735a406573)
14b908403SEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */
24b908403SEric Biggers/*
34b908403SEric Biggers * Chaining mode wrappers for AES
44b908403SEric Biggers *
54b908403SEric Biggers * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
64b908403SEric Biggers */
74b908403SEric Biggers
84b908403SEric Biggers/* included by aes-ce.S and aes-neon.S */
94b908403SEric Biggers
104b908403SEric Biggers	.text
114b908403SEric Biggers	.align		4
124b908403SEric Biggers
134b908403SEric Biggers#ifndef MAX_STRIDE
144b908403SEric Biggers#define MAX_STRIDE	4
154b908403SEric Biggers#endif
164b908403SEric Biggers
174b908403SEric Biggers#if MAX_STRIDE == 4
184b908403SEric Biggers#define ST4(x...) x
194b908403SEric Biggers#define ST5(x...)
204b908403SEric Biggers#else
214b908403SEric Biggers#define ST4(x...)
224b908403SEric Biggers#define ST5(x...) x
234b908403SEric Biggers#endif
244b908403SEric Biggers
254b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_encrypt_block4x)
264b908403SEric Biggers	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
274b908403SEric Biggers	ret
284b908403SEric BiggersSYM_FUNC_END(aes_encrypt_block4x)
294b908403SEric Biggers
304b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_decrypt_block4x)
314b908403SEric Biggers	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
324b908403SEric Biggers	ret
334b908403SEric BiggersSYM_FUNC_END(aes_decrypt_block4x)
344b908403SEric Biggers
354b908403SEric Biggers#if MAX_STRIDE == 5
364b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_encrypt_block5x)
374b908403SEric Biggers	encrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
384b908403SEric Biggers	ret
394b908403SEric BiggersSYM_FUNC_END(aes_encrypt_block5x)
404b908403SEric Biggers
414b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_decrypt_block5x)
424b908403SEric Biggers	decrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
434b908403SEric Biggers	ret
444b908403SEric BiggersSYM_FUNC_END(aes_decrypt_block5x)
454b908403SEric Biggers#endif
464b908403SEric Biggers
474b908403SEric Biggers	/*
484b908403SEric Biggers	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
494b908403SEric Biggers	 *		   int blocks)
504b908403SEric Biggers	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
514b908403SEric Biggers	 *		   int blocks)
524b908403SEric Biggers	 */
534b908403SEric Biggers
544b908403SEric BiggersAES_FUNC_START(aes_ecb_encrypt)
554b908403SEric Biggers	frame_push	0
564b908403SEric Biggers
574b908403SEric Biggers	enc_prepare	w3, x2, x5
584b908403SEric Biggers
594b908403SEric Biggers.LecbencloopNx:
604b908403SEric Biggers	subs		w4, w4, #MAX_STRIDE
614b908403SEric Biggers	bmi		.Lecbenc1x
624b908403SEric Biggers	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
634b908403SEric BiggersST4(	bl		aes_encrypt_block4x		)
644b908403SEric BiggersST5(	ld1		{v4.16b}, [x1], #16		)
654b908403SEric BiggersST5(	bl		aes_encrypt_block5x		)
664b908403SEric Biggers	st1		{v0.16b-v3.16b}, [x0], #64
674b908403SEric BiggersST5(	st1		{v4.16b}, [x0], #16		)
684b908403SEric Biggers	b		.LecbencloopNx
694b908403SEric Biggers.Lecbenc1x:
704b908403SEric Biggers	adds		w4, w4, #MAX_STRIDE
714b908403SEric Biggers	beq		.Lecbencout
724b908403SEric Biggers.Lecbencloop:
734b908403SEric Biggers	ld1		{v0.16b}, [x1], #16		/* get next pt block */
744b908403SEric Biggers	encrypt_block	v0, w3, x2, x5, w6
754b908403SEric Biggers	st1		{v0.16b}, [x0], #16
764b908403SEric Biggers	subs		w4, w4, #1
774b908403SEric Biggers	bne		.Lecbencloop
784b908403SEric Biggers.Lecbencout:
794b908403SEric Biggers	frame_pop
804b908403SEric Biggers	ret
814b908403SEric BiggersAES_FUNC_END(aes_ecb_encrypt)
824b908403SEric Biggers
834b908403SEric Biggers
844b908403SEric BiggersAES_FUNC_START(aes_ecb_decrypt)
854b908403SEric Biggers	frame_push	0
864b908403SEric Biggers
874b908403SEric Biggers	dec_prepare	w3, x2, x5
884b908403SEric Biggers
894b908403SEric Biggers.LecbdecloopNx:
904b908403SEric Biggers	subs		w4, w4, #MAX_STRIDE
914b908403SEric Biggers	bmi		.Lecbdec1x
924b908403SEric Biggers	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
934b908403SEric BiggersST4(	bl		aes_decrypt_block4x		)
944b908403SEric BiggersST5(	ld1		{v4.16b}, [x1], #16		)
954b908403SEric BiggersST5(	bl		aes_decrypt_block5x		)
964b908403SEric Biggers	st1		{v0.16b-v3.16b}, [x0], #64
974b908403SEric BiggersST5(	st1		{v4.16b}, [x0], #16		)
984b908403SEric Biggers	b		.LecbdecloopNx
994b908403SEric Biggers.Lecbdec1x:
1004b908403SEric Biggers	adds		w4, w4, #MAX_STRIDE
1014b908403SEric Biggers	beq		.Lecbdecout
1024b908403SEric Biggers.Lecbdecloop:
1034b908403SEric Biggers	ld1		{v0.16b}, [x1], #16		/* get next ct block */
1044b908403SEric Biggers	decrypt_block	v0, w3, x2, x5, w6
1054b908403SEric Biggers	st1		{v0.16b}, [x0], #16
1064b908403SEric Biggers	subs		w4, w4, #1
1074b908403SEric Biggers	bne		.Lecbdecloop
1084b908403SEric Biggers.Lecbdecout:
1094b908403SEric Biggers	frame_pop
1104b908403SEric Biggers	ret
1114b908403SEric BiggersAES_FUNC_END(aes_ecb_decrypt)
1124b908403SEric Biggers
1134b908403SEric Biggers
1144b908403SEric Biggers	/*
1154b908403SEric Biggers	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
1164b908403SEric Biggers	 *		   int blocks, u8 iv[])
1174b908403SEric Biggers	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
1184b908403SEric Biggers	 *		   int blocks, u8 iv[])
1194b908403SEric Biggers	 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
1204b908403SEric Biggers	 *			 int rounds, int blocks, u8 iv[],
1214b908403SEric Biggers	 *			 u32 const rk2[]);
1224b908403SEric Biggers	 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
1234b908403SEric Biggers	 *			 int rounds, int blocks, u8 iv[],
1244b908403SEric Biggers	 *			 u32 const rk2[]);
1254b908403SEric Biggers	 */
1264b908403SEric Biggers
1274b908403SEric BiggersAES_FUNC_START(aes_essiv_cbc_encrypt)
1284b908403SEric Biggers	ld1		{v4.16b}, [x5]			/* get iv */
1294b908403SEric Biggers
1304b908403SEric Biggers	mov		w8, #14				/* AES-256: 14 rounds */
1314b908403SEric Biggers	enc_prepare	w8, x6, x7
1324b908403SEric Biggers	encrypt_block	v4, w8, x6, x7, w9
1334b908403SEric Biggers	enc_switch_key	w3, x2, x6
1344b908403SEric Biggers	b		.Lcbcencloop4x
1354b908403SEric Biggers
1364b908403SEric BiggersAES_FUNC_START(aes_cbc_encrypt)
1374b908403SEric Biggers	ld1		{v4.16b}, [x5]			/* get iv */
1384b908403SEric Biggers	enc_prepare	w3, x2, x6
1394b908403SEric Biggers
1404b908403SEric Biggers.Lcbcencloop4x:
1414b908403SEric Biggers	subs		w4, w4, #4
1424b908403SEric Biggers	bmi		.Lcbcenc1x
1434b908403SEric Biggers	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
1444b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
1454b908403SEric Biggers	encrypt_block	v0, w3, x2, x6, w7
1464b908403SEric Biggers	eor		v1.16b, v1.16b, v0.16b
1474b908403SEric Biggers	encrypt_block	v1, w3, x2, x6, w7
1484b908403SEric Biggers	eor		v2.16b, v2.16b, v1.16b
1494b908403SEric Biggers	encrypt_block	v2, w3, x2, x6, w7
1504b908403SEric Biggers	eor		v3.16b, v3.16b, v2.16b
1514b908403SEric Biggers	encrypt_block	v3, w3, x2, x6, w7
1524b908403SEric Biggers	st1		{v0.16b-v3.16b}, [x0], #64
1534b908403SEric Biggers	mov		v4.16b, v3.16b
1544b908403SEric Biggers	b		.Lcbcencloop4x
1554b908403SEric Biggers.Lcbcenc1x:
1564b908403SEric Biggers	adds		w4, w4, #4
1574b908403SEric Biggers	beq		.Lcbcencout
1584b908403SEric Biggers.Lcbcencloop:
1594b908403SEric Biggers	ld1		{v0.16b}, [x1], #16		/* get next pt block */
1604b908403SEric Biggers	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
1614b908403SEric Biggers	encrypt_block	v4, w3, x2, x6, w7
1624b908403SEric Biggers	st1		{v4.16b}, [x0], #16
1634b908403SEric Biggers	subs		w4, w4, #1
1644b908403SEric Biggers	bne		.Lcbcencloop
1654b908403SEric Biggers.Lcbcencout:
1664b908403SEric Biggers	st1		{v4.16b}, [x5]			/* return iv */
1674b908403SEric Biggers	ret
1684b908403SEric BiggersAES_FUNC_END(aes_cbc_encrypt)
1694b908403SEric BiggersAES_FUNC_END(aes_essiv_cbc_encrypt)
1704b908403SEric Biggers
1714b908403SEric BiggersAES_FUNC_START(aes_essiv_cbc_decrypt)
1724b908403SEric Biggers	ld1		{cbciv.16b}, [x5]		/* get iv */
1734b908403SEric Biggers
1744b908403SEric Biggers	mov		w8, #14				/* AES-256: 14 rounds */
1754b908403SEric Biggers	enc_prepare	w8, x6, x7
1764b908403SEric Biggers	encrypt_block	cbciv, w8, x6, x7, w9
1774b908403SEric Biggers	b		.Lessivcbcdecstart
1784b908403SEric Biggers
1794b908403SEric BiggersAES_FUNC_START(aes_cbc_decrypt)
1804b908403SEric Biggers	ld1		{cbciv.16b}, [x5]		/* get iv */
1814b908403SEric Biggers.Lessivcbcdecstart:
1824b908403SEric Biggers	frame_push	0
1834b908403SEric Biggers	dec_prepare	w3, x2, x6
1844b908403SEric Biggers
1854b908403SEric Biggers.LcbcdecloopNx:
1864b908403SEric Biggers	subs		w4, w4, #MAX_STRIDE
1874b908403SEric Biggers	bmi		.Lcbcdec1x
1884b908403SEric Biggers	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
1894b908403SEric Biggers#if MAX_STRIDE == 5
1904b908403SEric Biggers	ld1		{v4.16b}, [x1], #16		/* get 1 ct block */
1914b908403SEric Biggers	mov		v5.16b, v0.16b
1924b908403SEric Biggers	mov		v6.16b, v1.16b
1934b908403SEric Biggers	mov		v7.16b, v2.16b
1944b908403SEric Biggers	bl		aes_decrypt_block5x
1954b908403SEric Biggers	sub		x1, x1, #32
1964b908403SEric Biggers	eor		v0.16b, v0.16b, cbciv.16b
1974b908403SEric Biggers	eor		v1.16b, v1.16b, v5.16b
1984b908403SEric Biggers	ld1		{v5.16b}, [x1], #16		/* reload 1 ct block */
1994b908403SEric Biggers	ld1		{cbciv.16b}, [x1], #16		/* reload 1 ct block */
2004b908403SEric Biggers	eor		v2.16b, v2.16b, v6.16b
2014b908403SEric Biggers	eor		v3.16b, v3.16b, v7.16b
2024b908403SEric Biggers	eor		v4.16b, v4.16b, v5.16b
2034b908403SEric Biggers#else
2044b908403SEric Biggers	mov		v4.16b, v0.16b
2054b908403SEric Biggers	mov		v5.16b, v1.16b
2064b908403SEric Biggers	mov		v6.16b, v2.16b
2074b908403SEric Biggers	bl		aes_decrypt_block4x
2084b908403SEric Biggers	sub		x1, x1, #16
2094b908403SEric Biggers	eor		v0.16b, v0.16b, cbciv.16b
2104b908403SEric Biggers	eor		v1.16b, v1.16b, v4.16b
2114b908403SEric Biggers	ld1		{cbciv.16b}, [x1], #16		/* reload 1 ct block */
2124b908403SEric Biggers	eor		v2.16b, v2.16b, v5.16b
2134b908403SEric Biggers	eor		v3.16b, v3.16b, v6.16b
2144b908403SEric Biggers#endif
2154b908403SEric Biggers	st1		{v0.16b-v3.16b}, [x0], #64
2164b908403SEric BiggersST5(	st1		{v4.16b}, [x0], #16		)
2174b908403SEric Biggers	b		.LcbcdecloopNx
2184b908403SEric Biggers.Lcbcdec1x:
2194b908403SEric Biggers	adds		w4, w4, #MAX_STRIDE
2204b908403SEric Biggers	beq		.Lcbcdecout
2214b908403SEric Biggers.Lcbcdecloop:
2224b908403SEric Biggers	ld1		{v1.16b}, [x1], #16		/* get next ct block */
2234b908403SEric Biggers	mov		v0.16b, v1.16b			/* ...and copy to v0 */
2244b908403SEric Biggers	decrypt_block	v0, w3, x2, x6, w7
2254b908403SEric Biggers	eor		v0.16b, v0.16b, cbciv.16b	/* xor with iv => pt */
2264b908403SEric Biggers	mov		cbciv.16b, v1.16b		/* ct is next iv */
2274b908403SEric Biggers	st1		{v0.16b}, [x0], #16
2284b908403SEric Biggers	subs		w4, w4, #1
2294b908403SEric Biggers	bne		.Lcbcdecloop
2304b908403SEric Biggers.Lcbcdecout:
2314b908403SEric Biggers	st1		{cbciv.16b}, [x5]		/* return iv */
2324b908403SEric Biggers	frame_pop
2334b908403SEric Biggers	ret
2344b908403SEric BiggersAES_FUNC_END(aes_cbc_decrypt)
2354b908403SEric BiggersAES_FUNC_END(aes_essiv_cbc_decrypt)
2364b908403SEric Biggers
2374b908403SEric Biggers
2384b908403SEric Biggers	/*
2394b908403SEric Biggers	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
2404b908403SEric Biggers	 *		       int rounds, int bytes, u8 const iv[])
2414b908403SEric Biggers	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
2424b908403SEric Biggers	 *		       int rounds, int bytes, u8 const iv[])
2434b908403SEric Biggers	 */
2444b908403SEric Biggers
2454b908403SEric BiggersAES_FUNC_START(aes_cbc_cts_encrypt)
2464b908403SEric Biggers	adr_l		x8, .Lcts_permute_table
2474b908403SEric Biggers	sub		x4, x4, #16
2484b908403SEric Biggers	add		x9, x8, #32
2494b908403SEric Biggers	add		x8, x8, x4
2504b908403SEric Biggers	sub		x9, x9, x4
2514b908403SEric Biggers	ld1		{v3.16b}, [x8]
2524b908403SEric Biggers	ld1		{v4.16b}, [x9]
2534b908403SEric Biggers
2544b908403SEric Biggers	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
2554b908403SEric Biggers	ld1		{v1.16b}, [x1]
2564b908403SEric Biggers
2574b908403SEric Biggers	ld1		{v5.16b}, [x5]			/* get iv */
2584b908403SEric Biggers	enc_prepare	w3, x2, x6
2594b908403SEric Biggers
2604b908403SEric Biggers	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
2614b908403SEric Biggers	tbl		v1.16b, {v1.16b}, v4.16b
2624b908403SEric Biggers	encrypt_block	v0, w3, x2, x6, w7
2634b908403SEric Biggers
2644b908403SEric Biggers	eor		v1.16b, v1.16b, v0.16b
2654b908403SEric Biggers	tbl		v0.16b, {v0.16b}, v3.16b
2664b908403SEric Biggers	encrypt_block	v1, w3, x2, x6, w7
2674b908403SEric Biggers
2684b908403SEric Biggers	add		x4, x0, x4
2694b908403SEric Biggers	st1		{v0.16b}, [x4]			/* overlapping stores */
2704b908403SEric Biggers	st1		{v1.16b}, [x0]
2714b908403SEric Biggers	ret
2724b908403SEric BiggersAES_FUNC_END(aes_cbc_cts_encrypt)
2734b908403SEric Biggers
2744b908403SEric BiggersAES_FUNC_START(aes_cbc_cts_decrypt)
2754b908403SEric Biggers	adr_l		x8, .Lcts_permute_table
2764b908403SEric Biggers	sub		x4, x4, #16
2774b908403SEric Biggers	add		x9, x8, #32
2784b908403SEric Biggers	add		x8, x8, x4
2794b908403SEric Biggers	sub		x9, x9, x4
2804b908403SEric Biggers	ld1		{v3.16b}, [x8]
2814b908403SEric Biggers	ld1		{v4.16b}, [x9]
2824b908403SEric Biggers
2834b908403SEric Biggers	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
2844b908403SEric Biggers	ld1		{v1.16b}, [x1]
2854b908403SEric Biggers
2864b908403SEric Biggers	ld1		{v5.16b}, [x5]			/* get iv */
2874b908403SEric Biggers	dec_prepare	w3, x2, x6
2884b908403SEric Biggers
2894b908403SEric Biggers	decrypt_block	v0, w3, x2, x6, w7
2904b908403SEric Biggers	tbl		v2.16b, {v0.16b}, v3.16b
2914b908403SEric Biggers	eor		v2.16b, v2.16b, v1.16b
2924b908403SEric Biggers
2934b908403SEric Biggers	tbx		v0.16b, {v1.16b}, v4.16b
2944b908403SEric Biggers	decrypt_block	v0, w3, x2, x6, w7
2954b908403SEric Biggers	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
2964b908403SEric Biggers
2974b908403SEric Biggers	add		x4, x0, x4
2984b908403SEric Biggers	st1		{v2.16b}, [x4]			/* overlapping stores */
2994b908403SEric Biggers	st1		{v0.16b}, [x0]
3004b908403SEric Biggers	ret
3014b908403SEric BiggersAES_FUNC_END(aes_cbc_cts_decrypt)
3024b908403SEric Biggers
3034b908403SEric Biggers	.section	".rodata", "a"
3044b908403SEric Biggers	.align		6
3054b908403SEric Biggers.Lcts_permute_table:
3064b908403SEric Biggers	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3074b908403SEric Biggers	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3084b908403SEric Biggers	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
3094b908403SEric Biggers	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
3104b908403SEric Biggers	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3114b908403SEric Biggers	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3124b908403SEric Biggers	.previous
3134b908403SEric Biggers
3144b908403SEric Biggers	/*
3154b908403SEric Biggers	 * This macro generates the code for CTR and XCTR mode.
3164b908403SEric Biggers	 */
3174b908403SEric Biggers.macro ctr_encrypt xctr
3184b908403SEric Biggers	// Arguments
3194b908403SEric Biggers	OUT		.req x0
3204b908403SEric Biggers	IN		.req x1
3214b908403SEric Biggers	KEY		.req x2
3224b908403SEric Biggers	ROUNDS_W	.req w3
3234b908403SEric Biggers	BYTES_W		.req w4
3244b908403SEric Biggers	IV		.req x5
3254b908403SEric Biggers	BYTE_CTR_W 	.req w6		// XCTR only
3264b908403SEric Biggers	// Intermediate values
3274b908403SEric Biggers	CTR_W		.req w11	// XCTR only
3284b908403SEric Biggers	CTR		.req x11	// XCTR only
3294b908403SEric Biggers	IV_PART		.req x12
3304b908403SEric Biggers	BLOCKS		.req x13
3314b908403SEric Biggers	BLOCKS_W	.req w13
3324b908403SEric Biggers
3334b908403SEric Biggers	frame_push	0
3344b908403SEric Biggers
3354b908403SEric Biggers	enc_prepare	ROUNDS_W, KEY, IV_PART
3364b908403SEric Biggers	ld1		{vctr.16b}, [IV]
3374b908403SEric Biggers
3384b908403SEric Biggers	/*
3394b908403SEric Biggers	 * Keep 64 bits of the IV in a register.  For CTR mode this lets us
3404b908403SEric Biggers	 * easily increment the IV.  For XCTR mode this lets us efficiently XOR
3414b908403SEric Biggers	 * the 64-bit counter with the IV.
3424b908403SEric Biggers	 */
3434b908403SEric Biggers	.if \xctr
3444b908403SEric Biggers		umov		IV_PART, vctr.d[0]
3454b908403SEric Biggers		lsr		CTR_W, BYTE_CTR_W, #4
3464b908403SEric Biggers	.else
3474b908403SEric Biggers		umov		IV_PART, vctr.d[1]
3484b908403SEric Biggers		rev		IV_PART, IV_PART
3494b908403SEric Biggers	.endif
3504b908403SEric Biggers
3514b908403SEric Biggers.LctrloopNx\xctr:
3524b908403SEric Biggers	add		BLOCKS_W, BYTES_W, #15
3534b908403SEric Biggers	sub		BYTES_W, BYTES_W, #MAX_STRIDE << 4
3544b908403SEric Biggers	lsr		BLOCKS_W, BLOCKS_W, #4
3554b908403SEric Biggers	mov		w8, #MAX_STRIDE
3564b908403SEric Biggers	cmp		BLOCKS_W, w8
3574b908403SEric Biggers	csel		BLOCKS_W, BLOCKS_W, w8, lt
3584b908403SEric Biggers
3594b908403SEric Biggers	/*
3604b908403SEric Biggers	 * Set up the counter values in v0-v{MAX_STRIDE-1}.
3614b908403SEric Biggers	 *
3624b908403SEric Biggers	 * If we are encrypting less than MAX_STRIDE blocks, the tail block
3634b908403SEric Biggers	 * handling code expects the last keystream block to be in
3644b908403SEric Biggers	 * v{MAX_STRIDE-1}.  For example: if encrypting two blocks with
3654b908403SEric Biggers	 * MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
3664b908403SEric Biggers	 */
3674b908403SEric Biggers	.if \xctr
3684b908403SEric Biggers		add		CTR, CTR, BLOCKS
3694b908403SEric Biggers	.else
3704b908403SEric Biggers		adds		IV_PART, IV_PART, BLOCKS
3714b908403SEric Biggers	.endif
3724b908403SEric Biggers	mov		v0.16b, vctr.16b
3734b908403SEric Biggers	mov		v1.16b, vctr.16b
3744b908403SEric Biggers	mov		v2.16b, vctr.16b
3754b908403SEric Biggers	mov		v3.16b, vctr.16b
3764b908403SEric BiggersST5(	mov		v4.16b, vctr.16b		)
3774b908403SEric Biggers	.if \xctr
3784b908403SEric Biggers		sub		x6, CTR, #MAX_STRIDE - 1
3794b908403SEric Biggers		sub		x7, CTR, #MAX_STRIDE - 2
3804b908403SEric Biggers		sub		x8, CTR, #MAX_STRIDE - 3
3814b908403SEric Biggers		sub		x9, CTR, #MAX_STRIDE - 4
3824b908403SEric BiggersST5(		sub		x10, CTR, #MAX_STRIDE - 5	)
3834b908403SEric Biggers		eor		x6, x6, IV_PART
3844b908403SEric Biggers		eor		x7, x7, IV_PART
3854b908403SEric Biggers		eor		x8, x8, IV_PART
3864b908403SEric Biggers		eor		x9, x9, IV_PART
3874b908403SEric BiggersST5(		eor		x10, x10, IV_PART		)
3884b908403SEric Biggers		mov		v0.d[0], x6
3894b908403SEric Biggers		mov		v1.d[0], x7
3904b908403SEric Biggers		mov		v2.d[0], x8
3914b908403SEric Biggers		mov		v3.d[0], x9
3924b908403SEric BiggersST5(		mov		v4.d[0], x10			)
3934b908403SEric Biggers	.else
3944b908403SEric Biggers		bcs		0f
3954b908403SEric Biggers		.subsection	1
3964b908403SEric Biggers		/*
3974b908403SEric Biggers		 * This subsection handles carries.
3984b908403SEric Biggers		 *
3994b908403SEric Biggers		 * Conditional branching here is allowed with respect to time
4004b908403SEric Biggers		 * invariance since the branches are dependent on the IV instead
4014b908403SEric Biggers		 * of the plaintext or key.  This code is rarely executed in
4024b908403SEric Biggers		 * practice anyway.
4034b908403SEric Biggers		 */
4044b908403SEric Biggers
4054b908403SEric Biggers		/* Apply carry to outgoing counter. */
4064b908403SEric Biggers0:		umov		x8, vctr.d[0]
4074b908403SEric Biggers		rev		x8, x8
4084b908403SEric Biggers		add		x8, x8, #1
4094b908403SEric Biggers		rev		x8, x8
4104b908403SEric Biggers		ins		vctr.d[0], x8
4114b908403SEric Biggers
4124b908403SEric Biggers		/*
4134b908403SEric Biggers		 * Apply carry to counter blocks if needed.
4144b908403SEric Biggers		 *
4154b908403SEric Biggers		 * Since the carry flag was set, we know 0 <= IV_PART <
4164b908403SEric Biggers		 * MAX_STRIDE.  Using the value of IV_PART we can determine how
4174b908403SEric Biggers		 * many counter blocks need to be updated.
4184b908403SEric Biggers		 */
4194b908403SEric Biggers		cbz		IV_PART, 2f
4204b908403SEric Biggers		adr		x16, 1f
4214b908403SEric Biggers		sub		x16, x16, IV_PART, lsl #3
4224b908403SEric Biggers		br		x16
4234b908403SEric Biggers		bti		c
4244b908403SEric Biggers		mov		v0.d[0], vctr.d[0]
4254b908403SEric Biggers		bti		c
4264b908403SEric Biggers		mov		v1.d[0], vctr.d[0]
4274b908403SEric Biggers		bti		c
4284b908403SEric Biggers		mov		v2.d[0], vctr.d[0]
4294b908403SEric Biggers		bti		c
4304b908403SEric Biggers		mov		v3.d[0], vctr.d[0]
4314b908403SEric BiggersST5(		bti		c				)
4324b908403SEric BiggersST5(		mov		v4.d[0], vctr.d[0]		)
4334b908403SEric Biggers1:		b		2f
4344b908403SEric Biggers		.previous
4354b908403SEric Biggers
4364b908403SEric Biggers2:		rev		x7, IV_PART
4374b908403SEric Biggers		ins		vctr.d[1], x7
4384b908403SEric Biggers		sub		x7, IV_PART, #MAX_STRIDE - 1
4394b908403SEric Biggers		sub		x8, IV_PART, #MAX_STRIDE - 2
4404b908403SEric Biggers		sub		x9, IV_PART, #MAX_STRIDE - 3
4414b908403SEric Biggers		rev		x7, x7
4424b908403SEric Biggers		rev		x8, x8
4434b908403SEric Biggers		mov		v1.d[1], x7
4444b908403SEric Biggers		rev		x9, x9
4454b908403SEric BiggersST5(		sub		x10, IV_PART, #MAX_STRIDE - 4	)
4464b908403SEric Biggers		mov		v2.d[1], x8
4474b908403SEric BiggersST5(		rev		x10, x10			)
4484b908403SEric Biggers		mov		v3.d[1], x9
4494b908403SEric BiggersST5(		mov		v4.d[1], x10			)
4504b908403SEric Biggers	.endif
4514b908403SEric Biggers
4524b908403SEric Biggers	/*
4534b908403SEric Biggers	 * If there are at least MAX_STRIDE blocks left, XOR the data with
4544b908403SEric Biggers	 * keystream and store.  Otherwise jump to tail handling.
4554b908403SEric Biggers	 */
4564b908403SEric Biggers	tbnz		BYTES_W, #31, .Lctrtail\xctr
4574b908403SEric Biggers	ld1		{v5.16b-v7.16b}, [IN], #48
4584b908403SEric BiggersST4(	bl		aes_encrypt_block4x		)
4594b908403SEric BiggersST5(	bl		aes_encrypt_block5x		)
4604b908403SEric Biggers	eor		v0.16b, v5.16b, v0.16b
4614b908403SEric BiggersST4(	ld1		{v5.16b}, [IN], #16		)
4624b908403SEric Biggers	eor		v1.16b, v6.16b, v1.16b
4634b908403SEric BiggersST5(	ld1		{v5.16b-v6.16b}, [IN], #32	)
4644b908403SEric Biggers	eor		v2.16b, v7.16b, v2.16b
4654b908403SEric Biggers	eor		v3.16b, v5.16b, v3.16b
4664b908403SEric BiggersST5(	eor		v4.16b, v6.16b, v4.16b		)
4674b908403SEric Biggers	st1		{v0.16b-v3.16b}, [OUT], #64
4684b908403SEric BiggersST5(	st1		{v4.16b}, [OUT], #16		)
4694b908403SEric Biggers	cbz		BYTES_W, .Lctrout\xctr
4704b908403SEric Biggers	b		.LctrloopNx\xctr
4714b908403SEric Biggers
4724b908403SEric Biggers.Lctrout\xctr:
4734b908403SEric Biggers	.if !\xctr
4744b908403SEric Biggers		st1		{vctr.16b}, [IV] /* return next CTR value */
4754b908403SEric Biggers	.endif
4764b908403SEric Biggers	frame_pop
4774b908403SEric Biggers	ret
4784b908403SEric Biggers
4794b908403SEric Biggers.Lctrtail\xctr:
4804b908403SEric Biggers	/*
4814b908403SEric Biggers	 * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
4824b908403SEric Biggers	 *
4834b908403SEric Biggers	 * This code expects the last keystream block to be in v{MAX_STRIDE-1}.
4844b908403SEric Biggers	 * For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and
4854b908403SEric Biggers	 * v4 should have the next two counter blocks.
4864b908403SEric Biggers	 *
4874b908403SEric Biggers	 * This allows us to store the ciphertext by writing to overlapping
4884b908403SEric Biggers	 * regions of memory.  Any invalid ciphertext blocks get overwritten by
4894b908403SEric Biggers	 * correctly computed blocks.  This approach greatly simplifies the
4904b908403SEric Biggers	 * logic for storing the ciphertext.
4914b908403SEric Biggers	 */
4924b908403SEric Biggers	mov		x16, #16
4934b908403SEric Biggers	ands		w7, BYTES_W, #0xf
4944b908403SEric Biggers	csel		x13, x7, x16, ne
4954b908403SEric Biggers
4964b908403SEric BiggersST5(	cmp		BYTES_W, #64 - (MAX_STRIDE << 4))
4974b908403SEric BiggersST5(	csel		x14, x16, xzr, gt		)
4984b908403SEric Biggers	cmp		BYTES_W, #48 - (MAX_STRIDE << 4)
4994b908403SEric Biggers	csel		x15, x16, xzr, gt
5004b908403SEric Biggers	cmp		BYTES_W, #32 - (MAX_STRIDE << 4)
5014b908403SEric Biggers	csel		x16, x16, xzr, gt
5024b908403SEric Biggers	cmp		BYTES_W, #16 - (MAX_STRIDE << 4)
5034b908403SEric Biggers
5044b908403SEric Biggers	adr_l		x9, .Lcts_permute_table
5054b908403SEric Biggers	add		x9, x9, x13
5064b908403SEric Biggers	ble		.Lctrtail1x\xctr
5074b908403SEric Biggers
5084b908403SEric BiggersST5(	ld1		{v5.16b}, [IN], x14		)
5094b908403SEric Biggers	ld1		{v6.16b}, [IN], x15
5104b908403SEric Biggers	ld1		{v7.16b}, [IN], x16
5114b908403SEric Biggers
5124b908403SEric BiggersST4(	bl		aes_encrypt_block4x		)
5134b908403SEric BiggersST5(	bl		aes_encrypt_block5x		)
5144b908403SEric Biggers
5154b908403SEric Biggers	ld1		{v8.16b}, [IN], x13
5164b908403SEric Biggers	ld1		{v9.16b}, [IN]
5174b908403SEric Biggers	ld1		{v10.16b}, [x9]
5184b908403SEric Biggers
5194b908403SEric BiggersST4(	eor		v6.16b, v6.16b, v0.16b		)
5204b908403SEric BiggersST4(	eor		v7.16b, v7.16b, v1.16b		)
5214b908403SEric BiggersST4(	tbl		v3.16b, {v3.16b}, v10.16b	)
5224b908403SEric BiggersST4(	eor		v8.16b, v8.16b, v2.16b		)
5234b908403SEric BiggersST4(	eor		v9.16b, v9.16b, v3.16b		)
5244b908403SEric Biggers
5254b908403SEric BiggersST5(	eor		v5.16b, v5.16b, v0.16b		)
5264b908403SEric BiggersST5(	eor		v6.16b, v6.16b, v1.16b		)
5274b908403SEric BiggersST5(	tbl		v4.16b, {v4.16b}, v10.16b	)
5284b908403SEric BiggersST5(	eor		v7.16b, v7.16b, v2.16b		)
5294b908403SEric BiggersST5(	eor		v8.16b, v8.16b, v3.16b		)
5304b908403SEric BiggersST5(	eor		v9.16b, v9.16b, v4.16b		)
5314b908403SEric Biggers
5324b908403SEric BiggersST5(	st1		{v5.16b}, [OUT], x14		)
5334b908403SEric Biggers	st1		{v6.16b}, [OUT], x15
5344b908403SEric Biggers	st1		{v7.16b}, [OUT], x16
5354b908403SEric Biggers	add		x13, x13, OUT
5364b908403SEric Biggers	st1		{v9.16b}, [x13]		// overlapping stores
5374b908403SEric Biggers	st1		{v8.16b}, [OUT]
5384b908403SEric Biggers	b		.Lctrout\xctr
5394b908403SEric Biggers
5404b908403SEric Biggers.Lctrtail1x\xctr:
5414b908403SEric Biggers	/*
5424b908403SEric Biggers	 * Handle <= 16 bytes of plaintext
5434b908403SEric Biggers	 *
5444b908403SEric Biggers	 * This code always reads and writes 16 bytes.  To avoid out of bounds
5454b908403SEric Biggers	 * accesses, XCTR and CTR modes must use a temporary buffer when
5464b908403SEric Biggers	 * encrypting/decrypting less than 16 bytes.
5474b908403SEric Biggers	 *
5484b908403SEric Biggers	 * This code is unusual in that it loads the input and stores the output
5494b908403SEric Biggers	 * relative to the end of the buffers rather than relative to the start.
5504b908403SEric Biggers	 * This causes unusual behaviour when encrypting/decrypting less than 16
5514b908403SEric Biggers	 * bytes; the end of the data is expected to be at the end of the
5524b908403SEric Biggers	 * temporary buffer rather than the start of the data being at the start
5534b908403SEric Biggers	 * of the temporary buffer.
5544b908403SEric Biggers	 */
5554b908403SEric Biggers	sub		x8, x7, #16
5564b908403SEric Biggers	csel		x7, x7, x8, eq
5574b908403SEric Biggers	add		IN, IN, x7
5584b908403SEric Biggers	add		OUT, OUT, x7
5594b908403SEric Biggers	ld1		{v5.16b}, [IN]
5604b908403SEric Biggers	ld1		{v6.16b}, [OUT]
5614b908403SEric BiggersST5(	mov		v3.16b, v4.16b			)
5624b908403SEric Biggers	encrypt_block	v3, ROUNDS_W, KEY, x8, w7
5634b908403SEric Biggers	ld1		{v10.16b-v11.16b}, [x9]
5644b908403SEric Biggers	tbl		v3.16b, {v3.16b}, v10.16b
5654b908403SEric Biggers	sshr		v11.16b, v11.16b, #7
5664b908403SEric Biggers	eor		v5.16b, v5.16b, v3.16b
5674b908403SEric Biggers	bif		v5.16b, v6.16b, v11.16b
5684b908403SEric Biggers	st1		{v5.16b}, [OUT]
5694b908403SEric Biggers	b		.Lctrout\xctr
5704b908403SEric Biggers
5714b908403SEric Biggers	// Arguments
5724b908403SEric Biggers	.unreq OUT
5734b908403SEric Biggers	.unreq IN
5744b908403SEric Biggers	.unreq KEY
5754b908403SEric Biggers	.unreq ROUNDS_W
5764b908403SEric Biggers	.unreq BYTES_W
5774b908403SEric Biggers	.unreq IV
5784b908403SEric Biggers	.unreq BYTE_CTR_W	// XCTR only
5794b908403SEric Biggers	// Intermediate values
5804b908403SEric Biggers	.unreq CTR_W		// XCTR only
5814b908403SEric Biggers	.unreq CTR		// XCTR only
5824b908403SEric Biggers	.unreq IV_PART
5834b908403SEric Biggers	.unreq BLOCKS
5844b908403SEric Biggers	.unreq BLOCKS_W
5854b908403SEric Biggers.endm
5864b908403SEric Biggers
5874b908403SEric Biggers	/*
5884b908403SEric Biggers	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5894b908403SEric Biggers	 *		   int bytes, u8 ctr[])
5904b908403SEric Biggers	 *
5914b908403SEric Biggers	 * The input and output buffers must always be at least 16 bytes even if
5924b908403SEric Biggers	 * encrypting/decrypting less than 16 bytes.  Otherwise out of bounds
5934b908403SEric Biggers	 * accesses will occur.  The data to be encrypted/decrypted is expected
5944b908403SEric Biggers	 * to be at the end of this 16-byte temporary buffer rather than the
5954b908403SEric Biggers	 * start.
5964b908403SEric Biggers	 */
5974b908403SEric Biggers
5984b908403SEric BiggersAES_FUNC_START(aes_ctr_encrypt)
5994b908403SEric Biggers	ctr_encrypt 0
6004b908403SEric BiggersAES_FUNC_END(aes_ctr_encrypt)
6014b908403SEric Biggers
6024b908403SEric Biggers	/*
6034b908403SEric Biggers	 * aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
6044b908403SEric Biggers	 *		   int bytes, u8 const iv[], int byte_ctr)
6054b908403SEric Biggers	 *
6064b908403SEric Biggers	 * The input and output buffers must always be at least 16 bytes even if
6074b908403SEric Biggers	 * encrypting/decrypting less than 16 bytes.  Otherwise out of bounds
6084b908403SEric Biggers	 * accesses will occur.  The data to be encrypted/decrypted is expected
6094b908403SEric Biggers	 * to be at the end of this 16-byte temporary buffer rather than the
6104b908403SEric Biggers	 * start.
6114b908403SEric Biggers	 */
6124b908403SEric Biggers
6134b908403SEric BiggersAES_FUNC_START(aes_xctr_encrypt)
6144b908403SEric Biggers	ctr_encrypt 1
6154b908403SEric BiggersAES_FUNC_END(aes_xctr_encrypt)
6164b908403SEric Biggers
6174b908403SEric Biggers
6184b908403SEric Biggers	/*
6194b908403SEric Biggers	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
6204b908403SEric Biggers	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
6214b908403SEric Biggers	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
6224b908403SEric Biggers	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
6234b908403SEric Biggers	 */
6244b908403SEric Biggers
6254b908403SEric Biggers	.macro		next_tweak, out, in, tmp
6264b908403SEric Biggers	sshr		\tmp\().2d,  \in\().2d,   #63
6274b908403SEric Biggers	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
6284b908403SEric Biggers	add		\out\().2d,  \in\().2d,   \in\().2d
6294b908403SEric Biggers	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
6304b908403SEric Biggers	eor		\out\().16b, \out\().16b, \tmp\().16b
6314b908403SEric Biggers	.endm
6324b908403SEric Biggers
6334b908403SEric Biggers	.macro		xts_load_mask, tmp
6344b908403SEric Biggers	movi		xtsmask.2s, #0x1
6354b908403SEric Biggers	movi		\tmp\().2s, #0x87
6364b908403SEric Biggers	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
6374b908403SEric Biggers	.endm
6384b908403SEric Biggers
6394b908403SEric BiggersAES_FUNC_START(aes_xts_encrypt)
6404b908403SEric Biggers	frame_push	0
6414b908403SEric Biggers
6424b908403SEric Biggers	ld1		{v4.16b}, [x6]
6434b908403SEric Biggers	xts_load_mask	v8
6444b908403SEric Biggers	cbz		w7, .Lxtsencnotfirst
6454b908403SEric Biggers
6464b908403SEric Biggers	enc_prepare	w3, x5, x8
6474b908403SEric Biggers	xts_cts_skip_tw	w7, .LxtsencNx
6484b908403SEric Biggers	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
6494b908403SEric Biggers	enc_switch_key	w3, x2, x8
6504b908403SEric Biggers	b		.LxtsencNx
6514b908403SEric Biggers
6524b908403SEric Biggers.Lxtsencnotfirst:
6534b908403SEric Biggers	enc_prepare	w3, x2, x8
6544b908403SEric Biggers.LxtsencloopNx:
6554b908403SEric Biggers	next_tweak	v4, v4, v8
6564b908403SEric Biggers.LxtsencNx:
6574b908403SEric Biggers	subs		w4, w4, #64
6584b908403SEric Biggers	bmi		.Lxtsenc1x
6594b908403SEric Biggers	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
6604b908403SEric Biggers	next_tweak	v5, v4, v8
6614b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
6624b908403SEric Biggers	next_tweak	v6, v5, v8
6634b908403SEric Biggers	eor		v1.16b, v1.16b, v5.16b
6644b908403SEric Biggers	eor		v2.16b, v2.16b, v6.16b
6654b908403SEric Biggers	next_tweak	v7, v6, v8
6664b908403SEric Biggers	eor		v3.16b, v3.16b, v7.16b
6674b908403SEric Biggers	bl		aes_encrypt_block4x
6684b908403SEric Biggers	eor		v3.16b, v3.16b, v7.16b
6694b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
6704b908403SEric Biggers	eor		v1.16b, v1.16b, v5.16b
6714b908403SEric Biggers	eor		v2.16b, v2.16b, v6.16b
6724b908403SEric Biggers	st1		{v0.16b-v3.16b}, [x0], #64
6734b908403SEric Biggers	mov		v4.16b, v7.16b
6744b908403SEric Biggers	cbz		w4, .Lxtsencret
6754b908403SEric Biggers	xts_reload_mask	v8
6764b908403SEric Biggers	b		.LxtsencloopNx
6774b908403SEric Biggers.Lxtsenc1x:
6784b908403SEric Biggers	adds		w4, w4, #64
6794b908403SEric Biggers	beq		.Lxtsencout
6804b908403SEric Biggers	subs		w4, w4, #16
6814b908403SEric Biggers	bmi		.LxtsencctsNx
6824b908403SEric Biggers.Lxtsencloop:
6834b908403SEric Biggers	ld1		{v0.16b}, [x1], #16
6844b908403SEric Biggers.Lxtsencctsout:
6854b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
6864b908403SEric Biggers	encrypt_block	v0, w3, x2, x8, w7
6874b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
6884b908403SEric Biggers	cbz		w4, .Lxtsencout
6894b908403SEric Biggers	subs		w4, w4, #16
6904b908403SEric Biggers	next_tweak	v4, v4, v8
6914b908403SEric Biggers	bmi		.Lxtsenccts
6924b908403SEric Biggers	st1		{v0.16b}, [x0], #16
6934b908403SEric Biggers	b		.Lxtsencloop
6944b908403SEric Biggers.Lxtsencout:
6954b908403SEric Biggers	st1		{v0.16b}, [x0]
6964b908403SEric Biggers.Lxtsencret:
6974b908403SEric Biggers	st1		{v4.16b}, [x6]
6984b908403SEric Biggers	frame_pop
6994b908403SEric Biggers	ret
7004b908403SEric Biggers
7014b908403SEric Biggers.LxtsencctsNx:
7024b908403SEric Biggers	mov		v0.16b, v3.16b
7034b908403SEric Biggers	sub		x0, x0, #16
7044b908403SEric Biggers.Lxtsenccts:
7054b908403SEric Biggers	adr_l		x8, .Lcts_permute_table
7064b908403SEric Biggers
7074b908403SEric Biggers	add		x1, x1, w4, sxtw	/* rewind input pointer */
7084b908403SEric Biggers	add		w4, w4, #16		/* # bytes in final block */
7094b908403SEric Biggers	add		x9, x8, #32
7104b908403SEric Biggers	add		x8, x8, x4
7114b908403SEric Biggers	sub		x9, x9, x4
7124b908403SEric Biggers	add		x4, x0, x4		/* output address of final block */
7134b908403SEric Biggers
7144b908403SEric Biggers	ld1		{v1.16b}, [x1]		/* load final block */
7154b908403SEric Biggers	ld1		{v2.16b}, [x8]
7164b908403SEric Biggers	ld1		{v3.16b}, [x9]
7174b908403SEric Biggers
7184b908403SEric Biggers	tbl		v2.16b, {v0.16b}, v2.16b
7194b908403SEric Biggers	tbx		v0.16b, {v1.16b}, v3.16b
7204b908403SEric Biggers	st1		{v2.16b}, [x4]			/* overlapping stores */
7214b908403SEric Biggers	mov		w4, wzr
7224b908403SEric Biggers	b		.Lxtsencctsout
7234b908403SEric BiggersAES_FUNC_END(aes_xts_encrypt)
7244b908403SEric Biggers
7254b908403SEric BiggersAES_FUNC_START(aes_xts_decrypt)
7264b908403SEric Biggers	frame_push	0
7274b908403SEric Biggers
7284b908403SEric Biggers	/* subtract 16 bytes if we are doing CTS */
7294b908403SEric Biggers	sub		w8, w4, #0x10
7304b908403SEric Biggers	tst		w4, #0xf
7314b908403SEric Biggers	csel		w4, w4, w8, eq
7324b908403SEric Biggers
7334b908403SEric Biggers	ld1		{v4.16b}, [x6]
7344b908403SEric Biggers	xts_load_mask	v8
7354b908403SEric Biggers	xts_cts_skip_tw	w7, .Lxtsdecskiptw
7364b908403SEric Biggers	cbz		w7, .Lxtsdecnotfirst
7374b908403SEric Biggers
7384b908403SEric Biggers	enc_prepare	w3, x5, x8
7394b908403SEric Biggers	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
7404b908403SEric Biggers.Lxtsdecskiptw:
7414b908403SEric Biggers	dec_prepare	w3, x2, x8
7424b908403SEric Biggers	b		.LxtsdecNx
7434b908403SEric Biggers
7444b908403SEric Biggers.Lxtsdecnotfirst:
7454b908403SEric Biggers	dec_prepare	w3, x2, x8
7464b908403SEric Biggers.LxtsdecloopNx:
7474b908403SEric Biggers	next_tweak	v4, v4, v8
7484b908403SEric Biggers.LxtsdecNx:
7494b908403SEric Biggers	subs		w4, w4, #64
7504b908403SEric Biggers	bmi		.Lxtsdec1x
7514b908403SEric Biggers	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
7524b908403SEric Biggers	next_tweak	v5, v4, v8
7534b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
7544b908403SEric Biggers	next_tweak	v6, v5, v8
7554b908403SEric Biggers	eor		v1.16b, v1.16b, v5.16b
7564b908403SEric Biggers	eor		v2.16b, v2.16b, v6.16b
7574b908403SEric Biggers	next_tweak	v7, v6, v8
7584b908403SEric Biggers	eor		v3.16b, v3.16b, v7.16b
7594b908403SEric Biggers	bl		aes_decrypt_block4x
7604b908403SEric Biggers	eor		v3.16b, v3.16b, v7.16b
7614b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
7624b908403SEric Biggers	eor		v1.16b, v1.16b, v5.16b
7634b908403SEric Biggers	eor		v2.16b, v2.16b, v6.16b
7644b908403SEric Biggers	st1		{v0.16b-v3.16b}, [x0], #64
7654b908403SEric Biggers	mov		v4.16b, v7.16b
7664b908403SEric Biggers	cbz		w4, .Lxtsdecout
7674b908403SEric Biggers	xts_reload_mask	v8
7684b908403SEric Biggers	b		.LxtsdecloopNx
7694b908403SEric Biggers.Lxtsdec1x:
7704b908403SEric Biggers	adds		w4, w4, #64
7714b908403SEric Biggers	beq		.Lxtsdecout
7724b908403SEric Biggers	subs		w4, w4, #16
7734b908403SEric Biggers.Lxtsdecloop:
7744b908403SEric Biggers	ld1		{v0.16b}, [x1], #16
7754b908403SEric Biggers	bmi		.Lxtsdeccts
7764b908403SEric Biggers.Lxtsdecctsout:
7774b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
7784b908403SEric Biggers	decrypt_block	v0, w3, x2, x8, w7
7794b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
7804b908403SEric Biggers	st1		{v0.16b}, [x0], #16
7814b908403SEric Biggers	cbz		w4, .Lxtsdecout
7824b908403SEric Biggers	subs		w4, w4, #16
7834b908403SEric Biggers	next_tweak	v4, v4, v8
7844b908403SEric Biggers	b		.Lxtsdecloop
7854b908403SEric Biggers.Lxtsdecout:
7864b908403SEric Biggers	st1		{v4.16b}, [x6]
7874b908403SEric Biggers	frame_pop
7884b908403SEric Biggers	ret
7894b908403SEric Biggers
7904b908403SEric Biggers.Lxtsdeccts:
7914b908403SEric Biggers	adr_l		x8, .Lcts_permute_table
7924b908403SEric Biggers
7934b908403SEric Biggers	add		x1, x1, w4, sxtw	/* rewind input pointer */
7944b908403SEric Biggers	add		w4, w4, #16		/* # bytes in final block */
7954b908403SEric Biggers	add		x9, x8, #32
7964b908403SEric Biggers	add		x8, x8, x4
7974b908403SEric Biggers	sub		x9, x9, x4
7984b908403SEric Biggers	add		x4, x0, x4		/* output address of final block */
7994b908403SEric Biggers
8004b908403SEric Biggers	next_tweak	v5, v4, v8
8014b908403SEric Biggers
8024b908403SEric Biggers	ld1		{v1.16b}, [x1]		/* load final block */
8034b908403SEric Biggers	ld1		{v2.16b}, [x8]
8044b908403SEric Biggers	ld1		{v3.16b}, [x9]
8054b908403SEric Biggers
8064b908403SEric Biggers	eor		v0.16b, v0.16b, v5.16b
8074b908403SEric Biggers	decrypt_block	v0, w3, x2, x8, w7
8084b908403SEric Biggers	eor		v0.16b, v0.16b, v5.16b
8094b908403SEric Biggers
8104b908403SEric Biggers	tbl		v2.16b, {v0.16b}, v2.16b
8114b908403SEric Biggers	tbx		v0.16b, {v1.16b}, v3.16b
8124b908403SEric Biggers
8134b908403SEric Biggers	st1		{v2.16b}, [x4]			/* overlapping stores */
8144b908403SEric Biggers	mov		w4, wzr
8154b908403SEric Biggers	b		.Lxtsdecctsout
8164b908403SEric BiggersAES_FUNC_END(aes_xts_decrypt)
8174b908403SEric Biggers
81858286738SEric Biggers#if IS_ENABLED(CONFIG_CRYPTO_LIB_AES_CBC_MACS)
8194b908403SEric Biggers	/*
820*11d6bc70SEric Biggers	 * void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
82158286738SEric Biggers	 *		       size_t blocks, u8 dg[], int enc_before,
82258286738SEric Biggers	 *		       int enc_after);
8234b908403SEric Biggers	 */
8244b908403SEric BiggersAES_FUNC_START(aes_mac_update)
8254b908403SEric Biggers	ld1		{v0.16b}, [x4]			/* get dg */
8264b908403SEric Biggers	enc_prepare	w2, x1, x7
8274b908403SEric Biggers	cbz		w5, .Lmacloop4x
8284b908403SEric Biggers
8294b908403SEric Biggers	encrypt_block	v0, w2, x1, x7, w8
8304b908403SEric Biggers
8314b908403SEric Biggers.Lmacloop4x:
83258286738SEric Biggers	subs		x3, x3, #4
8334b908403SEric Biggers	bmi		.Lmac1x
8344b908403SEric Biggers	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
8354b908403SEric Biggers	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
8364b908403SEric Biggers	encrypt_block	v0, w2, x1, x7, w8
8374b908403SEric Biggers	eor		v0.16b, v0.16b, v2.16b
8384b908403SEric Biggers	encrypt_block	v0, w2, x1, x7, w8
8394b908403SEric Biggers	eor		v0.16b, v0.16b, v3.16b
8404b908403SEric Biggers	encrypt_block	v0, w2, x1, x7, w8
8414b908403SEric Biggers	eor		v0.16b, v0.16b, v4.16b
84258286738SEric Biggers	cmp		x3, xzr
8434b908403SEric Biggers	csinv		w5, w6, wzr, eq
8444b908403SEric Biggers	cbz		w5, .Lmacout
8454b908403SEric Biggers	encrypt_block	v0, w2, x1, x7, w8
8464b908403SEric Biggers	st1		{v0.16b}, [x4]			/* return dg */
8474b908403SEric Biggers	b		.Lmacloop4x
8484b908403SEric Biggers.Lmac1x:
84958286738SEric Biggers	add		x3, x3, #4
8504b908403SEric Biggers.Lmacloop:
85158286738SEric Biggers	cbz		x3, .Lmacout
8524b908403SEric Biggers	ld1		{v1.16b}, [x0], #16		/* get next pt block */
8534b908403SEric Biggers	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
8544b908403SEric Biggers
85558286738SEric Biggers	subs		x3, x3, #1
8564b908403SEric Biggers	csinv		w5, w6, wzr, eq
8574b908403SEric Biggers	cbz		w5, .Lmacout
8584b908403SEric Biggers
8594b908403SEric Biggers.Lmacenc:
8604b908403SEric Biggers	encrypt_block	v0, w2, x1, x7, w8
8614b908403SEric Biggers	b		.Lmacloop
8624b908403SEric Biggers
8634b908403SEric Biggers.Lmacout:
8644b908403SEric Biggers	st1		{v0.16b}, [x4]			/* return dg */
8654b908403SEric Biggers	ret
8664b908403SEric BiggersAES_FUNC_END(aes_mac_update)
86758286738SEric Biggers#endif /* CONFIG_CRYPTO_LIB_AES_CBC_MACS */
868