xref: /linux/arch/xtensa/lib/umulsidi3.S (revision 0ea5c948cb64bab5bc7a5516774eb8536f05aa0d)
18939c58dSMax Filippov/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
28939c58dSMax Filippov#include <linux/linkage.h>
38939c58dSMax Filippov#include <asm/asmmacro.h>
48939c58dSMax Filippov#include <asm/core.h>
58939c58dSMax Filippov
6*9aecda97SRandy Dunlap#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC16
7*9aecda97SRandy Dunlap#define XCHAL_NO_MUL 0
8*9aecda97SRandy Dunlap#else
98939c58dSMax Filippov#define XCHAL_NO_MUL 1
108939c58dSMax Filippov#endif
118939c58dSMax Filippov
128939c58dSMax FilippovENTRY(__umulsidi3)
138939c58dSMax Filippov
148939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__
158939c58dSMax Filippov	abi_entry(32)
168939c58dSMax Filippov	s32i	a12, sp, 16
178939c58dSMax Filippov	s32i	a13, sp, 20
188939c58dSMax Filippov	s32i	a14, sp, 24
198939c58dSMax Filippov	s32i	a15, sp, 28
208939c58dSMax Filippov#elif XCHAL_NO_MUL
218939c58dSMax Filippov	/* This is not really a leaf function; allocate enough stack space
228939c58dSMax Filippov	   to allow CALL12s to a helper function.  */
238939c58dSMax Filippov	abi_entry(32)
248939c58dSMax Filippov#else
258939c58dSMax Filippov	abi_entry_default
268939c58dSMax Filippov#endif
278939c58dSMax Filippov
288939c58dSMax Filippov#ifdef __XTENSA_EB__
298939c58dSMax Filippov#define wh a2
308939c58dSMax Filippov#define wl a3
318939c58dSMax Filippov#else
328939c58dSMax Filippov#define wh a3
338939c58dSMax Filippov#define wl a2
348939c58dSMax Filippov#endif /* __XTENSA_EB__ */
358939c58dSMax Filippov
368939c58dSMax Filippov	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
378939c58dSMax Filippov	   See more comments there.  */
388939c58dSMax Filippov
398939c58dSMax Filippov#if XCHAL_HAVE_MUL32_HIGH
408939c58dSMax Filippov	mull	a6, a2, a3
418939c58dSMax Filippov	muluh	wh, a2, a3
428939c58dSMax Filippov	mov	wl, a6
438939c58dSMax Filippov
448939c58dSMax Filippov#else /* ! MUL32_HIGH */
458939c58dSMax Filippov
468939c58dSMax Filippov#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL
478939c58dSMax Filippov	/* a0 and a8 will be clobbered by calling the multiply function
488939c58dSMax Filippov	   but a8 is not used here and need not be saved.  */
498939c58dSMax Filippov	s32i	a0, sp, 0
508939c58dSMax Filippov#endif
518939c58dSMax Filippov
528939c58dSMax Filippov#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
538939c58dSMax Filippov
548939c58dSMax Filippov#define a2h a4
558939c58dSMax Filippov#define a3h a5
568939c58dSMax Filippov
578939c58dSMax Filippov	/* Get the high halves of the inputs into registers.  */
588939c58dSMax Filippov	srli	a2h, a2, 16
598939c58dSMax Filippov	srli	a3h, a3, 16
608939c58dSMax Filippov
618939c58dSMax Filippov#define a2l a2
628939c58dSMax Filippov#define a3l a3
638939c58dSMax Filippov
648939c58dSMax Filippov#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
658939c58dSMax Filippov	/* Clear the high halves of the inputs.  This does not matter
668939c58dSMax Filippov	   for MUL16 because the high bits are ignored.  */
678939c58dSMax Filippov	extui	a2, a2, 0, 16
688939c58dSMax Filippov	extui	a3, a3, 0, 16
698939c58dSMax Filippov#endif
708939c58dSMax Filippov#endif /* MUL16 || MUL32 */
718939c58dSMax Filippov
728939c58dSMax Filippov
738939c58dSMax Filippov#if XCHAL_HAVE_MUL16
748939c58dSMax Filippov
758939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
768939c58dSMax Filippov	mul16u	dst, xreg ## xhalf, yreg ## yhalf
778939c58dSMax Filippov
788939c58dSMax Filippov#elif XCHAL_HAVE_MUL32
798939c58dSMax Filippov
808939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
818939c58dSMax Filippov	mull	dst, xreg ## xhalf, yreg ## yhalf
828939c58dSMax Filippov
838939c58dSMax Filippov#elif XCHAL_HAVE_MAC16
848939c58dSMax Filippov
858939c58dSMax Filippov/* The preprocessor insists on inserting a space when concatenating after
868939c58dSMax Filippov   a period in the definition of do_mul below.  These macros are a workaround
878939c58dSMax Filippov   using underscores instead of periods when doing the concatenation.  */
888939c58dSMax Filippov#define umul_aa_ll umul.aa.ll
898939c58dSMax Filippov#define umul_aa_lh umul.aa.lh
908939c58dSMax Filippov#define umul_aa_hl umul.aa.hl
918939c58dSMax Filippov#define umul_aa_hh umul.aa.hh
928939c58dSMax Filippov
938939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
948939c58dSMax Filippov	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
958939c58dSMax Filippov	rsr	dst, ACCLO
968939c58dSMax Filippov
978939c58dSMax Filippov#else /* no multiply hardware */
988939c58dSMax Filippov
998939c58dSMax Filippov#define set_arg_l(dst, src) \
1008939c58dSMax Filippov	extui	dst, src, 0, 16
1018939c58dSMax Filippov#define set_arg_h(dst, src) \
1028939c58dSMax Filippov	srli	dst, src, 16
1038939c58dSMax Filippov
1048939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__
1058939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
1068939c58dSMax Filippov	set_arg_ ## xhalf (a13, xreg); \
1078939c58dSMax Filippov	set_arg_ ## yhalf (a14, yreg); \
1088939c58dSMax Filippov	call0	.Lmul_mulsi3; \
1098939c58dSMax Filippov	mov	dst, a12
1108939c58dSMax Filippov#else
1118939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
1128939c58dSMax Filippov	set_arg_ ## xhalf (a14, xreg); \
1138939c58dSMax Filippov	set_arg_ ## yhalf (a15, yreg); \
1148939c58dSMax Filippov	call12	.Lmul_mulsi3; \
1158939c58dSMax Filippov	mov	dst, a14
1168939c58dSMax Filippov#endif /* __XTENSA_CALL0_ABI__ */
1178939c58dSMax Filippov
1188939c58dSMax Filippov#endif /* no multiply hardware */
1198939c58dSMax Filippov
1208939c58dSMax Filippov	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
1218939c58dSMax Filippov	do_mul(a6, a2, l, a3, h)	/* pp 1 */
1228939c58dSMax Filippov	do_mul(a11, a2, h, a3, l)	/* pp 2 */
1238939c58dSMax Filippov	movi	a9, 0
1248939c58dSMax Filippov	add	a6, a6, a11
1258939c58dSMax Filippov	bgeu	a6, a11, 1f
1268939c58dSMax Filippov	addi	a9, a9, 1
1278939c58dSMax Filippov1:
1288939c58dSMax Filippov	/* Shift the high half of a9/a6 into position in a9.  Note that
1298939c58dSMax Filippov	   this value can be safely incremented without any carry-outs.  */
1308939c58dSMax Filippov	ssai	16
1318939c58dSMax Filippov	src	a9, a9, a6
1328939c58dSMax Filippov
1338939c58dSMax Filippov	/* Compute the low word into a6.  */
1348939c58dSMax Filippov	do_mul(a11, a2, l, a3, l)	/* pp 0 */
1358939c58dSMax Filippov	sll	a6, a6
1368939c58dSMax Filippov	add	a6, a6, a11
1378939c58dSMax Filippov	bgeu	a6, a11, 1f
1388939c58dSMax Filippov	addi	a9, a9, 1
1398939c58dSMax Filippov1:
1408939c58dSMax Filippov	/* Compute the high word into wh.  */
1418939c58dSMax Filippov	do_mul(wh, a2, h, a3, h)	/* pp 3 */
1428939c58dSMax Filippov	add	wh, wh, a9
1438939c58dSMax Filippov	mov	wl, a6
1448939c58dSMax Filippov
1458939c58dSMax Filippov#endif /* !MUL32_HIGH */
1468939c58dSMax Filippov
1478939c58dSMax Filippov#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL
1488939c58dSMax Filippov	/* Restore the original return address.  */
1498939c58dSMax Filippov	l32i	a0, sp, 0
1508939c58dSMax Filippov#endif
1518939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__
1528939c58dSMax Filippov	l32i	a12, sp, 16
1538939c58dSMax Filippov	l32i	a13, sp, 20
1548939c58dSMax Filippov	l32i	a14, sp, 24
1558939c58dSMax Filippov	l32i	a15, sp, 28
1568939c58dSMax Filippov	abi_ret(32)
1578939c58dSMax Filippov#else
1588939c58dSMax Filippov	abi_ret_default
1598939c58dSMax Filippov#endif
1608939c58dSMax Filippov
1618939c58dSMax Filippov#if XCHAL_NO_MUL
1628939c58dSMax Filippov
1638939c58dSMax Filippov	.macro	do_addx2 dst, as, at, tmp
1648939c58dSMax Filippov#if XCHAL_HAVE_ADDX
1658939c58dSMax Filippov	addx2	\dst, \as, \at
1668939c58dSMax Filippov#else
1678939c58dSMax Filippov	slli	\tmp, \as, 1
1688939c58dSMax Filippov	add	\dst, \tmp, \at
1698939c58dSMax Filippov#endif
1708939c58dSMax Filippov	.endm
1718939c58dSMax Filippov
1728939c58dSMax Filippov	.macro	do_addx4 dst, as, at, tmp
1738939c58dSMax Filippov#if XCHAL_HAVE_ADDX
1748939c58dSMax Filippov	addx4	\dst, \as, \at
1758939c58dSMax Filippov#else
1768939c58dSMax Filippov	slli	\tmp, \as, 2
1778939c58dSMax Filippov	add	\dst, \tmp, \at
1788939c58dSMax Filippov#endif
1798939c58dSMax Filippov	.endm
1808939c58dSMax Filippov
1818939c58dSMax Filippov	.macro	do_addx8 dst, as, at, tmp
1828939c58dSMax Filippov#if XCHAL_HAVE_ADDX
1838939c58dSMax Filippov	addx8	\dst, \as, \at
1848939c58dSMax Filippov#else
1858939c58dSMax Filippov	slli	\tmp, \as, 3
1868939c58dSMax Filippov	add	\dst, \tmp, \at
1878939c58dSMax Filippov#endif
1888939c58dSMax Filippov	.endm
1898939c58dSMax Filippov
1908939c58dSMax Filippov	/* For Xtensa processors with no multiply hardware, this simplified
1918939c58dSMax Filippov	   version of _mulsi3 is used for multiplying 16-bit chunks of
1928939c58dSMax Filippov	   the floating-point mantissas.  When using CALL0, this function
1938939c58dSMax Filippov	   uses a custom ABI: the inputs are passed in a13 and a14, the
1948939c58dSMax Filippov	   result is returned in a12, and a8 and a15 are clobbered.  */
1958939c58dSMax Filippov	.align	4
1968939c58dSMax Filippov.Lmul_mulsi3:
1978939c58dSMax Filippov	abi_entry_default
1988939c58dSMax Filippov
1998939c58dSMax Filippov	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
2008939c58dSMax Filippov	movi	\dst, 0
2018939c58dSMax Filippov1:	add	\tmp1, \src2, \dst
2028939c58dSMax Filippov	extui	\tmp2, \src1, 0, 1
2038939c58dSMax Filippov	movnez	\dst, \tmp1, \tmp2
2048939c58dSMax Filippov
2058939c58dSMax Filippov	do_addx2 \tmp1, \src2, \dst, \tmp1
2068939c58dSMax Filippov	extui	\tmp2, \src1, 1, 1
2078939c58dSMax Filippov	movnez	\dst, \tmp1, \tmp2
2088939c58dSMax Filippov
2098939c58dSMax Filippov	do_addx4 \tmp1, \src2, \dst, \tmp1
2108939c58dSMax Filippov	extui	\tmp2, \src1, 2, 1
2118939c58dSMax Filippov	movnez	\dst, \tmp1, \tmp2
2128939c58dSMax Filippov
2138939c58dSMax Filippov	do_addx8 \tmp1, \src2, \dst, \tmp1
2148939c58dSMax Filippov	extui	\tmp2, \src1, 3, 1
2158939c58dSMax Filippov	movnez	\dst, \tmp1, \tmp2
2168939c58dSMax Filippov
2178939c58dSMax Filippov	srli	\src1, \src1, 4
2188939c58dSMax Filippov	slli	\src2, \src2, 4
2198939c58dSMax Filippov	bnez	\src1, 1b
2208939c58dSMax Filippov	.endm
2218939c58dSMax Filippov
2228939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__
2238939c58dSMax Filippov	mul_mulsi3_body a12, a13, a14, a15, a8
2248939c58dSMax Filippov#else
2258939c58dSMax Filippov	/* The result will be written into a2, so save that argument in a4.  */
2268939c58dSMax Filippov	mov	a4, a2
2278939c58dSMax Filippov	mul_mulsi3_body a2, a4, a3, a5, a6
2288939c58dSMax Filippov#endif
2298939c58dSMax Filippov	abi_ret_default
2308939c58dSMax Filippov#endif /* XCHAL_NO_MUL */
2318939c58dSMax Filippov
2328939c58dSMax FilippovENDPROC(__umulsidi3)
233338d9150SMax FilippovEXPORT_SYMBOL(__umulsidi3)
234