18939c58dSMax Filippov/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ 28939c58dSMax Filippov#include <linux/linkage.h> 38939c58dSMax Filippov#include <asm/asmmacro.h> 48939c58dSMax Filippov#include <asm/core.h> 58939c58dSMax Filippov 6*9aecda97SRandy Dunlap#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC16 7*9aecda97SRandy Dunlap#define XCHAL_NO_MUL 0 8*9aecda97SRandy Dunlap#else 98939c58dSMax Filippov#define XCHAL_NO_MUL 1 108939c58dSMax Filippov#endif 118939c58dSMax Filippov 128939c58dSMax FilippovENTRY(__umulsidi3) 138939c58dSMax Filippov 148939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__ 158939c58dSMax Filippov abi_entry(32) 168939c58dSMax Filippov s32i a12, sp, 16 178939c58dSMax Filippov s32i a13, sp, 20 188939c58dSMax Filippov s32i a14, sp, 24 198939c58dSMax Filippov s32i a15, sp, 28 208939c58dSMax Filippov#elif XCHAL_NO_MUL 218939c58dSMax Filippov /* This is not really a leaf function; allocate enough stack space 228939c58dSMax Filippov to allow CALL12s to a helper function. */ 238939c58dSMax Filippov abi_entry(32) 248939c58dSMax Filippov#else 258939c58dSMax Filippov abi_entry_default 268939c58dSMax Filippov#endif 278939c58dSMax Filippov 288939c58dSMax Filippov#ifdef __XTENSA_EB__ 298939c58dSMax Filippov#define wh a2 308939c58dSMax Filippov#define wl a3 318939c58dSMax Filippov#else 328939c58dSMax Filippov#define wh a3 338939c58dSMax Filippov#define wl a2 348939c58dSMax Filippov#endif /* __XTENSA_EB__ */ 358939c58dSMax Filippov 368939c58dSMax Filippov /* This code is taken from the mulsf3 routine in ieee754-sf.S. 378939c58dSMax Filippov See more comments there. */ 388939c58dSMax Filippov 398939c58dSMax Filippov#if XCHAL_HAVE_MUL32_HIGH 408939c58dSMax Filippov mull a6, a2, a3 418939c58dSMax Filippov muluh wh, a2, a3 428939c58dSMax Filippov mov wl, a6 438939c58dSMax Filippov 448939c58dSMax Filippov#else /* ! MUL32_HIGH */ 458939c58dSMax Filippov 468939c58dSMax Filippov#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL 478939c58dSMax Filippov /* a0 and a8 will be clobbered by calling the multiply function 488939c58dSMax Filippov but a8 is not used here and need not be saved. */ 498939c58dSMax Filippov s32i a0, sp, 0 508939c58dSMax Filippov#endif 518939c58dSMax Filippov 528939c58dSMax Filippov#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 538939c58dSMax Filippov 548939c58dSMax Filippov#define a2h a4 558939c58dSMax Filippov#define a3h a5 568939c58dSMax Filippov 578939c58dSMax Filippov /* Get the high halves of the inputs into registers. */ 588939c58dSMax Filippov srli a2h, a2, 16 598939c58dSMax Filippov srli a3h, a3, 16 608939c58dSMax Filippov 618939c58dSMax Filippov#define a2l a2 628939c58dSMax Filippov#define a3l a3 638939c58dSMax Filippov 648939c58dSMax Filippov#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 658939c58dSMax Filippov /* Clear the high halves of the inputs. This does not matter 668939c58dSMax Filippov for MUL16 because the high bits are ignored. */ 678939c58dSMax Filippov extui a2, a2, 0, 16 688939c58dSMax Filippov extui a3, a3, 0, 16 698939c58dSMax Filippov#endif 708939c58dSMax Filippov#endif /* MUL16 || MUL32 */ 718939c58dSMax Filippov 728939c58dSMax Filippov 738939c58dSMax Filippov#if XCHAL_HAVE_MUL16 748939c58dSMax Filippov 758939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 768939c58dSMax Filippov mul16u dst, xreg ## xhalf, yreg ## yhalf 778939c58dSMax Filippov 788939c58dSMax Filippov#elif XCHAL_HAVE_MUL32 798939c58dSMax Filippov 808939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 818939c58dSMax Filippov mull dst, xreg ## xhalf, yreg ## yhalf 828939c58dSMax Filippov 838939c58dSMax Filippov#elif XCHAL_HAVE_MAC16 848939c58dSMax Filippov 858939c58dSMax Filippov/* The preprocessor insists on inserting a space when concatenating after 868939c58dSMax Filippov a period in the definition of do_mul below. These macros are a workaround 878939c58dSMax Filippov using underscores instead of periods when doing the concatenation. */ 888939c58dSMax Filippov#define umul_aa_ll umul.aa.ll 898939c58dSMax Filippov#define umul_aa_lh umul.aa.lh 908939c58dSMax Filippov#define umul_aa_hl umul.aa.hl 918939c58dSMax Filippov#define umul_aa_hh umul.aa.hh 928939c58dSMax Filippov 938939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 948939c58dSMax Filippov umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 958939c58dSMax Filippov rsr dst, ACCLO 968939c58dSMax Filippov 978939c58dSMax Filippov#else /* no multiply hardware */ 988939c58dSMax Filippov 998939c58dSMax Filippov#define set_arg_l(dst, src) \ 1008939c58dSMax Filippov extui dst, src, 0, 16 1018939c58dSMax Filippov#define set_arg_h(dst, src) \ 1028939c58dSMax Filippov srli dst, src, 16 1038939c58dSMax Filippov 1048939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__ 1058939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 1068939c58dSMax Filippov set_arg_ ## xhalf (a13, xreg); \ 1078939c58dSMax Filippov set_arg_ ## yhalf (a14, yreg); \ 1088939c58dSMax Filippov call0 .Lmul_mulsi3; \ 1098939c58dSMax Filippov mov dst, a12 1108939c58dSMax Filippov#else 1118939c58dSMax Filippov#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 1128939c58dSMax Filippov set_arg_ ## xhalf (a14, xreg); \ 1138939c58dSMax Filippov set_arg_ ## yhalf (a15, yreg); \ 1148939c58dSMax Filippov call12 .Lmul_mulsi3; \ 1158939c58dSMax Filippov mov dst, a14 1168939c58dSMax Filippov#endif /* __XTENSA_CALL0_ABI__ */ 1178939c58dSMax Filippov 1188939c58dSMax Filippov#endif /* no multiply hardware */ 1198939c58dSMax Filippov 1208939c58dSMax Filippov /* Add pp1 and pp2 into a6 with carry-out in a9. */ 1218939c58dSMax Filippov do_mul(a6, a2, l, a3, h) /* pp 1 */ 1228939c58dSMax Filippov do_mul(a11, a2, h, a3, l) /* pp 2 */ 1238939c58dSMax Filippov movi a9, 0 1248939c58dSMax Filippov add a6, a6, a11 1258939c58dSMax Filippov bgeu a6, a11, 1f 1268939c58dSMax Filippov addi a9, a9, 1 1278939c58dSMax Filippov1: 1288939c58dSMax Filippov /* Shift the high half of a9/a6 into position in a9. Note that 1298939c58dSMax Filippov this value can be safely incremented without any carry-outs. */ 1308939c58dSMax Filippov ssai 16 1318939c58dSMax Filippov src a9, a9, a6 1328939c58dSMax Filippov 1338939c58dSMax Filippov /* Compute the low word into a6. */ 1348939c58dSMax Filippov do_mul(a11, a2, l, a3, l) /* pp 0 */ 1358939c58dSMax Filippov sll a6, a6 1368939c58dSMax Filippov add a6, a6, a11 1378939c58dSMax Filippov bgeu a6, a11, 1f 1388939c58dSMax Filippov addi a9, a9, 1 1398939c58dSMax Filippov1: 1408939c58dSMax Filippov /* Compute the high word into wh. */ 1418939c58dSMax Filippov do_mul(wh, a2, h, a3, h) /* pp 3 */ 1428939c58dSMax Filippov add wh, wh, a9 1438939c58dSMax Filippov mov wl, a6 1448939c58dSMax Filippov 1458939c58dSMax Filippov#endif /* !MUL32_HIGH */ 1468939c58dSMax Filippov 1478939c58dSMax Filippov#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL 1488939c58dSMax Filippov /* Restore the original return address. */ 1498939c58dSMax Filippov l32i a0, sp, 0 1508939c58dSMax Filippov#endif 1518939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__ 1528939c58dSMax Filippov l32i a12, sp, 16 1538939c58dSMax Filippov l32i a13, sp, 20 1548939c58dSMax Filippov l32i a14, sp, 24 1558939c58dSMax Filippov l32i a15, sp, 28 1568939c58dSMax Filippov abi_ret(32) 1578939c58dSMax Filippov#else 1588939c58dSMax Filippov abi_ret_default 1598939c58dSMax Filippov#endif 1608939c58dSMax Filippov 1618939c58dSMax Filippov#if XCHAL_NO_MUL 1628939c58dSMax Filippov 1638939c58dSMax Filippov .macro do_addx2 dst, as, at, tmp 1648939c58dSMax Filippov#if XCHAL_HAVE_ADDX 1658939c58dSMax Filippov addx2 \dst, \as, \at 1668939c58dSMax Filippov#else 1678939c58dSMax Filippov slli \tmp, \as, 1 1688939c58dSMax Filippov add \dst, \tmp, \at 1698939c58dSMax Filippov#endif 1708939c58dSMax Filippov .endm 1718939c58dSMax Filippov 1728939c58dSMax Filippov .macro do_addx4 dst, as, at, tmp 1738939c58dSMax Filippov#if XCHAL_HAVE_ADDX 1748939c58dSMax Filippov addx4 \dst, \as, \at 1758939c58dSMax Filippov#else 1768939c58dSMax Filippov slli \tmp, \as, 2 1778939c58dSMax Filippov add \dst, \tmp, \at 1788939c58dSMax Filippov#endif 1798939c58dSMax Filippov .endm 1808939c58dSMax Filippov 1818939c58dSMax Filippov .macro do_addx8 dst, as, at, tmp 1828939c58dSMax Filippov#if XCHAL_HAVE_ADDX 1838939c58dSMax Filippov addx8 \dst, \as, \at 1848939c58dSMax Filippov#else 1858939c58dSMax Filippov slli \tmp, \as, 3 1868939c58dSMax Filippov add \dst, \tmp, \at 1878939c58dSMax Filippov#endif 1888939c58dSMax Filippov .endm 1898939c58dSMax Filippov 1908939c58dSMax Filippov /* For Xtensa processors with no multiply hardware, this simplified 1918939c58dSMax Filippov version of _mulsi3 is used for multiplying 16-bit chunks of 1928939c58dSMax Filippov the floating-point mantissas. When using CALL0, this function 1938939c58dSMax Filippov uses a custom ABI: the inputs are passed in a13 and a14, the 1948939c58dSMax Filippov result is returned in a12, and a8 and a15 are clobbered. */ 1958939c58dSMax Filippov .align 4 1968939c58dSMax Filippov.Lmul_mulsi3: 1978939c58dSMax Filippov abi_entry_default 1988939c58dSMax Filippov 1998939c58dSMax Filippov .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 2008939c58dSMax Filippov movi \dst, 0 2018939c58dSMax Filippov1: add \tmp1, \src2, \dst 2028939c58dSMax Filippov extui \tmp2, \src1, 0, 1 2038939c58dSMax Filippov movnez \dst, \tmp1, \tmp2 2048939c58dSMax Filippov 2058939c58dSMax Filippov do_addx2 \tmp1, \src2, \dst, \tmp1 2068939c58dSMax Filippov extui \tmp2, \src1, 1, 1 2078939c58dSMax Filippov movnez \dst, \tmp1, \tmp2 2088939c58dSMax Filippov 2098939c58dSMax Filippov do_addx4 \tmp1, \src2, \dst, \tmp1 2108939c58dSMax Filippov extui \tmp2, \src1, 2, 1 2118939c58dSMax Filippov movnez \dst, \tmp1, \tmp2 2128939c58dSMax Filippov 2138939c58dSMax Filippov do_addx8 \tmp1, \src2, \dst, \tmp1 2148939c58dSMax Filippov extui \tmp2, \src1, 3, 1 2158939c58dSMax Filippov movnez \dst, \tmp1, \tmp2 2168939c58dSMax Filippov 2178939c58dSMax Filippov srli \src1, \src1, 4 2188939c58dSMax Filippov slli \src2, \src2, 4 2198939c58dSMax Filippov bnez \src1, 1b 2208939c58dSMax Filippov .endm 2218939c58dSMax Filippov 2228939c58dSMax Filippov#ifdef __XTENSA_CALL0_ABI__ 2238939c58dSMax Filippov mul_mulsi3_body a12, a13, a14, a15, a8 2248939c58dSMax Filippov#else 2258939c58dSMax Filippov /* The result will be written into a2, so save that argument in a4. */ 2268939c58dSMax Filippov mov a4, a2 2278939c58dSMax Filippov mul_mulsi3_body a2, a4, a3, a5, a6 2288939c58dSMax Filippov#endif 2298939c58dSMax Filippov abi_ret_default 2308939c58dSMax Filippov#endif /* XCHAL_NO_MUL */ 2318939c58dSMax Filippov 2328939c58dSMax FilippovENDPROC(__umulsidi3) 233338d9150SMax FilippovEXPORT_SYMBOL(__umulsidi3) 234