1e71b7053SJung-uk Kim /* 283eaf7aeSJung-uk Kim * Copyright 2013-2022 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim * Copyright (c) 2012, Intel Corporation. All Rights Reserved. 4e71b7053SJung-uk Kim * 5*b077aed3SPierre Pronchery * Licensed under the Apache License 2.0 (the "License"). You may not use 6e71b7053SJung-uk Kim * this file except in compliance with the License. You can obtain a copy 7e71b7053SJung-uk Kim * in the file LICENSE in the source distribution or at 8e71b7053SJung-uk Kim * https://www.openssl.org/source/license.html 9e71b7053SJung-uk Kim * 10e71b7053SJung-uk Kim * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) 11e71b7053SJung-uk Kim * (1) Intel Corporation, Israel Development Center, Haifa, Israel 12e71b7053SJung-uk Kim * (2) University of Haifa, Israel 13e71b7053SJung-uk Kim */ 147bded2dbSJung-uk Kim 15e71b7053SJung-uk Kim #include <openssl/opensslconf.h> 167bded2dbSJung-uk Kim #include "rsaz_exp.h" 177bded2dbSJung-uk Kim 18e71b7053SJung-uk Kim #ifndef RSAZ_ENABLED 19e71b7053SJung-uk Kim NON_EMPTY_TRANSLATION_UNIT 20e71b7053SJung-uk Kim #else 217bded2dbSJung-uk Kim 227bded2dbSJung-uk Kim /* 237bded2dbSJung-uk Kim * See crypto/bn/asm/rsaz-avx2.pl for further details. 247bded2dbSJung-uk Kim */ 257bded2dbSJung-uk Kim void rsaz_1024_norm2red_avx2(void *red, const void *norm); 267bded2dbSJung-uk Kim void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b, 277bded2dbSJung-uk Kim const void *n, BN_ULONG k); 287bded2dbSJung-uk Kim void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k, 297bded2dbSJung-uk Kim int cnt); 307bded2dbSJung-uk Kim void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i); 317bded2dbSJung-uk Kim void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i); 327bded2dbSJung-uk Kim void rsaz_1024_red2norm_avx2(void *norm, const void *red); 337bded2dbSJung-uk Kim 347bded2dbSJung-uk Kim #if defined(__GNUC__) 357bded2dbSJung-uk Kim # define ALIGN64 __attribute__((aligned(64))) 367bded2dbSJung-uk Kim #elif defined(_MSC_VER) 377bded2dbSJung-uk Kim # define ALIGN64 __declspec(align(64)) 387bded2dbSJung-uk Kim #elif defined(__SUNPRO_C) 397bded2dbSJung-uk Kim # define ALIGN64 407bded2dbSJung-uk Kim # pragma align 64(one,two80) 417bded2dbSJung-uk Kim #else 427bded2dbSJung-uk Kim /* not fatal, might hurt performance a little */ 437bded2dbSJung-uk Kim # define ALIGN64 447bded2dbSJung-uk Kim #endif 457bded2dbSJung-uk Kim 467bded2dbSJung-uk Kim ALIGN64 static const BN_ULONG one[40] = { 477bded2dbSJung-uk Kim 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 487bded2dbSJung-uk Kim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 497bded2dbSJung-uk Kim }; 507bded2dbSJung-uk Kim 517bded2dbSJung-uk Kim ALIGN64 static const BN_ULONG two80[40] = { 527bded2dbSJung-uk Kim 0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 537bded2dbSJung-uk Kim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 547bded2dbSJung-uk Kim }; 557bded2dbSJung-uk Kim 567bded2dbSJung-uk Kim void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], 577bded2dbSJung-uk Kim const BN_ULONG base_norm[16], 587bded2dbSJung-uk Kim const BN_ULONG exponent[16], 597bded2dbSJung-uk Kim const BN_ULONG m_norm[16], const BN_ULONG RR[16], 607bded2dbSJung-uk Kim BN_ULONG k0) 617bded2dbSJung-uk Kim { 627bded2dbSJung-uk Kim unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */ 637bded2dbSJung-uk Kim unsigned char *p_str = storage + (64 - ((size_t)storage % 64)); 647bded2dbSJung-uk Kim unsigned char *a_inv, *m, *result; 657bded2dbSJung-uk Kim unsigned char *table_s = p_str + 320 * 3; 667bded2dbSJung-uk Kim unsigned char *R2 = table_s; /* borrow */ 677bded2dbSJung-uk Kim int index; 687bded2dbSJung-uk Kim int wvalue; 6983eaf7aeSJung-uk Kim BN_ULONG tmp[16]; 707bded2dbSJung-uk Kim 717bded2dbSJung-uk Kim if ((((size_t)p_str & 4095) + 320) >> 12) { 727bded2dbSJung-uk Kim result = p_str; 737bded2dbSJung-uk Kim a_inv = p_str + 320; 747bded2dbSJung-uk Kim m = p_str + 320 * 2; /* should not cross page */ 757bded2dbSJung-uk Kim } else { 767bded2dbSJung-uk Kim m = p_str; /* should not cross page */ 777bded2dbSJung-uk Kim result = p_str + 320; 787bded2dbSJung-uk Kim a_inv = p_str + 320 * 2; 797bded2dbSJung-uk Kim } 807bded2dbSJung-uk Kim 817bded2dbSJung-uk Kim rsaz_1024_norm2red_avx2(m, m_norm); 827bded2dbSJung-uk Kim rsaz_1024_norm2red_avx2(a_inv, base_norm); 837bded2dbSJung-uk Kim rsaz_1024_norm2red_avx2(R2, RR); 847bded2dbSJung-uk Kim 857bded2dbSJung-uk Kim rsaz_1024_mul_avx2(R2, R2, R2, m, k0); 867bded2dbSJung-uk Kim rsaz_1024_mul_avx2(R2, R2, two80, m, k0); 877bded2dbSJung-uk Kim 887bded2dbSJung-uk Kim /* table[0] = 1 */ 897bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, R2, one, m, k0); 907bded2dbSJung-uk Kim /* table[1] = a_inv^1 */ 917bded2dbSJung-uk Kim rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); 927bded2dbSJung-uk Kim 937bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 0); 947bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, a_inv, 1); 957bded2dbSJung-uk Kim 967bded2dbSJung-uk Kim /* table[2] = a_inv^2 */ 977bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); 987bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 2); 997bded2dbSJung-uk Kim #if 0 1007bded2dbSJung-uk Kim /* this is almost 2x smaller and less than 1% slower */ 1017bded2dbSJung-uk Kim for (index = 3; index < 32; index++) { 1027bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1037bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, index); 1047bded2dbSJung-uk Kim } 1057bded2dbSJung-uk Kim #else 1067bded2dbSJung-uk Kim /* table[4] = a_inv^4 */ 1077bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1087bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 4); 1097bded2dbSJung-uk Kim /* table[8] = a_inv^8 */ 1107bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1117bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 8); 1127bded2dbSJung-uk Kim /* table[16] = a_inv^16 */ 1137bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1147bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 16); 1157bded2dbSJung-uk Kim /* table[17] = a_inv^17 */ 1167bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1177bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 17); 1187bded2dbSJung-uk Kim 1197bded2dbSJung-uk Kim /* table[3] */ 1207bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 2); 1217bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1227bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 3); 1237bded2dbSJung-uk Kim /* table[6] */ 1247bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1257bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 6); 1267bded2dbSJung-uk Kim /* table[12] */ 1277bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1287bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 12); 1297bded2dbSJung-uk Kim /* table[24] */ 1307bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1317bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 24); 1327bded2dbSJung-uk Kim /* table[25] */ 1337bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1347bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 25); 1357bded2dbSJung-uk Kim 1367bded2dbSJung-uk Kim /* table[5] */ 1377bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 4); 1387bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1397bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 5); 1407bded2dbSJung-uk Kim /* table[10] */ 1417bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1427bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 10); 1437bded2dbSJung-uk Kim /* table[20] */ 1447bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1457bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 20); 1467bded2dbSJung-uk Kim /* table[21] */ 1477bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1487bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 21); 1497bded2dbSJung-uk Kim 1507bded2dbSJung-uk Kim /* table[7] */ 1517bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 6); 1527bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1537bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 7); 1547bded2dbSJung-uk Kim /* table[14] */ 1557bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1567bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 14); 1577bded2dbSJung-uk Kim /* table[28] */ 1587bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1597bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 28); 1607bded2dbSJung-uk Kim /* table[29] */ 1617bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1627bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 29); 1637bded2dbSJung-uk Kim 1647bded2dbSJung-uk Kim /* table[9] */ 1657bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 8); 1667bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1677bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 9); 1687bded2dbSJung-uk Kim /* table[18] */ 1697bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1707bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 18); 1717bded2dbSJung-uk Kim /* table[19] */ 1727bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1737bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 19); 1747bded2dbSJung-uk Kim 1757bded2dbSJung-uk Kim /* table[11] */ 1767bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 10); 1777bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1787bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 11); 1797bded2dbSJung-uk Kim /* table[22] */ 1807bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1817bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 22); 1827bded2dbSJung-uk Kim /* table[23] */ 1837bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1847bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 23); 1857bded2dbSJung-uk Kim 1867bded2dbSJung-uk Kim /* table[13] */ 1877bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 12); 1887bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1897bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 13); 1907bded2dbSJung-uk Kim /* table[26] */ 1917bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 1927bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 26); 1937bded2dbSJung-uk Kim /* table[27] */ 1947bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 1957bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 27); 1967bded2dbSJung-uk Kim 1977bded2dbSJung-uk Kim /* table[15] */ 1987bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 14); 1997bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 2007bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 15); 2017bded2dbSJung-uk Kim /* table[30] */ 2027bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 2037bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 30); 2047bded2dbSJung-uk Kim /* table[31] */ 2057bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 2067bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 31); 2077bded2dbSJung-uk Kim #endif 2087bded2dbSJung-uk Kim 2097bded2dbSJung-uk Kim /* load first window */ 2107bded2dbSJung-uk Kim p_str = (unsigned char *)exponent; 2117bded2dbSJung-uk Kim wvalue = p_str[127] >> 3; 2127bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, wvalue); 2137bded2dbSJung-uk Kim 2147bded2dbSJung-uk Kim index = 1014; 2157bded2dbSJung-uk Kim 2167bded2dbSJung-uk Kim while (index > -1) { /* loop for the remaining 127 windows */ 2177bded2dbSJung-uk Kim 2187bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 5); 2197bded2dbSJung-uk Kim 220e71b7053SJung-uk Kim wvalue = (p_str[(index / 8) + 1] << 8) | p_str[index / 8]; 2217bded2dbSJung-uk Kim wvalue = (wvalue >> (index % 8)) & 31; 2227bded2dbSJung-uk Kim index -= 5; 2237bded2dbSJung-uk Kim 2247bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ 2257bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 2267bded2dbSJung-uk Kim } 2277bded2dbSJung-uk Kim 2287bded2dbSJung-uk Kim /* square four times */ 2297bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 4); 2307bded2dbSJung-uk Kim 2317bded2dbSJung-uk Kim wvalue = p_str[0] & 15; 2327bded2dbSJung-uk Kim 2337bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ 2347bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 2357bded2dbSJung-uk Kim 2367bded2dbSJung-uk Kim /* from Montgomery */ 2377bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, one, m, k0); 2387bded2dbSJung-uk Kim 2397bded2dbSJung-uk Kim rsaz_1024_red2norm_avx2(result_norm, result); 2407bded2dbSJung-uk Kim 24183eaf7aeSJung-uk Kim bn_reduce_once_in_place(result_norm, /*carry=*/0, m_norm, tmp, 16); 24283eaf7aeSJung-uk Kim 2437bded2dbSJung-uk Kim OPENSSL_cleanse(storage, sizeof(storage)); 24483eaf7aeSJung-uk Kim OPENSSL_cleanse(tmp, sizeof(tmp)); 2457bded2dbSJung-uk Kim } 2467bded2dbSJung-uk Kim 2477bded2dbSJung-uk Kim /* 2487bded2dbSJung-uk Kim * See crypto/bn/rsaz-x86_64.pl for further details. 2497bded2dbSJung-uk Kim */ 2507bded2dbSJung-uk Kim void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n, 2517bded2dbSJung-uk Kim BN_ULONG k); 2527bded2dbSJung-uk Kim void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n, 2537bded2dbSJung-uk Kim BN_ULONG k, const void *tbl, unsigned int power); 2547bded2dbSJung-uk Kim void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl, 2557bded2dbSJung-uk Kim const void *n, BN_ULONG k, unsigned int power); 2567bded2dbSJung-uk Kim void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k); 2577bded2dbSJung-uk Kim void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k, 2587bded2dbSJung-uk Kim int cnt); 2597bded2dbSJung-uk Kim void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); 2607bded2dbSJung-uk Kim void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); 2617bded2dbSJung-uk Kim 2627bded2dbSJung-uk Kim void RSAZ_512_mod_exp(BN_ULONG result[8], 2637bded2dbSJung-uk Kim const BN_ULONG base[8], const BN_ULONG exponent[8], 2647bded2dbSJung-uk Kim const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) 2657bded2dbSJung-uk Kim { 2667bded2dbSJung-uk Kim unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */ 2677bded2dbSJung-uk Kim unsigned char *table = storage + (64 - ((size_t)storage % 64)); 2687bded2dbSJung-uk Kim BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8); 2697bded2dbSJung-uk Kim BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8); 2707bded2dbSJung-uk Kim unsigned char *p_str = (unsigned char *)exponent; 2717bded2dbSJung-uk Kim int index; 2727bded2dbSJung-uk Kim unsigned int wvalue; 27383eaf7aeSJung-uk Kim BN_ULONG tmp[8]; 2747bded2dbSJung-uk Kim 2757bded2dbSJung-uk Kim /* table[0] = 1_inv */ 2767bded2dbSJung-uk Kim temp[0] = 0 - m[0]; 2777bded2dbSJung-uk Kim temp[1] = ~m[1]; 2787bded2dbSJung-uk Kim temp[2] = ~m[2]; 2797bded2dbSJung-uk Kim temp[3] = ~m[3]; 2807bded2dbSJung-uk Kim temp[4] = ~m[4]; 2817bded2dbSJung-uk Kim temp[5] = ~m[5]; 2827bded2dbSJung-uk Kim temp[6] = ~m[6]; 2837bded2dbSJung-uk Kim temp[7] = ~m[7]; 2847bded2dbSJung-uk Kim rsaz_512_scatter4(table, temp, 0); 2857bded2dbSJung-uk Kim 2867bded2dbSJung-uk Kim /* table [1] = a_inv^1 */ 2877bded2dbSJung-uk Kim rsaz_512_mul(a_inv, base, RR, m, k0); 2887bded2dbSJung-uk Kim rsaz_512_scatter4(table, a_inv, 1); 2897bded2dbSJung-uk Kim 2907bded2dbSJung-uk Kim /* table [2] = a_inv^2 */ 2917bded2dbSJung-uk Kim rsaz_512_sqr(temp, a_inv, m, k0, 1); 2927bded2dbSJung-uk Kim rsaz_512_scatter4(table, temp, 2); 2937bded2dbSJung-uk Kim 2947bded2dbSJung-uk Kim for (index = 3; index < 16; index++) 2957bded2dbSJung-uk Kim rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); 2967bded2dbSJung-uk Kim 2977bded2dbSJung-uk Kim /* load first window */ 2987bded2dbSJung-uk Kim wvalue = p_str[63]; 2997bded2dbSJung-uk Kim 3007bded2dbSJung-uk Kim rsaz_512_gather4(temp, table, wvalue >> 4); 3017bded2dbSJung-uk Kim rsaz_512_sqr(temp, temp, m, k0, 4); 3027bded2dbSJung-uk Kim rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf); 3037bded2dbSJung-uk Kim 3047bded2dbSJung-uk Kim for (index = 62; index >= 0; index--) { 3057bded2dbSJung-uk Kim wvalue = p_str[index]; 3067bded2dbSJung-uk Kim 3077bded2dbSJung-uk Kim rsaz_512_sqr(temp, temp, m, k0, 4); 3087bded2dbSJung-uk Kim rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4); 3097bded2dbSJung-uk Kim 3107bded2dbSJung-uk Kim rsaz_512_sqr(temp, temp, m, k0, 4); 3117bded2dbSJung-uk Kim rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f); 3127bded2dbSJung-uk Kim } 3137bded2dbSJung-uk Kim 3147bded2dbSJung-uk Kim /* from Montgomery */ 3157bded2dbSJung-uk Kim rsaz_512_mul_by_one(result, temp, m, k0); 3167bded2dbSJung-uk Kim 31783eaf7aeSJung-uk Kim bn_reduce_once_in_place(result, /*carry=*/0, m, tmp, 8); 31883eaf7aeSJung-uk Kim 3197bded2dbSJung-uk Kim OPENSSL_cleanse(storage, sizeof(storage)); 32083eaf7aeSJung-uk Kim OPENSSL_cleanse(tmp, sizeof(tmp)); 3217bded2dbSJung-uk Kim } 3227bded2dbSJung-uk Kim 3237bded2dbSJung-uk Kim #endif 324