1*7bded2dbSJung-uk Kim /***************************************************************************** 2*7bded2dbSJung-uk Kim * * 3*7bded2dbSJung-uk Kim * Copyright (c) 2012, Intel Corporation * 4*7bded2dbSJung-uk Kim * * 5*7bded2dbSJung-uk Kim * All rights reserved. * 6*7bded2dbSJung-uk Kim * * 7*7bded2dbSJung-uk Kim * Redistribution and use in source and binary forms, with or without * 8*7bded2dbSJung-uk Kim * modification, are permitted provided that the following conditions are * 9*7bded2dbSJung-uk Kim * met: * 10*7bded2dbSJung-uk Kim * * 11*7bded2dbSJung-uk Kim * * Redistributions of source code must retain the above copyright * 12*7bded2dbSJung-uk Kim * notice, this list of conditions and the following disclaimer. * 13*7bded2dbSJung-uk Kim * * 14*7bded2dbSJung-uk Kim * * Redistributions in binary form must reproduce the above copyright * 15*7bded2dbSJung-uk Kim * notice, this list of conditions and the following disclaimer in the * 16*7bded2dbSJung-uk Kim * documentation and/or other materials provided with the * 17*7bded2dbSJung-uk Kim * distribution. * 18*7bded2dbSJung-uk Kim * * 19*7bded2dbSJung-uk Kim * * Neither the name of the Intel Corporation nor the names of its * 20*7bded2dbSJung-uk Kim * contributors may be used to endorse or promote products derived from * 21*7bded2dbSJung-uk Kim * this software without specific prior written permission. * 22*7bded2dbSJung-uk Kim * * 23*7bded2dbSJung-uk Kim * * 24*7bded2dbSJung-uk Kim * THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * 25*7bded2dbSJung-uk Kim * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * 26*7bded2dbSJung-uk Kim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * 27*7bded2dbSJung-uk Kim * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * 28*7bded2dbSJung-uk Kim * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * 29*7bded2dbSJung-uk Kim * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * 30*7bded2dbSJung-uk Kim * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * 31*7bded2dbSJung-uk Kim * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * 32*7bded2dbSJung-uk Kim * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * 33*7bded2dbSJung-uk Kim * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 34*7bded2dbSJung-uk Kim * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 35*7bded2dbSJung-uk Kim * * 36*7bded2dbSJung-uk Kim ****************************************************************************** 37*7bded2dbSJung-uk Kim * Developers and authors: * 38*7bded2dbSJung-uk Kim * Shay Gueron (1, 2), and Vlad Krasnov (1) * 39*7bded2dbSJung-uk Kim * (1) Intel Corporation, Israel Development Center, Haifa, Israel * 40*7bded2dbSJung-uk Kim * (2) University of Haifa, Israel * 41*7bded2dbSJung-uk Kim *****************************************************************************/ 42*7bded2dbSJung-uk Kim 43*7bded2dbSJung-uk Kim #include "rsaz_exp.h" 44*7bded2dbSJung-uk Kim 45*7bded2dbSJung-uk Kim #ifdef RSAZ_ENABLED 46*7bded2dbSJung-uk Kim 47*7bded2dbSJung-uk Kim /* 48*7bded2dbSJung-uk Kim * See crypto/bn/asm/rsaz-avx2.pl for further details. 49*7bded2dbSJung-uk Kim */ 50*7bded2dbSJung-uk Kim void rsaz_1024_norm2red_avx2(void *red, const void *norm); 51*7bded2dbSJung-uk Kim void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b, 52*7bded2dbSJung-uk Kim const void *n, BN_ULONG k); 53*7bded2dbSJung-uk Kim void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k, 54*7bded2dbSJung-uk Kim int cnt); 55*7bded2dbSJung-uk Kim void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i); 56*7bded2dbSJung-uk Kim void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i); 57*7bded2dbSJung-uk Kim void rsaz_1024_red2norm_avx2(void *norm, const void *red); 58*7bded2dbSJung-uk Kim 59*7bded2dbSJung-uk Kim #if defined(__GNUC__) 60*7bded2dbSJung-uk Kim # define ALIGN64 __attribute__((aligned(64))) 61*7bded2dbSJung-uk Kim #elif defined(_MSC_VER) 62*7bded2dbSJung-uk Kim # define ALIGN64 __declspec(align(64)) 63*7bded2dbSJung-uk Kim #elif defined(__SUNPRO_C) 64*7bded2dbSJung-uk Kim # define ALIGN64 65*7bded2dbSJung-uk Kim # pragma align 64(one,two80) 66*7bded2dbSJung-uk Kim #else 67*7bded2dbSJung-uk Kim /* not fatal, might hurt performance a little */ 68*7bded2dbSJung-uk Kim # define ALIGN64 69*7bded2dbSJung-uk Kim #endif 70*7bded2dbSJung-uk Kim 71*7bded2dbSJung-uk Kim ALIGN64 static const BN_ULONG one[40] = { 72*7bded2dbSJung-uk Kim 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73*7bded2dbSJung-uk Kim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 74*7bded2dbSJung-uk Kim }; 75*7bded2dbSJung-uk Kim 76*7bded2dbSJung-uk Kim ALIGN64 static const BN_ULONG two80[40] = { 77*7bded2dbSJung-uk Kim 0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78*7bded2dbSJung-uk Kim 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 79*7bded2dbSJung-uk Kim }; 80*7bded2dbSJung-uk Kim 81*7bded2dbSJung-uk Kim void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], 82*7bded2dbSJung-uk Kim const BN_ULONG base_norm[16], 83*7bded2dbSJung-uk Kim const BN_ULONG exponent[16], 84*7bded2dbSJung-uk Kim const BN_ULONG m_norm[16], const BN_ULONG RR[16], 85*7bded2dbSJung-uk Kim BN_ULONG k0) 86*7bded2dbSJung-uk Kim { 87*7bded2dbSJung-uk Kim unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */ 88*7bded2dbSJung-uk Kim unsigned char *p_str = storage + (64 - ((size_t)storage % 64)); 89*7bded2dbSJung-uk Kim unsigned char *a_inv, *m, *result; 90*7bded2dbSJung-uk Kim unsigned char *table_s = p_str + 320 * 3; 91*7bded2dbSJung-uk Kim unsigned char *R2 = table_s; /* borrow */ 92*7bded2dbSJung-uk Kim int index; 93*7bded2dbSJung-uk Kim int wvalue; 94*7bded2dbSJung-uk Kim 95*7bded2dbSJung-uk Kim if ((((size_t)p_str & 4095) + 320) >> 12) { 96*7bded2dbSJung-uk Kim result = p_str; 97*7bded2dbSJung-uk Kim a_inv = p_str + 320; 98*7bded2dbSJung-uk Kim m = p_str + 320 * 2; /* should not cross page */ 99*7bded2dbSJung-uk Kim } else { 100*7bded2dbSJung-uk Kim m = p_str; /* should not cross page */ 101*7bded2dbSJung-uk Kim result = p_str + 320; 102*7bded2dbSJung-uk Kim a_inv = p_str + 320 * 2; 103*7bded2dbSJung-uk Kim } 104*7bded2dbSJung-uk Kim 105*7bded2dbSJung-uk Kim rsaz_1024_norm2red_avx2(m, m_norm); 106*7bded2dbSJung-uk Kim rsaz_1024_norm2red_avx2(a_inv, base_norm); 107*7bded2dbSJung-uk Kim rsaz_1024_norm2red_avx2(R2, RR); 108*7bded2dbSJung-uk Kim 109*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(R2, R2, R2, m, k0); 110*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(R2, R2, two80, m, k0); 111*7bded2dbSJung-uk Kim 112*7bded2dbSJung-uk Kim /* table[0] = 1 */ 113*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, R2, one, m, k0); 114*7bded2dbSJung-uk Kim /* table[1] = a_inv^1 */ 115*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); 116*7bded2dbSJung-uk Kim 117*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 0); 118*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, a_inv, 1); 119*7bded2dbSJung-uk Kim 120*7bded2dbSJung-uk Kim /* table[2] = a_inv^2 */ 121*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); 122*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 2); 123*7bded2dbSJung-uk Kim #if 0 124*7bded2dbSJung-uk Kim /* this is almost 2x smaller and less than 1% slower */ 125*7bded2dbSJung-uk Kim for (index = 3; index < 32; index++) { 126*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 127*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, index); 128*7bded2dbSJung-uk Kim } 129*7bded2dbSJung-uk Kim #else 130*7bded2dbSJung-uk Kim /* table[4] = a_inv^4 */ 131*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 132*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 4); 133*7bded2dbSJung-uk Kim /* table[8] = a_inv^8 */ 134*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 135*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 8); 136*7bded2dbSJung-uk Kim /* table[16] = a_inv^16 */ 137*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 138*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 16); 139*7bded2dbSJung-uk Kim /* table[17] = a_inv^17 */ 140*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 141*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 17); 142*7bded2dbSJung-uk Kim 143*7bded2dbSJung-uk Kim /* table[3] */ 144*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 2); 145*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 146*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 3); 147*7bded2dbSJung-uk Kim /* table[6] */ 148*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 149*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 6); 150*7bded2dbSJung-uk Kim /* table[12] */ 151*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 152*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 12); 153*7bded2dbSJung-uk Kim /* table[24] */ 154*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 155*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 24); 156*7bded2dbSJung-uk Kim /* table[25] */ 157*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 158*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 25); 159*7bded2dbSJung-uk Kim 160*7bded2dbSJung-uk Kim /* table[5] */ 161*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 4); 162*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 163*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 5); 164*7bded2dbSJung-uk Kim /* table[10] */ 165*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 166*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 10); 167*7bded2dbSJung-uk Kim /* table[20] */ 168*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 169*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 20); 170*7bded2dbSJung-uk Kim /* table[21] */ 171*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 172*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 21); 173*7bded2dbSJung-uk Kim 174*7bded2dbSJung-uk Kim /* table[7] */ 175*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 6); 176*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 177*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 7); 178*7bded2dbSJung-uk Kim /* table[14] */ 179*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 180*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 14); 181*7bded2dbSJung-uk Kim /* table[28] */ 182*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 183*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 28); 184*7bded2dbSJung-uk Kim /* table[29] */ 185*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 186*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 29); 187*7bded2dbSJung-uk Kim 188*7bded2dbSJung-uk Kim /* table[9] */ 189*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 8); 190*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 191*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 9); 192*7bded2dbSJung-uk Kim /* table[18] */ 193*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 194*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 18); 195*7bded2dbSJung-uk Kim /* table[19] */ 196*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 197*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 19); 198*7bded2dbSJung-uk Kim 199*7bded2dbSJung-uk Kim /* table[11] */ 200*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 10); 201*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 202*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 11); 203*7bded2dbSJung-uk Kim /* table[22] */ 204*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 205*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 22); 206*7bded2dbSJung-uk Kim /* table[23] */ 207*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 208*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 23); 209*7bded2dbSJung-uk Kim 210*7bded2dbSJung-uk Kim /* table[13] */ 211*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 12); 212*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 213*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 13); 214*7bded2dbSJung-uk Kim /* table[26] */ 215*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 216*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 26); 217*7bded2dbSJung-uk Kim /* table[27] */ 218*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 219*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 27); 220*7bded2dbSJung-uk Kim 221*7bded2dbSJung-uk Kim /* table[15] */ 222*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, 14); 223*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 224*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 15); 225*7bded2dbSJung-uk Kim /* table[30] */ 226*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 1); 227*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 30); 228*7bded2dbSJung-uk Kim /* table[31] */ 229*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 230*7bded2dbSJung-uk Kim rsaz_1024_scatter5_avx2(table_s, result, 31); 231*7bded2dbSJung-uk Kim #endif 232*7bded2dbSJung-uk Kim 233*7bded2dbSJung-uk Kim /* load first window */ 234*7bded2dbSJung-uk Kim p_str = (unsigned char *)exponent; 235*7bded2dbSJung-uk Kim wvalue = p_str[127] >> 3; 236*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(result, table_s, wvalue); 237*7bded2dbSJung-uk Kim 238*7bded2dbSJung-uk Kim index = 1014; 239*7bded2dbSJung-uk Kim 240*7bded2dbSJung-uk Kim while (index > -1) { /* loop for the remaining 127 windows */ 241*7bded2dbSJung-uk Kim 242*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 5); 243*7bded2dbSJung-uk Kim 244*7bded2dbSJung-uk Kim wvalue = *((unsigned short *)&p_str[index / 8]); 245*7bded2dbSJung-uk Kim wvalue = (wvalue >> (index % 8)) & 31; 246*7bded2dbSJung-uk Kim index -= 5; 247*7bded2dbSJung-uk Kim 248*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ 249*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 250*7bded2dbSJung-uk Kim } 251*7bded2dbSJung-uk Kim 252*7bded2dbSJung-uk Kim /* square four times */ 253*7bded2dbSJung-uk Kim rsaz_1024_sqr_avx2(result, result, m, k0, 4); 254*7bded2dbSJung-uk Kim 255*7bded2dbSJung-uk Kim wvalue = p_str[0] & 15; 256*7bded2dbSJung-uk Kim 257*7bded2dbSJung-uk Kim rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ 258*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 259*7bded2dbSJung-uk Kim 260*7bded2dbSJung-uk Kim /* from Montgomery */ 261*7bded2dbSJung-uk Kim rsaz_1024_mul_avx2(result, result, one, m, k0); 262*7bded2dbSJung-uk Kim 263*7bded2dbSJung-uk Kim rsaz_1024_red2norm_avx2(result_norm, result); 264*7bded2dbSJung-uk Kim 265*7bded2dbSJung-uk Kim OPENSSL_cleanse(storage, sizeof(storage)); 266*7bded2dbSJung-uk Kim } 267*7bded2dbSJung-uk Kim 268*7bded2dbSJung-uk Kim /* 269*7bded2dbSJung-uk Kim * See crypto/bn/rsaz-x86_64.pl for further details. 270*7bded2dbSJung-uk Kim */ 271*7bded2dbSJung-uk Kim void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n, 272*7bded2dbSJung-uk Kim BN_ULONG k); 273*7bded2dbSJung-uk Kim void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n, 274*7bded2dbSJung-uk Kim BN_ULONG k, const void *tbl, unsigned int power); 275*7bded2dbSJung-uk Kim void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl, 276*7bded2dbSJung-uk Kim const void *n, BN_ULONG k, unsigned int power); 277*7bded2dbSJung-uk Kim void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k); 278*7bded2dbSJung-uk Kim void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k, 279*7bded2dbSJung-uk Kim int cnt); 280*7bded2dbSJung-uk Kim void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); 281*7bded2dbSJung-uk Kim void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); 282*7bded2dbSJung-uk Kim 283*7bded2dbSJung-uk Kim void RSAZ_512_mod_exp(BN_ULONG result[8], 284*7bded2dbSJung-uk Kim const BN_ULONG base[8], const BN_ULONG exponent[8], 285*7bded2dbSJung-uk Kim const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) 286*7bded2dbSJung-uk Kim { 287*7bded2dbSJung-uk Kim unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */ 288*7bded2dbSJung-uk Kim unsigned char *table = storage + (64 - ((size_t)storage % 64)); 289*7bded2dbSJung-uk Kim BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8); 290*7bded2dbSJung-uk Kim BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8); 291*7bded2dbSJung-uk Kim unsigned char *p_str = (unsigned char *)exponent; 292*7bded2dbSJung-uk Kim int index; 293*7bded2dbSJung-uk Kim unsigned int wvalue; 294*7bded2dbSJung-uk Kim 295*7bded2dbSJung-uk Kim /* table[0] = 1_inv */ 296*7bded2dbSJung-uk Kim temp[0] = 0 - m[0]; 297*7bded2dbSJung-uk Kim temp[1] = ~m[1]; 298*7bded2dbSJung-uk Kim temp[2] = ~m[2]; 299*7bded2dbSJung-uk Kim temp[3] = ~m[3]; 300*7bded2dbSJung-uk Kim temp[4] = ~m[4]; 301*7bded2dbSJung-uk Kim temp[5] = ~m[5]; 302*7bded2dbSJung-uk Kim temp[6] = ~m[6]; 303*7bded2dbSJung-uk Kim temp[7] = ~m[7]; 304*7bded2dbSJung-uk Kim rsaz_512_scatter4(table, temp, 0); 305*7bded2dbSJung-uk Kim 306*7bded2dbSJung-uk Kim /* table [1] = a_inv^1 */ 307*7bded2dbSJung-uk Kim rsaz_512_mul(a_inv, base, RR, m, k0); 308*7bded2dbSJung-uk Kim rsaz_512_scatter4(table, a_inv, 1); 309*7bded2dbSJung-uk Kim 310*7bded2dbSJung-uk Kim /* table [2] = a_inv^2 */ 311*7bded2dbSJung-uk Kim rsaz_512_sqr(temp, a_inv, m, k0, 1); 312*7bded2dbSJung-uk Kim rsaz_512_scatter4(table, temp, 2); 313*7bded2dbSJung-uk Kim 314*7bded2dbSJung-uk Kim for (index = 3; index < 16; index++) 315*7bded2dbSJung-uk Kim rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); 316*7bded2dbSJung-uk Kim 317*7bded2dbSJung-uk Kim /* load first window */ 318*7bded2dbSJung-uk Kim wvalue = p_str[63]; 319*7bded2dbSJung-uk Kim 320*7bded2dbSJung-uk Kim rsaz_512_gather4(temp, table, wvalue >> 4); 321*7bded2dbSJung-uk Kim rsaz_512_sqr(temp, temp, m, k0, 4); 322*7bded2dbSJung-uk Kim rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf); 323*7bded2dbSJung-uk Kim 324*7bded2dbSJung-uk Kim for (index = 62; index >= 0; index--) { 325*7bded2dbSJung-uk Kim wvalue = p_str[index]; 326*7bded2dbSJung-uk Kim 327*7bded2dbSJung-uk Kim rsaz_512_sqr(temp, temp, m, k0, 4); 328*7bded2dbSJung-uk Kim rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4); 329*7bded2dbSJung-uk Kim 330*7bded2dbSJung-uk Kim rsaz_512_sqr(temp, temp, m, k0, 4); 331*7bded2dbSJung-uk Kim rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f); 332*7bded2dbSJung-uk Kim } 333*7bded2dbSJung-uk Kim 334*7bded2dbSJung-uk Kim /* from Montgomery */ 335*7bded2dbSJung-uk Kim rsaz_512_mul_by_one(result, temp, m, k0); 336*7bded2dbSJung-uk Kim 337*7bded2dbSJung-uk Kim OPENSSL_cleanse(storage, sizeof(storage)); 338*7bded2dbSJung-uk Kim } 339*7bded2dbSJung-uk Kim 340*7bded2dbSJung-uk Kim #else 341*7bded2dbSJung-uk Kim 342*7bded2dbSJung-uk Kim # if defined(PEDANTIC) || defined(__DECC) || defined(__clang__) 343*7bded2dbSJung-uk Kim static void *dummy = &dummy; 344*7bded2dbSJung-uk Kim # endif 345*7bded2dbSJung-uk Kim 346*7bded2dbSJung-uk Kim #endif 347