1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate /* 30*7c478bd9Sstevel@tonic-gate * If compiled without -DRF_INLINE_MACROS then needs -lm at link time 31*7c478bd9Sstevel@tonic-gate * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time 32*7c478bd9Sstevel@tonic-gate * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c ) 33*7c478bd9Sstevel@tonic-gate */ 34*7c478bd9Sstevel@tonic-gate 35*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 36*7c478bd9Sstevel@tonic-gate #include <math.h> 37*7c478bd9Sstevel@tonic-gate 38*7c478bd9Sstevel@tonic-gate static const double TwoTo16 = 65536.0; 39*7c478bd9Sstevel@tonic-gate static const double TwoToMinus16 = 1.0/65536.0; 40*7c478bd9Sstevel@tonic-gate static const double Zero = 0.0; 41*7c478bd9Sstevel@tonic-gate static const double TwoTo32 = 65536.0 * 65536.0; 42*7c478bd9Sstevel@tonic-gate static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0); 43*7c478bd9Sstevel@tonic-gate 44*7c478bd9Sstevel@tonic-gate #ifdef RF_INLINE_MACROS 45*7c478bd9Sstevel@tonic-gate 46*7c478bd9Sstevel@tonic-gate double upper32(double); 47*7c478bd9Sstevel@tonic-gate double lower32(double, double); 48*7c478bd9Sstevel@tonic-gate double mod(double, double, double); 49*7c478bd9Sstevel@tonic-gate 50*7c478bd9Sstevel@tonic-gate #else 51*7c478bd9Sstevel@tonic-gate 52*7c478bd9Sstevel@tonic-gate static double 53*7c478bd9Sstevel@tonic-gate upper32(double x) 54*7c478bd9Sstevel@tonic-gate { 55*7c478bd9Sstevel@tonic-gate return (floor(x * TwoToMinus32)); 56*7c478bd9Sstevel@tonic-gate } 57*7c478bd9Sstevel@tonic-gate 58*7c478bd9Sstevel@tonic-gate 59*7c478bd9Sstevel@tonic-gate static double 60*7c478bd9Sstevel@tonic-gate lower32(double x, double y) 61*7c478bd9Sstevel@tonic-gate { 62*7c478bd9Sstevel@tonic-gate return (x - TwoTo32 * floor(x * TwoToMinus32)); 63*7c478bd9Sstevel@tonic-gate } 64*7c478bd9Sstevel@tonic-gate 65*7c478bd9Sstevel@tonic-gate static double 66*7c478bd9Sstevel@tonic-gate mod(double x, double oneoverm, double m) 67*7c478bd9Sstevel@tonic-gate { 68*7c478bd9Sstevel@tonic-gate return (x - m * floor(x * oneoverm)); 69*7c478bd9Sstevel@tonic-gate } 70*7c478bd9Sstevel@tonic-gate 71*7c478bd9Sstevel@tonic-gate #endif 72*7c478bd9Sstevel@tonic-gate 73*7c478bd9Sstevel@tonic-gate 74*7c478bd9Sstevel@tonic-gate static void 75*7c478bd9Sstevel@tonic-gate cleanup(double *dt, int from, int tlen) 76*7c478bd9Sstevel@tonic-gate { 77*7c478bd9Sstevel@tonic-gate int i; 78*7c478bd9Sstevel@tonic-gate double tmp, tmp1, x, x1; 79*7c478bd9Sstevel@tonic-gate 80*7c478bd9Sstevel@tonic-gate tmp = tmp1 = Zero; 81*7c478bd9Sstevel@tonic-gate 82*7c478bd9Sstevel@tonic-gate for (i = 2 * from; i < 2 * tlen; i += 2) { 83*7c478bd9Sstevel@tonic-gate x = dt[i]; 84*7c478bd9Sstevel@tonic-gate x1 = dt[i + 1]; 85*7c478bd9Sstevel@tonic-gate dt[i] = lower32(x, Zero) + tmp; 86*7c478bd9Sstevel@tonic-gate dt[i + 1] = lower32(x1, Zero) + tmp1; 87*7c478bd9Sstevel@tonic-gate tmp = upper32(x); 88*7c478bd9Sstevel@tonic-gate tmp1 = upper32(x1); 89*7c478bd9Sstevel@tonic-gate } 90*7c478bd9Sstevel@tonic-gate } 91*7c478bd9Sstevel@tonic-gate 92*7c478bd9Sstevel@tonic-gate 93*7c478bd9Sstevel@tonic-gate void 94*7c478bd9Sstevel@tonic-gate conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) 95*7c478bd9Sstevel@tonic-gate { 96*7c478bd9Sstevel@tonic-gate int i; 97*7c478bd9Sstevel@tonic-gate int64_t t, t1, /* using int64_t and not uint64_t */ 98*7c478bd9Sstevel@tonic-gate a, b, c, d; /* because more efficient code is */ 99*7c478bd9Sstevel@tonic-gate /* generated this way, and there */ 100*7c478bd9Sstevel@tonic-gate /* is no overflow */ 101*7c478bd9Sstevel@tonic-gate t1 = 0; 102*7c478bd9Sstevel@tonic-gate a = (int64_t)d16[0]; 103*7c478bd9Sstevel@tonic-gate b = (int64_t)d16[1]; 104*7c478bd9Sstevel@tonic-gate for (i = 0; i < ilen - 1; i++) { 105*7c478bd9Sstevel@tonic-gate c = (int64_t)d16[2 * i + 2]; 106*7c478bd9Sstevel@tonic-gate t1 += a & 0xffffffff; 107*7c478bd9Sstevel@tonic-gate t = (a >> 32); 108*7c478bd9Sstevel@tonic-gate d = (int64_t)d16[2 * i + 3]; 109*7c478bd9Sstevel@tonic-gate t1 += (b & 0xffff) << 16; 110*7c478bd9Sstevel@tonic-gate t += (b >> 16) + (t1 >> 32); 111*7c478bd9Sstevel@tonic-gate i32[i] = t1 & 0xffffffff; 112*7c478bd9Sstevel@tonic-gate t1 = t; 113*7c478bd9Sstevel@tonic-gate a = c; 114*7c478bd9Sstevel@tonic-gate b = d; 115*7c478bd9Sstevel@tonic-gate } 116*7c478bd9Sstevel@tonic-gate t1 += a & 0xffffffff; 117*7c478bd9Sstevel@tonic-gate t = (a >> 32); 118*7c478bd9Sstevel@tonic-gate t1 += (b & 0xffff) << 16; 119*7c478bd9Sstevel@tonic-gate i32[i] = t1 & 0xffffffff; 120*7c478bd9Sstevel@tonic-gate } 121*7c478bd9Sstevel@tonic-gate 122*7c478bd9Sstevel@tonic-gate void 123*7c478bd9Sstevel@tonic-gate conv_i32_to_d32(double *d32, uint32_t *i32, int len) 124*7c478bd9Sstevel@tonic-gate { 125*7c478bd9Sstevel@tonic-gate int i; 126*7c478bd9Sstevel@tonic-gate 127*7c478bd9Sstevel@tonic-gate #pragma pipeloop(0) 128*7c478bd9Sstevel@tonic-gate for (i = 0; i < len; i++) 129*7c478bd9Sstevel@tonic-gate d32[i] = (double)(i32[i]); 130*7c478bd9Sstevel@tonic-gate } 131*7c478bd9Sstevel@tonic-gate 132*7c478bd9Sstevel@tonic-gate 133*7c478bd9Sstevel@tonic-gate void 134*7c478bd9Sstevel@tonic-gate conv_i32_to_d16(double *d16, uint32_t *i32, int len) 135*7c478bd9Sstevel@tonic-gate { 136*7c478bd9Sstevel@tonic-gate int i; 137*7c478bd9Sstevel@tonic-gate uint32_t a; 138*7c478bd9Sstevel@tonic-gate 139*7c478bd9Sstevel@tonic-gate #pragma pipeloop(0) 140*7c478bd9Sstevel@tonic-gate for (i = 0; i < len; i++) { 141*7c478bd9Sstevel@tonic-gate a = i32[i]; 142*7c478bd9Sstevel@tonic-gate d16[2 * i] = (double)(a & 0xffff); 143*7c478bd9Sstevel@tonic-gate d16[2 * i + 1] = (double)(a >> 16); 144*7c478bd9Sstevel@tonic-gate } 145*7c478bd9Sstevel@tonic-gate } 146*7c478bd9Sstevel@tonic-gate 147*7c478bd9Sstevel@tonic-gate #ifdef RF_INLINE_MACROS 148*7c478bd9Sstevel@tonic-gate 149*7c478bd9Sstevel@tonic-gate void 150*7c478bd9Sstevel@tonic-gate i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */ 151*7c478bd9Sstevel@tonic-gate const double *, /* 2^16 */ 152*7c478bd9Sstevel@tonic-gate const double *, /* 0 */ 153*7c478bd9Sstevel@tonic-gate double *, /* result16 */ 154*7c478bd9Sstevel@tonic-gate double *, /* result32 */ 155*7c478bd9Sstevel@tonic-gate float *); /* source - should be unsigned int* */ 156*7c478bd9Sstevel@tonic-gate /* converted to float* */ 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate #else 159*7c478bd9Sstevel@tonic-gate 160*7c478bd9Sstevel@tonic-gate 161*7c478bd9Sstevel@tonic-gate static void 162*7c478bd9Sstevel@tonic-gate i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */ 163*7c478bd9Sstevel@tonic-gate const double *dummy2, /* 2^16 */ 164*7c478bd9Sstevel@tonic-gate const double *dummy3, /* 0 */ 165*7c478bd9Sstevel@tonic-gate double *result16, 166*7c478bd9Sstevel@tonic-gate double *result32, 167*7c478bd9Sstevel@tonic-gate float *src) /* source - should be unsigned int* */ 168*7c478bd9Sstevel@tonic-gate /* converted to float* */ 169*7c478bd9Sstevel@tonic-gate { 170*7c478bd9Sstevel@tonic-gate uint32_t *i32; 171*7c478bd9Sstevel@tonic-gate uint32_t a, b, c, d; 172*7c478bd9Sstevel@tonic-gate 173*7c478bd9Sstevel@tonic-gate i32 = (uint32_t *)src; 174*7c478bd9Sstevel@tonic-gate a = i32[0]; 175*7c478bd9Sstevel@tonic-gate b = i32[1]; 176*7c478bd9Sstevel@tonic-gate c = i32[2]; 177*7c478bd9Sstevel@tonic-gate d = i32[3]; 178*7c478bd9Sstevel@tonic-gate result16[0] = (double)(a & 0xffff); 179*7c478bd9Sstevel@tonic-gate result16[1] = (double)(a >> 16); 180*7c478bd9Sstevel@tonic-gate result32[0] = (double)a; 181*7c478bd9Sstevel@tonic-gate result16[2] = (double)(b & 0xffff); 182*7c478bd9Sstevel@tonic-gate result16[3] = (double)(b >> 16); 183*7c478bd9Sstevel@tonic-gate result32[1] = (double)b; 184*7c478bd9Sstevel@tonic-gate result16[4] = (double)(c & 0xffff); 185*7c478bd9Sstevel@tonic-gate result16[5] = (double)(c >> 16); 186*7c478bd9Sstevel@tonic-gate result32[2] = (double)c; 187*7c478bd9Sstevel@tonic-gate result16[6] = (double)(d & 0xffff); 188*7c478bd9Sstevel@tonic-gate result16[7] = (double)(d >> 16); 189*7c478bd9Sstevel@tonic-gate result32[3] = (double)d; 190*7c478bd9Sstevel@tonic-gate } 191*7c478bd9Sstevel@tonic-gate 192*7c478bd9Sstevel@tonic-gate #endif 193*7c478bd9Sstevel@tonic-gate 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate void 196*7c478bd9Sstevel@tonic-gate conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len) 197*7c478bd9Sstevel@tonic-gate { 198*7c478bd9Sstevel@tonic-gate int i; 199*7c478bd9Sstevel@tonic-gate uint32_t a; 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate #pragma pipeloop(0) 202*7c478bd9Sstevel@tonic-gate for (i = 0; i < len - 3; i += 4) { 203*7c478bd9Sstevel@tonic-gate i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, 204*7c478bd9Sstevel@tonic-gate &(d16[2*i]), &(d32[i]), 205*7c478bd9Sstevel@tonic-gate (float *)(&(i32[i]))); 206*7c478bd9Sstevel@tonic-gate } 207*7c478bd9Sstevel@tonic-gate for (; i < len; i++) { 208*7c478bd9Sstevel@tonic-gate a = i32[i]; 209*7c478bd9Sstevel@tonic-gate d32[i] = (double)(i32[i]); 210*7c478bd9Sstevel@tonic-gate d16[2 * i] = (double)(a & 0xffff); 211*7c478bd9Sstevel@tonic-gate d16[2 * i + 1] = (double)(a >> 16); 212*7c478bd9Sstevel@tonic-gate } 213*7c478bd9Sstevel@tonic-gate } 214*7c478bd9Sstevel@tonic-gate 215*7c478bd9Sstevel@tonic-gate 216*7c478bd9Sstevel@tonic-gate static void 217*7c478bd9Sstevel@tonic-gate adjust_montf_result(uint32_t *i32, uint32_t *nint, int len) 218*7c478bd9Sstevel@tonic-gate { 219*7c478bd9Sstevel@tonic-gate int64_t acc; 220*7c478bd9Sstevel@tonic-gate int i; 221*7c478bd9Sstevel@tonic-gate 222*7c478bd9Sstevel@tonic-gate if (i32[len] > 0) 223*7c478bd9Sstevel@tonic-gate i = -1; 224*7c478bd9Sstevel@tonic-gate else { 225*7c478bd9Sstevel@tonic-gate for (i = len - 1; i >= 0; i--) { 226*7c478bd9Sstevel@tonic-gate if (i32[i] != nint[i]) break; 227*7c478bd9Sstevel@tonic-gate } 228*7c478bd9Sstevel@tonic-gate } 229*7c478bd9Sstevel@tonic-gate if ((i < 0) || (i32[i] > nint[i])) { 230*7c478bd9Sstevel@tonic-gate acc = 0; 231*7c478bd9Sstevel@tonic-gate for (i = 0; i < len; i++) { 232*7c478bd9Sstevel@tonic-gate acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); 233*7c478bd9Sstevel@tonic-gate i32[i] = acc & 0xffffffff; 234*7c478bd9Sstevel@tonic-gate acc = acc >> 32; 235*7c478bd9Sstevel@tonic-gate } 236*7c478bd9Sstevel@tonic-gate } 237*7c478bd9Sstevel@tonic-gate } 238*7c478bd9Sstevel@tonic-gate 239*7c478bd9Sstevel@tonic-gate 240*7c478bd9Sstevel@tonic-gate /* 241*7c478bd9Sstevel@tonic-gate * the lengths of the input arrays should be at least the following: 242*7c478bd9Sstevel@tonic-gate * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] 243*7c478bd9Sstevel@tonic-gate * all of them should be different from one another 244*7c478bd9Sstevel@tonic-gate */ 245*7c478bd9Sstevel@tonic-gate void mont_mulf_noconv(uint32_t *result, 246*7c478bd9Sstevel@tonic-gate double *dm1, double *dm2, double *dt, 247*7c478bd9Sstevel@tonic-gate double *dn, uint32_t *nint, 248*7c478bd9Sstevel@tonic-gate int nlen, double dn0) 249*7c478bd9Sstevel@tonic-gate { 250*7c478bd9Sstevel@tonic-gate int i, j, jj; 251*7c478bd9Sstevel@tonic-gate double digit, m2j, a, b; 252*7c478bd9Sstevel@tonic-gate double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; 253*7c478bd9Sstevel@tonic-gate 254*7c478bd9Sstevel@tonic-gate pdm1 = &(dm1[0]); 255*7c478bd9Sstevel@tonic-gate pdm2 = &(dm2[0]); 256*7c478bd9Sstevel@tonic-gate pdn = &(dn[0]); 257*7c478bd9Sstevel@tonic-gate pdm2[2 * nlen] = Zero; 258*7c478bd9Sstevel@tonic-gate 259*7c478bd9Sstevel@tonic-gate if (nlen != 16) { 260*7c478bd9Sstevel@tonic-gate for (i = 0; i < 4 * nlen + 2; i++) 261*7c478bd9Sstevel@tonic-gate dt[i] = Zero; 262*7c478bd9Sstevel@tonic-gate a = dt[0] = pdm1[0] * pdm2[0]; 263*7c478bd9Sstevel@tonic-gate digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); 264*7c478bd9Sstevel@tonic-gate 265*7c478bd9Sstevel@tonic-gate pdtj = &(dt[0]); 266*7c478bd9Sstevel@tonic-gate for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { 267*7c478bd9Sstevel@tonic-gate m2j = pdm2[j]; 268*7c478bd9Sstevel@tonic-gate a = pdtj[0] + pdn[0] * digit; 269*7c478bd9Sstevel@tonic-gate b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; 270*7c478bd9Sstevel@tonic-gate pdtj[1] = b; 271*7c478bd9Sstevel@tonic-gate 272*7c478bd9Sstevel@tonic-gate #pragma pipeloop(0) 273*7c478bd9Sstevel@tonic-gate for (i = 1; i < nlen; i++) { 274*7c478bd9Sstevel@tonic-gate pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; 275*7c478bd9Sstevel@tonic-gate } 276*7c478bd9Sstevel@tonic-gate if (jj == 30) { 277*7c478bd9Sstevel@tonic-gate cleanup(dt, j / 2 + 1, 2 * nlen + 1); 278*7c478bd9Sstevel@tonic-gate jj = 0; 279*7c478bd9Sstevel@tonic-gate } 280*7c478bd9Sstevel@tonic-gate 281*7c478bd9Sstevel@tonic-gate digit = mod(lower32(b, Zero) * dn0, 282*7c478bd9Sstevel@tonic-gate TwoToMinus16, TwoTo16); 283*7c478bd9Sstevel@tonic-gate } 284*7c478bd9Sstevel@tonic-gate } else { 285*7c478bd9Sstevel@tonic-gate a = dt[0] = pdm1[0] * pdm2[0]; 286*7c478bd9Sstevel@tonic-gate 287*7c478bd9Sstevel@tonic-gate dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = 288*7c478bd9Sstevel@tonic-gate dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = 289*7c478bd9Sstevel@tonic-gate dt[54] = dt[53] = dt[52] = dt[51] = dt[50] = 290*7c478bd9Sstevel@tonic-gate dt[49] = dt[48] = dt[47] = dt[46] = dt[45] = 291*7c478bd9Sstevel@tonic-gate dt[44] = dt[43] = dt[42] = dt[41] = dt[40] = 292*7c478bd9Sstevel@tonic-gate dt[39] = dt[38] = dt[37] = dt[36] = dt[35] = 293*7c478bd9Sstevel@tonic-gate dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = 294*7c478bd9Sstevel@tonic-gate dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = 295*7c478bd9Sstevel@tonic-gate dt[24] = dt[23] = dt[22] = dt[21] = dt[20] = 296*7c478bd9Sstevel@tonic-gate dt[19] = dt[18] = dt[17] = dt[16] = dt[15] = 297*7c478bd9Sstevel@tonic-gate dt[14] = dt[13] = dt[12] = dt[11] = dt[10] = 298*7c478bd9Sstevel@tonic-gate dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] = 299*7c478bd9Sstevel@tonic-gate dt[3] = dt[2] = dt[1] = Zero; 300*7c478bd9Sstevel@tonic-gate 301*7c478bd9Sstevel@tonic-gate pdn_0 = pdn[0]; 302*7c478bd9Sstevel@tonic-gate pdm1_0 = pdm1[0]; 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); 305*7c478bd9Sstevel@tonic-gate pdtj = &(dt[0]); 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate for (j = 0; j < 32; j++, pdtj++) { 308*7c478bd9Sstevel@tonic-gate 309*7c478bd9Sstevel@tonic-gate m2j = pdm2[j]; 310*7c478bd9Sstevel@tonic-gate a = pdtj[0] + pdn_0 * digit; 311*7c478bd9Sstevel@tonic-gate b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16; 312*7c478bd9Sstevel@tonic-gate pdtj[1] = b; 313*7c478bd9Sstevel@tonic-gate 314*7c478bd9Sstevel@tonic-gate pdtj[2] += pdm1[1] *m2j + pdn[1] * digit; 315*7c478bd9Sstevel@tonic-gate pdtj[4] += pdm1[2] *m2j + pdn[2] * digit; 316*7c478bd9Sstevel@tonic-gate pdtj[6] += pdm1[3] *m2j + pdn[3] * digit; 317*7c478bd9Sstevel@tonic-gate pdtj[8] += pdm1[4] *m2j + pdn[4] * digit; 318*7c478bd9Sstevel@tonic-gate pdtj[10] += pdm1[5] *m2j + pdn[5] * digit; 319*7c478bd9Sstevel@tonic-gate pdtj[12] += pdm1[6] *m2j + pdn[6] * digit; 320*7c478bd9Sstevel@tonic-gate pdtj[14] += pdm1[7] *m2j + pdn[7] * digit; 321*7c478bd9Sstevel@tonic-gate pdtj[16] += pdm1[8] *m2j + pdn[8] * digit; 322*7c478bd9Sstevel@tonic-gate pdtj[18] += pdm1[9] *m2j + pdn[9] * digit; 323*7c478bd9Sstevel@tonic-gate pdtj[20] += pdm1[10] *m2j + pdn[10] * digit; 324*7c478bd9Sstevel@tonic-gate pdtj[22] += pdm1[11] *m2j + pdn[11] * digit; 325*7c478bd9Sstevel@tonic-gate pdtj[24] += pdm1[12] *m2j + pdn[12] * digit; 326*7c478bd9Sstevel@tonic-gate pdtj[26] += pdm1[13] *m2j + pdn[13] * digit; 327*7c478bd9Sstevel@tonic-gate pdtj[28] += pdm1[14] *m2j + pdn[14] * digit; 328*7c478bd9Sstevel@tonic-gate pdtj[30] += pdm1[15] *m2j + pdn[15] * digit; 329*7c478bd9Sstevel@tonic-gate /* no need for cleenup, cannot overflow */ 330*7c478bd9Sstevel@tonic-gate digit = mod(lower32(b, Zero) * dn0, 331*7c478bd9Sstevel@tonic-gate TwoToMinus16, TwoTo16); 332*7c478bd9Sstevel@tonic-gate } 333*7c478bd9Sstevel@tonic-gate } 334*7c478bd9Sstevel@tonic-gate 335*7c478bd9Sstevel@tonic-gate conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1); 336*7c478bd9Sstevel@tonic-gate adjust_montf_result(result, nint, nlen); 337*7c478bd9Sstevel@tonic-gate } 338