xref: /titanic_44/usr/src/common/bignum/mont_mulf.c (revision 8475e04352e630e4bd0f59a283286ee2475a14ce)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*8475e043SDan OpenSolaris Anderson  * Common Development and Distribution License (the "License").
6*8475e043SDan OpenSolaris Anderson  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*8475e043SDan OpenSolaris Anderson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
287c478bd9Sstevel@tonic-gate  * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
29*8475e043SDan OpenSolaris Anderson  * (i.e. cc <compiler_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
307c478bd9Sstevel@tonic-gate  */
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate #include <sys/types.h>
337c478bd9Sstevel@tonic-gate #include <math.h>
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate static const double TwoTo16 = 65536.0;
367c478bd9Sstevel@tonic-gate static const double TwoToMinus16 = 1.0/65536.0;
377c478bd9Sstevel@tonic-gate static const double Zero = 0.0;
387c478bd9Sstevel@tonic-gate static const double TwoTo32 = 65536.0 * 65536.0;
397c478bd9Sstevel@tonic-gate static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
407c478bd9Sstevel@tonic-gate 
417c478bd9Sstevel@tonic-gate #ifdef RF_INLINE_MACROS
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate double upper32(double);
447c478bd9Sstevel@tonic-gate double lower32(double, double);
457c478bd9Sstevel@tonic-gate double mod(double, double, double);
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate #else
487c478bd9Sstevel@tonic-gate 
497c478bd9Sstevel@tonic-gate static double
upper32(double x)507c478bd9Sstevel@tonic-gate upper32(double x)
517c478bd9Sstevel@tonic-gate {
527c478bd9Sstevel@tonic-gate 	return (floor(x * TwoToMinus32));
537c478bd9Sstevel@tonic-gate }
547c478bd9Sstevel@tonic-gate 
557c478bd9Sstevel@tonic-gate 
567c478bd9Sstevel@tonic-gate static double
lower32(double x,double y)577c478bd9Sstevel@tonic-gate lower32(double x, double y)
587c478bd9Sstevel@tonic-gate {
597c478bd9Sstevel@tonic-gate 	return (x - TwoTo32 * floor(x * TwoToMinus32));
607c478bd9Sstevel@tonic-gate }
617c478bd9Sstevel@tonic-gate 
627c478bd9Sstevel@tonic-gate static double
mod(double x,double oneoverm,double m)637c478bd9Sstevel@tonic-gate mod(double x, double oneoverm, double m)
647c478bd9Sstevel@tonic-gate {
657c478bd9Sstevel@tonic-gate 	return (x - m * floor(x * oneoverm));
667c478bd9Sstevel@tonic-gate }
677c478bd9Sstevel@tonic-gate 
687c478bd9Sstevel@tonic-gate #endif
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate static void
cleanup(double * dt,int from,int tlen)727c478bd9Sstevel@tonic-gate cleanup(double *dt, int from, int tlen)
737c478bd9Sstevel@tonic-gate {
747c478bd9Sstevel@tonic-gate 	int i;
757c478bd9Sstevel@tonic-gate 	double tmp, tmp1, x, x1;
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate 	tmp = tmp1 = Zero;
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate 	for (i = 2 * from; i < 2 * tlen; i += 2) {
807c478bd9Sstevel@tonic-gate 		x = dt[i];
817c478bd9Sstevel@tonic-gate 		x1 = dt[i + 1];
827c478bd9Sstevel@tonic-gate 		dt[i] = lower32(x, Zero) + tmp;
837c478bd9Sstevel@tonic-gate 		dt[i + 1] = lower32(x1, Zero) + tmp1;
847c478bd9Sstevel@tonic-gate 		tmp = upper32(x);
857c478bd9Sstevel@tonic-gate 		tmp1 = upper32(x1);
867c478bd9Sstevel@tonic-gate 	}
877c478bd9Sstevel@tonic-gate }
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate void
conv_d16_to_i32(uint32_t * i32,double * d16,int64_t * tmp,int ilen)917c478bd9Sstevel@tonic-gate conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
927c478bd9Sstevel@tonic-gate {
937c478bd9Sstevel@tonic-gate 	int i;
94*8475e043SDan OpenSolaris Anderson 	int64_t t, t1,		/* Using int64_t and not uint64_t */
957c478bd9Sstevel@tonic-gate 	    a, b, c, d;		/* because more efficient code is */
967c478bd9Sstevel@tonic-gate 				/* generated this way, and there  */
97*8475e043SDan OpenSolaris Anderson 				/* is no overflow.  */
987c478bd9Sstevel@tonic-gate 	t1 = 0;
997c478bd9Sstevel@tonic-gate 	a = (int64_t)d16[0];
1007c478bd9Sstevel@tonic-gate 	b = (int64_t)d16[1];
1017c478bd9Sstevel@tonic-gate 	for (i = 0; i < ilen - 1; i++) {
1027c478bd9Sstevel@tonic-gate 		c = (int64_t)d16[2 * i + 2];
1037c478bd9Sstevel@tonic-gate 		t1 += a & 0xffffffff;
1047c478bd9Sstevel@tonic-gate 		t = (a >> 32);
1057c478bd9Sstevel@tonic-gate 		d = (int64_t)d16[2 * i + 3];
1067c478bd9Sstevel@tonic-gate 		t1 += (b & 0xffff) << 16;
1077c478bd9Sstevel@tonic-gate 		t += (b >> 16) + (t1 >> 32);
1087c478bd9Sstevel@tonic-gate 		i32[i] = t1 & 0xffffffff;
1097c478bd9Sstevel@tonic-gate 		t1 = t;
1107c478bd9Sstevel@tonic-gate 		a = c;
1117c478bd9Sstevel@tonic-gate 		b = d;
1127c478bd9Sstevel@tonic-gate 	}
1137c478bd9Sstevel@tonic-gate 	t1 += a & 0xffffffff;
1147c478bd9Sstevel@tonic-gate 	t = (a >> 32);
1157c478bd9Sstevel@tonic-gate 	t1 += (b & 0xffff) << 16;
1167c478bd9Sstevel@tonic-gate 	i32[i] = t1 & 0xffffffff;
1177c478bd9Sstevel@tonic-gate }
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate void
conv_i32_to_d32(double * d32,uint32_t * i32,int len)1207c478bd9Sstevel@tonic-gate conv_i32_to_d32(double *d32, uint32_t *i32, int len)
1217c478bd9Sstevel@tonic-gate {
1227c478bd9Sstevel@tonic-gate 	int i;
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate #pragma pipeloop(0)
1257c478bd9Sstevel@tonic-gate 	for (i = 0; i < len; i++)
1267c478bd9Sstevel@tonic-gate 		d32[i] = (double)(i32[i]);
1277c478bd9Sstevel@tonic-gate }
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate void
conv_i32_to_d16(double * d16,uint32_t * i32,int len)1317c478bd9Sstevel@tonic-gate conv_i32_to_d16(double *d16, uint32_t *i32, int len)
1327c478bd9Sstevel@tonic-gate {
1337c478bd9Sstevel@tonic-gate 	int i;
1347c478bd9Sstevel@tonic-gate 	uint32_t a;
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate #pragma pipeloop(0)
1377c478bd9Sstevel@tonic-gate 	for (i = 0; i < len; i++) {
1387c478bd9Sstevel@tonic-gate 		a = i32[i];
1397c478bd9Sstevel@tonic-gate 		d16[2 * i] = (double)(a & 0xffff);
1407c478bd9Sstevel@tonic-gate 		d16[2 * i + 1] = (double)(a >> 16);
1417c478bd9Sstevel@tonic-gate 	}
1427c478bd9Sstevel@tonic-gate }
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate #ifdef RF_INLINE_MACROS
1457c478bd9Sstevel@tonic-gate 
1467c478bd9Sstevel@tonic-gate void
1477c478bd9Sstevel@tonic-gate i16_to_d16_and_d32x4(const double *,	/* 1/(2^16) */
1487c478bd9Sstevel@tonic-gate 			const double *,	/* 2^16 */
1497c478bd9Sstevel@tonic-gate 			const double *,	/* 0 */
1507c478bd9Sstevel@tonic-gate 			double *,	/* result16 */
1517c478bd9Sstevel@tonic-gate 			double *,	/* result32 */
1527c478bd9Sstevel@tonic-gate 			float *);	/* source - should be unsigned int* */
1537c478bd9Sstevel@tonic-gate 					/* converted to float* */
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate #else
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate static void
i16_to_d16_and_d32x4(const double * dummy1,const double * dummy2,const double * dummy3,double * result16,double * result32,float * src)1597c478bd9Sstevel@tonic-gate i16_to_d16_and_d32x4(const double *dummy1,	/* 1/(2^16) */
1607c478bd9Sstevel@tonic-gate 			const double *dummy2,	/* 2^16 */
1617c478bd9Sstevel@tonic-gate 			const double *dummy3,	/* 0 */
1627c478bd9Sstevel@tonic-gate 			double *result16,
1637c478bd9Sstevel@tonic-gate 			double *result32,
1647c478bd9Sstevel@tonic-gate 			float *src)	/* source - should be unsigned int* */
1657c478bd9Sstevel@tonic-gate 					/* converted to float* */
1667c478bd9Sstevel@tonic-gate {
1677c478bd9Sstevel@tonic-gate 	uint32_t *i32;
1687c478bd9Sstevel@tonic-gate 	uint32_t a, b, c, d;
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate 	i32 = (uint32_t *)src;
1717c478bd9Sstevel@tonic-gate 	a = i32[0];
1727c478bd9Sstevel@tonic-gate 	b = i32[1];
1737c478bd9Sstevel@tonic-gate 	c = i32[2];
1747c478bd9Sstevel@tonic-gate 	d = i32[3];
1757c478bd9Sstevel@tonic-gate 	result16[0] = (double)(a & 0xffff);
1767c478bd9Sstevel@tonic-gate 	result16[1] = (double)(a >> 16);
1777c478bd9Sstevel@tonic-gate 	result32[0] = (double)a;
1787c478bd9Sstevel@tonic-gate 	result16[2] = (double)(b & 0xffff);
1797c478bd9Sstevel@tonic-gate 	result16[3] = (double)(b >> 16);
1807c478bd9Sstevel@tonic-gate 	result32[1] = (double)b;
1817c478bd9Sstevel@tonic-gate 	result16[4] = (double)(c & 0xffff);
1827c478bd9Sstevel@tonic-gate 	result16[5] = (double)(c >> 16);
1837c478bd9Sstevel@tonic-gate 	result32[2] = (double)c;
1847c478bd9Sstevel@tonic-gate 	result16[6] = (double)(d & 0xffff);
1857c478bd9Sstevel@tonic-gate 	result16[7] = (double)(d >> 16);
1867c478bd9Sstevel@tonic-gate 	result32[3] = (double)d;
1877c478bd9Sstevel@tonic-gate }
1887c478bd9Sstevel@tonic-gate 
1897c478bd9Sstevel@tonic-gate #endif
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate void
conv_i32_to_d32_and_d16(double * d32,double * d16,uint32_t * i32,int len)1937c478bd9Sstevel@tonic-gate conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
1947c478bd9Sstevel@tonic-gate {
1957c478bd9Sstevel@tonic-gate 	int i;
1967c478bd9Sstevel@tonic-gate 	uint32_t a;
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate #pragma pipeloop(0)
1997c478bd9Sstevel@tonic-gate 	for (i = 0; i < len - 3; i += 4) {
2007c478bd9Sstevel@tonic-gate 		i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
201*8475e043SDan OpenSolaris Anderson 		    &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
2027c478bd9Sstevel@tonic-gate 	}
2037c478bd9Sstevel@tonic-gate 	for (; i < len; i++) {
2047c478bd9Sstevel@tonic-gate 		a = i32[i];
2057c478bd9Sstevel@tonic-gate 		d32[i] = (double)(i32[i]);
2067c478bd9Sstevel@tonic-gate 		d16[2 * i] = (double)(a & 0xffff);
2077c478bd9Sstevel@tonic-gate 		d16[2 * i + 1] = (double)(a >> 16);
2087c478bd9Sstevel@tonic-gate 	}
2097c478bd9Sstevel@tonic-gate }
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate static void
adjust_montf_result(uint32_t * i32,uint32_t * nint,int len)2137c478bd9Sstevel@tonic-gate adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
2147c478bd9Sstevel@tonic-gate {
2157c478bd9Sstevel@tonic-gate 	int64_t acc;
2167c478bd9Sstevel@tonic-gate 	int i;
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate 	if (i32[len] > 0)
2197c478bd9Sstevel@tonic-gate 		i = -1;
2207c478bd9Sstevel@tonic-gate 	else {
2217c478bd9Sstevel@tonic-gate 		for (i = len - 1; i >= 0; i--) {
2227c478bd9Sstevel@tonic-gate 			if (i32[i] != nint[i]) break;
2237c478bd9Sstevel@tonic-gate 		}
2247c478bd9Sstevel@tonic-gate 	}
2257c478bd9Sstevel@tonic-gate 	if ((i < 0) || (i32[i] > nint[i])) {
2267c478bd9Sstevel@tonic-gate 		acc = 0;
2277c478bd9Sstevel@tonic-gate 		for (i = 0; i < len; i++) {
2287c478bd9Sstevel@tonic-gate 			acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
2297c478bd9Sstevel@tonic-gate 			i32[i] = acc & 0xffffffff;
2307c478bd9Sstevel@tonic-gate 			acc = acc >> 32;
2317c478bd9Sstevel@tonic-gate 		}
2327c478bd9Sstevel@tonic-gate 	}
2337c478bd9Sstevel@tonic-gate }
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate /*
2377c478bd9Sstevel@tonic-gate  * the lengths of the input arrays should be at least the following:
2387c478bd9Sstevel@tonic-gate  * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
2397c478bd9Sstevel@tonic-gate  * all of them should be different from one another
2407c478bd9Sstevel@tonic-gate  */
mont_mulf_noconv(uint32_t * result,double * dm1,double * dm2,double * dt,double * dn,uint32_t * nint,int nlen,double dn0)2417c478bd9Sstevel@tonic-gate void mont_mulf_noconv(uint32_t *result,
2427c478bd9Sstevel@tonic-gate 			double *dm1, double *dm2, double *dt,
2437c478bd9Sstevel@tonic-gate 			double *dn, uint32_t *nint,
2447c478bd9Sstevel@tonic-gate 			int nlen, double dn0)
2457c478bd9Sstevel@tonic-gate {
2467c478bd9Sstevel@tonic-gate 	int i, j, jj;
2477c478bd9Sstevel@tonic-gate 	double digit, m2j, a, b;
2487c478bd9Sstevel@tonic-gate 	double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate 	pdm1 = &(dm1[0]);
2517c478bd9Sstevel@tonic-gate 	pdm2 = &(dm2[0]);
2527c478bd9Sstevel@tonic-gate 	pdn = &(dn[0]);
2537c478bd9Sstevel@tonic-gate 	pdm2[2 * nlen] = Zero;
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate 	if (nlen != 16) {
2567c478bd9Sstevel@tonic-gate 		for (i = 0; i < 4 * nlen + 2; i++)
2577c478bd9Sstevel@tonic-gate 			dt[i] = Zero;
2587c478bd9Sstevel@tonic-gate 		a = dt[0] = pdm1[0] * pdm2[0];
2597c478bd9Sstevel@tonic-gate 		digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 		pdtj = &(dt[0]);
2627c478bd9Sstevel@tonic-gate 		for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
2637c478bd9Sstevel@tonic-gate 			m2j = pdm2[j];
2647c478bd9Sstevel@tonic-gate 			a = pdtj[0] + pdn[0] * digit;
2657c478bd9Sstevel@tonic-gate 			b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
2667c478bd9Sstevel@tonic-gate 			pdtj[1] = b;
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate #pragma pipeloop(0)
2697c478bd9Sstevel@tonic-gate 			for (i = 1; i < nlen; i++) {
2707c478bd9Sstevel@tonic-gate 				pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
2717c478bd9Sstevel@tonic-gate 			}
2727c478bd9Sstevel@tonic-gate 			if (jj == 30) {
2737c478bd9Sstevel@tonic-gate 				cleanup(dt, j / 2 + 1, 2 * nlen + 1);
2747c478bd9Sstevel@tonic-gate 				jj = 0;
2757c478bd9Sstevel@tonic-gate 			}
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 			digit = mod(lower32(b, Zero) * dn0,
2787c478bd9Sstevel@tonic-gate 			    TwoToMinus16, TwoTo16);
2797c478bd9Sstevel@tonic-gate 		}
2807c478bd9Sstevel@tonic-gate 	} else {
2817c478bd9Sstevel@tonic-gate 		a = dt[0] = pdm1[0] * pdm2[0];
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 		dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
2847c478bd9Sstevel@tonic-gate 		    dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
2857c478bd9Sstevel@tonic-gate 		    dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
2867c478bd9Sstevel@tonic-gate 		    dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
2877c478bd9Sstevel@tonic-gate 		    dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
2887c478bd9Sstevel@tonic-gate 		    dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
2897c478bd9Sstevel@tonic-gate 		    dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
2907c478bd9Sstevel@tonic-gate 		    dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
2917c478bd9Sstevel@tonic-gate 		    dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
2927c478bd9Sstevel@tonic-gate 		    dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
2937c478bd9Sstevel@tonic-gate 		    dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
2947c478bd9Sstevel@tonic-gate 		    dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
2957c478bd9Sstevel@tonic-gate 		    dt[3] = dt[2] = dt[1] = Zero;
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 		pdn_0 = pdn[0];
2987c478bd9Sstevel@tonic-gate 		pdm1_0 = pdm1[0];
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 		digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
3017c478bd9Sstevel@tonic-gate 		pdtj = &(dt[0]);
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate 		for (j = 0; j < 32; j++, pdtj++) {
3047c478bd9Sstevel@tonic-gate 
3057c478bd9Sstevel@tonic-gate 			m2j = pdm2[j];
3067c478bd9Sstevel@tonic-gate 			a = pdtj[0] + pdn_0 * digit;
3077c478bd9Sstevel@tonic-gate 			b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16;
3087c478bd9Sstevel@tonic-gate 			pdtj[1] = b;
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 			pdtj[2] += pdm1[1] *m2j + pdn[1] * digit;
3117c478bd9Sstevel@tonic-gate 			pdtj[4] += pdm1[2] *m2j + pdn[2] * digit;
3127c478bd9Sstevel@tonic-gate 			pdtj[6] += pdm1[3] *m2j + pdn[3] * digit;
3137c478bd9Sstevel@tonic-gate 			pdtj[8] += pdm1[4] *m2j + pdn[4] * digit;
3147c478bd9Sstevel@tonic-gate 			pdtj[10] += pdm1[5] *m2j + pdn[5] * digit;
3157c478bd9Sstevel@tonic-gate 			pdtj[12] += pdm1[6] *m2j + pdn[6] * digit;
3167c478bd9Sstevel@tonic-gate 			pdtj[14] += pdm1[7] *m2j + pdn[7] * digit;
3177c478bd9Sstevel@tonic-gate 			pdtj[16] += pdm1[8] *m2j + pdn[8] * digit;
3187c478bd9Sstevel@tonic-gate 			pdtj[18] += pdm1[9] *m2j + pdn[9] * digit;
3197c478bd9Sstevel@tonic-gate 			pdtj[20] += pdm1[10] *m2j + pdn[10] * digit;
3207c478bd9Sstevel@tonic-gate 			pdtj[22] += pdm1[11] *m2j + pdn[11] * digit;
3217c478bd9Sstevel@tonic-gate 			pdtj[24] += pdm1[12] *m2j + pdn[12] * digit;
3227c478bd9Sstevel@tonic-gate 			pdtj[26] += pdm1[13] *m2j + pdn[13] * digit;
3237c478bd9Sstevel@tonic-gate 			pdtj[28] += pdm1[14] *m2j + pdn[14] * digit;
3247c478bd9Sstevel@tonic-gate 			pdtj[30] += pdm1[15] *m2j + pdn[15] * digit;
325*8475e043SDan OpenSolaris Anderson 			/* no need for cleanup, cannot overflow */
3267c478bd9Sstevel@tonic-gate 			digit = mod(lower32(b, Zero) * dn0,
3277c478bd9Sstevel@tonic-gate 			    TwoToMinus16, TwoTo16);
3287c478bd9Sstevel@tonic-gate 		}
3297c478bd9Sstevel@tonic-gate 	}
3307c478bd9Sstevel@tonic-gate 
3317c478bd9Sstevel@tonic-gate 	conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
3327c478bd9Sstevel@tonic-gate 	adjust_montf_result(result, nint, nlen);
3337c478bd9Sstevel@tonic-gate }
334