xref: /freebsd/contrib/bearssl/src/int/i31_modpow2.c (revision 2aaf9152a852aba9eb2036b95f4948ee77988826)
1 /*
2  * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "inner.h"
26 
27 /* see inner.h */
28 uint32_t
br_i31_modpow_opt(uint32_t * x,const unsigned char * e,size_t elen,const uint32_t * m,uint32_t m0i,uint32_t * tmp,size_t twlen)29 br_i31_modpow_opt(uint32_t *x,
30 	const unsigned char *e, size_t elen,
31 	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen)
32 {
33 	size_t mlen, mwlen;
34 	uint32_t *t1, *t2, *base;
35 	size_t u, v;
36 	uint32_t acc;
37 	int acc_len, win_len;
38 
39 	/*
40 	 * Get modulus size.
41 	 */
42 	mwlen = (m[0] + 63) >> 5;
43 	mlen = mwlen * sizeof m[0];
44 	mwlen += (mwlen & 1);
45 	t1 = tmp;
46 	t2 = tmp + mwlen;
47 
48 	/*
49 	 * Compute possible window size, with a maximum of 5 bits.
50 	 * When the window has size 1 bit, we use a specific code
51 	 * that requires only two temporaries. Otherwise, for a
52 	 * window of k bits, we need 2^k+1 temporaries.
53 	 */
54 	if (twlen < (mwlen << 1)) {
55 		return 0;
56 	}
57 	for (win_len = 5; win_len > 1; win_len --) {
58 		if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) {
59 			break;
60 		}
61 	}
62 
63 	/*
64 	 * Everything is done in Montgomery representation.
65 	 */
66 	br_i31_to_monty(x, m);
67 
68 	/*
69 	 * Compute window contents. If the window has size one bit only,
70 	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
71 	 * t2[k] is set to x^k (for k >= 1).
72 	 */
73 	if (win_len == 1) {
74 		memcpy(t2, x, mlen);
75 	} else {
76 		memcpy(t2 + mwlen, x, mlen);
77 		base = t2 + mwlen;
78 		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
79 			br_i31_montymul(base + mwlen, base, x, m, m0i);
80 			base += mwlen;
81 		}
82 	}
83 
84 	/*
85 	 * We need to set x to 1, in Montgomery representation. This can
86 	 * be done efficiently by setting the high word to 1, then doing
87 	 * one word-sized shift.
88 	 */
89 	br_i31_zero(x, m[0]);
90 	x[(m[0] + 31) >> 5] = 1;
91 	br_i31_muladd_small(x, 0, m);
92 
93 	/*
94 	 * We process bits from most to least significant. At each
95 	 * loop iteration, we have acc_len bits in acc.
96 	 */
97 	acc = 0;
98 	acc_len = 0;
99 	while (acc_len > 0 || elen > 0) {
100 		int i, k;
101 		uint32_t bits;
102 
103 		/*
104 		 * Get the next bits.
105 		 */
106 		k = win_len;
107 		if (acc_len < win_len) {
108 			if (elen > 0) {
109 				acc = (acc << 8) | *e ++;
110 				elen --;
111 				acc_len += 8;
112 			} else {
113 				k = acc_len;
114 			}
115 		}
116 		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
117 		acc_len -= k;
118 
119 		/*
120 		 * We could get exactly k bits. Compute k squarings.
121 		 */
122 		for (i = 0; i < k; i ++) {
123 			br_i31_montymul(t1, x, x, m, m0i);
124 			memcpy(x, t1, mlen);
125 		}
126 
127 		/*
128 		 * Window lookup: we want to set t2 to the window
129 		 * lookup value, assuming the bits are non-zero. If
130 		 * the window length is 1 bit only, then t2 is
131 		 * already set; otherwise, we do a constant-time lookup.
132 		 */
133 		if (win_len > 1) {
134 			br_i31_zero(t2, m[0]);
135 			base = t2 + mwlen;
136 			for (u = 1; u < ((uint32_t)1 << k); u ++) {
137 				uint32_t mask;
138 
139 				mask = -EQ(u, bits);
140 				for (v = 1; v < mwlen; v ++) {
141 					t2[v] |= mask & base[v];
142 				}
143 				base += mwlen;
144 			}
145 		}
146 
147 		/*
148 		 * Multiply with the looked-up value. We keep the
149 		 * product only if the exponent bits are not all-zero.
150 		 */
151 		br_i31_montymul(t1, x, t2, m, m0i);
152 		CCOPY(NEQ(bits, 0), x, t1, mlen);
153 	}
154 
155 	/*
156 	 * Convert back from Montgomery representation, and exit.
157 	 */
158 	br_i31_from_monty(x, m, m0i);
159 	return 1;
160 }
161