xref: /freebsd/crypto/openssl/crypto/bn/bn_nist.c (revision f25b8c9fb4f58cf61adb47d7570abe7caa6d385d)
1 /*
2  * Copyright 2002-2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include "bn_local.h"
11 #include "internal/cryptlib.h"
12 
13 #define BN_NIST_192_TOP (192 + BN_BITS2 - 1) / BN_BITS2
14 #define BN_NIST_224_TOP (224 + BN_BITS2 - 1) / BN_BITS2
15 #define BN_NIST_256_TOP (256 + BN_BITS2 - 1) / BN_BITS2
16 #define BN_NIST_384_TOP (384 + BN_BITS2 - 1) / BN_BITS2
17 #define BN_NIST_521_TOP (521 + BN_BITS2 - 1) / BN_BITS2
18 
19 /* pre-computed tables are "carry-less" values of modulus*(i+1) */
20 #if BN_BITS2 == 64
21 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
22     { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL },
23     { 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL },
24     { 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL }
25 };
26 
27 static const BN_ULONG _nist_p_192_sqr[] = {
28     0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
29     0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
30 };
31 
32 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
33     { 0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
34         0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL },
35     { 0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
36         0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL } /* this one is
37                                                         * "carry-full" */
38 };
39 
40 static const BN_ULONG _nist_p_224_sqr[] = {
41     0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
42     0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
43     0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
44     0xFFFFFFFFFFFFFFFFULL
45 };
46 
47 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
48     { 0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
49         0x0000000000000000ULL, 0xFFFFFFFF00000001ULL },
50     { 0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
51         0x0000000000000000ULL, 0xFFFFFFFE00000002ULL },
52     { 0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
53         0x0000000000000000ULL, 0xFFFFFFFD00000003ULL },
54     { 0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
55         0x0000000000000000ULL, 0xFFFFFFFC00000004ULL },
56     { 0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
57         0x0000000000000000ULL, 0xFFFFFFFB00000005ULL },
58 };
59 
60 static const BN_ULONG _nist_p_256_sqr[] = {
61     0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
62     0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
63     0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
64     0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
65 };
66 
67 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
68     { 0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL, 0xFFFFFFFFFFFFFFFEULL,
69         0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL },
70     { 0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
71         0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL },
72     { 0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL, 0xFFFFFFFFFFFFFFFCULL,
73         0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL },
74     { 0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL, 0xFFFFFFFFFFFFFFFBULL,
75         0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL },
76     { 0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL, 0xFFFFFFFFFFFFFFFAULL,
77         0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL },
78 };
79 
80 static const BN_ULONG _nist_p_384_sqr[] = {
81     0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
82     0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
83     0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
84     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
85 };
86 
87 static const BN_ULONG _nist_p_521[] = {
88     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
89     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
90     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
91     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
92     0x00000000000001FFULL
93 };
94 
95 static const BN_ULONG _nist_p_521_sqr[] = {
96     0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
97     0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
98     0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
99     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
100     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
101     0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
102 };
103 #elif BN_BITS2 == 32
104 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
105     { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
106     { 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
107     { 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }
108 };
109 
110 static const BN_ULONG _nist_p_192_sqr[] = {
111     0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
112     0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
113 };
114 
115 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
116     { 0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
117         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
118     { 0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
119         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }
120 };
121 
122 static const BN_ULONG _nist_p_224_sqr[] = {
123     0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
124     0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
125     0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
126     0xFFFFFFFF, 0xFFFFFFFF
127 };
128 
129 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
130     { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
131         0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF },
132     { 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
133         0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE },
134     { 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
135         0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD },
136     { 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
137         0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC },
138     { 0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
139         0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB },
140 };
141 
142 static const BN_ULONG _nist_p_256_sqr[] = {
143     0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
144     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
145     0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
146     0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
147 };
148 
149 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
150     { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
151         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
152     { 0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
153         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
154     { 0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
155         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
156     { 0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
157         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
158     { 0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
159         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
160 };
161 
162 static const BN_ULONG _nist_p_384_sqr[] = {
163     0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
164     0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
165     0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
166     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
167 };
168 
169 static const BN_ULONG _nist_p_521[] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
170     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
171     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
172     0xFFFFFFFF, 0x000001FF };
173 
174 static const BN_ULONG _nist_p_521_sqr[] = {
175     0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
176     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
178     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
179     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
180     0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
181 };
182 #else
183 #error "unsupported BN_BITS2"
184 #endif
185 
186 static const BIGNUM ossl_bignum_nist_p_192 = {
187     (BN_ULONG *)_nist_p_192[0],
188     BN_NIST_192_TOP,
189     BN_NIST_192_TOP,
190     0,
191     BN_FLG_STATIC_DATA
192 };
193 
194 static const BIGNUM ossl_bignum_nist_p_224 = {
195     (BN_ULONG *)_nist_p_224[0],
196     BN_NIST_224_TOP,
197     BN_NIST_224_TOP,
198     0,
199     BN_FLG_STATIC_DATA
200 };
201 
202 static const BIGNUM ossl_bignum_nist_p_256 = {
203     (BN_ULONG *)_nist_p_256[0],
204     BN_NIST_256_TOP,
205     BN_NIST_256_TOP,
206     0,
207     BN_FLG_STATIC_DATA
208 };
209 
210 static const BIGNUM ossl_bignum_nist_p_384 = {
211     (BN_ULONG *)_nist_p_384[0],
212     BN_NIST_384_TOP,
213     BN_NIST_384_TOP,
214     0,
215     BN_FLG_STATIC_DATA
216 };
217 
218 static const BIGNUM ossl_bignum_nist_p_521 = {
219     (BN_ULONG *)_nist_p_521,
220     BN_NIST_521_TOP,
221     BN_NIST_521_TOP,
222     0,
223     BN_FLG_STATIC_DATA
224 };
225 
BN_get0_nist_prime_192(void)226 const BIGNUM *BN_get0_nist_prime_192(void)
227 {
228     return &ossl_bignum_nist_p_192;
229 }
230 
BN_get0_nist_prime_224(void)231 const BIGNUM *BN_get0_nist_prime_224(void)
232 {
233     return &ossl_bignum_nist_p_224;
234 }
235 
BN_get0_nist_prime_256(void)236 const BIGNUM *BN_get0_nist_prime_256(void)
237 {
238     return &ossl_bignum_nist_p_256;
239 }
240 
BN_get0_nist_prime_384(void)241 const BIGNUM *BN_get0_nist_prime_384(void)
242 {
243     return &ossl_bignum_nist_p_384;
244 }
245 
BN_get0_nist_prime_521(void)246 const BIGNUM *BN_get0_nist_prime_521(void)
247 {
248     return &ossl_bignum_nist_p_521;
249 }
250 
251 /*
252  * To avoid more recent compilers (specifically clang-14) from treating this
253  * code as a violation of the strict aliasing conditions and omitting it, this
254  * cannot be declared as a function.  Moreover, the dst parameter cannot be
255  * cached in a local since this no longer references the union and again falls
256  * foul of the strict aliasing criteria.  Refer to #18225 for the initial
257  * diagnostics and llvm/llvm-project#55255 for the later discussions with the
258  * LLVM developers.  The problem boils down to if an array in the union is
259  * converted to a pointer or if it is used directly.
260  *
261  * This function was inlined regardless, so there is no space cost to be
262  * paid for making it a macro.
263  */
264 #define nist_cp_bn_0(dst, src_in, top, max) \
265     {                                       \
266         int ii;                             \
267         const BN_ULONG *src = src_in;       \
268                                             \
269         for (ii = 0; ii < top; ii++)        \
270             (dst)[ii] = src[ii];            \
271         for (; ii < max; ii++)              \
272             (dst)[ii] = 0;                  \
273     }
274 
nist_cp_bn(BN_ULONG * dst,const BN_ULONG * src,int top)275 static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
276 {
277     int i;
278 
279     for (i = 0; i < top; i++)
280         dst[i] = src[i];
281 }
282 
283 #if BN_BITS2 == 64
284 #define bn_cp_64(to, n, from, m) (to)[n] = (m >= 0) ? ((from)[m]) : 0;
285 #define bn_64_set_0(to, n) (to)[n] = (BN_ULONG)0;
286 /*
287  * two following macros are implemented under assumption that they
288  * are called in a sequence with *ascending* n, i.e. as they are...
289  */
290 #define bn_cp_32_naked(to, n, from, m) (((n) & 1) ? (to[(n) / 2] |= ((m) & 1) ? (from[(m) / 2] & BN_MASK2h) : (from[(m) / 2] << 32)) \
291                                                   : (to[(n) / 2] = ((m) & 1) ? (from[(m) / 2] >> 32) : (from[(m) / 2] & BN_MASK2l)))
292 #define bn_32_set_0(to, n) (((n) & 1) ? (to[(n) / 2] &= BN_MASK2l) : (to[(n) / 2] = 0));
293 #define bn_cp_32(to, n, from, m) ((m) >= 0) ? bn_cp_32_naked(to, n, from, m) : bn_32_set_0(to, n)
294 #if defined(L_ENDIAN)
295 #if defined(__arch64__)
296 #define NIST_INT64 long
297 #else
298 #define NIST_INT64 long long
299 #endif
300 #endif
301 #else
302 #define bn_cp_64(to, n, from, m)                      \
303     {                                                 \
304         bn_cp_32(to, (n) * 2, from, (m) * 2);         \
305         bn_cp_32(to, (n) * 2 + 1, from, (m) * 2 + 1); \
306     }
307 #define bn_64_set_0(to, n)            \
308     {                                 \
309         bn_32_set_0(to, (n) * 2);     \
310         bn_32_set_0(to, (n) * 2 + 1); \
311     }
312 #define bn_cp_32(to, n, from, m) (to)[n] = (m >= 0) ? ((from)[m]) : 0;
313 #define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
314 #if defined(_WIN32) && !defined(__GNUC__)
315 #define NIST_INT64 __int64
316 #elif defined(BN_LLONG)
317 #define NIST_INT64 long long
318 #endif
319 #endif /* BN_BITS2 != 64 */
320 
321 #ifdef NIST_INT64
322 /* Helpers to load/store a 32-bit word (uint32_t) from/into a memory
323  * location and avoid potential aliasing issue.  */
load_u32(const void * ptr)324 static ossl_inline uint32_t load_u32(const void *ptr)
325 {
326     uint32_t tmp;
327 
328     memcpy(&tmp, ptr, sizeof(tmp));
329     return tmp;
330 }
331 
store_lo32(void * ptr,NIST_INT64 val)332 static ossl_inline void store_lo32(void *ptr, NIST_INT64 val)
333 {
334     /* A cast is needed for big-endian system: on a 32-bit BE system
335      * NIST_INT64 may be defined as well if the compiler supports 64-bit
336      * long long.  */
337     uint32_t tmp = (uint32_t)val;
338 
339     memcpy(ptr, &tmp, sizeof(tmp));
340 }
341 #endif /* NIST_INT64 */
342 
343 #define nist_set_192(to, from, a1, a2, a3)      \
344     {                                           \
345         bn_cp_64(to, 0, from, (a3) - 3)         \
346             bn_cp_64(to, 1, from, (a2) - 3)     \
347                 bn_cp_64(to, 2, from, (a1) - 3) \
348     }
349 
BN_nist_mod_192(BIGNUM * r,const BIGNUM * a,const BIGNUM * field,BN_CTX * ctx)350 int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
351     BN_CTX *ctx)
352 {
353     int top = a->top, i;
354     int carry;
355     register BN_ULONG *r_d, *a_d = a->d;
356     union {
357         BN_ULONG bn[BN_NIST_192_TOP];
358         unsigned int ui[BN_NIST_192_TOP * sizeof(BN_ULONG) / sizeof(unsigned int)];
359     } buf;
360     BN_ULONG c_d[BN_NIST_192_TOP], *res;
361     static const BIGNUM ossl_bignum_nist_p_192_sqr = {
362         (BN_ULONG *)_nist_p_192_sqr,
363         OSSL_NELEM(_nist_p_192_sqr),
364         OSSL_NELEM(_nist_p_192_sqr),
365         0, BN_FLG_STATIC_DATA
366     };
367 
368     field = &ossl_bignum_nist_p_192; /* just to make sure */
369 
370     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_192_sqr) >= 0)
371         return BN_nnmod(r, a, field, ctx);
372 
373     i = BN_ucmp(field, a);
374     if (i == 0) {
375         BN_zero(r);
376         return 1;
377     } else if (i > 0)
378         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
379 
380     if (r != a) {
381         if (!bn_wexpand(r, BN_NIST_192_TOP))
382             return 0;
383         r_d = r->d;
384         nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
385     } else
386         r_d = a_d;
387 
388     nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
389         BN_NIST_192_TOP);
390 
391 #if defined(NIST_INT64)
392     {
393         NIST_INT64 acc; /* accumulator */
394         unsigned int *rp = (unsigned int *)r_d;
395         const unsigned int *bp = (const unsigned int *)buf.ui;
396 
397         acc = load_u32(&rp[0]);
398         acc += bp[3 * 2 - 6];
399         acc += bp[5 * 2 - 6];
400         store_lo32(&rp[0], acc);
401         acc >>= 32;
402 
403         acc += load_u32(&rp[1]);
404         acc += bp[3 * 2 - 5];
405         acc += bp[5 * 2 - 5];
406         store_lo32(&rp[1], acc);
407         acc >>= 32;
408 
409         acc += load_u32(&rp[2]);
410         acc += bp[3 * 2 - 6];
411         acc += bp[4 * 2 - 6];
412         acc += bp[5 * 2 - 6];
413         store_lo32(&rp[2], acc);
414         acc >>= 32;
415 
416         acc += load_u32(&rp[3]);
417         acc += bp[3 * 2 - 5];
418         acc += bp[4 * 2 - 5];
419         acc += bp[5 * 2 - 5];
420         store_lo32(&rp[3], acc);
421         acc >>= 32;
422 
423         acc += load_u32(&rp[4]);
424         acc += bp[4 * 2 - 6];
425         acc += bp[5 * 2 - 6];
426         store_lo32(&rp[4], acc);
427         acc >>= 32;
428 
429         acc += load_u32(&rp[5]);
430         acc += bp[4 * 2 - 5];
431         acc += bp[5 * 2 - 5];
432         store_lo32(&rp[5], acc);
433 
434         carry = (int)(acc >> 32);
435     }
436 #else
437     {
438         BN_ULONG t_d[BN_NIST_192_TOP];
439 
440         nist_set_192(t_d, buf.bn, 0, 3, 3);
441         carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
442         nist_set_192(t_d, buf.bn, 4, 4, 0);
443         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
444         nist_set_192(t_d, buf.bn, 5, 5, 5)
445             carry
446             += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
447     }
448 #endif
449     if (carry > 0)
450         carry = (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
451             BN_NIST_192_TOP);
452     else
453         carry = 1;
454 
455     /*
456      * we need 'if (carry==0 || result>=modulus) result-=modulus;'
457      * as comparison implies subtraction, we can write
458      * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
459      * this is what happens below, but without explicit if:-) a.
460      */
461     res = (bn_sub_words(c_d, r_d, _nist_p_192[0], BN_NIST_192_TOP) && carry)
462         ? r_d
463         : c_d;
464     nist_cp_bn(r_d, res, BN_NIST_192_TOP);
465     r->top = BN_NIST_192_TOP;
466     bn_correct_top(r);
467 
468     return 1;
469 }
470 
471 typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *, const BN_ULONG *,
472     const BN_ULONG *, int);
473 
474 #define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7)      \
475     {                                                           \
476         bn_cp_32(to, 0, from, (a7) - 7)                         \
477             bn_cp_32(to, 1, from, (a6) - 7)                     \
478                 bn_cp_32(to, 2, from, (a5) - 7)                 \
479                     bn_cp_32(to, 3, from, (a4) - 7)             \
480                         bn_cp_32(to, 4, from, (a3) - 7)         \
481                             bn_cp_32(to, 5, from, (a2) - 7)     \
482                                 bn_cp_32(to, 6, from, (a1) - 7) \
483     }
484 
BN_nist_mod_224(BIGNUM * r,const BIGNUM * a,const BIGNUM * field,BN_CTX * ctx)485 int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
486     BN_CTX *ctx)
487 {
488     int top = a->top, i;
489     int carry;
490     BN_ULONG *r_d, *a_d = a->d;
491     union {
492         BN_ULONG bn[BN_NIST_224_TOP];
493         unsigned int ui[BN_NIST_224_TOP * sizeof(BN_ULONG) / sizeof(unsigned int)];
494     } buf;
495     BN_ULONG c_d[BN_NIST_224_TOP], *res;
496     bn_addsub_f adjust;
497     static const BIGNUM ossl_bignum_nist_p_224_sqr = {
498         (BN_ULONG *)_nist_p_224_sqr,
499         OSSL_NELEM(_nist_p_224_sqr),
500         OSSL_NELEM(_nist_p_224_sqr),
501         0, BN_FLG_STATIC_DATA
502     };
503 
504     field = &ossl_bignum_nist_p_224; /* just to make sure */
505 
506     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_224_sqr) >= 0)
507         return BN_nnmod(r, a, field, ctx);
508 
509     i = BN_ucmp(field, a);
510     if (i == 0) {
511         BN_zero(r);
512         return 1;
513     } else if (i > 0)
514         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
515 
516     if (r != a) {
517         if (!bn_wexpand(r, BN_NIST_224_TOP))
518             return 0;
519         r_d = r->d;
520         nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
521     } else
522         r_d = a_d;
523 
524 #if BN_BITS2 == 64
525     /* copy upper 256 bits of 448 bit number ... */
526     nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
527         top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
528     /* ... and right shift by 32 to obtain upper 224 bits */
529     nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
530     /* truncate lower part to 224 bits too */
531     r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
532 #else
533     nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP,
534         BN_NIST_224_TOP);
535 #endif
536 
537 #if defined(NIST_INT64) && BN_BITS2 != 64
538     {
539         NIST_INT64 acc; /* accumulator */
540         unsigned int *rp = (unsigned int *)r_d;
541         const unsigned int *bp = (const unsigned int *)buf.ui;
542 
543         acc = rp[0];
544         acc -= bp[7 - 7];
545         acc -= bp[11 - 7];
546         rp[0] = (unsigned int)acc;
547         acc >>= 32;
548 
549         acc += rp[1];
550         acc -= bp[8 - 7];
551         acc -= bp[12 - 7];
552         rp[1] = (unsigned int)acc;
553         acc >>= 32;
554 
555         acc += rp[2];
556         acc -= bp[9 - 7];
557         acc -= bp[13 - 7];
558         rp[2] = (unsigned int)acc;
559         acc >>= 32;
560 
561         acc += rp[3];
562         acc += bp[7 - 7];
563         acc += bp[11 - 7];
564         acc -= bp[10 - 7];
565         rp[3] = (unsigned int)acc;
566         acc >>= 32;
567 
568         acc += rp[4];
569         acc += bp[8 - 7];
570         acc += bp[12 - 7];
571         acc -= bp[11 - 7];
572         rp[4] = (unsigned int)acc;
573         acc >>= 32;
574 
575         acc += rp[5];
576         acc += bp[9 - 7];
577         acc += bp[13 - 7];
578         acc -= bp[12 - 7];
579         rp[5] = (unsigned int)acc;
580         acc >>= 32;
581 
582         acc += rp[6];
583         acc += bp[10 - 7];
584         acc -= bp[13 - 7];
585         rp[6] = (unsigned int)acc;
586 
587         carry = (int)(acc >> 32);
588 #if BN_BITS2 == 64
589         rp[7] = carry;
590 #endif
591     }
592 #else
593     {
594         BN_ULONG t_d[BN_NIST_224_TOP];
595 
596         nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
597         carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
598         nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
599         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
600         nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
601         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
602         nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
603         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
604 
605 #if BN_BITS2 == 64
606         carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
607 #endif
608     }
609 #endif
610     adjust = bn_sub_words;
611     if (carry > 0) {
612         carry = (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
613             BN_NIST_224_TOP);
614 #if BN_BITS2 == 64
615         carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
616 #endif
617     } else if (carry < 0) {
618         /*
619          * it's a bit more complicated logic in this case. if bn_add_words
620          * yields no carry, then result has to be adjusted by unconditionally
621          * *adding* the modulus. but if it does, then result has to be
622          * compared to the modulus and conditionally adjusted by
623          * *subtracting* the latter.
624          */
625         carry = (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
626             BN_NIST_224_TOP);
627         adjust = carry ? bn_sub_words : bn_add_words;
628     } else
629         carry = 1;
630 
631     /* otherwise it's effectively same as in BN_nist_mod_192... */
632     res = ((*adjust)(c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP) && carry)
633         ? r_d
634         : c_d;
635     nist_cp_bn(r_d, res, BN_NIST_224_TOP);
636     r->top = BN_NIST_224_TOP;
637     bn_correct_top(r);
638 
639     return 1;
640 }
641 
642 #define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8)      \
643     {                                                               \
644         bn_cp_32(to, 0, from, (a8) - 8)                             \
645             bn_cp_32(to, 1, from, (a7) - 8)                         \
646                 bn_cp_32(to, 2, from, (a6) - 8)                     \
647                     bn_cp_32(to, 3, from, (a5) - 8)                 \
648                         bn_cp_32(to, 4, from, (a4) - 8)             \
649                             bn_cp_32(to, 5, from, (a3) - 8)         \
650                                 bn_cp_32(to, 6, from, (a2) - 8)     \
651                                     bn_cp_32(to, 7, from, (a1) - 8) \
652     }
653 
BN_nist_mod_256(BIGNUM * r,const BIGNUM * a,const BIGNUM * field,BN_CTX * ctx)654 int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
655     BN_CTX *ctx)
656 {
657     int i, top = a->top;
658     int carry = 0;
659     register BN_ULONG *a_d = a->d, *r_d;
660     union {
661         BN_ULONG bn[BN_NIST_256_TOP];
662         unsigned int ui[BN_NIST_256_TOP * sizeof(BN_ULONG) / sizeof(unsigned int)];
663     } buf;
664     BN_ULONG c_d[BN_NIST_256_TOP], *res;
665     bn_addsub_f adjust;
666     static const BIGNUM ossl_bignum_nist_p_256_sqr = {
667         (BN_ULONG *)_nist_p_256_sqr,
668         OSSL_NELEM(_nist_p_256_sqr),
669         OSSL_NELEM(_nist_p_256_sqr),
670         0, BN_FLG_STATIC_DATA
671     };
672 
673     field = &ossl_bignum_nist_p_256; /* just to make sure */
674 
675     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_256_sqr) >= 0)
676         return BN_nnmod(r, a, field, ctx);
677 
678     i = BN_ucmp(field, a);
679     if (i == 0) {
680         BN_zero(r);
681         return 1;
682     } else if (i > 0)
683         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
684 
685     if (r != a) {
686         if (!bn_wexpand(r, BN_NIST_256_TOP))
687             return 0;
688         r_d = r->d;
689         nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
690     } else
691         r_d = a_d;
692 
693     nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP,
694         BN_NIST_256_TOP);
695 
696 #if defined(NIST_INT64)
697     {
698         NIST_INT64 acc; /* accumulator */
699         unsigned int *rp = (unsigned int *)r_d;
700         const unsigned int *bp = (const unsigned int *)buf.ui;
701 
702         acc = load_u32(&rp[0]);
703         acc += bp[8 - 8];
704         acc += bp[9 - 8];
705         acc -= bp[11 - 8];
706         acc -= bp[12 - 8];
707         acc -= bp[13 - 8];
708         acc -= bp[14 - 8];
709         store_lo32(&rp[0], acc);
710         acc >>= 32;
711 
712         acc += load_u32(&rp[1]);
713         acc += bp[9 - 8];
714         acc += bp[10 - 8];
715         acc -= bp[12 - 8];
716         acc -= bp[13 - 8];
717         acc -= bp[14 - 8];
718         acc -= bp[15 - 8];
719         store_lo32(&rp[1], acc);
720         acc >>= 32;
721 
722         acc += load_u32(&rp[2]);
723         acc += bp[10 - 8];
724         acc += bp[11 - 8];
725         acc -= bp[13 - 8];
726         acc -= bp[14 - 8];
727         acc -= bp[15 - 8];
728         store_lo32(&rp[2], acc);
729         acc >>= 32;
730 
731         acc += load_u32(&rp[3]);
732         acc += bp[11 - 8];
733         acc += bp[11 - 8];
734         acc += bp[12 - 8];
735         acc += bp[12 - 8];
736         acc += bp[13 - 8];
737         acc -= bp[15 - 8];
738         acc -= bp[8 - 8];
739         acc -= bp[9 - 8];
740         store_lo32(&rp[3], acc);
741         acc >>= 32;
742 
743         acc += load_u32(&rp[4]);
744         acc += bp[12 - 8];
745         acc += bp[12 - 8];
746         acc += bp[13 - 8];
747         acc += bp[13 - 8];
748         acc += bp[14 - 8];
749         acc -= bp[9 - 8];
750         acc -= bp[10 - 8];
751         store_lo32(&rp[4], acc);
752         acc >>= 32;
753 
754         acc += load_u32(&rp[5]);
755         acc += bp[13 - 8];
756         acc += bp[13 - 8];
757         acc += bp[14 - 8];
758         acc += bp[14 - 8];
759         acc += bp[15 - 8];
760         acc -= bp[10 - 8];
761         acc -= bp[11 - 8];
762         store_lo32(&rp[5], acc);
763         acc >>= 32;
764 
765         acc += load_u32(&rp[6]);
766         acc += bp[14 - 8];
767         acc += bp[14 - 8];
768         acc += bp[15 - 8];
769         acc += bp[15 - 8];
770         acc += bp[14 - 8];
771         acc += bp[13 - 8];
772         acc -= bp[8 - 8];
773         acc -= bp[9 - 8];
774         store_lo32(&rp[6], acc);
775         acc >>= 32;
776 
777         acc += load_u32(&rp[7]);
778         acc += bp[15 - 8];
779         acc += bp[15 - 8];
780         acc += bp[15 - 8];
781         acc += bp[8 - 8];
782         acc -= bp[10 - 8];
783         acc -= bp[11 - 8];
784         acc -= bp[12 - 8];
785         acc -= bp[13 - 8];
786         store_lo32(&rp[7], acc);
787 
788         carry = (int)(acc >> 32);
789     }
790 #else
791     {
792         BN_ULONG t_d[BN_NIST_256_TOP];
793 
794         /*
795          * S1
796          */
797         nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
798         /*
799          * S2
800          */
801         nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
802         carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
803         /* left shift */
804         {
805             register BN_ULONG *ap, t, c;
806             ap = t_d;
807             c = 0;
808             for (i = BN_NIST_256_TOP; i != 0; --i) {
809                 t = *ap;
810                 *(ap++) = ((t << 1) | c) & BN_MASK2;
811                 c = (t & BN_TBIT) ? 1 : 0;
812             }
813             carry <<= 1;
814             carry |= c;
815         }
816         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
817         /*
818          * S3
819          */
820         nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
821         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
822         /*
823          * S4
824          */
825         nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
826         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
827         /*
828          * D1
829          */
830         nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
831         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
832         /*
833          * D2
834          */
835         nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
836         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
837         /*
838          * D3
839          */
840         nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
841         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
842         /*
843          * D4
844          */
845         nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
846         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
847     }
848 #endif
849     /* see BN_nist_mod_224 for explanation */
850     adjust = bn_sub_words;
851     if (carry > 0)
852         carry = (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
853             BN_NIST_256_TOP);
854     else if (carry < 0) {
855         carry = (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
856             BN_NIST_256_TOP);
857         adjust = carry ? bn_sub_words : bn_add_words;
858     } else
859         carry = 1;
860 
861     res = ((*adjust)(c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP) && carry)
862         ? r_d
863         : c_d;
864     nist_cp_bn(r_d, res, BN_NIST_256_TOP);
865     r->top = BN_NIST_256_TOP;
866     bn_correct_top(r);
867 
868     return 1;
869 }
870 
871 #define nist_set_384(to, from, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12)     \
872     {                                                                                 \
873         bn_cp_32(to, 0, from, (a12) - 12)                                             \
874             bn_cp_32(to, 1, from, (a11) - 12)                                         \
875                 bn_cp_32(to, 2, from, (a10) - 12)                                     \
876                     bn_cp_32(to, 3, from, (a9) - 12)                                  \
877                         bn_cp_32(to, 4, from, (a8) - 12)                              \
878                             bn_cp_32(to, 5, from, (a7) - 12)                          \
879                                 bn_cp_32(to, 6, from, (a6) - 12)                      \
880                                     bn_cp_32(to, 7, from, (a5) - 12)                  \
881                                         bn_cp_32(to, 8, from, (a4) - 12)              \
882                                             bn_cp_32(to, 9, from, (a3) - 12)          \
883                                                 bn_cp_32(to, 10, from, (a2) - 12)     \
884                                                     bn_cp_32(to, 11, from, (a1) - 12) \
885     }
886 
BN_nist_mod_384(BIGNUM * r,const BIGNUM * a,const BIGNUM * field,BN_CTX * ctx)887 int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
888     BN_CTX *ctx)
889 {
890     int i, top = a->top;
891     int carry = 0;
892     register BN_ULONG *r_d, *a_d = a->d;
893     union {
894         BN_ULONG bn[BN_NIST_384_TOP];
895         unsigned int ui[BN_NIST_384_TOP * sizeof(BN_ULONG) / sizeof(unsigned int)];
896     } buf;
897     BN_ULONG c_d[BN_NIST_384_TOP], *res;
898     bn_addsub_f adjust;
899     static const BIGNUM ossl_bignum_nist_p_384_sqr = {
900         (BN_ULONG *)_nist_p_384_sqr,
901         OSSL_NELEM(_nist_p_384_sqr),
902         OSSL_NELEM(_nist_p_384_sqr),
903         0, BN_FLG_STATIC_DATA
904     };
905 
906     field = &ossl_bignum_nist_p_384; /* just to make sure */
907 
908     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_384_sqr) >= 0)
909         return BN_nnmod(r, a, field, ctx);
910 
911     i = BN_ucmp(field, a);
912     if (i == 0) {
913         BN_zero(r);
914         return 1;
915     } else if (i > 0)
916         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
917 
918     if (r != a) {
919         if (!bn_wexpand(r, BN_NIST_384_TOP))
920             return 0;
921         r_d = r->d;
922         nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
923     } else
924         r_d = a_d;
925 
926     nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP,
927         BN_NIST_384_TOP);
928 
929 #if defined(NIST_INT64)
930     {
931         NIST_INT64 acc; /* accumulator */
932         unsigned int *rp = (unsigned int *)r_d;
933         const unsigned int *bp = (const unsigned int *)buf.ui;
934 
935         acc = load_u32(&rp[0]);
936         acc += bp[12 - 12];
937         acc += bp[21 - 12];
938         acc += bp[20 - 12];
939         acc -= bp[23 - 12];
940         store_lo32(&rp[0], acc);
941         acc >>= 32;
942 
943         acc += load_u32(&rp[1]);
944         acc += bp[13 - 12];
945         acc += bp[22 - 12];
946         acc += bp[23 - 12];
947         acc -= bp[12 - 12];
948         acc -= bp[20 - 12];
949         store_lo32(&rp[1], acc);
950         acc >>= 32;
951 
952         acc += load_u32(&rp[2]);
953         acc += bp[14 - 12];
954         acc += bp[23 - 12];
955         acc -= bp[13 - 12];
956         acc -= bp[21 - 12];
957         store_lo32(&rp[2], acc);
958         acc >>= 32;
959 
960         acc += load_u32(&rp[3]);
961         acc += bp[15 - 12];
962         acc += bp[12 - 12];
963         acc += bp[20 - 12];
964         acc += bp[21 - 12];
965         acc -= bp[14 - 12];
966         acc -= bp[22 - 12];
967         acc -= bp[23 - 12];
968         store_lo32(&rp[3], acc);
969         acc >>= 32;
970 
971         acc += load_u32(&rp[4]);
972         acc += bp[21 - 12];
973         acc += bp[21 - 12];
974         acc += bp[16 - 12];
975         acc += bp[13 - 12];
976         acc += bp[12 - 12];
977         acc += bp[20 - 12];
978         acc += bp[22 - 12];
979         acc -= bp[15 - 12];
980         acc -= bp[23 - 12];
981         acc -= bp[23 - 12];
982         store_lo32(&rp[4], acc);
983         acc >>= 32;
984 
985         acc += load_u32(&rp[5]);
986         acc += bp[22 - 12];
987         acc += bp[22 - 12];
988         acc += bp[17 - 12];
989         acc += bp[14 - 12];
990         acc += bp[13 - 12];
991         acc += bp[21 - 12];
992         acc += bp[23 - 12];
993         acc -= bp[16 - 12];
994         store_lo32(&rp[5], acc);
995         acc >>= 32;
996 
997         acc += load_u32(&rp[6]);
998         acc += bp[23 - 12];
999         acc += bp[23 - 12];
1000         acc += bp[18 - 12];
1001         acc += bp[15 - 12];
1002         acc += bp[14 - 12];
1003         acc += bp[22 - 12];
1004         acc -= bp[17 - 12];
1005         store_lo32(&rp[6], acc);
1006         acc >>= 32;
1007 
1008         acc += load_u32(&rp[7]);
1009         acc += bp[19 - 12];
1010         acc += bp[16 - 12];
1011         acc += bp[15 - 12];
1012         acc += bp[23 - 12];
1013         acc -= bp[18 - 12];
1014         store_lo32(&rp[7], acc);
1015         acc >>= 32;
1016 
1017         acc += load_u32(&rp[8]);
1018         acc += bp[20 - 12];
1019         acc += bp[17 - 12];
1020         acc += bp[16 - 12];
1021         acc -= bp[19 - 12];
1022         store_lo32(&rp[8], acc);
1023         acc >>= 32;
1024 
1025         acc += load_u32(&rp[9]);
1026         acc += bp[21 - 12];
1027         acc += bp[18 - 12];
1028         acc += bp[17 - 12];
1029         acc -= bp[20 - 12];
1030         store_lo32(&rp[9], acc);
1031         acc >>= 32;
1032 
1033         acc += load_u32(&rp[10]);
1034         acc += bp[22 - 12];
1035         acc += bp[19 - 12];
1036         acc += bp[18 - 12];
1037         acc -= bp[21 - 12];
1038         store_lo32(&rp[10], acc);
1039         acc >>= 32;
1040 
1041         acc += load_u32(&rp[11]);
1042         acc += bp[23 - 12];
1043         acc += bp[20 - 12];
1044         acc += bp[19 - 12];
1045         acc -= bp[22 - 12];
1046         store_lo32(&rp[11], acc);
1047 
1048         carry = (int)(acc >> 32);
1049     }
1050 #else
1051     {
1052         BN_ULONG t_d[BN_NIST_384_TOP];
1053 
1054         /*
1055          * S1
1056          */
1057         nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23 - 4, 22 - 4, 21 - 4);
1058         /* left shift */
1059         {
1060             register BN_ULONG *ap, t, c;
1061             ap = t_d;
1062             c = 0;
1063             for (i = 3; i != 0; --i) {
1064                 t = *ap;
1065                 *(ap++) = ((t << 1) | c) & BN_MASK2;
1066                 c = (t & BN_TBIT) ? 1 : 0;
1067             }
1068             *ap = c;
1069         }
1070         carry = (int)bn_add_words(r_d + (128 / BN_BITS2), r_d + (128 / BN_BITS2),
1071             t_d, BN_NIST_256_TOP);
1072         /*
1073          * S2
1074          */
1075         carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
1076         /*
1077          * S3
1078          */
1079         nist_set_384(t_d, buf.bn, 20, 19, 18, 17, 16, 15, 14, 13, 12, 23, 22,
1080             21);
1081         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1082         /*
1083          * S4
1084          */
1085         nist_set_384(t_d, buf.bn, 19, 18, 17, 16, 15, 14, 13, 12, 20, 0, 23,
1086             0);
1087         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1088         /*
1089          * S5
1090          */
1091         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 23, 22, 21, 20, 0, 0, 0, 0);
1092         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1093         /*
1094          * S6
1095          */
1096         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 23, 22, 21, 0, 0, 20);
1097         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1098         /*
1099          * D1
1100          */
1101         nist_set_384(t_d, buf.bn, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1102             23);
1103         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1104         /*
1105          * D2
1106          */
1107         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 22, 21, 20, 0);
1108         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1109         /*
1110          * D3
1111          */
1112         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 0, 0);
1113         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1114     }
1115 #endif
1116     /* see BN_nist_mod_224 for explanation */
1117     adjust = bn_sub_words;
1118     if (carry > 0)
1119         carry = (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1120             BN_NIST_384_TOP);
1121     else if (carry < 0) {
1122         carry = (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1123             BN_NIST_384_TOP);
1124         adjust = carry ? bn_sub_words : bn_add_words;
1125     } else
1126         carry = 1;
1127 
1128     res = ((*adjust)(c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP) && carry)
1129         ? r_d
1130         : c_d;
1131     nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1132     r->top = BN_NIST_384_TOP;
1133     bn_correct_top(r);
1134 
1135     return 1;
1136 }
1137 
1138 #define BN_NIST_521_RSHIFT (521 % BN_BITS2)
1139 #define BN_NIST_521_LSHIFT (BN_BITS2 - BN_NIST_521_RSHIFT)
1140 #define BN_NIST_521_TOP_MASK ((BN_ULONG)BN_MASK2 >> BN_NIST_521_LSHIFT)
1141 
BN_nist_mod_521(BIGNUM * r,const BIGNUM * a,const BIGNUM * field,BN_CTX * ctx)1142 int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1143     BN_CTX *ctx)
1144 {
1145     int top = a->top, i;
1146     BN_ULONG *r_d, *a_d = a->d, t_d[BN_NIST_521_TOP], val, tmp, *res;
1147     static const BIGNUM ossl_bignum_nist_p_521_sqr = {
1148         (BN_ULONG *)_nist_p_521_sqr,
1149         OSSL_NELEM(_nist_p_521_sqr),
1150         OSSL_NELEM(_nist_p_521_sqr),
1151         0, BN_FLG_STATIC_DATA
1152     };
1153 
1154     field = &ossl_bignum_nist_p_521; /* just to make sure */
1155 
1156     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_521_sqr) >= 0)
1157         return BN_nnmod(r, a, field, ctx);
1158 
1159     i = BN_ucmp(field, a);
1160     if (i == 0) {
1161         BN_zero(r);
1162         return 1;
1163     } else if (i > 0)
1164         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1165 
1166     if (r != a) {
1167         if (!bn_wexpand(r, BN_NIST_521_TOP))
1168             return 0;
1169         r_d = r->d;
1170         nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1171     } else
1172         r_d = a_d;
1173 
1174     /* upper 521 bits, copy ... */
1175     nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1176         top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1177     /* ... and right shift */
1178     for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1179 #if 0
1180         /*
1181          * MSC ARM compiler [version 2013, presumably even earlier,
1182          * much earlier] miscompiles this code, but not one in
1183          * #else section. See RT#3541.
1184          */
1185         tmp = val >> BN_NIST_521_RSHIFT;
1186         val = t_d[i + 1];
1187         t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2;
1188 #else
1189         t_d[i] = (val >> BN_NIST_521_RSHIFT | (tmp = t_d[i + 1]) << BN_NIST_521_LSHIFT) & BN_MASK2;
1190         val = tmp;
1191 #endif
1192     }
1193     t_d[i] = val >> BN_NIST_521_RSHIFT;
1194     /* lower 521 bits */
1195     r_d[i] &= BN_NIST_521_TOP_MASK;
1196 
1197     bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1198     res = bn_sub_words(t_d, r_d, _nist_p_521,
1199               BN_NIST_521_TOP)
1200         ? r_d
1201         : t_d;
1202     nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1203     r->top = BN_NIST_521_TOP;
1204     bn_correct_top(r);
1205 
1206     return 1;
1207 }
1208 
BN_nist_mod_func(const BIGNUM * p)1209 int (*BN_nist_mod_func(const BIGNUM *p))(BIGNUM *r, const BIGNUM *a,
1210     const BIGNUM *field, BN_CTX *ctx)
1211 {
1212     if (BN_ucmp(&ossl_bignum_nist_p_192, p) == 0)
1213         return BN_nist_mod_192;
1214     if (BN_ucmp(&ossl_bignum_nist_p_224, p) == 0)
1215         return BN_nist_mod_224;
1216     if (BN_ucmp(&ossl_bignum_nist_p_256, p) == 0)
1217         return BN_nist_mod_256;
1218     if (BN_ucmp(&ossl_bignum_nist_p_384, p) == 0)
1219         return BN_nist_mod_384;
1220     if (BN_ucmp(&ossl_bignum_nist_p_521, p) == 0)
1221         return BN_nist_mod_521;
1222     return 0;
1223 }
1224