Lines Matching +full:2 +full:- +full:a
5 * a copy of this software and associated documentation files (the
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * that right-shifting a signed negative integer copies the sign bit
30 * (arithmetic right-shift). This is "implementation-defined behaviour",
39 | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
45 * Convert an integer from unsigned big-endian encoding to a sequence of
46 * 13-bit words in little-endian order. The final "partial" word is
57 while (len -- > 0) { in be8_to_le13()
63 acc_len -= 13; in be8_to_le13()
70 * Convert an integer (13-bit words, little-endian) to unsigned
71 * big-endian encoding. The total encoding length is provided; all
82 while (len -- > 0) { in le13_to_be8()
89 acc_len -= 8; in le13_to_be8()
94 * Normalise an array of words to a strict 13 bits per word. Returned
116 * mul20() multiplies two 260-bit integers together. Each word must fit
120 * square20() computes the square of a 260-bit integer. Each word must
128 mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) in mul20() argument
131 * Two-level Karatsuba: turns a 20x20 multiplication into in mul20()
132 * nine 5x5 multiplications. We use 13-bit words but do not in mul20()
135 * - First Karatsuba decomposition turns the 20x20 mul on in mul20()
136 * 13-bit words into three 10x10 muls, two on 13-bit words in mul20()
137 * and one on 14-bit words. in mul20()
139 * - Second Karatsuba decomposition further splits these into: in mul20()
141 * * four 5x5 muls on 13-bit words in mul20()
142 * * four 5x5 muls on 14-bit words in mul20()
143 * * one 5x5 mul on 15-bit words in mul20()
145 * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit in mul20()
146 * or 15-bit words, respectively. in mul20()
157 (dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \ in mul20()
158 + (s2w)[5 * (s2_off) + 2]; \ in mul20()
168 (dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \ in mul20()
174 (dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \ in mul20()
176 (dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \ in mul20()
178 (dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \ in mul20()
179 + (s2w)[5 * (s2_off) + 2]; \ in mul20()
180 (dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \ in mul20()
182 (dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \ in mul20()
197 CPR1((dw)[(d_off) + 2], cprcc); \ in mul20()
207 memcpy(u, a, 20 * sizeof *a); in mul20()
208 ZADD(u, 4, a, 0, a, 1); in mul20()
209 ZADD(u, 5, a, 2, a, 3); in mul20()
210 ZADD(u, 6, a, 0, a, 2); in mul20()
211 ZADD(u, 7, a, 1, a, 3); in mul20()
216 ZADD(v, 5, b, 2, b, 3); in mul20()
217 ZADD(v, 6, b, 0, b, 2); in mul20()
223 * each, so we can add product results together "as is" in 32-bit in mul20()
230 w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2]) in mul20()
232 + MUL15(u[i + 2], v[i + 0]); in mul20()
234 + MUL15(u[i + 1], v[i + 2]) in mul20()
235 + MUL15(u[i + 2], v[i + 1]) in mul20()
239 + MUL15(u[i + 2], v[i + 2]) in mul20()
243 + MUL15(u[i + 2], v[i + 3]) in mul20()
244 + MUL15(u[i + 3], v[i + 2]) in mul20()
246 w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4]) in mul20()
248 + MUL15(u[i + 4], v[i + 2]); in mul20()
260 * than 524224 (because 4*(32764^2)+524224 < 8192*524225). in mul20()
263 * then do a carry propagation (this reduces words to 13 bits in mul20()
270 w[80 + 2] = MUL15(u[40 + 0], v[40 + 2]) in mul20()
272 + MUL15(u[40 + 2], v[40 + 0]); in mul20()
274 + MUL15(u[40 + 1], v[40 + 2]) in mul20()
275 + MUL15(u[40 + 2], v[40 + 1]) in mul20()
279 + MUL15(u[40 + 2], v[40 + 2]) in mul20()
283 + MUL15(u[40 + 2], v[40 + 3]) in mul20()
284 + MUL15(u[40 + 3], v[40 + 2]) in mul20()
286 w[80 + 6] = MUL15(u[40 + 2], v[40 + 4]) in mul20()
288 + MUL15(u[40 + 4], v[40 + 2]); in mul20()
298 * The products on 14-bit words in slots 6 and 7 yield values in mul20()
299 * up to 5*(16382^2) each, and we need to subtract two such in mul20()
301 * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit. in mul20()
302 * However, 10*(16382^2) does not fit. So we must perform a in mul20()
313 ZSUB2F(w, 8, w, 0, w, 2); in mul20()
316 ZADDT(w, 2, w, 9); in mul20()
318 /* 2..3*2..3 into 4..7 */ in mul20()
324 /* (0..1+2..3)*(0..1+2..3) into 12..15 */ in mul20()
330 /* first-level recomposition */ in mul20()
333 ZSUB2F(w, 14, w, 2, w, 6); in mul20()
335 ZADDT(w, 2, w, 12); in mul20()
354 square20(uint32_t *d, const uint32_t *a) in square20() argument
356 mul20(d, a, a); in square20()
362 mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) in mul20() argument
366 t[ 0] = MUL15(a[ 0], b[ 0]); in mul20()
367 t[ 1] = MUL15(a[ 0], b[ 1]) in mul20()
368 + MUL15(a[ 1], b[ 0]); in mul20()
369 t[ 2] = MUL15(a[ 0], b[ 2]) in mul20()
370 + MUL15(a[ 1], b[ 1]) in mul20()
371 + MUL15(a[ 2], b[ 0]); in mul20()
372 t[ 3] = MUL15(a[ 0], b[ 3]) in mul20()
373 + MUL15(a[ 1], b[ 2]) in mul20()
374 + MUL15(a[ 2], b[ 1]) in mul20()
375 + MUL15(a[ 3], b[ 0]); in mul20()
376 t[ 4] = MUL15(a[ 0], b[ 4]) in mul20()
377 + MUL15(a[ 1], b[ 3]) in mul20()
378 + MUL15(a[ 2], b[ 2]) in mul20()
379 + MUL15(a[ 3], b[ 1]) in mul20()
380 + MUL15(a[ 4], b[ 0]); in mul20()
381 t[ 5] = MUL15(a[ 0], b[ 5]) in mul20()
382 + MUL15(a[ 1], b[ 4]) in mul20()
383 + MUL15(a[ 2], b[ 3]) in mul20()
384 + MUL15(a[ 3], b[ 2]) in mul20()
385 + MUL15(a[ 4], b[ 1]) in mul20()
386 + MUL15(a[ 5], b[ 0]); in mul20()
387 t[ 6] = MUL15(a[ 0], b[ 6]) in mul20()
388 + MUL15(a[ 1], b[ 5]) in mul20()
389 + MUL15(a[ 2], b[ 4]) in mul20()
390 + MUL15(a[ 3], b[ 3]) in mul20()
391 + MUL15(a[ 4], b[ 2]) in mul20()
392 + MUL15(a[ 5], b[ 1]) in mul20()
393 + MUL15(a[ 6], b[ 0]); in mul20()
394 t[ 7] = MUL15(a[ 0], b[ 7]) in mul20()
395 + MUL15(a[ 1], b[ 6]) in mul20()
396 + MUL15(a[ 2], b[ 5]) in mul20()
397 + MUL15(a[ 3], b[ 4]) in mul20()
398 + MUL15(a[ 4], b[ 3]) in mul20()
399 + MUL15(a[ 5], b[ 2]) in mul20()
400 + MUL15(a[ 6], b[ 1]) in mul20()
401 + MUL15(a[ 7], b[ 0]); in mul20()
402 t[ 8] = MUL15(a[ 0], b[ 8]) in mul20()
403 + MUL15(a[ 1], b[ 7]) in mul20()
404 + MUL15(a[ 2], b[ 6]) in mul20()
405 + MUL15(a[ 3], b[ 5]) in mul20()
406 + MUL15(a[ 4], b[ 4]) in mul20()
407 + MUL15(a[ 5], b[ 3]) in mul20()
408 + MUL15(a[ 6], b[ 2]) in mul20()
409 + MUL15(a[ 7], b[ 1]) in mul20()
410 + MUL15(a[ 8], b[ 0]); in mul20()
411 t[ 9] = MUL15(a[ 0], b[ 9]) in mul20()
412 + MUL15(a[ 1], b[ 8]) in mul20()
413 + MUL15(a[ 2], b[ 7]) in mul20()
414 + MUL15(a[ 3], b[ 6]) in mul20()
415 + MUL15(a[ 4], b[ 5]) in mul20()
416 + MUL15(a[ 5], b[ 4]) in mul20()
417 + MUL15(a[ 6], b[ 3]) in mul20()
418 + MUL15(a[ 7], b[ 2]) in mul20()
419 + MUL15(a[ 8], b[ 1]) in mul20()
420 + MUL15(a[ 9], b[ 0]); in mul20()
421 t[10] = MUL15(a[ 0], b[10]) in mul20()
422 + MUL15(a[ 1], b[ 9]) in mul20()
423 + MUL15(a[ 2], b[ 8]) in mul20()
424 + MUL15(a[ 3], b[ 7]) in mul20()
425 + MUL15(a[ 4], b[ 6]) in mul20()
426 + MUL15(a[ 5], b[ 5]) in mul20()
427 + MUL15(a[ 6], b[ 4]) in mul20()
428 + MUL15(a[ 7], b[ 3]) in mul20()
429 + MUL15(a[ 8], b[ 2]) in mul20()
430 + MUL15(a[ 9], b[ 1]) in mul20()
431 + MUL15(a[10], b[ 0]); in mul20()
432 t[11] = MUL15(a[ 0], b[11]) in mul20()
433 + MUL15(a[ 1], b[10]) in mul20()
434 + MUL15(a[ 2], b[ 9]) in mul20()
435 + MUL15(a[ 3], b[ 8]) in mul20()
436 + MUL15(a[ 4], b[ 7]) in mul20()
437 + MUL15(a[ 5], b[ 6]) in mul20()
438 + MUL15(a[ 6], b[ 5]) in mul20()
439 + MUL15(a[ 7], b[ 4]) in mul20()
440 + MUL15(a[ 8], b[ 3]) in mul20()
441 + MUL15(a[ 9], b[ 2]) in mul20()
442 + MUL15(a[10], b[ 1]) in mul20()
443 + MUL15(a[11], b[ 0]); in mul20()
444 t[12] = MUL15(a[ 0], b[12]) in mul20()
445 + MUL15(a[ 1], b[11]) in mul20()
446 + MUL15(a[ 2], b[10]) in mul20()
447 + MUL15(a[ 3], b[ 9]) in mul20()
448 + MUL15(a[ 4], b[ 8]) in mul20()
449 + MUL15(a[ 5], b[ 7]) in mul20()
450 + MUL15(a[ 6], b[ 6]) in mul20()
451 + MUL15(a[ 7], b[ 5]) in mul20()
452 + MUL15(a[ 8], b[ 4]) in mul20()
453 + MUL15(a[ 9], b[ 3]) in mul20()
454 + MUL15(a[10], b[ 2]) in mul20()
455 + MUL15(a[11], b[ 1]) in mul20()
456 + MUL15(a[12], b[ 0]); in mul20()
457 t[13] = MUL15(a[ 0], b[13]) in mul20()
458 + MUL15(a[ 1], b[12]) in mul20()
459 + MUL15(a[ 2], b[11]) in mul20()
460 + MUL15(a[ 3], b[10]) in mul20()
461 + MUL15(a[ 4], b[ 9]) in mul20()
462 + MUL15(a[ 5], b[ 8]) in mul20()
463 + MUL15(a[ 6], b[ 7]) in mul20()
464 + MUL15(a[ 7], b[ 6]) in mul20()
465 + MUL15(a[ 8], b[ 5]) in mul20()
466 + MUL15(a[ 9], b[ 4]) in mul20()
467 + MUL15(a[10], b[ 3]) in mul20()
468 + MUL15(a[11], b[ 2]) in mul20()
469 + MUL15(a[12], b[ 1]) in mul20()
470 + MUL15(a[13], b[ 0]); in mul20()
471 t[14] = MUL15(a[ 0], b[14]) in mul20()
472 + MUL15(a[ 1], b[13]) in mul20()
473 + MUL15(a[ 2], b[12]) in mul20()
474 + MUL15(a[ 3], b[11]) in mul20()
475 + MUL15(a[ 4], b[10]) in mul20()
476 + MUL15(a[ 5], b[ 9]) in mul20()
477 + MUL15(a[ 6], b[ 8]) in mul20()
478 + MUL15(a[ 7], b[ 7]) in mul20()
479 + MUL15(a[ 8], b[ 6]) in mul20()
480 + MUL15(a[ 9], b[ 5]) in mul20()
481 + MUL15(a[10], b[ 4]) in mul20()
482 + MUL15(a[11], b[ 3]) in mul20()
483 + MUL15(a[12], b[ 2]) in mul20()
484 + MUL15(a[13], b[ 1]) in mul20()
485 + MUL15(a[14], b[ 0]); in mul20()
486 t[15] = MUL15(a[ 0], b[15]) in mul20()
487 + MUL15(a[ 1], b[14]) in mul20()
488 + MUL15(a[ 2], b[13]) in mul20()
489 + MUL15(a[ 3], b[12]) in mul20()
490 + MUL15(a[ 4], b[11]) in mul20()
491 + MUL15(a[ 5], b[10]) in mul20()
492 + MUL15(a[ 6], b[ 9]) in mul20()
493 + MUL15(a[ 7], b[ 8]) in mul20()
494 + MUL15(a[ 8], b[ 7]) in mul20()
495 + MUL15(a[ 9], b[ 6]) in mul20()
496 + MUL15(a[10], b[ 5]) in mul20()
497 + MUL15(a[11], b[ 4]) in mul20()
498 + MUL15(a[12], b[ 3]) in mul20()
499 + MUL15(a[13], b[ 2]) in mul20()
500 + MUL15(a[14], b[ 1]) in mul20()
501 + MUL15(a[15], b[ 0]); in mul20()
502 t[16] = MUL15(a[ 0], b[16]) in mul20()
503 + MUL15(a[ 1], b[15]) in mul20()
504 + MUL15(a[ 2], b[14]) in mul20()
505 + MUL15(a[ 3], b[13]) in mul20()
506 + MUL15(a[ 4], b[12]) in mul20()
507 + MUL15(a[ 5], b[11]) in mul20()
508 + MUL15(a[ 6], b[10]) in mul20()
509 + MUL15(a[ 7], b[ 9]) in mul20()
510 + MUL15(a[ 8], b[ 8]) in mul20()
511 + MUL15(a[ 9], b[ 7]) in mul20()
512 + MUL15(a[10], b[ 6]) in mul20()
513 + MUL15(a[11], b[ 5]) in mul20()
514 + MUL15(a[12], b[ 4]) in mul20()
515 + MUL15(a[13], b[ 3]) in mul20()
516 + MUL15(a[14], b[ 2]) in mul20()
517 + MUL15(a[15], b[ 1]) in mul20()
518 + MUL15(a[16], b[ 0]); in mul20()
519 t[17] = MUL15(a[ 0], b[17]) in mul20()
520 + MUL15(a[ 1], b[16]) in mul20()
521 + MUL15(a[ 2], b[15]) in mul20()
522 + MUL15(a[ 3], b[14]) in mul20()
523 + MUL15(a[ 4], b[13]) in mul20()
524 + MUL15(a[ 5], b[12]) in mul20()
525 + MUL15(a[ 6], b[11]) in mul20()
526 + MUL15(a[ 7], b[10]) in mul20()
527 + MUL15(a[ 8], b[ 9]) in mul20()
528 + MUL15(a[ 9], b[ 8]) in mul20()
529 + MUL15(a[10], b[ 7]) in mul20()
530 + MUL15(a[11], b[ 6]) in mul20()
531 + MUL15(a[12], b[ 5]) in mul20()
532 + MUL15(a[13], b[ 4]) in mul20()
533 + MUL15(a[14], b[ 3]) in mul20()
534 + MUL15(a[15], b[ 2]) in mul20()
535 + MUL15(a[16], b[ 1]) in mul20()
536 + MUL15(a[17], b[ 0]); in mul20()
537 t[18] = MUL15(a[ 0], b[18]) in mul20()
538 + MUL15(a[ 1], b[17]) in mul20()
539 + MUL15(a[ 2], b[16]) in mul20()
540 + MUL15(a[ 3], b[15]) in mul20()
541 + MUL15(a[ 4], b[14]) in mul20()
542 + MUL15(a[ 5], b[13]) in mul20()
543 + MUL15(a[ 6], b[12]) in mul20()
544 + MUL15(a[ 7], b[11]) in mul20()
545 + MUL15(a[ 8], b[10]) in mul20()
546 + MUL15(a[ 9], b[ 9]) in mul20()
547 + MUL15(a[10], b[ 8]) in mul20()
548 + MUL15(a[11], b[ 7]) in mul20()
549 + MUL15(a[12], b[ 6]) in mul20()
550 + MUL15(a[13], b[ 5]) in mul20()
551 + MUL15(a[14], b[ 4]) in mul20()
552 + MUL15(a[15], b[ 3]) in mul20()
553 + MUL15(a[16], b[ 2]) in mul20()
554 + MUL15(a[17], b[ 1]) in mul20()
555 + MUL15(a[18], b[ 0]); in mul20()
556 t[19] = MUL15(a[ 0], b[19]) in mul20()
557 + MUL15(a[ 1], b[18]) in mul20()
558 + MUL15(a[ 2], b[17]) in mul20()
559 + MUL15(a[ 3], b[16]) in mul20()
560 + MUL15(a[ 4], b[15]) in mul20()
561 + MUL15(a[ 5], b[14]) in mul20()
562 + MUL15(a[ 6], b[13]) in mul20()
563 + MUL15(a[ 7], b[12]) in mul20()
564 + MUL15(a[ 8], b[11]) in mul20()
565 + MUL15(a[ 9], b[10]) in mul20()
566 + MUL15(a[10], b[ 9]) in mul20()
567 + MUL15(a[11], b[ 8]) in mul20()
568 + MUL15(a[12], b[ 7]) in mul20()
569 + MUL15(a[13], b[ 6]) in mul20()
570 + MUL15(a[14], b[ 5]) in mul20()
571 + MUL15(a[15], b[ 4]) in mul20()
572 + MUL15(a[16], b[ 3]) in mul20()
573 + MUL15(a[17], b[ 2]) in mul20()
574 + MUL15(a[18], b[ 1]) in mul20()
575 + MUL15(a[19], b[ 0]); in mul20()
576 t[20] = MUL15(a[ 1], b[19]) in mul20()
577 + MUL15(a[ 2], b[18]) in mul20()
578 + MUL15(a[ 3], b[17]) in mul20()
579 + MUL15(a[ 4], b[16]) in mul20()
580 + MUL15(a[ 5], b[15]) in mul20()
581 + MUL15(a[ 6], b[14]) in mul20()
582 + MUL15(a[ 7], b[13]) in mul20()
583 + MUL15(a[ 8], b[12]) in mul20()
584 + MUL15(a[ 9], b[11]) in mul20()
585 + MUL15(a[10], b[10]) in mul20()
586 + MUL15(a[11], b[ 9]) in mul20()
587 + MUL15(a[12], b[ 8]) in mul20()
588 + MUL15(a[13], b[ 7]) in mul20()
589 + MUL15(a[14], b[ 6]) in mul20()
590 + MUL15(a[15], b[ 5]) in mul20()
591 + MUL15(a[16], b[ 4]) in mul20()
592 + MUL15(a[17], b[ 3]) in mul20()
593 + MUL15(a[18], b[ 2]) in mul20()
594 + MUL15(a[19], b[ 1]); in mul20()
595 t[21] = MUL15(a[ 2], b[19]) in mul20()
596 + MUL15(a[ 3], b[18]) in mul20()
597 + MUL15(a[ 4], b[17]) in mul20()
598 + MUL15(a[ 5], b[16]) in mul20()
599 + MUL15(a[ 6], b[15]) in mul20()
600 + MUL15(a[ 7], b[14]) in mul20()
601 + MUL15(a[ 8], b[13]) in mul20()
602 + MUL15(a[ 9], b[12]) in mul20()
603 + MUL15(a[10], b[11]) in mul20()
604 + MUL15(a[11], b[10]) in mul20()
605 + MUL15(a[12], b[ 9]) in mul20()
606 + MUL15(a[13], b[ 8]) in mul20()
607 + MUL15(a[14], b[ 7]) in mul20()
608 + MUL15(a[15], b[ 6]) in mul20()
609 + MUL15(a[16], b[ 5]) in mul20()
610 + MUL15(a[17], b[ 4]) in mul20()
611 + MUL15(a[18], b[ 3]) in mul20()
612 + MUL15(a[19], b[ 2]); in mul20()
613 t[22] = MUL15(a[ 3], b[19]) in mul20()
614 + MUL15(a[ 4], b[18]) in mul20()
615 + MUL15(a[ 5], b[17]) in mul20()
616 + MUL15(a[ 6], b[16]) in mul20()
617 + MUL15(a[ 7], b[15]) in mul20()
618 + MUL15(a[ 8], b[14]) in mul20()
619 + MUL15(a[ 9], b[13]) in mul20()
620 + MUL15(a[10], b[12]) in mul20()
621 + MUL15(a[11], b[11]) in mul20()
622 + MUL15(a[12], b[10]) in mul20()
623 + MUL15(a[13], b[ 9]) in mul20()
624 + MUL15(a[14], b[ 8]) in mul20()
625 + MUL15(a[15], b[ 7]) in mul20()
626 + MUL15(a[16], b[ 6]) in mul20()
627 + MUL15(a[17], b[ 5]) in mul20()
628 + MUL15(a[18], b[ 4]) in mul20()
629 + MUL15(a[19], b[ 3]); in mul20()
630 t[23] = MUL15(a[ 4], b[19]) in mul20()
631 + MUL15(a[ 5], b[18]) in mul20()
632 + MUL15(a[ 6], b[17]) in mul20()
633 + MUL15(a[ 7], b[16]) in mul20()
634 + MUL15(a[ 8], b[15]) in mul20()
635 + MUL15(a[ 9], b[14]) in mul20()
636 + MUL15(a[10], b[13]) in mul20()
637 + MUL15(a[11], b[12]) in mul20()
638 + MUL15(a[12], b[11]) in mul20()
639 + MUL15(a[13], b[10]) in mul20()
640 + MUL15(a[14], b[ 9]) in mul20()
641 + MUL15(a[15], b[ 8]) in mul20()
642 + MUL15(a[16], b[ 7]) in mul20()
643 + MUL15(a[17], b[ 6]) in mul20()
644 + MUL15(a[18], b[ 5]) in mul20()
645 + MUL15(a[19], b[ 4]); in mul20()
646 t[24] = MUL15(a[ 5], b[19]) in mul20()
647 + MUL15(a[ 6], b[18]) in mul20()
648 + MUL15(a[ 7], b[17]) in mul20()
649 + MUL15(a[ 8], b[16]) in mul20()
650 + MUL15(a[ 9], b[15]) in mul20()
651 + MUL15(a[10], b[14]) in mul20()
652 + MUL15(a[11], b[13]) in mul20()
653 + MUL15(a[12], b[12]) in mul20()
654 + MUL15(a[13], b[11]) in mul20()
655 + MUL15(a[14], b[10]) in mul20()
656 + MUL15(a[15], b[ 9]) in mul20()
657 + MUL15(a[16], b[ 8]) in mul20()
658 + MUL15(a[17], b[ 7]) in mul20()
659 + MUL15(a[18], b[ 6]) in mul20()
660 + MUL15(a[19], b[ 5]); in mul20()
661 t[25] = MUL15(a[ 6], b[19]) in mul20()
662 + MUL15(a[ 7], b[18]) in mul20()
663 + MUL15(a[ 8], b[17]) in mul20()
664 + MUL15(a[ 9], b[16]) in mul20()
665 + MUL15(a[10], b[15]) in mul20()
666 + MUL15(a[11], b[14]) in mul20()
667 + MUL15(a[12], b[13]) in mul20()
668 + MUL15(a[13], b[12]) in mul20()
669 + MUL15(a[14], b[11]) in mul20()
670 + MUL15(a[15], b[10]) in mul20()
671 + MUL15(a[16], b[ 9]) in mul20()
672 + MUL15(a[17], b[ 8]) in mul20()
673 + MUL15(a[18], b[ 7]) in mul20()
674 + MUL15(a[19], b[ 6]); in mul20()
675 t[26] = MUL15(a[ 7], b[19]) in mul20()
676 + MUL15(a[ 8], b[18]) in mul20()
677 + MUL15(a[ 9], b[17]) in mul20()
678 + MUL15(a[10], b[16]) in mul20()
679 + MUL15(a[11], b[15]) in mul20()
680 + MUL15(a[12], b[14]) in mul20()
681 + MUL15(a[13], b[13]) in mul20()
682 + MUL15(a[14], b[12]) in mul20()
683 + MUL15(a[15], b[11]) in mul20()
684 + MUL15(a[16], b[10]) in mul20()
685 + MUL15(a[17], b[ 9]) in mul20()
686 + MUL15(a[18], b[ 8]) in mul20()
687 + MUL15(a[19], b[ 7]); in mul20()
688 t[27] = MUL15(a[ 8], b[19]) in mul20()
689 + MUL15(a[ 9], b[18]) in mul20()
690 + MUL15(a[10], b[17]) in mul20()
691 + MUL15(a[11], b[16]) in mul20()
692 + MUL15(a[12], b[15]) in mul20()
693 + MUL15(a[13], b[14]) in mul20()
694 + MUL15(a[14], b[13]) in mul20()
695 + MUL15(a[15], b[12]) in mul20()
696 + MUL15(a[16], b[11]) in mul20()
697 + MUL15(a[17], b[10]) in mul20()
698 + MUL15(a[18], b[ 9]) in mul20()
699 + MUL15(a[19], b[ 8]); in mul20()
700 t[28] = MUL15(a[ 9], b[19]) in mul20()
701 + MUL15(a[10], b[18]) in mul20()
702 + MUL15(a[11], b[17]) in mul20()
703 + MUL15(a[12], b[16]) in mul20()
704 + MUL15(a[13], b[15]) in mul20()
705 + MUL15(a[14], b[14]) in mul20()
706 + MUL15(a[15], b[13]) in mul20()
707 + MUL15(a[16], b[12]) in mul20()
708 + MUL15(a[17], b[11]) in mul20()
709 + MUL15(a[18], b[10]) in mul20()
710 + MUL15(a[19], b[ 9]); in mul20()
711 t[29] = MUL15(a[10], b[19]) in mul20()
712 + MUL15(a[11], b[18]) in mul20()
713 + MUL15(a[12], b[17]) in mul20()
714 + MUL15(a[13], b[16]) in mul20()
715 + MUL15(a[14], b[15]) in mul20()
716 + MUL15(a[15], b[14]) in mul20()
717 + MUL15(a[16], b[13]) in mul20()
718 + MUL15(a[17], b[12]) in mul20()
719 + MUL15(a[18], b[11]) in mul20()
720 + MUL15(a[19], b[10]); in mul20()
721 t[30] = MUL15(a[11], b[19]) in mul20()
722 + MUL15(a[12], b[18]) in mul20()
723 + MUL15(a[13], b[17]) in mul20()
724 + MUL15(a[14], b[16]) in mul20()
725 + MUL15(a[15], b[15]) in mul20()
726 + MUL15(a[16], b[14]) in mul20()
727 + MUL15(a[17], b[13]) in mul20()
728 + MUL15(a[18], b[12]) in mul20()
729 + MUL15(a[19], b[11]); in mul20()
730 t[31] = MUL15(a[12], b[19]) in mul20()
731 + MUL15(a[13], b[18]) in mul20()
732 + MUL15(a[14], b[17]) in mul20()
733 + MUL15(a[15], b[16]) in mul20()
734 + MUL15(a[16], b[15]) in mul20()
735 + MUL15(a[17], b[14]) in mul20()
736 + MUL15(a[18], b[13]) in mul20()
737 + MUL15(a[19], b[12]); in mul20()
738 t[32] = MUL15(a[13], b[19]) in mul20()
739 + MUL15(a[14], b[18]) in mul20()
740 + MUL15(a[15], b[17]) in mul20()
741 + MUL15(a[16], b[16]) in mul20()
742 + MUL15(a[17], b[15]) in mul20()
743 + MUL15(a[18], b[14]) in mul20()
744 + MUL15(a[19], b[13]); in mul20()
745 t[33] = MUL15(a[14], b[19]) in mul20()
746 + MUL15(a[15], b[18]) in mul20()
747 + MUL15(a[16], b[17]) in mul20()
748 + MUL15(a[17], b[16]) in mul20()
749 + MUL15(a[18], b[15]) in mul20()
750 + MUL15(a[19], b[14]); in mul20()
751 t[34] = MUL15(a[15], b[19]) in mul20()
752 + MUL15(a[16], b[18]) in mul20()
753 + MUL15(a[17], b[17]) in mul20()
754 + MUL15(a[18], b[16]) in mul20()
755 + MUL15(a[19], b[15]); in mul20()
756 t[35] = MUL15(a[16], b[19]) in mul20()
757 + MUL15(a[17], b[18]) in mul20()
758 + MUL15(a[18], b[17]) in mul20()
759 + MUL15(a[19], b[16]); in mul20()
760 t[36] = MUL15(a[17], b[19]) in mul20()
761 + MUL15(a[18], b[18]) in mul20()
762 + MUL15(a[19], b[17]); in mul20()
763 t[37] = MUL15(a[18], b[19]) in mul20()
764 + MUL15(a[19], b[18]); in mul20()
765 t[38] = MUL15(a[19], b[19]); in mul20()
770 square20(uint32_t *d, const uint32_t *a) in square20() argument
774 t[ 0] = MUL15(a[ 0], a[ 0]); in square20()
775 t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1); in square20()
776 t[ 2] = MUL15(a[ 1], a[ 1]) in square20()
777 + ((MUL15(a[ 0], a[ 2])) << 1); in square20()
778 t[ 3] = ((MUL15(a[ 0], a[ 3]) in square20()
779 + MUL15(a[ 1], a[ 2])) << 1); in square20()
780 t[ 4] = MUL15(a[ 2], a[ 2]) in square20()
781 + ((MUL15(a[ 0], a[ 4]) in square20()
782 + MUL15(a[ 1], a[ 3])) << 1); in square20()
783 t[ 5] = ((MUL15(a[ 0], a[ 5]) in square20()
784 + MUL15(a[ 1], a[ 4]) in square20()
785 + MUL15(a[ 2], a[ 3])) << 1); in square20()
786 t[ 6] = MUL15(a[ 3], a[ 3]) in square20()
787 + ((MUL15(a[ 0], a[ 6]) in square20()
788 + MUL15(a[ 1], a[ 5]) in square20()
789 + MUL15(a[ 2], a[ 4])) << 1); in square20()
790 t[ 7] = ((MUL15(a[ 0], a[ 7]) in square20()
791 + MUL15(a[ 1], a[ 6]) in square20()
792 + MUL15(a[ 2], a[ 5]) in square20()
793 + MUL15(a[ 3], a[ 4])) << 1); in square20()
794 t[ 8] = MUL15(a[ 4], a[ 4]) in square20()
795 + ((MUL15(a[ 0], a[ 8]) in square20()
796 + MUL15(a[ 1], a[ 7]) in square20()
797 + MUL15(a[ 2], a[ 6]) in square20()
798 + MUL15(a[ 3], a[ 5])) << 1); in square20()
799 t[ 9] = ((MUL15(a[ 0], a[ 9]) in square20()
800 + MUL15(a[ 1], a[ 8]) in square20()
801 + MUL15(a[ 2], a[ 7]) in square20()
802 + MUL15(a[ 3], a[ 6]) in square20()
803 + MUL15(a[ 4], a[ 5])) << 1); in square20()
804 t[10] = MUL15(a[ 5], a[ 5]) in square20()
805 + ((MUL15(a[ 0], a[10]) in square20()
806 + MUL15(a[ 1], a[ 9]) in square20()
807 + MUL15(a[ 2], a[ 8]) in square20()
808 + MUL15(a[ 3], a[ 7]) in square20()
809 + MUL15(a[ 4], a[ 6])) << 1); in square20()
810 t[11] = ((MUL15(a[ 0], a[11]) in square20()
811 + MUL15(a[ 1], a[10]) in square20()
812 + MUL15(a[ 2], a[ 9]) in square20()
813 + MUL15(a[ 3], a[ 8]) in square20()
814 + MUL15(a[ 4], a[ 7]) in square20()
815 + MUL15(a[ 5], a[ 6])) << 1); in square20()
816 t[12] = MUL15(a[ 6], a[ 6]) in square20()
817 + ((MUL15(a[ 0], a[12]) in square20()
818 + MUL15(a[ 1], a[11]) in square20()
819 + MUL15(a[ 2], a[10]) in square20()
820 + MUL15(a[ 3], a[ 9]) in square20()
821 + MUL15(a[ 4], a[ 8]) in square20()
822 + MUL15(a[ 5], a[ 7])) << 1); in square20()
823 t[13] = ((MUL15(a[ 0], a[13]) in square20()
824 + MUL15(a[ 1], a[12]) in square20()
825 + MUL15(a[ 2], a[11]) in square20()
826 + MUL15(a[ 3], a[10]) in square20()
827 + MUL15(a[ 4], a[ 9]) in square20()
828 + MUL15(a[ 5], a[ 8]) in square20()
829 + MUL15(a[ 6], a[ 7])) << 1); in square20()
830 t[14] = MUL15(a[ 7], a[ 7]) in square20()
831 + ((MUL15(a[ 0], a[14]) in square20()
832 + MUL15(a[ 1], a[13]) in square20()
833 + MUL15(a[ 2], a[12]) in square20()
834 + MUL15(a[ 3], a[11]) in square20()
835 + MUL15(a[ 4], a[10]) in square20()
836 + MUL15(a[ 5], a[ 9]) in square20()
837 + MUL15(a[ 6], a[ 8])) << 1); in square20()
838 t[15] = ((MUL15(a[ 0], a[15]) in square20()
839 + MUL15(a[ 1], a[14]) in square20()
840 + MUL15(a[ 2], a[13]) in square20()
841 + MUL15(a[ 3], a[12]) in square20()
842 + MUL15(a[ 4], a[11]) in square20()
843 + MUL15(a[ 5], a[10]) in square20()
844 + MUL15(a[ 6], a[ 9]) in square20()
845 + MUL15(a[ 7], a[ 8])) << 1); in square20()
846 t[16] = MUL15(a[ 8], a[ 8]) in square20()
847 + ((MUL15(a[ 0], a[16]) in square20()
848 + MUL15(a[ 1], a[15]) in square20()
849 + MUL15(a[ 2], a[14]) in square20()
850 + MUL15(a[ 3], a[13]) in square20()
851 + MUL15(a[ 4], a[12]) in square20()
852 + MUL15(a[ 5], a[11]) in square20()
853 + MUL15(a[ 6], a[10]) in square20()
854 + MUL15(a[ 7], a[ 9])) << 1); in square20()
855 t[17] = ((MUL15(a[ 0], a[17]) in square20()
856 + MUL15(a[ 1], a[16]) in square20()
857 + MUL15(a[ 2], a[15]) in square20()
858 + MUL15(a[ 3], a[14]) in square20()
859 + MUL15(a[ 4], a[13]) in square20()
860 + MUL15(a[ 5], a[12]) in square20()
861 + MUL15(a[ 6], a[11]) in square20()
862 + MUL15(a[ 7], a[10]) in square20()
863 + MUL15(a[ 8], a[ 9])) << 1); in square20()
864 t[18] = MUL15(a[ 9], a[ 9]) in square20()
865 + ((MUL15(a[ 0], a[18]) in square20()
866 + MUL15(a[ 1], a[17]) in square20()
867 + MUL15(a[ 2], a[16]) in square20()
868 + MUL15(a[ 3], a[15]) in square20()
869 + MUL15(a[ 4], a[14]) in square20()
870 + MUL15(a[ 5], a[13]) in square20()
871 + MUL15(a[ 6], a[12]) in square20()
872 + MUL15(a[ 7], a[11]) in square20()
873 + MUL15(a[ 8], a[10])) << 1); in square20()
874 t[19] = ((MUL15(a[ 0], a[19]) in square20()
875 + MUL15(a[ 1], a[18]) in square20()
876 + MUL15(a[ 2], a[17]) in square20()
877 + MUL15(a[ 3], a[16]) in square20()
878 + MUL15(a[ 4], a[15]) in square20()
879 + MUL15(a[ 5], a[14]) in square20()
880 + MUL15(a[ 6], a[13]) in square20()
881 + MUL15(a[ 7], a[12]) in square20()
882 + MUL15(a[ 8], a[11]) in square20()
883 + MUL15(a[ 9], a[10])) << 1); in square20()
884 t[20] = MUL15(a[10], a[10]) in square20()
885 + ((MUL15(a[ 1], a[19]) in square20()
886 + MUL15(a[ 2], a[18]) in square20()
887 + MUL15(a[ 3], a[17]) in square20()
888 + MUL15(a[ 4], a[16]) in square20()
889 + MUL15(a[ 5], a[15]) in square20()
890 + MUL15(a[ 6], a[14]) in square20()
891 + MUL15(a[ 7], a[13]) in square20()
892 + MUL15(a[ 8], a[12]) in square20()
893 + MUL15(a[ 9], a[11])) << 1); in square20()
894 t[21] = ((MUL15(a[ 2], a[19]) in square20()
895 + MUL15(a[ 3], a[18]) in square20()
896 + MUL15(a[ 4], a[17]) in square20()
897 + MUL15(a[ 5], a[16]) in square20()
898 + MUL15(a[ 6], a[15]) in square20()
899 + MUL15(a[ 7], a[14]) in square20()
900 + MUL15(a[ 8], a[13]) in square20()
901 + MUL15(a[ 9], a[12]) in square20()
902 + MUL15(a[10], a[11])) << 1); in square20()
903 t[22] = MUL15(a[11], a[11]) in square20()
904 + ((MUL15(a[ 3], a[19]) in square20()
905 + MUL15(a[ 4], a[18]) in square20()
906 + MUL15(a[ 5], a[17]) in square20()
907 + MUL15(a[ 6], a[16]) in square20()
908 + MUL15(a[ 7], a[15]) in square20()
909 + MUL15(a[ 8], a[14]) in square20()
910 + MUL15(a[ 9], a[13]) in square20()
911 + MUL15(a[10], a[12])) << 1); in square20()
912 t[23] = ((MUL15(a[ 4], a[19]) in square20()
913 + MUL15(a[ 5], a[18]) in square20()
914 + MUL15(a[ 6], a[17]) in square20()
915 + MUL15(a[ 7], a[16]) in square20()
916 + MUL15(a[ 8], a[15]) in square20()
917 + MUL15(a[ 9], a[14]) in square20()
918 + MUL15(a[10], a[13]) in square20()
919 + MUL15(a[11], a[12])) << 1); in square20()
920 t[24] = MUL15(a[12], a[12]) in square20()
921 + ((MUL15(a[ 5], a[19]) in square20()
922 + MUL15(a[ 6], a[18]) in square20()
923 + MUL15(a[ 7], a[17]) in square20()
924 + MUL15(a[ 8], a[16]) in square20()
925 + MUL15(a[ 9], a[15]) in square20()
926 + MUL15(a[10], a[14]) in square20()
927 + MUL15(a[11], a[13])) << 1); in square20()
928 t[25] = ((MUL15(a[ 6], a[19]) in square20()
929 + MUL15(a[ 7], a[18]) in square20()
930 + MUL15(a[ 8], a[17]) in square20()
931 + MUL15(a[ 9], a[16]) in square20()
932 + MUL15(a[10], a[15]) in square20()
933 + MUL15(a[11], a[14]) in square20()
934 + MUL15(a[12], a[13])) << 1); in square20()
935 t[26] = MUL15(a[13], a[13]) in square20()
936 + ((MUL15(a[ 7], a[19]) in square20()
937 + MUL15(a[ 8], a[18]) in square20()
938 + MUL15(a[ 9], a[17]) in square20()
939 + MUL15(a[10], a[16]) in square20()
940 + MUL15(a[11], a[15]) in square20()
941 + MUL15(a[12], a[14])) << 1); in square20()
942 t[27] = ((MUL15(a[ 8], a[19]) in square20()
943 + MUL15(a[ 9], a[18]) in square20()
944 + MUL15(a[10], a[17]) in square20()
945 + MUL15(a[11], a[16]) in square20()
946 + MUL15(a[12], a[15]) in square20()
947 + MUL15(a[13], a[14])) << 1); in square20()
948 t[28] = MUL15(a[14], a[14]) in square20()
949 + ((MUL15(a[ 9], a[19]) in square20()
950 + MUL15(a[10], a[18]) in square20()
951 + MUL15(a[11], a[17]) in square20()
952 + MUL15(a[12], a[16]) in square20()
953 + MUL15(a[13], a[15])) << 1); in square20()
954 t[29] = ((MUL15(a[10], a[19]) in square20()
955 + MUL15(a[11], a[18]) in square20()
956 + MUL15(a[12], a[17]) in square20()
957 + MUL15(a[13], a[16]) in square20()
958 + MUL15(a[14], a[15])) << 1); in square20()
959 t[30] = MUL15(a[15], a[15]) in square20()
960 + ((MUL15(a[11], a[19]) in square20()
961 + MUL15(a[12], a[18]) in square20()
962 + MUL15(a[13], a[17]) in square20()
963 + MUL15(a[14], a[16])) << 1); in square20()
964 t[31] = ((MUL15(a[12], a[19]) in square20()
965 + MUL15(a[13], a[18]) in square20()
966 + MUL15(a[14], a[17]) in square20()
967 + MUL15(a[15], a[16])) << 1); in square20()
968 t[32] = MUL15(a[16], a[16]) in square20()
969 + ((MUL15(a[13], a[19]) in square20()
970 + MUL15(a[14], a[18]) in square20()
971 + MUL15(a[15], a[17])) << 1); in square20()
972 t[33] = ((MUL15(a[14], a[19]) in square20()
973 + MUL15(a[15], a[18]) in square20()
974 + MUL15(a[16], a[17])) << 1); in square20()
975 t[34] = MUL15(a[17], a[17]) in square20()
976 + ((MUL15(a[15], a[19]) in square20()
977 + MUL15(a[16], a[18])) << 1); in square20()
978 t[35] = ((MUL15(a[16], a[19]) in square20()
979 + MUL15(a[17], a[18])) << 1); in square20()
980 t[36] = MUL15(a[18], a[18]) in square20()
981 + ((MUL15(a[17], a[19])) << 1); in square20()
982 t[37] = ((MUL15(a[18], a[19])) << 1); in square20()
983 t[38] = MUL15(a[19], a[19]); in square20()
990 * Modulus for field F256 (field for point coordinates in curve P-256).
999 * The 'b' curve equation coefficient for P-256.
1008 * Perform a "short reduction" in field F256 (field for curve P-256).
1020 d[14] -= x << 10; in reduce_f256()
1021 d[7] -= x << 5; in reduce_f256()
1027 * Perform a "final reduction" in field F256 (field for curve P-256).
1045 w = t[i] - F256[i] - cc; in reduce_final_f256()
1055 * Perform a multiplication of two integers modulo
1056 * 2^256-2^224+2^192+2^96-1 (for NIST curve P-256). Operands are arrays
1057 * of 20 words, each containing 13 bits of data, in little-endian order.
1058 * On input, upper word may be up to 13 bits (hence value up to 2^260-1);
1062 mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) in mul_f256() argument
1071 mul20(t, a, b); in mul_f256()
1078 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 in mul_f256()
1080 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in mul_f256()
1082 * For a word x at bit offset n (n >= 256), we have: in mul_f256()
1083 * x*2^n = x*2^(n-32) - x*2^(n-64) in mul_f256()
1084 * - x*2^(n - 160) + x*2^(n-256) mod p in mul_f256()
1089 for (i = 39; i >= 20; i --) { in mul_f256()
1093 t[i - 2] += ARSH(x, 6); in mul_f256()
1094 t[i - 3] += (x << 7) & 0x1FFF; in mul_f256()
1095 t[i - 4] -= ARSH(x, 12); in mul_f256()
1096 t[i - 5] -= (x << 1) & 0x1FFF; in mul_f256()
1097 t[i - 12] -= ARSH(x, 4); in mul_f256()
1098 t[i - 13] -= (x << 9) & 0x1FFF; in mul_f256()
1099 t[i - 19] += ARSH(x, 9); in mul_f256()
1100 t[i - 20] += (x << 4) & 0x1FFF; in mul_f256()
1104 * Propagate carries. This is a signed propagation, and the in mul_f256()
1106 * but not two much: worst case is the chain involving t[i - 3], in mul_f256()
1107 * in which a value may be added to itself up to 7 times. Since in mul_f256()
1108 * starting values are 13-bit each, all words fit on 20 bits in mul_f256()
1116 * bits, and the values fit on 21 bits, values fit in 32-bit words, in mul_f256()
1122 t[14] -= cc << 10; in mul_f256()
1123 t[7] -= cc << 5; in mul_f256()
1128 * end up with a value which is negative, and we don't want that. in mul_f256()
1135 t[0] -= cc; in mul_f256()
1138 t[17] -= cc << 3; in mul_f256()
1145 * Square an integer modulo 2^256-2^224+2^192+2^96-1 (for NIST curve
1146 * P-256). Operand is an array of 20 words, each containing 13 bits of
1147 * data, in little-endian order. On input, upper word may be up to 13
1148 * bits (hence value up to 2^260-1); on output, value fits on 257 bits
1152 square_f256(uint32_t *d, const uint32_t *a) in square_f256() argument
1160 square20(t, a); in square_f256()
1167 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 in square_f256()
1169 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in square_f256()
1171 * For a word x at bit offset n (n >= 256), we have: in square_f256()
1172 * x*2^n = x*2^(n-32) - x*2^(n-64) in square_f256()
1173 * - x*2^(n - 160) + x*2^(n-256) mod p in square_f256()
1178 for (i = 39; i >= 20; i --) { in square_f256()
1182 t[i - 2] += ARSH(x, 6); in square_f256()
1183 t[i - 3] += (x << 7) & 0x1FFF; in square_f256()
1184 t[i - 4] -= ARSH(x, 12); in square_f256()
1185 t[i - 5] -= (x << 1) & 0x1FFF; in square_f256()
1186 t[i - 12] -= ARSH(x, 4); in square_f256()
1187 t[i - 13] -= (x << 9) & 0x1FFF; in square_f256()
1188 t[i - 19] += ARSH(x, 9); in square_f256()
1189 t[i - 20] += (x << 4) & 0x1FFF; in square_f256()
1193 * Propagate carries. This is a signed propagation, and the in square_f256()
1195 * but not two much: worst case is the chain involving t[i - 3], in square_f256()
1196 * in which a value may be added to itself up to 7 times. Since in square_f256()
1197 * starting values are 13-bit each, all words fit on 20 bits in square_f256()
1205 * bits, and the values fit on 21 bits, values fit in 32-bit words, in square_f256()
1211 t[14] -= cc << 10; in square_f256()
1212 t[7] -= cc << 5; in square_f256()
1217 * end up with a value which is negative, and we don't want that. in square_f256()
1224 t[0] -= cc; in square_f256()
1227 t[17] -= cc << 3; in square_f256()
1234 * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
1236 * X = x / z^2
1241 * Coordinates are represented in arrays of 32-bit integers, each holding
1252 * Convert a point to affine coordinates:
1253 * - If the point is the point at infinity, then all three coordinates
1255 * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
1266 * Invert z with a modular exponentiation: the modulus is in p256_to_affine()
1267 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is in p256_to_affine()
1268 * p-2. Exponent bit pattern (from high to low) is: in p256_to_affine()
1269 * - 32 bits of value 1 in p256_to_affine()
1270 * - 31 bits of value 0 in p256_to_affine()
1271 * - 1 bit of value 1 in p256_to_affine()
1272 * - 96 bits of value 0 in p256_to_affine()
1273 * - 94 bits of value 1 in p256_to_affine()
1274 * - 1 bit of value 0 in p256_to_affine()
1275 * - 1 bit of value 1 in p256_to_affine()
1276 * Thus, we precompute z^(2^31-1) to speed things up. in p256_to_affine()
1284 * A simple square-and-multiply for z^(2^31-1). We could save about in p256_to_affine()
1286 * this would require a bit more code, and extra stack buffers. in p256_to_affine()
1288 memcpy(t1, P->z, sizeof P->z); in p256_to_affine()
1291 mul_f256(t1, t1, P->z); in p256_to_affine()
1295 * Square-and-multiply. Apart from the squarings, we have a few in p256_to_affine()
1299 memcpy(t2, P->z, sizeof P->z); in p256_to_affine()
1312 mul_f256(t2, t2, P->z); in p256_to_affine()
1318 * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3. in p256_to_affine()
1321 mul_f256(P->x, t1, P->x); in p256_to_affine()
1323 mul_f256(P->y, t1, P->y); in p256_to_affine()
1324 reduce_final_f256(P->x); in p256_to_affine()
1325 reduce_final_f256(P->y); in p256_to_affine()
1331 mul_f256(P->z, P->z, t2); in p256_to_affine()
1332 reduce_final_f256(P->z); in p256_to_affine()
1336 * Double a point in P-256. This function works for all valid points,
1345 * s = 4*x*y^2 in p256_double()
1346 * m = 3*(x + z^2)*(x - z^2) in p256_double()
1347 * x' = m^2 - 2*s in p256_double()
1348 * y' = m*(s - x') - 8*y^4 in p256_double()
1349 * z' = 2*y*z in p256_double()
1351 * These formulas work for all points, including points of order 2 in p256_double()
1353 * - If y = 0 then z' = 0. But there is no such point in P-256 in p256_double()
1355 * - If z = 0 then z' = 0. in p256_double()
1361 * Compute z^2 in t1. in p256_double()
1363 square_f256(t1, Q->z); in p256_double()
1366 * Compute x-z^2 in t2 and x+z^2 in t1. in p256_double()
1369 t2[i] = (F256[i] << 1) + Q->x[i] - t1[i]; in p256_double()
1370 t1[i] += Q->x[i]; in p256_double()
1376 * Compute 3*(x+z^2)*(x-z^2) in t1. in p256_double()
1385 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). in p256_double()
1387 square_f256(t3, Q->y); in p256_double()
1392 mul_f256(t2, Q->x, t3); in p256_double()
1400 * Compute x' = m^2 - 2*s. in p256_double()
1402 square_f256(Q->x, t1); in p256_double()
1404 Q->x[i] += (F256[i] << 2) - (t2[i] << 1); in p256_double()
1406 norm13(Q->x, Q->x, 20); in p256_double()
1407 reduce_f256(Q->x); in p256_double()
1410 * Compute z' = 2*y*z. in p256_double()
1412 mul_f256(t4, Q->y, Q->z); in p256_double()
1414 Q->z[i] = t4[i] << 1; in p256_double()
1416 norm13(Q->z, Q->z, 20); in p256_double()
1417 reduce_f256(Q->z); in p256_double()
1420 * Compute y' = m*(s - x') - 8*y^4. Note that we already have in p256_double()
1421 * 2*y^2 in t3. in p256_double()
1424 t2[i] += (F256[i] << 1) - Q->x[i]; in p256_double()
1427 mul_f256(Q->y, t1, t2); in p256_double()
1430 Q->y[i] += (F256[i] << 2) - (t4[i] << 1); in p256_double()
1432 norm13(Q->y, Q->y, 20); in p256_double()
1433 reduce_f256(Q->y); in p256_double()
1441 * - If P1 == 0 but P2 != 0
1442 * - If P1 != 0 but P2 == 0
1443 * - If P1 == P2
1449 * - P1 and P2 have the same Y coordinate
1450 * - P1 == 0 and P2 == 0
1451 * - The Y coordinate of one of the points is 0 and the other point is
1454 * The third case cannot actually happen with valid points, since a point
1455 * with Y == 0 is a point of order 2, and there is no point of order 2 on
1456 * curve P-256.
1461 * - If the result is not the point at infinity, then it is correct.
1462 * - Otherwise, if the returned value is 1, then this is a case of
1464 * - Otherwise, P1 == P2, so a "double" operation should have been
1473 * u1 = x1 * z2^2 in p256_add()
1474 * u2 = x2 * z1^2 in p256_add()
1477 * h = u2 - u1 in p256_add()
1478 * r = s2 - s1 in p256_add()
1479 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add()
1480 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add()
1488 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). in p256_add()
1490 square_f256(t3, P2->z); in p256_add()
1491 mul_f256(t1, P1->x, t3); in p256_add()
1492 mul_f256(t4, P2->z, t3); in p256_add()
1493 mul_f256(t3, P1->y, t4); in p256_add()
1496 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). in p256_add()
1498 square_f256(t4, P1->z); in p256_add()
1499 mul_f256(t2, P2->x, t4); in p256_add()
1500 mul_f256(t5, P1->z, t4); in p256_add()
1501 mul_f256(t4, P2->y, t5); in p256_add()
1504 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add()
1509 t2[i] += (F256[i] << 1) - t1[i]; in p256_add()
1510 t4[i] += (F256[i] << 1) - t3[i]; in p256_add()
1520 ret = (ret | -ret) >> 31; in p256_add()
1523 * Compute u1*h^2 (in t6) and h^3 (in t5); in p256_add()
1530 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add()
1532 square_f256(P1->x, t4); in p256_add()
1534 P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1); in p256_add()
1536 norm13(P1->x, P1->x, 20); in p256_add()
1537 reduce_f256(P1->x); in p256_add()
1540 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add()
1543 t6[i] += (F256[i] << 1) - P1->x[i]; in p256_add()
1546 mul_f256(P1->y, t4, t6); in p256_add()
1549 P1->y[i] += (F256[i] << 1) - t1[i]; in p256_add()
1551 norm13(P1->y, P1->y, 20); in p256_add()
1552 reduce_f256(P1->y); in p256_add()
1557 mul_f256(t1, P1->z, P2->z); in p256_add()
1558 mul_f256(P1->z, t1, t2); in p256_add()
1564 * Add point P2 to point P1. This is a specialised function for the
1565 * case when P2 is a non-zero point in affine coordinate.
1569 * - If P1 == 0
1570 * - If P1 == P2
1576 * - P1 and P2 have the same Y coordinate
1577 * - The Y coordinate of P2 is 0 and P1 is the point at infinity.
1579 * The second case cannot actually happen with valid points, since a point
1580 * with Y == 0 is a point of order 2, and there is no point of order 2 on
1581 * curve P-256.
1586 * - If the result is not the point at infinity, then it is correct.
1587 * - Otherwise, if the returned value is 1, then this is a case of
1589 * - Otherwise, P1 == P2, so a "double" operation should have been
1599 * u2 = x2 * z1^2 in p256_add_mixed()
1602 * h = u2 - u1 in p256_add_mixed()
1603 * r = s2 - s1 in p256_add_mixed()
1604 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add_mixed()
1605 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add_mixed()
1615 memcpy(t1, P1->x, sizeof t1); in p256_add_mixed()
1616 memcpy(t3, P1->y, sizeof t3); in p256_add_mixed()
1619 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). in p256_add_mixed()
1621 square_f256(t4, P1->z); in p256_add_mixed()
1622 mul_f256(t2, P2->x, t4); in p256_add_mixed()
1623 mul_f256(t5, P1->z, t4); in p256_add_mixed()
1624 mul_f256(t4, P2->y, t5); in p256_add_mixed()
1627 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add_mixed()
1632 t2[i] += (F256[i] << 1) - t1[i]; in p256_add_mixed()
1633 t4[i] += (F256[i] << 1) - t3[i]; in p256_add_mixed()
1643 ret = (ret | -ret) >> 31; in p256_add_mixed()
1646 * Compute u1*h^2 (in t6) and h^3 (in t5); in p256_add_mixed()
1653 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add_mixed()
1655 square_f256(P1->x, t4); in p256_add_mixed()
1657 P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1); in p256_add_mixed()
1659 norm13(P1->x, P1->x, 20); in p256_add_mixed()
1660 reduce_f256(P1->x); in p256_add_mixed()
1663 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add_mixed()
1666 t6[i] += (F256[i] << 1) - P1->x[i]; in p256_add_mixed()
1669 mul_f256(P1->y, t4, t6); in p256_add_mixed()
1672 P1->y[i] += (F256[i] << 1) - t1[i]; in p256_add_mixed()
1674 norm13(P1->y, P1->y, 20); in p256_add_mixed()
1675 reduce_f256(P1->y); in p256_add_mixed()
1680 mul_f256(P1->z, P1->z, t2); in p256_add_mixed()
1686 * Decode a P-256 point. This function does not support the point at
1726 t1[i] += (F256[i] << 3) - MUL15(3, tx[i]) + P256_B[i] - t2[i]; in p256_decode()
1738 memcpy(P->x, tx, sizeof tx); in p256_decode()
1739 memcpy(P->y, ty, sizeof ty); in p256_decode()
1740 memset(P->z, 0, sizeof P->z); in p256_decode()
1741 P->z[0] = 1; in p256_decode()
1746 * Encode a point into a buffer. This function assumes that the point is
1756 le13_to_be8(buf + 1, 32, P->x); in p256_encode()
1757 le13_to_be8(buf + 33, 32, P->y); in p256_encode()
1761 * Multiply a curve point by an integer. The integer is assumed to be
1769 * qz is a flag that is initially 1, and remains equal to 1 in p256_mul()
1772 * We use a 2-bit window to handle multiplier bits by pairs. in p256_mul()
1787 * We start with Q = 0. We process multiplier bits 2 by 2. in p256_mul()
1791 while (xlen -- > 0) { in p256_mul()
1794 for (k = 6; k >= 0; k -= 2) { in p256_mul()
1804 CCOPY(EQ(bits, 2), &T, &P2, sizeof T); in p256_mul()
1819 * the point are encoded as 20 words of 13 bits each (little-endian
1820 * order); 13-bit words are then grouped 2-by-2 into 32-bit words
1821 * (little-endian order within each word).
1917 * Lookup one of the Gwin[] values, by index. This is constant-time.
1930 m = -EQ(idx, k + 1); in lookup_Gwin()
1936 T->x[(u << 1) + 0] = xy[u] & 0xFFFF; in lookup_Gwin()
1937 T->x[(u << 1) + 1] = xy[u] >> 16; in lookup_Gwin()
1938 T->y[(u << 1) + 0] = xy[u + 10] & 0xFFFF; in lookup_Gwin()
1939 T->y[(u << 1) + 1] = xy[u + 10] >> 16; in lookup_Gwin()
1941 memset(T->z, 0, sizeof T->z); in lookup_Gwin()
1942 T->z[0] = 1; in lookup_Gwin()
1946 * Multiply the generator by an integer. The integer is assumed non-zero
1953 * qz is a flag that is initially 1, and remains equal to 1 in p256_mulgen()
1956 * We use a 4-bit window to handle multiplier bits by groups in p256_mulgen()
1958 * points in affine coordinates; we use a constant-time lookup. in p256_mulgen()
1965 while (xlen -- > 0) { in p256_mulgen()
1970 for (k = 0; k < 2; k ++) { in p256_mulgen()
2066 api_muladd(unsigned char *A, const unsigned char *B, size_t len, in api_muladd() argument
2078 r = p256_decode(&P, A, len); in api_muladd()
2105 * z = 1, t = 0 return Q (a 'double' case) in api_muladd()
2110 p256_encode(A, &P); in api_muladd()