1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* see inner.h */ 28 void 29 br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, 30 const uint16_t *m, uint16_t m0i) 31 { 32 size_t len, len4, u, v; 33 uint32_t dh; 34 35 len = (m[0] + 15) >> 4; 36 len4 = len & ~(size_t)3; 37 br_i15_zero(d, m[0]); 38 dh = 0; 39 for (u = 0; u < len; u ++) { 40 uint32_t f, xu, r, zh; 41 42 xu = x[u + 1]; 43 f = MUL15((d[1] + MUL15(x[u + 1], y[1])) & 0x7FFF, m0i) 44 & 0x7FFF; 45 #if BR_ARMEL_CORTEXM_GCC 46 if (len4 != 0) { 47 uint16_t *limit; 48 49 limit = d + len4; 50 asm volatile ( 51 "\n\ 52 @ carry: r=r2 \n\ 53 @ multipliers: xu=r3 f=r4 \n\ 54 @ base registers: d+v=r5 y+v=r6 m+v=r7 \n\ 55 @ r8 contains 0x7FFF \n\ 56 @ r9 contains d+len4 \n\ 57 ldr r0, %[limit] \n\ 58 ldr r3, %[xu] \n\ 59 mov r9, r0 \n\ 60 ldr r4, %[f] \n\ 61 eor r2, r2 \n\ 62 ldr r5, %[d] \n\ 63 sub r1, r2, #1 \n\ 64 ldr r6, %[y] \n\ 65 lsr r1, r1, #17 \n\ 66 ldr r7, %[m] \n\ 67 mov r8, r1 \n\ 68 loop%=: \n\ 69 ldrh r0, [r6, #2] \n\ 70 ldrh r1, [r7, #2] \n\ 71 mul r0, r3 \n\ 72 mul r1, r4 \n\ 73 add r2, r0, r2 \n\ 74 ldrh r0, [r5, #2] \n\ 75 add r2, r1, r2 \n\ 76 mov r1, r8 \n\ 77 add r2, r0, r2 \n\ 78 and r1, r2 \n\ 79 lsr r2, r2, #15 \n\ 80 strh r1, [r5, #0] \n\ 81 \n\ 82 ldrh r0, [r6, #4] \n\ 83 ldrh r1, [r7, #4] \n\ 84 mul r0, r3 \n\ 85 mul r1, r4 \n\ 86 add r2, r0, r2 \n\ 87 ldrh r0, [r5, #4] \n\ 88 add r2, r1, r2 \n\ 89 mov r1, r8 \n\ 90 add r2, r0, r2 \n\ 91 and r1, r2 \n\ 92 lsr r2, r2, #15 \n\ 93 strh r1, [r5, #2] \n\ 94 \n\ 95 ldrh r0, [r6, #6] \n\ 96 ldrh r1, [r7, #6] \n\ 97 mul r0, r3 \n\ 98 mul r1, r4 \n\ 99 add r2, r0, r2 \n\ 100 ldrh r0, [r5, #6] \n\ 101 add r2, r1, r2 \n\ 102 mov r1, r8 \n\ 103 add r2, r0, r2 \n\ 104 and r1, r2 \n\ 105 lsr r2, r2, #15 \n\ 106 strh r1, [r5, #4] \n\ 107 \n\ 108 ldrh r0, [r6, #8] \n\ 109 ldrh r1, [r7, #8] \n\ 110 mul r0, r3 \n\ 111 mul r1, r4 \n\ 112 add r2, r0, r2 \n\ 113 ldrh r0, [r5, #8] \n\ 114 add r2, r1, r2 \n\ 115 mov r1, r8 \n\ 116 add r2, r0, r2 \n\ 117 and r1, r2 \n\ 118 lsr r2, r2, #15 \n\ 119 strh r1, [r5, #6] \n\ 120 \n\ 121 add r5, r5, #8 \n\ 122 add r6, r6, #8 \n\ 123 add r7, r7, #8 \n\ 124 cmp r5, r9 \n\ 125 bne loop%= \n\ 126 \n\ 127 str r2, %[carry] \n\ 128 " 129 : [carry] "=m" (r) 130 : [xu] "m" (xu), [f] "m" (f), [d] "m" (d), [y] "m" (y), 131 [m] "m" (m), [limit] "m" (limit) 132 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); 133 } else { 134 r = 0; 135 } 136 v = len4; 137 #else 138 r = 0; 139 for (v = 0; v < len4; v += 4) { 140 uint32_t z; 141 142 z = d[v + 1] + MUL15(xu, y[v + 1]) 143 + MUL15(f, m[v + 1]) + r; 144 r = z >> 15; 145 d[v + 0] = z & 0x7FFF; 146 z = d[v + 2] + MUL15(xu, y[v + 2]) 147 + MUL15(f, m[v + 2]) + r; 148 r = z >> 15; 149 d[v + 1] = z & 0x7FFF; 150 z = d[v + 3] + MUL15(xu, y[v + 3]) 151 + MUL15(f, m[v + 3]) + r; 152 r = z >> 15; 153 d[v + 2] = z & 0x7FFF; 154 z = d[v + 4] + MUL15(xu, y[v + 4]) 155 + MUL15(f, m[v + 4]) + r; 156 r = z >> 15; 157 d[v + 3] = z & 0x7FFF; 158 } 159 #endif 160 for (; v < len; v ++) { 161 uint32_t z; 162 163 z = d[v + 1] + MUL15(xu, y[v + 1]) 164 + MUL15(f, m[v + 1]) + r; 165 r = z >> 15; 166 d[v + 0] = z & 0x7FFF; 167 } 168 169 zh = dh + r; 170 d[len] = zh & 0x7FFF; 171 dh = zh >> 15; 172 } 173 174 /* 175 * Restore the bit length (it was overwritten in the loop above). 176 */ 177 d[0] = m[0]; 178 179 /* 180 * d[] may be greater than m[], but it is still lower than twice 181 * the modulus. 182 */ 183 br_i15_sub(d, m, NEQ(dh, 0) | NOT(br_i15_sub(d, m, 0))); 184 } 185