1 2 /* $OpenBSD: sntrup761.c,v 1.8 2024/09/16 05:37:05 djm Exp $ */ 3 4 /* 5 * Public Domain, Authors: 6 * - Daniel J. Bernstein 7 * - Chitchanok Chuengsatiansup 8 * - Tanja Lange 9 * - Christine van Vredendaal 10 */ 11 12 #include "includes.h" 13 14 #ifdef USE_SNTRUP761X25519 15 16 #include <string.h> 17 #include "crypto_api.h" 18 19 #define crypto_declassify(x, y) do {} while (0) 20 21 #define int8 crypto_int8 22 #define uint8 crypto_uint8 23 #define int16 crypto_int16 24 #define uint16 crypto_uint16 25 #define int32 crypto_int32 26 #define uint32 crypto_uint32 27 #define int64 crypto_int64 28 #define uint64 crypto_uint64 29 extern volatile crypto_int16 crypto_int16_optblocker; 30 extern volatile crypto_int32 crypto_int32_optblocker; 31 extern volatile crypto_int64 crypto_int64_optblocker; 32 33 /* from supercop-20240808/cryptoint/crypto_int16.h */ 34 /* auto-generated: cd cryptoint; ./autogen */ 35 /* cryptoint 20240806 */ 36 37 #ifndef crypto_int16_h 38 #define crypto_int16_h 39 40 #define crypto_int16 int16_t 41 #define crypto_int16_unsigned uint16_t 42 43 44 45 __attribute__((unused)) 46 static inline 47 crypto_int16 crypto_int16_load(const unsigned char *crypto_int16_s) { 48 crypto_int16 crypto_int16_z = 0; 49 crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0; 50 crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8; 51 return crypto_int16_z; 52 } 53 54 __attribute__((unused)) 55 static inline 56 void crypto_int16_store(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) { 57 *crypto_int16_s++ = crypto_int16_x >> 0; 58 *crypto_int16_s++ = crypto_int16_x >> 8; 59 } 60 61 __attribute__((unused)) 62 static inline 63 crypto_int16 crypto_int16_negative_mask(crypto_int16 crypto_int16_x) { 64 #if defined(__GNUC__) && defined(__x86_64__) 65 __asm__ ("sarw $15,%0" : "+r"(crypto_int16_x) : : "cc"); 66 return crypto_int16_x; 67 #elif defined(__GNUC__) && defined(__aarch64__) 68 crypto_int16 crypto_int16_y; 69 __asm__ ("sbfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 70 return crypto_int16_y; 71 #else 72 crypto_int16_x >>= 16-6; 73 crypto_int16_x ^= crypto_int16_optblocker; 74 crypto_int16_x >>= 5; 75 return crypto_int16_x; 76 #endif 77 } 78 79 __attribute__((unused)) 80 static inline 81 crypto_int16_unsigned crypto_int16_unsigned_topbit_01(crypto_int16_unsigned crypto_int16_x) { 82 #if defined(__GNUC__) && defined(__x86_64__) 83 __asm__ ("shrw $15,%0" : "+r"(crypto_int16_x) : : "cc"); 84 return crypto_int16_x; 85 #elif defined(__GNUC__) && defined(__aarch64__) 86 crypto_int16 crypto_int16_y; 87 __asm__ ("ubfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 88 return crypto_int16_y; 89 #else 90 crypto_int16_x >>= 16-6; 91 crypto_int16_x ^= crypto_int16_optblocker; 92 crypto_int16_x >>= 5; 93 return crypto_int16_x; 94 #endif 95 } 96 97 __attribute__((unused)) 98 static inline 99 crypto_int16 crypto_int16_negative_01(crypto_int16 crypto_int16_x) { 100 return crypto_int16_unsigned_topbit_01(crypto_int16_x); 101 } 102 103 __attribute__((unused)) 104 static inline 105 crypto_int16 crypto_int16_topbit_mask(crypto_int16 crypto_int16_x) { 106 return crypto_int16_negative_mask(crypto_int16_x); 107 } 108 109 __attribute__((unused)) 110 static inline 111 crypto_int16 crypto_int16_topbit_01(crypto_int16 crypto_int16_x) { 112 return crypto_int16_unsigned_topbit_01(crypto_int16_x); 113 } 114 115 __attribute__((unused)) 116 static inline 117 crypto_int16 crypto_int16_bottombit_mask(crypto_int16 crypto_int16_x) { 118 #if defined(__GNUC__) && defined(__x86_64__) 119 __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); 120 return -crypto_int16_x; 121 #elif defined(__GNUC__) && defined(__aarch64__) 122 crypto_int16 crypto_int16_y; 123 __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 124 return crypto_int16_y; 125 #else 126 crypto_int16_x &= 1 ^ crypto_int16_optblocker; 127 return -crypto_int16_x; 128 #endif 129 } 130 131 __attribute__((unused)) 132 static inline 133 crypto_int16 crypto_int16_bottombit_01(crypto_int16 crypto_int16_x) { 134 #if defined(__GNUC__) && defined(__x86_64__) 135 __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); 136 return crypto_int16_x; 137 #elif defined(__GNUC__) && defined(__aarch64__) 138 crypto_int16 crypto_int16_y; 139 __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 140 return crypto_int16_y; 141 #else 142 crypto_int16_x &= 1 ^ crypto_int16_optblocker; 143 return crypto_int16_x; 144 #endif 145 } 146 147 __attribute__((unused)) 148 static inline 149 crypto_int16 crypto_int16_bitinrangepublicpos_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 150 #if defined(__GNUC__) && defined(__x86_64__) 151 __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 152 #elif defined(__GNUC__) && defined(__aarch64__) 153 __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); 154 #else 155 crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; 156 #endif 157 return crypto_int16_bottombit_mask(crypto_int16_x); 158 } 159 160 __attribute__((unused)) 161 static inline 162 crypto_int16 crypto_int16_bitinrangepublicpos_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 163 #if defined(__GNUC__) && defined(__x86_64__) 164 __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 165 #elif defined(__GNUC__) && defined(__aarch64__) 166 __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); 167 #else 168 crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; 169 #endif 170 return crypto_int16_bottombit_01(crypto_int16_x); 171 } 172 173 __attribute__((unused)) 174 static inline 175 crypto_int16 crypto_int16_shlmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 176 #if defined(__GNUC__) && defined(__x86_64__) 177 crypto_int16_s &= 15; 178 __asm__ ("shlw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 179 #elif defined(__GNUC__) && defined(__aarch64__) 180 __asm__ ("and %w0,%w0,15\n and %w1,%w1,65535\n lsl %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); 181 #else 182 int crypto_int16_k, crypto_int16_l; 183 for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) 184 crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x << crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); 185 #endif 186 return crypto_int16_x; 187 } 188 189 __attribute__((unused)) 190 static inline 191 crypto_int16 crypto_int16_shrmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 192 #if defined(__GNUC__) && defined(__x86_64__) 193 crypto_int16_s &= 15; 194 __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 195 #elif defined(__GNUC__) && defined(__aarch64__) 196 __asm__ ("and %w0,%w0,15\n sxth %w1,%w1\n asr %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); 197 #else 198 int crypto_int16_k, crypto_int16_l; 199 for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) 200 crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x >> crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); 201 #endif 202 return crypto_int16_x; 203 } 204 205 __attribute__((unused)) 206 static inline 207 crypto_int16 crypto_int16_bitmod_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 208 crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); 209 return crypto_int16_bottombit_mask(crypto_int16_x); 210 } 211 212 __attribute__((unused)) 213 static inline 214 crypto_int16 crypto_int16_bitmod_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 215 crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); 216 return crypto_int16_bottombit_01(crypto_int16_x); 217 } 218 219 __attribute__((unused)) 220 static inline 221 crypto_int16 crypto_int16_nonzero_mask(crypto_int16 crypto_int16_x) { 222 #if defined(__GNUC__) && defined(__x86_64__) 223 crypto_int16 crypto_int16_q,crypto_int16_z; 224 __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 225 return crypto_int16_z; 226 #elif defined(__GNUC__) && defined(__aarch64__) 227 crypto_int16 crypto_int16_z; 228 __asm__ ("tst %w1,65535\n csetm %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 229 return crypto_int16_z; 230 #else 231 crypto_int16_x |= -crypto_int16_x; 232 return crypto_int16_negative_mask(crypto_int16_x); 233 #endif 234 } 235 236 __attribute__((unused)) 237 static inline 238 crypto_int16 crypto_int16_nonzero_01(crypto_int16 crypto_int16_x) { 239 #if defined(__GNUC__) && defined(__x86_64__) 240 crypto_int16 crypto_int16_q,crypto_int16_z; 241 __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 242 return crypto_int16_z; 243 #elif defined(__GNUC__) && defined(__aarch64__) 244 crypto_int16 crypto_int16_z; 245 __asm__ ("tst %w1,65535\n cset %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 246 return crypto_int16_z; 247 #else 248 crypto_int16_x |= -crypto_int16_x; 249 return crypto_int16_unsigned_topbit_01(crypto_int16_x); 250 #endif 251 } 252 253 __attribute__((unused)) 254 static inline 255 crypto_int16 crypto_int16_positive_mask(crypto_int16 crypto_int16_x) { 256 #if defined(__GNUC__) && defined(__x86_64__) 257 crypto_int16 crypto_int16_q,crypto_int16_z; 258 __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 259 return crypto_int16_z; 260 #elif defined(__GNUC__) && defined(__aarch64__) 261 crypto_int16 crypto_int16_z; 262 __asm__ ("sxth %w0,%w1\n cmp %w0,0\n csetm %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 263 return crypto_int16_z; 264 #else 265 crypto_int16 crypto_int16_z = -crypto_int16_x; 266 crypto_int16_z ^= crypto_int16_x & crypto_int16_z; 267 return crypto_int16_negative_mask(crypto_int16_z); 268 #endif 269 } 270 271 __attribute__((unused)) 272 static inline 273 crypto_int16 crypto_int16_positive_01(crypto_int16 crypto_int16_x) { 274 #if defined(__GNUC__) && defined(__x86_64__) 275 crypto_int16 crypto_int16_q,crypto_int16_z; 276 __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 277 return crypto_int16_z; 278 #elif defined(__GNUC__) && defined(__aarch64__) 279 crypto_int16 crypto_int16_z; 280 __asm__ ("sxth %w0,%w1\n cmp %w0,0\n cset %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 281 return crypto_int16_z; 282 #else 283 crypto_int16 crypto_int16_z = -crypto_int16_x; 284 crypto_int16_z ^= crypto_int16_x & crypto_int16_z; 285 return crypto_int16_unsigned_topbit_01(crypto_int16_z); 286 #endif 287 } 288 289 __attribute__((unused)) 290 static inline 291 crypto_int16 crypto_int16_zero_mask(crypto_int16 crypto_int16_x) { 292 #if defined(__GNUC__) && defined(__x86_64__) 293 crypto_int16 crypto_int16_q,crypto_int16_z; 294 __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 295 return crypto_int16_z; 296 #elif defined(__GNUC__) && defined(__aarch64__) 297 crypto_int16 crypto_int16_z; 298 __asm__ ("tst %w1,65535\n csetm %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 299 return crypto_int16_z; 300 #else 301 return ~crypto_int16_nonzero_mask(crypto_int16_x); 302 #endif 303 } 304 305 __attribute__((unused)) 306 static inline 307 crypto_int16 crypto_int16_zero_01(crypto_int16 crypto_int16_x) { 308 #if defined(__GNUC__) && defined(__x86_64__) 309 crypto_int16 crypto_int16_q,crypto_int16_z; 310 __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 311 return crypto_int16_z; 312 #elif defined(__GNUC__) && defined(__aarch64__) 313 crypto_int16 crypto_int16_z; 314 __asm__ ("tst %w1,65535\n cset %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 315 return crypto_int16_z; 316 #else 317 return 1-crypto_int16_nonzero_01(crypto_int16_x); 318 #endif 319 } 320 321 __attribute__((unused)) 322 static inline 323 crypto_int16 crypto_int16_unequal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 324 #if defined(__GNUC__) && defined(__x86_64__) 325 crypto_int16 crypto_int16_q,crypto_int16_z; 326 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 327 return crypto_int16_z; 328 #elif defined(__GNUC__) && defined(__aarch64__) 329 crypto_int16 crypto_int16_z; 330 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 331 return crypto_int16_z; 332 #else 333 return crypto_int16_nonzero_mask(crypto_int16_x ^ crypto_int16_y); 334 #endif 335 } 336 337 __attribute__((unused)) 338 static inline 339 crypto_int16 crypto_int16_unequal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 340 #if defined(__GNUC__) && defined(__x86_64__) 341 crypto_int16 crypto_int16_q,crypto_int16_z; 342 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 343 return crypto_int16_z; 344 #elif defined(__GNUC__) && defined(__aarch64__) 345 crypto_int16 crypto_int16_z; 346 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 347 return crypto_int16_z; 348 #else 349 return crypto_int16_nonzero_01(crypto_int16_x ^ crypto_int16_y); 350 #endif 351 } 352 353 __attribute__((unused)) 354 static inline 355 crypto_int16 crypto_int16_equal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 356 #if defined(__GNUC__) && defined(__x86_64__) 357 crypto_int16 crypto_int16_q,crypto_int16_z; 358 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 359 return crypto_int16_z; 360 #elif defined(__GNUC__) && defined(__aarch64__) 361 crypto_int16 crypto_int16_z; 362 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 363 return crypto_int16_z; 364 #else 365 return ~crypto_int16_unequal_mask(crypto_int16_x,crypto_int16_y); 366 #endif 367 } 368 369 __attribute__((unused)) 370 static inline 371 crypto_int16 crypto_int16_equal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 372 #if defined(__GNUC__) && defined(__x86_64__) 373 crypto_int16 crypto_int16_q,crypto_int16_z; 374 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 375 return crypto_int16_z; 376 #elif defined(__GNUC__) && defined(__aarch64__) 377 crypto_int16 crypto_int16_z; 378 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 379 return crypto_int16_z; 380 #else 381 return 1-crypto_int16_unequal_01(crypto_int16_x,crypto_int16_y); 382 #endif 383 } 384 385 __attribute__((unused)) 386 static inline 387 crypto_int16 crypto_int16_min(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 388 #if defined(__GNUC__) && defined(__x86_64__) 389 __asm__ ("cmpw %1,%0\n cmovgw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 390 return crypto_int16_x; 391 #elif defined(__GNUC__) && defined(__aarch64__) 392 __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w0,%w1,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 393 return crypto_int16_x; 394 #else 395 crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; 396 crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; 397 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); 398 crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); 399 crypto_int16_z &= crypto_int16_r; 400 return crypto_int16_x ^ crypto_int16_z; 401 #endif 402 } 403 404 __attribute__((unused)) 405 static inline 406 crypto_int16 crypto_int16_max(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 407 #if defined(__GNUC__) && defined(__x86_64__) 408 __asm__ ("cmpw %1,%0\n cmovlw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 409 return crypto_int16_x; 410 #elif defined(__GNUC__) && defined(__aarch64__) 411 __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w1,%w0,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 412 return crypto_int16_x; 413 #else 414 crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; 415 crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; 416 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); 417 crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); 418 crypto_int16_z &= crypto_int16_r; 419 return crypto_int16_y ^ crypto_int16_z; 420 #endif 421 } 422 423 __attribute__((unused)) 424 static inline 425 void crypto_int16_minmax(crypto_int16 *crypto_int16_p,crypto_int16 *crypto_int16_q) { 426 crypto_int16 crypto_int16_x = *crypto_int16_p; 427 crypto_int16 crypto_int16_y = *crypto_int16_q; 428 #if defined(__GNUC__) && defined(__x86_64__) 429 crypto_int16 crypto_int16_z; 430 __asm__ ("cmpw %2,%1\n movw %1,%0\n cmovgw %2,%1\n cmovgw %0,%2" : "=&r"(crypto_int16_z), "+&r"(crypto_int16_x), "+r"(crypto_int16_y) : : "cc"); 431 *crypto_int16_p = crypto_int16_x; 432 *crypto_int16_q = crypto_int16_y; 433 #elif defined(__GNUC__) && defined(__aarch64__) 434 crypto_int16 crypto_int16_r, crypto_int16_s; 435 __asm__ ("sxth %w0,%w0\n cmp %w0,%w3,sxth\n csel %w1,%w0,%w3,lt\n csel %w2,%w3,%w0,lt" : "+&r"(crypto_int16_x), "=&r"(crypto_int16_r), "=r"(crypto_int16_s) : "r"(crypto_int16_y) : "cc"); 436 *crypto_int16_p = crypto_int16_r; 437 *crypto_int16_q = crypto_int16_s; 438 #else 439 crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; 440 crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; 441 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); 442 crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); 443 crypto_int16_z &= crypto_int16_r; 444 crypto_int16_x ^= crypto_int16_z; 445 crypto_int16_y ^= crypto_int16_z; 446 *crypto_int16_p = crypto_int16_x; 447 *crypto_int16_q = crypto_int16_y; 448 #endif 449 } 450 451 __attribute__((unused)) 452 static inline 453 crypto_int16 crypto_int16_smaller_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 454 #if defined(__GNUC__) && defined(__x86_64__) 455 crypto_int16 crypto_int16_q,crypto_int16_z; 456 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 457 return crypto_int16_z; 458 #elif defined(__GNUC__) && defined(__aarch64__) 459 crypto_int16 crypto_int16_z; 460 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 461 return crypto_int16_z; 462 #else 463 crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; 464 crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; 465 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); 466 return crypto_int16_negative_mask(crypto_int16_z); 467 #endif 468 } 469 470 __attribute__((unused)) 471 static inline 472 crypto_int16 crypto_int16_smaller_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 473 #if defined(__GNUC__) && defined(__x86_64__) 474 crypto_int16 crypto_int16_q,crypto_int16_z; 475 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 476 return crypto_int16_z; 477 #elif defined(__GNUC__) && defined(__aarch64__) 478 crypto_int16 crypto_int16_z; 479 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 480 return crypto_int16_z; 481 #else 482 crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; 483 crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; 484 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); 485 return crypto_int16_unsigned_topbit_01(crypto_int16_z); 486 #endif 487 } 488 489 __attribute__((unused)) 490 static inline 491 crypto_int16 crypto_int16_leq_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 492 #if defined(__GNUC__) && defined(__x86_64__) 493 crypto_int16 crypto_int16_q,crypto_int16_z; 494 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 495 return crypto_int16_z; 496 #elif defined(__GNUC__) && defined(__aarch64__) 497 crypto_int16 crypto_int16_z; 498 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 499 return crypto_int16_z; 500 #else 501 return ~crypto_int16_smaller_mask(crypto_int16_y,crypto_int16_x); 502 #endif 503 } 504 505 __attribute__((unused)) 506 static inline 507 crypto_int16 crypto_int16_leq_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 508 #if defined(__GNUC__) && defined(__x86_64__) 509 crypto_int16 crypto_int16_q,crypto_int16_z; 510 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 511 return crypto_int16_z; 512 #elif defined(__GNUC__) && defined(__aarch64__) 513 crypto_int16 crypto_int16_z; 514 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 515 return crypto_int16_z; 516 #else 517 return 1-crypto_int16_smaller_01(crypto_int16_y,crypto_int16_x); 518 #endif 519 } 520 521 __attribute__((unused)) 522 static inline 523 int crypto_int16_ones_num(crypto_int16 crypto_int16_x) { 524 crypto_int16_unsigned crypto_int16_y = crypto_int16_x; 525 const crypto_int16 C0 = 0x5555; 526 const crypto_int16 C1 = 0x3333; 527 const crypto_int16 C2 = 0x0f0f; 528 crypto_int16_y -= ((crypto_int16_y >> 1) & C0); 529 crypto_int16_y = (crypto_int16_y & C1) + ((crypto_int16_y >> 2) & C1); 530 crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 4)) & C2; 531 crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 8)) & 0xff; 532 return crypto_int16_y; 533 } 534 535 __attribute__((unused)) 536 static inline 537 int crypto_int16_bottomzeros_num(crypto_int16 crypto_int16_x) { 538 #if defined(__GNUC__) && defined(__x86_64__) 539 crypto_int16 fallback = 16; 540 __asm__ ("bsfw %0,%0\n cmovew %1,%0" : "+&r"(crypto_int16_x) : "r"(fallback) : "cc"); 541 return crypto_int16_x; 542 #elif defined(__GNUC__) && defined(__aarch64__) 543 int64_t crypto_int16_y; 544 __asm__ ("orr %w0,%w1,-65536\n rbit %w0,%w0\n clz %w0,%w0" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 545 return crypto_int16_y; 546 #else 547 crypto_int16 crypto_int16_y = crypto_int16_x ^ (crypto_int16_x-1); 548 crypto_int16_y = ((crypto_int16) crypto_int16_y) >> 1; 549 crypto_int16_y &= ~(crypto_int16_x & (((crypto_int16) 1) << (16-1))); 550 return crypto_int16_ones_num(crypto_int16_y); 551 #endif 552 } 553 554 #endif 555 556 /* from supercop-20240808/cryptoint/crypto_int32.h */ 557 /* auto-generated: cd cryptoint; ./autogen */ 558 /* cryptoint 20240806 */ 559 560 #ifndef crypto_int32_h 561 #define crypto_int32_h 562 563 #define crypto_int32 int32_t 564 #define crypto_int32_unsigned uint32_t 565 566 567 568 __attribute__((unused)) 569 static inline 570 crypto_int32 crypto_int32_load(const unsigned char *crypto_int32_s) { 571 crypto_int32 crypto_int32_z = 0; 572 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0; 573 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8; 574 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16; 575 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24; 576 return crypto_int32_z; 577 } 578 579 __attribute__((unused)) 580 static inline 581 void crypto_int32_store(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) { 582 *crypto_int32_s++ = crypto_int32_x >> 0; 583 *crypto_int32_s++ = crypto_int32_x >> 8; 584 *crypto_int32_s++ = crypto_int32_x >> 16; 585 *crypto_int32_s++ = crypto_int32_x >> 24; 586 } 587 588 __attribute__((unused)) 589 static inline 590 crypto_int32 crypto_int32_negative_mask(crypto_int32 crypto_int32_x) { 591 #if defined(__GNUC__) && defined(__x86_64__) 592 __asm__ ("sarl $31,%0" : "+r"(crypto_int32_x) : : "cc"); 593 return crypto_int32_x; 594 #elif defined(__GNUC__) && defined(__aarch64__) 595 crypto_int32 crypto_int32_y; 596 __asm__ ("asr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 597 return crypto_int32_y; 598 #else 599 crypto_int32_x >>= 32-6; 600 crypto_int32_x ^= crypto_int32_optblocker; 601 crypto_int32_x >>= 5; 602 return crypto_int32_x; 603 #endif 604 } 605 606 __attribute__((unused)) 607 static inline 608 crypto_int32_unsigned crypto_int32_unsigned_topbit_01(crypto_int32_unsigned crypto_int32_x) { 609 #if defined(__GNUC__) && defined(__x86_64__) 610 __asm__ ("shrl $31,%0" : "+r"(crypto_int32_x) : : "cc"); 611 return crypto_int32_x; 612 #elif defined(__GNUC__) && defined(__aarch64__) 613 crypto_int32 crypto_int32_y; 614 __asm__ ("lsr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 615 return crypto_int32_y; 616 #else 617 crypto_int32_x >>= 32-6; 618 crypto_int32_x ^= crypto_int32_optblocker; 619 crypto_int32_x >>= 5; 620 return crypto_int32_x; 621 #endif 622 } 623 624 __attribute__((unused)) 625 static inline 626 crypto_int32 crypto_int32_negative_01(crypto_int32 crypto_int32_x) { 627 return crypto_int32_unsigned_topbit_01(crypto_int32_x); 628 } 629 630 __attribute__((unused)) 631 static inline 632 crypto_int32 crypto_int32_topbit_mask(crypto_int32 crypto_int32_x) { 633 return crypto_int32_negative_mask(crypto_int32_x); 634 } 635 636 __attribute__((unused)) 637 static inline 638 crypto_int32 crypto_int32_topbit_01(crypto_int32 crypto_int32_x) { 639 return crypto_int32_unsigned_topbit_01(crypto_int32_x); 640 } 641 642 __attribute__((unused)) 643 static inline 644 crypto_int32 crypto_int32_bottombit_mask(crypto_int32 crypto_int32_x) { 645 #if defined(__GNUC__) && defined(__x86_64__) 646 __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); 647 return -crypto_int32_x; 648 #elif defined(__GNUC__) && defined(__aarch64__) 649 crypto_int32 crypto_int32_y; 650 __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 651 return crypto_int32_y; 652 #else 653 crypto_int32_x &= 1 ^ crypto_int32_optblocker; 654 return -crypto_int32_x; 655 #endif 656 } 657 658 __attribute__((unused)) 659 static inline 660 crypto_int32 crypto_int32_bottombit_01(crypto_int32 crypto_int32_x) { 661 #if defined(__GNUC__) && defined(__x86_64__) 662 __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); 663 return crypto_int32_x; 664 #elif defined(__GNUC__) && defined(__aarch64__) 665 crypto_int32 crypto_int32_y; 666 __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 667 return crypto_int32_y; 668 #else 669 crypto_int32_x &= 1 ^ crypto_int32_optblocker; 670 return crypto_int32_x; 671 #endif 672 } 673 674 __attribute__((unused)) 675 static inline 676 crypto_int32 crypto_int32_bitinrangepublicpos_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 677 #if defined(__GNUC__) && defined(__x86_64__) 678 __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 679 #elif defined(__GNUC__) && defined(__aarch64__) 680 __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 681 #else 682 crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; 683 #endif 684 return crypto_int32_bottombit_mask(crypto_int32_x); 685 } 686 687 __attribute__((unused)) 688 static inline 689 crypto_int32 crypto_int32_bitinrangepublicpos_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 690 #if defined(__GNUC__) && defined(__x86_64__) 691 __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 692 #elif defined(__GNUC__) && defined(__aarch64__) 693 __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 694 #else 695 crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; 696 #endif 697 return crypto_int32_bottombit_01(crypto_int32_x); 698 } 699 700 __attribute__((unused)) 701 static inline 702 crypto_int32 crypto_int32_shlmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 703 #if defined(__GNUC__) && defined(__x86_64__) 704 __asm__ ("shll %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 705 #elif defined(__GNUC__) && defined(__aarch64__) 706 __asm__ ("lsl %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 707 #else 708 int crypto_int32_k, crypto_int32_l; 709 for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) 710 crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x << crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); 711 #endif 712 return crypto_int32_x; 713 } 714 715 __attribute__((unused)) 716 static inline 717 crypto_int32 crypto_int32_shrmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 718 #if defined(__GNUC__) && defined(__x86_64__) 719 __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 720 #elif defined(__GNUC__) && defined(__aarch64__) 721 __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 722 #else 723 int crypto_int32_k, crypto_int32_l; 724 for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) 725 crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x >> crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); 726 #endif 727 return crypto_int32_x; 728 } 729 730 __attribute__((unused)) 731 static inline 732 crypto_int32 crypto_int32_bitmod_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 733 crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); 734 return crypto_int32_bottombit_mask(crypto_int32_x); 735 } 736 737 __attribute__((unused)) 738 static inline 739 crypto_int32 crypto_int32_bitmod_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 740 crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); 741 return crypto_int32_bottombit_01(crypto_int32_x); 742 } 743 744 __attribute__((unused)) 745 static inline 746 crypto_int32 crypto_int32_nonzero_mask(crypto_int32 crypto_int32_x) { 747 #if defined(__GNUC__) && defined(__x86_64__) 748 crypto_int32 crypto_int32_q,crypto_int32_z; 749 __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 750 return crypto_int32_z; 751 #elif defined(__GNUC__) && defined(__aarch64__) 752 crypto_int32 crypto_int32_z; 753 __asm__ ("cmp %w1,0\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 754 return crypto_int32_z; 755 #else 756 crypto_int32_x |= -crypto_int32_x; 757 return crypto_int32_negative_mask(crypto_int32_x); 758 #endif 759 } 760 761 __attribute__((unused)) 762 static inline 763 crypto_int32 crypto_int32_nonzero_01(crypto_int32 crypto_int32_x) { 764 #if defined(__GNUC__) && defined(__x86_64__) 765 crypto_int32 crypto_int32_q,crypto_int32_z; 766 __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 767 return crypto_int32_z; 768 #elif defined(__GNUC__) && defined(__aarch64__) 769 crypto_int32 crypto_int32_z; 770 __asm__ ("cmp %w1,0\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 771 return crypto_int32_z; 772 #else 773 crypto_int32_x |= -crypto_int32_x; 774 return crypto_int32_unsigned_topbit_01(crypto_int32_x); 775 #endif 776 } 777 778 __attribute__((unused)) 779 static inline 780 crypto_int32 crypto_int32_positive_mask(crypto_int32 crypto_int32_x) { 781 #if defined(__GNUC__) && defined(__x86_64__) 782 crypto_int32 crypto_int32_q,crypto_int32_z; 783 __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 784 return crypto_int32_z; 785 #elif defined(__GNUC__) && defined(__aarch64__) 786 crypto_int32 crypto_int32_z; 787 __asm__ ("cmp %w1,0\n csetm %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 788 return crypto_int32_z; 789 #else 790 crypto_int32 crypto_int32_z = -crypto_int32_x; 791 crypto_int32_z ^= crypto_int32_x & crypto_int32_z; 792 return crypto_int32_negative_mask(crypto_int32_z); 793 #endif 794 } 795 796 __attribute__((unused)) 797 static inline 798 crypto_int32 crypto_int32_positive_01(crypto_int32 crypto_int32_x) { 799 #if defined(__GNUC__) && defined(__x86_64__) 800 crypto_int32 crypto_int32_q,crypto_int32_z; 801 __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 802 return crypto_int32_z; 803 #elif defined(__GNUC__) && defined(__aarch64__) 804 crypto_int32 crypto_int32_z; 805 __asm__ ("cmp %w1,0\n cset %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 806 return crypto_int32_z; 807 #else 808 crypto_int32 crypto_int32_z = -crypto_int32_x; 809 crypto_int32_z ^= crypto_int32_x & crypto_int32_z; 810 return crypto_int32_unsigned_topbit_01(crypto_int32_z); 811 #endif 812 } 813 814 __attribute__((unused)) 815 static inline 816 crypto_int32 crypto_int32_zero_mask(crypto_int32 crypto_int32_x) { 817 #if defined(__GNUC__) && defined(__x86_64__) 818 crypto_int32 crypto_int32_q,crypto_int32_z; 819 __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 820 return crypto_int32_z; 821 #elif defined(__GNUC__) && defined(__aarch64__) 822 crypto_int32 crypto_int32_z; 823 __asm__ ("cmp %w1,0\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 824 return crypto_int32_z; 825 #else 826 return ~crypto_int32_nonzero_mask(crypto_int32_x); 827 #endif 828 } 829 830 __attribute__((unused)) 831 static inline 832 crypto_int32 crypto_int32_zero_01(crypto_int32 crypto_int32_x) { 833 #if defined(__GNUC__) && defined(__x86_64__) 834 crypto_int32 crypto_int32_q,crypto_int32_z; 835 __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 836 return crypto_int32_z; 837 #elif defined(__GNUC__) && defined(__aarch64__) 838 crypto_int32 crypto_int32_z; 839 __asm__ ("cmp %w1,0\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 840 return crypto_int32_z; 841 #else 842 return 1-crypto_int32_nonzero_01(crypto_int32_x); 843 #endif 844 } 845 846 __attribute__((unused)) 847 static inline 848 crypto_int32 crypto_int32_unequal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 849 #if defined(__GNUC__) && defined(__x86_64__) 850 crypto_int32 crypto_int32_q,crypto_int32_z; 851 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 852 return crypto_int32_z; 853 #elif defined(__GNUC__) && defined(__aarch64__) 854 crypto_int32 crypto_int32_z; 855 __asm__ ("cmp %w1,%w2\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 856 return crypto_int32_z; 857 #else 858 return crypto_int32_nonzero_mask(crypto_int32_x ^ crypto_int32_y); 859 #endif 860 } 861 862 __attribute__((unused)) 863 static inline 864 crypto_int32 crypto_int32_unequal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 865 #if defined(__GNUC__) && defined(__x86_64__) 866 crypto_int32 crypto_int32_q,crypto_int32_z; 867 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 868 return crypto_int32_z; 869 #elif defined(__GNUC__) && defined(__aarch64__) 870 crypto_int32 crypto_int32_z; 871 __asm__ ("cmp %w1,%w2\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 872 return crypto_int32_z; 873 #else 874 return crypto_int32_nonzero_01(crypto_int32_x ^ crypto_int32_y); 875 #endif 876 } 877 878 __attribute__((unused)) 879 static inline 880 crypto_int32 crypto_int32_equal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 881 #if defined(__GNUC__) && defined(__x86_64__) 882 crypto_int32 crypto_int32_q,crypto_int32_z; 883 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 884 return crypto_int32_z; 885 #elif defined(__GNUC__) && defined(__aarch64__) 886 crypto_int32 crypto_int32_z; 887 __asm__ ("cmp %w1,%w2\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 888 return crypto_int32_z; 889 #else 890 return ~crypto_int32_unequal_mask(crypto_int32_x,crypto_int32_y); 891 #endif 892 } 893 894 __attribute__((unused)) 895 static inline 896 crypto_int32 crypto_int32_equal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 897 #if defined(__GNUC__) && defined(__x86_64__) 898 crypto_int32 crypto_int32_q,crypto_int32_z; 899 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 900 return crypto_int32_z; 901 #elif defined(__GNUC__) && defined(__aarch64__) 902 crypto_int32 crypto_int32_z; 903 __asm__ ("cmp %w1,%w2\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 904 return crypto_int32_z; 905 #else 906 return 1-crypto_int32_unequal_01(crypto_int32_x,crypto_int32_y); 907 #endif 908 } 909 910 __attribute__((unused)) 911 static inline 912 crypto_int32 crypto_int32_min(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 913 #if defined(__GNUC__) && defined(__x86_64__) 914 __asm__ ("cmpl %1,%0\n cmovgl %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 915 return crypto_int32_x; 916 #elif defined(__GNUC__) && defined(__aarch64__) 917 __asm__ ("cmp %w0,%w1\n csel %w0,%w0,%w1,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 918 return crypto_int32_x; 919 #else 920 crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; 921 crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; 922 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); 923 crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); 924 crypto_int32_z &= crypto_int32_r; 925 return crypto_int32_x ^ crypto_int32_z; 926 #endif 927 } 928 929 __attribute__((unused)) 930 static inline 931 crypto_int32 crypto_int32_max(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 932 #if defined(__GNUC__) && defined(__x86_64__) 933 __asm__ ("cmpl %1,%0\n cmovll %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 934 return crypto_int32_x; 935 #elif defined(__GNUC__) && defined(__aarch64__) 936 __asm__ ("cmp %w0,%w1\n csel %w0,%w1,%w0,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 937 return crypto_int32_x; 938 #else 939 crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; 940 crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; 941 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); 942 crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); 943 crypto_int32_z &= crypto_int32_r; 944 return crypto_int32_y ^ crypto_int32_z; 945 #endif 946 } 947 948 __attribute__((unused)) 949 static inline 950 void crypto_int32_minmax(crypto_int32 *crypto_int32_p,crypto_int32 *crypto_int32_q) { 951 crypto_int32 crypto_int32_x = *crypto_int32_p; 952 crypto_int32 crypto_int32_y = *crypto_int32_q; 953 #if defined(__GNUC__) && defined(__x86_64__) 954 crypto_int32 crypto_int32_z; 955 __asm__ ("cmpl %2,%1\n movl %1,%0\n cmovgl %2,%1\n cmovgl %0,%2" : "=&r"(crypto_int32_z), "+&r"(crypto_int32_x), "+r"(crypto_int32_y) : : "cc"); 956 *crypto_int32_p = crypto_int32_x; 957 *crypto_int32_q = crypto_int32_y; 958 #elif defined(__GNUC__) && defined(__aarch64__) 959 crypto_int32 crypto_int32_r, crypto_int32_s; 960 __asm__ ("cmp %w2,%w3\n csel %w0,%w2,%w3,lt\n csel %w1,%w3,%w2,lt" : "=&r"(crypto_int32_r), "=r"(crypto_int32_s) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 961 *crypto_int32_p = crypto_int32_r; 962 *crypto_int32_q = crypto_int32_s; 963 #else 964 crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; 965 crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; 966 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); 967 crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); 968 crypto_int32_z &= crypto_int32_r; 969 crypto_int32_x ^= crypto_int32_z; 970 crypto_int32_y ^= crypto_int32_z; 971 *crypto_int32_p = crypto_int32_x; 972 *crypto_int32_q = crypto_int32_y; 973 #endif 974 } 975 976 __attribute__((unused)) 977 static inline 978 crypto_int32 crypto_int32_smaller_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 979 #if defined(__GNUC__) && defined(__x86_64__) 980 crypto_int32 crypto_int32_q,crypto_int32_z; 981 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 982 return crypto_int32_z; 983 #elif defined(__GNUC__) && defined(__aarch64__) 984 crypto_int32 crypto_int32_z; 985 __asm__ ("cmp %w1,%w2\n csetm %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 986 return crypto_int32_z; 987 #else 988 crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; 989 crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; 990 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); 991 return crypto_int32_negative_mask(crypto_int32_z); 992 #endif 993 } 994 995 __attribute__((unused)) 996 static inline 997 crypto_int32 crypto_int32_smaller_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 998 #if defined(__GNUC__) && defined(__x86_64__) 999 crypto_int32 crypto_int32_q,crypto_int32_z; 1000 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1001 return crypto_int32_z; 1002 #elif defined(__GNUC__) && defined(__aarch64__) 1003 crypto_int32 crypto_int32_z; 1004 __asm__ ("cmp %w1,%w2\n cset %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1005 return crypto_int32_z; 1006 #else 1007 crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; 1008 crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; 1009 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); 1010 return crypto_int32_unsigned_topbit_01(crypto_int32_z); 1011 #endif 1012 } 1013 1014 __attribute__((unused)) 1015 static inline 1016 crypto_int32 crypto_int32_leq_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 1017 #if defined(__GNUC__) && defined(__x86_64__) 1018 crypto_int32 crypto_int32_q,crypto_int32_z; 1019 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1020 return crypto_int32_z; 1021 #elif defined(__GNUC__) && defined(__aarch64__) 1022 crypto_int32 crypto_int32_z; 1023 __asm__ ("cmp %w1,%w2\n csetm %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1024 return crypto_int32_z; 1025 #else 1026 return ~crypto_int32_smaller_mask(crypto_int32_y,crypto_int32_x); 1027 #endif 1028 } 1029 1030 __attribute__((unused)) 1031 static inline 1032 crypto_int32 crypto_int32_leq_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 1033 #if defined(__GNUC__) && defined(__x86_64__) 1034 crypto_int32 crypto_int32_q,crypto_int32_z; 1035 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1036 return crypto_int32_z; 1037 #elif defined(__GNUC__) && defined(__aarch64__) 1038 crypto_int32 crypto_int32_z; 1039 __asm__ ("cmp %w1,%w2\n cset %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1040 return crypto_int32_z; 1041 #else 1042 return 1-crypto_int32_smaller_01(crypto_int32_y,crypto_int32_x); 1043 #endif 1044 } 1045 1046 __attribute__((unused)) 1047 static inline 1048 int crypto_int32_ones_num(crypto_int32 crypto_int32_x) { 1049 crypto_int32_unsigned crypto_int32_y = crypto_int32_x; 1050 const crypto_int32 C0 = 0x55555555; 1051 const crypto_int32 C1 = 0x33333333; 1052 const crypto_int32 C2 = 0x0f0f0f0f; 1053 crypto_int32_y -= ((crypto_int32_y >> 1) & C0); 1054 crypto_int32_y = (crypto_int32_y & C1) + ((crypto_int32_y >> 2) & C1); 1055 crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 4)) & C2; 1056 crypto_int32_y += crypto_int32_y >> 8; 1057 crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 16)) & 0xff; 1058 return crypto_int32_y; 1059 } 1060 1061 __attribute__((unused)) 1062 static inline 1063 int crypto_int32_bottomzeros_num(crypto_int32 crypto_int32_x) { 1064 #if defined(__GNUC__) && defined(__x86_64__) 1065 crypto_int32 fallback = 32; 1066 __asm__ ("bsfl %0,%0\n cmovel %1,%0" : "+&r"(crypto_int32_x) : "r"(fallback) : "cc"); 1067 return crypto_int32_x; 1068 #elif defined(__GNUC__) && defined(__aarch64__) 1069 int64_t crypto_int32_y; 1070 __asm__ ("rbit %w0,%w1\n clz %w0,%w0" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 1071 return crypto_int32_y; 1072 #else 1073 crypto_int32 crypto_int32_y = crypto_int32_x ^ (crypto_int32_x-1); 1074 crypto_int32_y = ((crypto_int32) crypto_int32_y) >> 1; 1075 crypto_int32_y &= ~(crypto_int32_x & (((crypto_int32) 1) << (32-1))); 1076 return crypto_int32_ones_num(crypto_int32_y); 1077 #endif 1078 } 1079 1080 #endif 1081 1082 /* from supercop-20240808/cryptoint/crypto_int64.h */ 1083 /* auto-generated: cd cryptoint; ./autogen */ 1084 /* cryptoint 20240806 */ 1085 1086 #ifndef crypto_int64_h 1087 #define crypto_int64_h 1088 1089 #define crypto_int64 int64_t 1090 #define crypto_int64_unsigned uint64_t 1091 1092 1093 1094 __attribute__((unused)) 1095 static inline 1096 crypto_int64 crypto_int64_load(const unsigned char *crypto_int64_s) { 1097 crypto_int64 crypto_int64_z = 0; 1098 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0; 1099 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8; 1100 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16; 1101 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24; 1102 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32; 1103 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40; 1104 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48; 1105 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56; 1106 return crypto_int64_z; 1107 } 1108 1109 __attribute__((unused)) 1110 static inline 1111 void crypto_int64_store(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) { 1112 *crypto_int64_s++ = crypto_int64_x >> 0; 1113 *crypto_int64_s++ = crypto_int64_x >> 8; 1114 *crypto_int64_s++ = crypto_int64_x >> 16; 1115 *crypto_int64_s++ = crypto_int64_x >> 24; 1116 *crypto_int64_s++ = crypto_int64_x >> 32; 1117 *crypto_int64_s++ = crypto_int64_x >> 40; 1118 *crypto_int64_s++ = crypto_int64_x >> 48; 1119 *crypto_int64_s++ = crypto_int64_x >> 56; 1120 } 1121 1122 __attribute__((unused)) 1123 static inline 1124 crypto_int64 crypto_int64_negative_mask(crypto_int64 crypto_int64_x) { 1125 #if defined(__GNUC__) && defined(__x86_64__) 1126 __asm__ ("sarq $63,%0" : "+r"(crypto_int64_x) : : "cc"); 1127 return crypto_int64_x; 1128 #elif defined(__GNUC__) && defined(__aarch64__) 1129 crypto_int64 crypto_int64_y; 1130 __asm__ ("asr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1131 return crypto_int64_y; 1132 #else 1133 crypto_int64_x >>= 64-6; 1134 crypto_int64_x ^= crypto_int64_optblocker; 1135 crypto_int64_x >>= 5; 1136 return crypto_int64_x; 1137 #endif 1138 } 1139 1140 __attribute__((unused)) 1141 static inline 1142 crypto_int64_unsigned crypto_int64_unsigned_topbit_01(crypto_int64_unsigned crypto_int64_x) { 1143 #if defined(__GNUC__) && defined(__x86_64__) 1144 __asm__ ("shrq $63,%0" : "+r"(crypto_int64_x) : : "cc"); 1145 return crypto_int64_x; 1146 #elif defined(__GNUC__) && defined(__aarch64__) 1147 crypto_int64 crypto_int64_y; 1148 __asm__ ("lsr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1149 return crypto_int64_y; 1150 #else 1151 crypto_int64_x >>= 64-6; 1152 crypto_int64_x ^= crypto_int64_optblocker; 1153 crypto_int64_x >>= 5; 1154 return crypto_int64_x; 1155 #endif 1156 } 1157 1158 __attribute__((unused)) 1159 static inline 1160 crypto_int64 crypto_int64_negative_01(crypto_int64 crypto_int64_x) { 1161 return crypto_int64_unsigned_topbit_01(crypto_int64_x); 1162 } 1163 1164 __attribute__((unused)) 1165 static inline 1166 crypto_int64 crypto_int64_topbit_mask(crypto_int64 crypto_int64_x) { 1167 return crypto_int64_negative_mask(crypto_int64_x); 1168 } 1169 1170 __attribute__((unused)) 1171 static inline 1172 crypto_int64 crypto_int64_topbit_01(crypto_int64 crypto_int64_x) { 1173 return crypto_int64_unsigned_topbit_01(crypto_int64_x); 1174 } 1175 1176 __attribute__((unused)) 1177 static inline 1178 crypto_int64 crypto_int64_bottombit_mask(crypto_int64 crypto_int64_x) { 1179 #if defined(__GNUC__) && defined(__x86_64__) 1180 __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); 1181 return -crypto_int64_x; 1182 #elif defined(__GNUC__) && defined(__aarch64__) 1183 crypto_int64 crypto_int64_y; 1184 __asm__ ("sbfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1185 return crypto_int64_y; 1186 #else 1187 crypto_int64_x &= 1 ^ crypto_int64_optblocker; 1188 return -crypto_int64_x; 1189 #endif 1190 } 1191 1192 __attribute__((unused)) 1193 static inline 1194 crypto_int64 crypto_int64_bottombit_01(crypto_int64 crypto_int64_x) { 1195 #if defined(__GNUC__) && defined(__x86_64__) 1196 __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); 1197 return crypto_int64_x; 1198 #elif defined(__GNUC__) && defined(__aarch64__) 1199 crypto_int64 crypto_int64_y; 1200 __asm__ ("ubfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1201 return crypto_int64_y; 1202 #else 1203 crypto_int64_x &= 1 ^ crypto_int64_optblocker; 1204 return crypto_int64_x; 1205 #endif 1206 } 1207 1208 __attribute__((unused)) 1209 static inline 1210 crypto_int64 crypto_int64_bitinrangepublicpos_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1211 #if defined(__GNUC__) && defined(__x86_64__) 1212 __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1213 #elif defined(__GNUC__) && defined(__aarch64__) 1214 __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1215 #else 1216 crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; 1217 #endif 1218 return crypto_int64_bottombit_mask(crypto_int64_x); 1219 } 1220 1221 __attribute__((unused)) 1222 static inline 1223 crypto_int64 crypto_int64_bitinrangepublicpos_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1224 #if defined(__GNUC__) && defined(__x86_64__) 1225 __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1226 #elif defined(__GNUC__) && defined(__aarch64__) 1227 __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1228 #else 1229 crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; 1230 #endif 1231 return crypto_int64_bottombit_01(crypto_int64_x); 1232 } 1233 1234 __attribute__((unused)) 1235 static inline 1236 crypto_int64 crypto_int64_shlmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1237 #if defined(__GNUC__) && defined(__x86_64__) 1238 __asm__ ("shlq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1239 #elif defined(__GNUC__) && defined(__aarch64__) 1240 __asm__ ("lsl %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1241 #else 1242 int crypto_int64_k, crypto_int64_l; 1243 for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) 1244 crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x << crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); 1245 #endif 1246 return crypto_int64_x; 1247 } 1248 1249 __attribute__((unused)) 1250 static inline 1251 crypto_int64 crypto_int64_shrmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1252 #if defined(__GNUC__) && defined(__x86_64__) 1253 __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1254 #elif defined(__GNUC__) && defined(__aarch64__) 1255 __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1256 #else 1257 int crypto_int64_k, crypto_int64_l; 1258 for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) 1259 crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x >> crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); 1260 #endif 1261 return crypto_int64_x; 1262 } 1263 1264 __attribute__((unused)) 1265 static inline 1266 crypto_int64 crypto_int64_bitmod_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1267 crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); 1268 return crypto_int64_bottombit_mask(crypto_int64_x); 1269 } 1270 1271 __attribute__((unused)) 1272 static inline 1273 crypto_int64 crypto_int64_bitmod_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1274 crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); 1275 return crypto_int64_bottombit_01(crypto_int64_x); 1276 } 1277 1278 __attribute__((unused)) 1279 static inline 1280 crypto_int64 crypto_int64_nonzero_mask(crypto_int64 crypto_int64_x) { 1281 #if defined(__GNUC__) && defined(__x86_64__) 1282 crypto_int64 crypto_int64_q,crypto_int64_z; 1283 __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1284 return crypto_int64_z; 1285 #elif defined(__GNUC__) && defined(__aarch64__) 1286 crypto_int64 crypto_int64_z; 1287 __asm__ ("cmp %1,0\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1288 return crypto_int64_z; 1289 #else 1290 crypto_int64_x |= -crypto_int64_x; 1291 return crypto_int64_negative_mask(crypto_int64_x); 1292 #endif 1293 } 1294 1295 __attribute__((unused)) 1296 static inline 1297 crypto_int64 crypto_int64_nonzero_01(crypto_int64 crypto_int64_x) { 1298 #if defined(__GNUC__) && defined(__x86_64__) 1299 crypto_int64 crypto_int64_q,crypto_int64_z; 1300 __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1301 return crypto_int64_z; 1302 #elif defined(__GNUC__) && defined(__aarch64__) 1303 crypto_int64 crypto_int64_z; 1304 __asm__ ("cmp %1,0\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1305 return crypto_int64_z; 1306 #else 1307 crypto_int64_x |= -crypto_int64_x; 1308 return crypto_int64_unsigned_topbit_01(crypto_int64_x); 1309 #endif 1310 } 1311 1312 __attribute__((unused)) 1313 static inline 1314 crypto_int64 crypto_int64_positive_mask(crypto_int64 crypto_int64_x) { 1315 #if defined(__GNUC__) && defined(__x86_64__) 1316 crypto_int64 crypto_int64_q,crypto_int64_z; 1317 __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1318 return crypto_int64_z; 1319 #elif defined(__GNUC__) && defined(__aarch64__) 1320 crypto_int64 crypto_int64_z; 1321 __asm__ ("cmp %1,0\n csetm %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1322 return crypto_int64_z; 1323 #else 1324 crypto_int64 crypto_int64_z = -crypto_int64_x; 1325 crypto_int64_z ^= crypto_int64_x & crypto_int64_z; 1326 return crypto_int64_negative_mask(crypto_int64_z); 1327 #endif 1328 } 1329 1330 __attribute__((unused)) 1331 static inline 1332 crypto_int64 crypto_int64_positive_01(crypto_int64 crypto_int64_x) { 1333 #if defined(__GNUC__) && defined(__x86_64__) 1334 crypto_int64 crypto_int64_q,crypto_int64_z; 1335 __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1336 return crypto_int64_z; 1337 #elif defined(__GNUC__) && defined(__aarch64__) 1338 crypto_int64 crypto_int64_z; 1339 __asm__ ("cmp %1,0\n cset %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1340 return crypto_int64_z; 1341 #else 1342 crypto_int64 crypto_int64_z = -crypto_int64_x; 1343 crypto_int64_z ^= crypto_int64_x & crypto_int64_z; 1344 return crypto_int64_unsigned_topbit_01(crypto_int64_z); 1345 #endif 1346 } 1347 1348 __attribute__((unused)) 1349 static inline 1350 crypto_int64 crypto_int64_zero_mask(crypto_int64 crypto_int64_x) { 1351 #if defined(__GNUC__) && defined(__x86_64__) 1352 crypto_int64 crypto_int64_q,crypto_int64_z; 1353 __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1354 return crypto_int64_z; 1355 #elif defined(__GNUC__) && defined(__aarch64__) 1356 crypto_int64 crypto_int64_z; 1357 __asm__ ("cmp %1,0\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1358 return crypto_int64_z; 1359 #else 1360 return ~crypto_int64_nonzero_mask(crypto_int64_x); 1361 #endif 1362 } 1363 1364 __attribute__((unused)) 1365 static inline 1366 crypto_int64 crypto_int64_zero_01(crypto_int64 crypto_int64_x) { 1367 #if defined(__GNUC__) && defined(__x86_64__) 1368 crypto_int64 crypto_int64_q,crypto_int64_z; 1369 __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1370 return crypto_int64_z; 1371 #elif defined(__GNUC__) && defined(__aarch64__) 1372 crypto_int64 crypto_int64_z; 1373 __asm__ ("cmp %1,0\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1374 return crypto_int64_z; 1375 #else 1376 return 1-crypto_int64_nonzero_01(crypto_int64_x); 1377 #endif 1378 } 1379 1380 __attribute__((unused)) 1381 static inline 1382 crypto_int64 crypto_int64_unequal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1383 #if defined(__GNUC__) && defined(__x86_64__) 1384 crypto_int64 crypto_int64_q,crypto_int64_z; 1385 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1386 return crypto_int64_z; 1387 #elif defined(__GNUC__) && defined(__aarch64__) 1388 crypto_int64 crypto_int64_z; 1389 __asm__ ("cmp %1,%2\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1390 return crypto_int64_z; 1391 #else 1392 return crypto_int64_nonzero_mask(crypto_int64_x ^ crypto_int64_y); 1393 #endif 1394 } 1395 1396 __attribute__((unused)) 1397 static inline 1398 crypto_int64 crypto_int64_unequal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1399 #if defined(__GNUC__) && defined(__x86_64__) 1400 crypto_int64 crypto_int64_q,crypto_int64_z; 1401 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1402 return crypto_int64_z; 1403 #elif defined(__GNUC__) && defined(__aarch64__) 1404 crypto_int64 crypto_int64_z; 1405 __asm__ ("cmp %1,%2\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1406 return crypto_int64_z; 1407 #else 1408 return crypto_int64_nonzero_01(crypto_int64_x ^ crypto_int64_y); 1409 #endif 1410 } 1411 1412 __attribute__((unused)) 1413 static inline 1414 crypto_int64 crypto_int64_equal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1415 #if defined(__GNUC__) && defined(__x86_64__) 1416 crypto_int64 crypto_int64_q,crypto_int64_z; 1417 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1418 return crypto_int64_z; 1419 #elif defined(__GNUC__) && defined(__aarch64__) 1420 crypto_int64 crypto_int64_z; 1421 __asm__ ("cmp %1,%2\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1422 return crypto_int64_z; 1423 #else 1424 return ~crypto_int64_unequal_mask(crypto_int64_x,crypto_int64_y); 1425 #endif 1426 } 1427 1428 __attribute__((unused)) 1429 static inline 1430 crypto_int64 crypto_int64_equal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1431 #if defined(__GNUC__) && defined(__x86_64__) 1432 crypto_int64 crypto_int64_q,crypto_int64_z; 1433 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1434 return crypto_int64_z; 1435 #elif defined(__GNUC__) && defined(__aarch64__) 1436 crypto_int64 crypto_int64_z; 1437 __asm__ ("cmp %1,%2\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1438 return crypto_int64_z; 1439 #else 1440 return 1-crypto_int64_unequal_01(crypto_int64_x,crypto_int64_y); 1441 #endif 1442 } 1443 1444 __attribute__((unused)) 1445 static inline 1446 crypto_int64 crypto_int64_min(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1447 #if defined(__GNUC__) && defined(__x86_64__) 1448 __asm__ ("cmpq %1,%0\n cmovgq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1449 return crypto_int64_x; 1450 #elif defined(__GNUC__) && defined(__aarch64__) 1451 __asm__ ("cmp %0,%1\n csel %0,%0,%1,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1452 return crypto_int64_x; 1453 #else 1454 crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; 1455 crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; 1456 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); 1457 crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); 1458 crypto_int64_z &= crypto_int64_r; 1459 return crypto_int64_x ^ crypto_int64_z; 1460 #endif 1461 } 1462 1463 __attribute__((unused)) 1464 static inline 1465 crypto_int64 crypto_int64_max(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1466 #if defined(__GNUC__) && defined(__x86_64__) 1467 __asm__ ("cmpq %1,%0\n cmovlq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1468 return crypto_int64_x; 1469 #elif defined(__GNUC__) && defined(__aarch64__) 1470 __asm__ ("cmp %0,%1\n csel %0,%1,%0,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1471 return crypto_int64_x; 1472 #else 1473 crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; 1474 crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; 1475 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); 1476 crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); 1477 crypto_int64_z &= crypto_int64_r; 1478 return crypto_int64_y ^ crypto_int64_z; 1479 #endif 1480 } 1481 1482 __attribute__((unused)) 1483 static inline 1484 void crypto_int64_minmax(crypto_int64 *crypto_int64_p,crypto_int64 *crypto_int64_q) { 1485 crypto_int64 crypto_int64_x = *crypto_int64_p; 1486 crypto_int64 crypto_int64_y = *crypto_int64_q; 1487 #if defined(__GNUC__) && defined(__x86_64__) 1488 crypto_int64 crypto_int64_z; 1489 __asm__ ("cmpq %2,%1\n movq %1,%0\n cmovgq %2,%1\n cmovgq %0,%2" : "=&r"(crypto_int64_z), "+&r"(crypto_int64_x), "+r"(crypto_int64_y) : : "cc"); 1490 *crypto_int64_p = crypto_int64_x; 1491 *crypto_int64_q = crypto_int64_y; 1492 #elif defined(__GNUC__) && defined(__aarch64__) 1493 crypto_int64 crypto_int64_r, crypto_int64_s; 1494 __asm__ ("cmp %2,%3\n csel %0,%2,%3,lt\n csel %1,%3,%2,lt" : "=&r"(crypto_int64_r), "=r"(crypto_int64_s) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1495 *crypto_int64_p = crypto_int64_r; 1496 *crypto_int64_q = crypto_int64_s; 1497 #else 1498 crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; 1499 crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; 1500 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); 1501 crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); 1502 crypto_int64_z &= crypto_int64_r; 1503 crypto_int64_x ^= crypto_int64_z; 1504 crypto_int64_y ^= crypto_int64_z; 1505 *crypto_int64_p = crypto_int64_x; 1506 *crypto_int64_q = crypto_int64_y; 1507 #endif 1508 } 1509 1510 __attribute__((unused)) 1511 static inline 1512 crypto_int64 crypto_int64_smaller_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1513 #if defined(__GNUC__) && defined(__x86_64__) 1514 crypto_int64 crypto_int64_q,crypto_int64_z; 1515 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1516 return crypto_int64_z; 1517 #elif defined(__GNUC__) && defined(__aarch64__) 1518 crypto_int64 crypto_int64_z; 1519 __asm__ ("cmp %1,%2\n csetm %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1520 return crypto_int64_z; 1521 #else 1522 crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; 1523 crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; 1524 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); 1525 return crypto_int64_negative_mask(crypto_int64_z); 1526 #endif 1527 } 1528 1529 __attribute__((unused)) 1530 static inline 1531 crypto_int64 crypto_int64_smaller_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1532 #if defined(__GNUC__) && defined(__x86_64__) 1533 crypto_int64 crypto_int64_q,crypto_int64_z; 1534 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1535 return crypto_int64_z; 1536 #elif defined(__GNUC__) && defined(__aarch64__) 1537 crypto_int64 crypto_int64_z; 1538 __asm__ ("cmp %1,%2\n cset %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1539 return crypto_int64_z; 1540 #else 1541 crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; 1542 crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; 1543 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); 1544 return crypto_int64_unsigned_topbit_01(crypto_int64_z); 1545 #endif 1546 } 1547 1548 __attribute__((unused)) 1549 static inline 1550 crypto_int64 crypto_int64_leq_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1551 #if defined(__GNUC__) && defined(__x86_64__) 1552 crypto_int64 crypto_int64_q,crypto_int64_z; 1553 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1554 return crypto_int64_z; 1555 #elif defined(__GNUC__) && defined(__aarch64__) 1556 crypto_int64 crypto_int64_z; 1557 __asm__ ("cmp %1,%2\n csetm %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1558 return crypto_int64_z; 1559 #else 1560 return ~crypto_int64_smaller_mask(crypto_int64_y,crypto_int64_x); 1561 #endif 1562 } 1563 1564 __attribute__((unused)) 1565 static inline 1566 crypto_int64 crypto_int64_leq_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1567 #if defined(__GNUC__) && defined(__x86_64__) 1568 crypto_int64 crypto_int64_q,crypto_int64_z; 1569 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1570 return crypto_int64_z; 1571 #elif defined(__GNUC__) && defined(__aarch64__) 1572 crypto_int64 crypto_int64_z; 1573 __asm__ ("cmp %1,%2\n cset %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1574 return crypto_int64_z; 1575 #else 1576 return 1-crypto_int64_smaller_01(crypto_int64_y,crypto_int64_x); 1577 #endif 1578 } 1579 1580 __attribute__((unused)) 1581 static inline 1582 int crypto_int64_ones_num(crypto_int64 crypto_int64_x) { 1583 crypto_int64_unsigned crypto_int64_y = crypto_int64_x; 1584 const crypto_int64 C0 = 0x5555555555555555; 1585 const crypto_int64 C1 = 0x3333333333333333; 1586 const crypto_int64 C2 = 0x0f0f0f0f0f0f0f0f; 1587 crypto_int64_y -= ((crypto_int64_y >> 1) & C0); 1588 crypto_int64_y = (crypto_int64_y & C1) + ((crypto_int64_y >> 2) & C1); 1589 crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 4)) & C2; 1590 crypto_int64_y += crypto_int64_y >> 8; 1591 crypto_int64_y += crypto_int64_y >> 16; 1592 crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 32)) & 0xff; 1593 return crypto_int64_y; 1594 } 1595 1596 __attribute__((unused)) 1597 static inline 1598 int crypto_int64_bottomzeros_num(crypto_int64 crypto_int64_x) { 1599 #if defined(__GNUC__) && defined(__x86_64__) 1600 crypto_int64 fallback = 64; 1601 __asm__ ("bsfq %0,%0\n cmoveq %1,%0" : "+&r"(crypto_int64_x) : "r"(fallback) : "cc"); 1602 return crypto_int64_x; 1603 #elif defined(__GNUC__) && defined(__aarch64__) 1604 int64_t crypto_int64_y; 1605 __asm__ ("rbit %0,%1\n clz %0,%0" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1606 return crypto_int64_y; 1607 #else 1608 crypto_int64 crypto_int64_y = crypto_int64_x ^ (crypto_int64_x-1); 1609 crypto_int64_y = ((crypto_int64) crypto_int64_y) >> 1; 1610 crypto_int64_y &= ~(crypto_int64_x & (((crypto_int64) 1) << (64-1))); 1611 return crypto_int64_ones_num(crypto_int64_y); 1612 #endif 1613 } 1614 1615 #endif 1616 1617 /* from supercop-20240808/crypto_sort/int32/portable4/sort.c */ 1618 #define int32_MINMAX(a,b) crypto_int32_minmax(&a,&b) 1619 1620 static void crypto_sort_int32(void *array,long long n) 1621 { 1622 long long top,p,q,r,i,j; 1623 int32 *x = array; 1624 1625 if (n < 2) return; 1626 top = 1; 1627 while (top < n - top) top += top; 1628 1629 for (p = top;p >= 1;p >>= 1) { 1630 i = 0; 1631 while (i + 2 * p <= n) { 1632 for (j = i;j < i + p;++j) 1633 int32_MINMAX(x[j],x[j+p]); 1634 i += 2 * p; 1635 } 1636 for (j = i;j < n - p;++j) 1637 int32_MINMAX(x[j],x[j+p]); 1638 1639 i = 0; 1640 j = 0; 1641 for (q = top;q > p;q >>= 1) { 1642 if (j != i) for (;;) { 1643 if (j == n - q) goto done; 1644 int32 a = x[j + p]; 1645 for (r = q;r > p;r >>= 1) 1646 int32_MINMAX(a,x[j + r]); 1647 x[j + p] = a; 1648 ++j; 1649 if (j == i + p) { 1650 i += 2 * p; 1651 break; 1652 } 1653 } 1654 while (i + p <= n - q) { 1655 for (j = i;j < i + p;++j) { 1656 int32 a = x[j + p]; 1657 for (r = q;r > p;r >>= 1) 1658 int32_MINMAX(a,x[j+r]); 1659 x[j + p] = a; 1660 } 1661 i += 2 * p; 1662 } 1663 /* now i + p > n - q */ 1664 j = i; 1665 while (j < n - q) { 1666 int32 a = x[j + p]; 1667 for (r = q;r > p;r >>= 1) 1668 int32_MINMAX(a,x[j+r]); 1669 x[j + p] = a; 1670 ++j; 1671 } 1672 1673 done: ; 1674 } 1675 } 1676 } 1677 1678 /* from supercop-20240808/crypto_sort/uint32/useint32/sort.c */ 1679 1680 /* can save time by vectorizing xor loops */ 1681 /* can save time by integrating xor loops with int32_sort */ 1682 1683 static void crypto_sort_uint32(void *array,long long n) 1684 { 1685 crypto_uint32 *x = array; 1686 long long j; 1687 for (j = 0;j < n;++j) x[j] ^= 0x80000000; 1688 crypto_sort_int32(array,n); 1689 for (j = 0;j < n;++j) x[j] ^= 0x80000000; 1690 } 1691 1692 /* from supercop-20240808/crypto_kem/sntrup761/compact/kem.c */ 1693 // 20240806 djb: some automated conversion to cryptoint 1694 1695 #define p 761 1696 #define q 4591 1697 #define w 286 1698 #define q12 ((q - 1) / 2) 1699 typedef int8_t small; 1700 typedef int16_t Fq; 1701 #define Hash_bytes 32 1702 #define Small_bytes ((p + 3) / 4) 1703 typedef small Inputs[p]; 1704 #define SecretKeys_bytes (2 * Small_bytes) 1705 #define Confirm_bytes 32 1706 1707 static small F3_freeze(int16_t x) { return x - 3 * ((10923 * x + 16384) >> 15); } 1708 1709 static Fq Fq_freeze(int32_t x) { 1710 const int32_t q16 = (0x10000 + q / 2) / q; 1711 const int32_t q20 = (0x100000 + q / 2) / q; 1712 const int32_t q28 = (0x10000000 + q / 2) / q; 1713 x -= q * ((q16 * x) >> 16); 1714 x -= q * ((q20 * x) >> 20); 1715 return x - q * ((q28 * x + 0x8000000) >> 28); 1716 } 1717 1718 static int Weightw_mask(small *r) { 1719 int i, weight = 0; 1720 for (i = 0; i < p; ++i) weight += crypto_int64_bottombit_01(r[i]); 1721 return crypto_int16_nonzero_mask(weight - w); 1722 } 1723 1724 static void uint32_divmod_uint14(uint32_t *Q, uint16_t *r, uint32_t x, uint16_t m) { 1725 uint32_t qpart, mask, v = 0x80000000 / m; 1726 qpart = (x * (uint64_t)v) >> 31; 1727 x -= qpart * m; 1728 *Q = qpart; 1729 qpart = (x * (uint64_t)v) >> 31; 1730 x -= qpart * m; 1731 *Q += qpart; 1732 x -= m; 1733 *Q += 1; 1734 mask = crypto_int32_negative_mask(x); 1735 x += mask & (uint32_t)m; 1736 *Q += mask; 1737 *r = x; 1738 } 1739 1740 static uint16_t uint32_mod_uint14(uint32_t x, uint16_t m) { 1741 uint32_t Q; 1742 uint16_t r; 1743 uint32_divmod_uint14(&Q, &r, x, m); 1744 return r; 1745 } 1746 1747 static void Encode(unsigned char *out, const uint16_t *R, const uint16_t *M, long long len) { 1748 if (len == 1) { 1749 uint16_t r = R[0], m = M[0]; 1750 while (m > 1) { 1751 *out++ = r; 1752 r >>= 8; 1753 m = (m + 255) >> 8; 1754 } 1755 } 1756 if (len > 1) { 1757 uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2]; 1758 long long i; 1759 for (i = 0; i < len - 1; i += 2) { 1760 uint32_t m0 = M[i]; 1761 uint32_t r = R[i] + R[i + 1] * m0; 1762 uint32_t m = M[i + 1] * m0; 1763 while (m >= 16384) { 1764 *out++ = r; 1765 r >>= 8; 1766 m = (m + 255) >> 8; 1767 } 1768 R2[i / 2] = r; 1769 M2[i / 2] = m; 1770 } 1771 if (i < len) { 1772 R2[i / 2] = R[i]; 1773 M2[i / 2] = M[i]; 1774 } 1775 Encode(out, R2, M2, (len + 1) / 2); 1776 } 1777 } 1778 1779 static void Decode(uint16_t *out, const unsigned char *S, const uint16_t *M, long long len) { 1780 if (len == 1) { 1781 if (M[0] == 1) 1782 *out = 0; 1783 else if (M[0] <= 256) 1784 *out = uint32_mod_uint14(S[0], M[0]); 1785 else 1786 *out = uint32_mod_uint14(S[0] + (((uint16_t)S[1]) << 8), M[0]); 1787 } 1788 if (len > 1) { 1789 uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2], bottomr[len / 2]; 1790 uint32_t bottomt[len / 2]; 1791 long long i; 1792 for (i = 0; i < len - 1; i += 2) { 1793 uint32_t m = M[i] * (uint32_t)M[i + 1]; 1794 if (m > 256 * 16383) { 1795 bottomt[i / 2] = 256 * 256; 1796 bottomr[i / 2] = S[0] + 256 * S[1]; 1797 S += 2; 1798 M2[i / 2] = (((m + 255) >> 8) + 255) >> 8; 1799 } else if (m >= 16384) { 1800 bottomt[i / 2] = 256; 1801 bottomr[i / 2] = S[0]; 1802 S += 1; 1803 M2[i / 2] = (m + 255) >> 8; 1804 } else { 1805 bottomt[i / 2] = 1; 1806 bottomr[i / 2] = 0; 1807 M2[i / 2] = m; 1808 } 1809 } 1810 if (i < len) M2[i / 2] = M[i]; 1811 Decode(R2, S, M2, (len + 1) / 2); 1812 for (i = 0; i < len - 1; i += 2) { 1813 uint32_t r1, r = bottomr[i / 2]; 1814 uint16_t r0; 1815 r += bottomt[i / 2] * R2[i / 2]; 1816 uint32_divmod_uint14(&r1, &r0, r, M[i]); 1817 r1 = uint32_mod_uint14(r1, M[i + 1]); 1818 *out++ = r0; 1819 *out++ = r1; 1820 } 1821 if (i < len) *out++ = R2[i / 2]; 1822 } 1823 } 1824 1825 static void R3_fromRq(small *out, const Fq *r) { 1826 int i; 1827 for (i = 0; i < p; ++i) out[i] = F3_freeze(r[i]); 1828 } 1829 1830 static void R3_mult(small *h, const small *f, const small *g) { 1831 int16_t fg[p + p - 1]; 1832 int i, j; 1833 for (i = 0; i < p + p - 1; ++i) fg[i] = 0; 1834 for (i = 0; i < p; ++i) 1835 for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int16_t)g[j]; 1836 for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; 1837 for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; 1838 for (i = 0; i < p; ++i) h[i] = F3_freeze(fg[i]); 1839 } 1840 1841 static int R3_recip(small *out, const small *in) { 1842 small f[p + 1], g[p + 1], v[p + 1], r[p + 1]; 1843 int sign, swap, t, i, loop, delta = 1; 1844 for (i = 0; i < p + 1; ++i) v[i] = 0; 1845 for (i = 0; i < p + 1; ++i) r[i] = 0; 1846 r[0] = 1; 1847 for (i = 0; i < p; ++i) f[i] = 0; 1848 f[0] = 1; 1849 f[p - 1] = f[p] = -1; 1850 for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; 1851 g[p] = 0; 1852 for (loop = 0; loop < 2 * p - 1; ++loop) { 1853 for (i = p; i > 0; --i) v[i] = v[i - 1]; 1854 v[0] = 0; 1855 sign = -g[0] * f[0]; 1856 swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); 1857 delta ^= swap & (delta ^ -delta); 1858 delta += 1; 1859 for (i = 0; i < p + 1; ++i) { 1860 t = swap & (f[i] ^ g[i]); 1861 f[i] ^= t; 1862 g[i] ^= t; 1863 t = swap & (v[i] ^ r[i]); 1864 v[i] ^= t; 1865 r[i] ^= t; 1866 } 1867 for (i = 0; i < p + 1; ++i) g[i] = F3_freeze(g[i] + sign * f[i]); 1868 for (i = 0; i < p + 1; ++i) r[i] = F3_freeze(r[i] + sign * v[i]); 1869 for (i = 0; i < p; ++i) g[i] = g[i + 1]; 1870 g[p] = 0; 1871 } 1872 sign = f[0]; 1873 for (i = 0; i < p; ++i) out[i] = sign * v[p - 1 - i]; 1874 return crypto_int16_nonzero_mask(delta); 1875 } 1876 1877 static void Rq_mult_small(Fq *h, const Fq *f, const small *g) { 1878 int32_t fg[p + p - 1]; 1879 int i, j; 1880 for (i = 0; i < p + p - 1; ++i) fg[i] = 0; 1881 for (i = 0; i < p; ++i) 1882 for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int32_t)g[j]; 1883 for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; 1884 for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; 1885 for (i = 0; i < p; ++i) h[i] = Fq_freeze(fg[i]); 1886 } 1887 1888 static void Rq_mult3(Fq *h, const Fq *f) { 1889 int i; 1890 for (i = 0; i < p; ++i) h[i] = Fq_freeze(3 * f[i]); 1891 } 1892 1893 static Fq Fq_recip(Fq a1) { 1894 int i = 1; 1895 Fq ai = a1; 1896 while (i < q - 2) { 1897 ai = Fq_freeze(a1 * (int32_t)ai); 1898 i += 1; 1899 } 1900 return ai; 1901 } 1902 1903 static int Rq_recip3(Fq *out, const small *in) { 1904 Fq f[p + 1], g[p + 1], v[p + 1], r[p + 1], scale; 1905 int swap, t, i, loop, delta = 1; 1906 int32_t f0, g0; 1907 for (i = 0; i < p + 1; ++i) v[i] = 0; 1908 for (i = 0; i < p + 1; ++i) r[i] = 0; 1909 r[0] = Fq_recip(3); 1910 for (i = 0; i < p; ++i) f[i] = 0; 1911 f[0] = 1; 1912 f[p - 1] = f[p] = -1; 1913 for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; 1914 g[p] = 0; 1915 for (loop = 0; loop < 2 * p - 1; ++loop) { 1916 for (i = p; i > 0; --i) v[i] = v[i - 1]; 1917 v[0] = 0; 1918 swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); 1919 delta ^= swap & (delta ^ -delta); 1920 delta += 1; 1921 for (i = 0; i < p + 1; ++i) { 1922 t = swap & (f[i] ^ g[i]); 1923 f[i] ^= t; 1924 g[i] ^= t; 1925 t = swap & (v[i] ^ r[i]); 1926 v[i] ^= t; 1927 r[i] ^= t; 1928 } 1929 f0 = f[0]; 1930 g0 = g[0]; 1931 for (i = 0; i < p + 1; ++i) g[i] = Fq_freeze(f0 * g[i] - g0 * f[i]); 1932 for (i = 0; i < p + 1; ++i) r[i] = Fq_freeze(f0 * r[i] - g0 * v[i]); 1933 for (i = 0; i < p; ++i) g[i] = g[i + 1]; 1934 g[p] = 0; 1935 } 1936 scale = Fq_recip(f[0]); 1937 for (i = 0; i < p; ++i) out[i] = Fq_freeze(scale * (int32_t)v[p - 1 - i]); 1938 return crypto_int16_nonzero_mask(delta); 1939 } 1940 1941 static void Round(Fq *out, const Fq *a) { 1942 int i; 1943 for (i = 0; i < p; ++i) out[i] = a[i] - F3_freeze(a[i]); 1944 } 1945 1946 static void Short_fromlist(small *out, const uint32_t *in) { 1947 uint32_t L[p]; 1948 int i; 1949 for (i = 0; i < w; ++i) L[i] = in[i] & (uint32_t)-2; 1950 for (i = w; i < p; ++i) L[i] = (in[i] & (uint32_t)-3) | 1; 1951 crypto_sort_uint32(L, p); 1952 for (i = 0; i < p; ++i) out[i] = (L[i] & 3) - 1; 1953 } 1954 1955 static void Hash_prefix(unsigned char *out, int b, const unsigned char *in, int inlen) { 1956 unsigned char x[inlen + 1], h[64]; 1957 int i; 1958 x[0] = b; 1959 for (i = 0; i < inlen; ++i) x[i + 1] = in[i]; 1960 crypto_hash_sha512(h, x, inlen + 1); 1961 for (i = 0; i < 32; ++i) out[i] = h[i]; 1962 } 1963 1964 static uint32_t urandom32(void) { 1965 unsigned char c[4]; 1966 uint32_t result = 0; 1967 int i; 1968 randombytes(c, 4); 1969 for (i = 0; i < 4; ++i) result += ((uint32_t)c[i]) << (8 * i); 1970 return result; 1971 } 1972 1973 static void Short_random(small *out) { 1974 uint32_t L[p]; 1975 int i; 1976 for (i = 0; i < p; ++i) L[i] = urandom32(); 1977 Short_fromlist(out, L); 1978 } 1979 1980 static void Small_random(small *out) { 1981 int i; 1982 for (i = 0; i < p; ++i) out[i] = (((urandom32() & 0x3fffffff) * 3) >> 30) - 1; 1983 } 1984 1985 static void KeyGen(Fq *h, small *f, small *ginv) { 1986 small g[p]; 1987 Fq finv[p]; 1988 for (;;) { 1989 int result; 1990 Small_random(g); 1991 result = R3_recip(ginv, g); 1992 crypto_declassify(&result, sizeof result); 1993 if (result == 0) break; 1994 } 1995 Short_random(f); 1996 Rq_recip3(finv, f); 1997 Rq_mult_small(h, finv, g); 1998 } 1999 2000 static void Encrypt(Fq *c, const small *r, const Fq *h) { 2001 Fq hr[p]; 2002 Rq_mult_small(hr, h, r); 2003 Round(c, hr); 2004 } 2005 2006 static void Decrypt(small *r, const Fq *c, const small *f, const small *ginv) { 2007 Fq cf[p], cf3[p]; 2008 small e[p], ev[p]; 2009 int mask, i; 2010 Rq_mult_small(cf, c, f); 2011 Rq_mult3(cf3, cf); 2012 R3_fromRq(e, cf3); 2013 R3_mult(ev, e, ginv); 2014 mask = Weightw_mask(ev); 2015 for (i = 0; i < w; ++i) r[i] = ((ev[i] ^ 1) & ~mask) ^ 1; 2016 for (i = w; i < p; ++i) r[i] = ev[i] & ~mask; 2017 } 2018 2019 static void Small_encode(unsigned char *s, const small *f) { 2020 int i, j; 2021 for (i = 0; i < p / 4; ++i) { 2022 small x = 0; 2023 for (j = 0;j < 4;++j) x += (*f++ + 1) << (2 * j); 2024 *s++ = x; 2025 } 2026 *s = *f++ + 1; 2027 } 2028 2029 static void Small_decode(small *f, const unsigned char *s) { 2030 int i, j; 2031 for (i = 0; i < p / 4; ++i) { 2032 unsigned char x = *s++; 2033 for (j = 0;j < 4;++j) *f++ = ((small)((x >> (2 * j)) & 3)) - 1; 2034 } 2035 *f++ = ((small)(*s & 3)) - 1; 2036 } 2037 2038 static void Rq_encode(unsigned char *s, const Fq *r) { 2039 uint16_t R[p], M[p]; 2040 int i; 2041 for (i = 0; i < p; ++i) R[i] = r[i] + q12; 2042 for (i = 0; i < p; ++i) M[i] = q; 2043 Encode(s, R, M, p); 2044 } 2045 2046 static void Rq_decode(Fq *r, const unsigned char *s) { 2047 uint16_t R[p], M[p]; 2048 int i; 2049 for (i = 0; i < p; ++i) M[i] = q; 2050 Decode(R, s, M, p); 2051 for (i = 0; i < p; ++i) r[i] = ((Fq)R[i]) - q12; 2052 } 2053 2054 static void Rounded_encode(unsigned char *s, const Fq *r) { 2055 uint16_t R[p], M[p]; 2056 int i; 2057 for (i = 0; i < p; ++i) R[i] = ((r[i] + q12) * 10923) >> 15; 2058 for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; 2059 Encode(s, R, M, p); 2060 } 2061 2062 static void Rounded_decode(Fq *r, const unsigned char *s) { 2063 uint16_t R[p], M[p]; 2064 int i; 2065 for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; 2066 Decode(R, s, M, p); 2067 for (i = 0; i < p; ++i) r[i] = R[i] * 3 - q12; 2068 } 2069 2070 static void ZKeyGen(unsigned char *pk, unsigned char *sk) { 2071 Fq h[p]; 2072 small f[p], v[p]; 2073 KeyGen(h, f, v); 2074 Rq_encode(pk, h); 2075 Small_encode(sk, f); 2076 Small_encode(sk + Small_bytes, v); 2077 } 2078 2079 static void ZEncrypt(unsigned char *C, const Inputs r, const unsigned char *pk) { 2080 Fq h[p], c[p]; 2081 Rq_decode(h, pk); 2082 Encrypt(c, r, h); 2083 Rounded_encode(C, c); 2084 } 2085 2086 static void ZDecrypt(Inputs r, const unsigned char *C, const unsigned char *sk) { 2087 small f[p], v[p]; 2088 Fq c[p]; 2089 Small_decode(f, sk); 2090 Small_decode(v, sk + Small_bytes); 2091 Rounded_decode(c, C); 2092 Decrypt(r, c, f, v); 2093 } 2094 2095 static void HashConfirm(unsigned char *h, const unsigned char *r, const unsigned char *cache) { 2096 unsigned char x[Hash_bytes * 2]; 2097 int i; 2098 Hash_prefix(x, 3, r, Small_bytes); 2099 for (i = 0; i < Hash_bytes; ++i) x[Hash_bytes + i] = cache[i]; 2100 Hash_prefix(h, 2, x, sizeof x); 2101 } 2102 2103 static void HashSession(unsigned char *k, int b, const unsigned char *y, const unsigned char *z) { 2104 unsigned char x[Hash_bytes + crypto_kem_sntrup761_CIPHERTEXTBYTES]; 2105 int i; 2106 Hash_prefix(x, 3, y, Small_bytes); 2107 for (i = 0; i < crypto_kem_sntrup761_CIPHERTEXTBYTES; ++i) x[Hash_bytes + i] = z[i]; 2108 Hash_prefix(k, b, x, sizeof x); 2109 } 2110 2111 int crypto_kem_sntrup761_keypair(unsigned char *pk, unsigned char *sk) { 2112 int i; 2113 ZKeyGen(pk, sk); 2114 sk += SecretKeys_bytes; 2115 for (i = 0; i < crypto_kem_sntrup761_PUBLICKEYBYTES; ++i) *sk++ = pk[i]; 2116 randombytes(sk, Small_bytes); 2117 Hash_prefix(sk + Small_bytes, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); 2118 return 0; 2119 } 2120 2121 static void Hide(unsigned char *c, unsigned char *r_enc, const Inputs r, const unsigned char *pk, const unsigned char *cache) { 2122 Small_encode(r_enc, r); 2123 ZEncrypt(c, r, pk); 2124 HashConfirm(c + crypto_kem_sntrup761_CIPHERTEXTBYTES - Confirm_bytes, r_enc, cache); 2125 } 2126 2127 int crypto_kem_sntrup761_enc(unsigned char *c, unsigned char *k, const unsigned char *pk) { 2128 Inputs r; 2129 unsigned char r_enc[Small_bytes], cache[Hash_bytes]; 2130 Hash_prefix(cache, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); 2131 Short_random(r); 2132 Hide(c, r_enc, r, pk, cache); 2133 HashSession(k, 1, r_enc, c); 2134 return 0; 2135 } 2136 2137 static int Ciphertexts_diff_mask(const unsigned char *c, const unsigned char *c2) { 2138 uint16_t differentbits = 0; 2139 int len = crypto_kem_sntrup761_CIPHERTEXTBYTES; 2140 while (len-- > 0) differentbits |= (*c++) ^ (*c2++); 2141 return (crypto_int64_bitmod_01((differentbits - 1),8)) - 1; 2142 } 2143 2144 int crypto_kem_sntrup761_dec(unsigned char *k, const unsigned char *c, const unsigned char *sk) { 2145 const unsigned char *pk = sk + SecretKeys_bytes; 2146 const unsigned char *rho = pk + crypto_kem_sntrup761_PUBLICKEYBYTES; 2147 const unsigned char *cache = rho + Small_bytes; 2148 Inputs r; 2149 unsigned char r_enc[Small_bytes], cnew[crypto_kem_sntrup761_CIPHERTEXTBYTES]; 2150 int mask, i; 2151 ZDecrypt(r, c, sk); 2152 Hide(cnew, r_enc, r, pk, cache); 2153 mask = Ciphertexts_diff_mask(c, cnew); 2154 for (i = 0; i < Small_bytes; ++i) r_enc[i] ^= mask & (r_enc[i] ^ rho[i]); 2155 HashSession(k, 1 + mask, r_enc, c); 2156 return 0; 2157 } 2158 2159 #endif /* USE_SNTRUP761X25519 */ 2160