1/* Do not modify. This file is auto-generated from rsaz-3k-avx512.pl. */ 2.text 3 4.globl ossl_rsaz_amm52x30_x1_ifma256 5.type ossl_rsaz_amm52x30_x1_ifma256,@function 6.align 32 7ossl_rsaz_amm52x30_x1_ifma256: 8.cfi_startproc 9.byte 243,15,30,250 10 pushq %rbx 11.cfi_adjust_cfa_offset 8 12.cfi_offset %rbx,-16 13 pushq %rbp 14.cfi_adjust_cfa_offset 8 15.cfi_offset %rbp,-24 16 pushq %r12 17.cfi_adjust_cfa_offset 8 18.cfi_offset %r12,-32 19 pushq %r13 20.cfi_adjust_cfa_offset 8 21.cfi_offset %r13,-40 22 pushq %r14 23.cfi_adjust_cfa_offset 8 24.cfi_offset %r14,-48 25 pushq %r15 26.cfi_adjust_cfa_offset 8 27.cfi_offset %r15,-56 28 29 vpxord %ymm0,%ymm0,%ymm0 30 vmovdqa64 %ymm0,%ymm3 31 vmovdqa64 %ymm0,%ymm4 32 vmovdqa64 %ymm0,%ymm5 33 vmovdqa64 %ymm0,%ymm6 34 vmovdqa64 %ymm0,%ymm7 35 vmovdqa64 %ymm0,%ymm8 36 vmovdqa64 %ymm0,%ymm9 37 vmovdqa64 %ymm0,%ymm10 38 39 xorl %r9d,%r9d 40 41 movq %rdx,%r11 42 movq $0xfffffffffffff,%rax 43 44 45 movl $7,%ebx 46 47.align 32 48.Lloop7: 49 movq 0(%r11),%r13 50 51 vpbroadcastq %r13,%ymm1 52 movq 0(%rsi),%rdx 53 mulxq %r13,%r13,%r12 54 addq %r13,%r9 55 movq %r12,%r10 56 adcq $0,%r10 57 58 movq %r8,%r13 59 imulq %r9,%r13 60 andq %rax,%r13 61 62 vpbroadcastq %r13,%ymm2 63 movq 0(%rcx),%rdx 64 mulxq %r13,%r13,%r12 65 addq %r13,%r9 66 adcq %r12,%r10 67 68 shrq $52,%r9 69 salq $12,%r10 70 orq %r10,%r9 71 72 vpmadd52luq 0(%rsi),%ymm1,%ymm3 73 vpmadd52luq 32(%rsi),%ymm1,%ymm4 74 vpmadd52luq 64(%rsi),%ymm1,%ymm5 75 vpmadd52luq 96(%rsi),%ymm1,%ymm6 76 vpmadd52luq 128(%rsi),%ymm1,%ymm7 77 vpmadd52luq 160(%rsi),%ymm1,%ymm8 78 vpmadd52luq 192(%rsi),%ymm1,%ymm9 79 vpmadd52luq 224(%rsi),%ymm1,%ymm10 80 81 vpmadd52luq 0(%rcx),%ymm2,%ymm3 82 vpmadd52luq 32(%rcx),%ymm2,%ymm4 83 vpmadd52luq 64(%rcx),%ymm2,%ymm5 84 vpmadd52luq 96(%rcx),%ymm2,%ymm6 85 vpmadd52luq 128(%rcx),%ymm2,%ymm7 86 vpmadd52luq 160(%rcx),%ymm2,%ymm8 87 vpmadd52luq 192(%rcx),%ymm2,%ymm9 88 vpmadd52luq 224(%rcx),%ymm2,%ymm10 89 90 91 valignq $1,%ymm3,%ymm4,%ymm3 92 valignq $1,%ymm4,%ymm5,%ymm4 93 valignq $1,%ymm5,%ymm6,%ymm5 94 valignq $1,%ymm6,%ymm7,%ymm6 95 valignq $1,%ymm7,%ymm8,%ymm7 96 valignq $1,%ymm8,%ymm9,%ymm8 97 valignq $1,%ymm9,%ymm10,%ymm9 98 valignq $1,%ymm10,%ymm0,%ymm10 99 100 vmovq %xmm3,%r13 101 addq %r13,%r9 102 103 vpmadd52huq 0(%rsi),%ymm1,%ymm3 104 vpmadd52huq 32(%rsi),%ymm1,%ymm4 105 vpmadd52huq 64(%rsi),%ymm1,%ymm5 106 vpmadd52huq 96(%rsi),%ymm1,%ymm6 107 vpmadd52huq 128(%rsi),%ymm1,%ymm7 108 vpmadd52huq 160(%rsi),%ymm1,%ymm8 109 vpmadd52huq 192(%rsi),%ymm1,%ymm9 110 vpmadd52huq 224(%rsi),%ymm1,%ymm10 111 112 vpmadd52huq 0(%rcx),%ymm2,%ymm3 113 vpmadd52huq 32(%rcx),%ymm2,%ymm4 114 vpmadd52huq 64(%rcx),%ymm2,%ymm5 115 vpmadd52huq 96(%rcx),%ymm2,%ymm6 116 vpmadd52huq 128(%rcx),%ymm2,%ymm7 117 vpmadd52huq 160(%rcx),%ymm2,%ymm8 118 vpmadd52huq 192(%rcx),%ymm2,%ymm9 119 vpmadd52huq 224(%rcx),%ymm2,%ymm10 120 movq 8(%r11),%r13 121 122 vpbroadcastq %r13,%ymm1 123 movq 0(%rsi),%rdx 124 mulxq %r13,%r13,%r12 125 addq %r13,%r9 126 movq %r12,%r10 127 adcq $0,%r10 128 129 movq %r8,%r13 130 imulq %r9,%r13 131 andq %rax,%r13 132 133 vpbroadcastq %r13,%ymm2 134 movq 0(%rcx),%rdx 135 mulxq %r13,%r13,%r12 136 addq %r13,%r9 137 adcq %r12,%r10 138 139 shrq $52,%r9 140 salq $12,%r10 141 orq %r10,%r9 142 143 vpmadd52luq 0(%rsi),%ymm1,%ymm3 144 vpmadd52luq 32(%rsi),%ymm1,%ymm4 145 vpmadd52luq 64(%rsi),%ymm1,%ymm5 146 vpmadd52luq 96(%rsi),%ymm1,%ymm6 147 vpmadd52luq 128(%rsi),%ymm1,%ymm7 148 vpmadd52luq 160(%rsi),%ymm1,%ymm8 149 vpmadd52luq 192(%rsi),%ymm1,%ymm9 150 vpmadd52luq 224(%rsi),%ymm1,%ymm10 151 152 vpmadd52luq 0(%rcx),%ymm2,%ymm3 153 vpmadd52luq 32(%rcx),%ymm2,%ymm4 154 vpmadd52luq 64(%rcx),%ymm2,%ymm5 155 vpmadd52luq 96(%rcx),%ymm2,%ymm6 156 vpmadd52luq 128(%rcx),%ymm2,%ymm7 157 vpmadd52luq 160(%rcx),%ymm2,%ymm8 158 vpmadd52luq 192(%rcx),%ymm2,%ymm9 159 vpmadd52luq 224(%rcx),%ymm2,%ymm10 160 161 162 valignq $1,%ymm3,%ymm4,%ymm3 163 valignq $1,%ymm4,%ymm5,%ymm4 164 valignq $1,%ymm5,%ymm6,%ymm5 165 valignq $1,%ymm6,%ymm7,%ymm6 166 valignq $1,%ymm7,%ymm8,%ymm7 167 valignq $1,%ymm8,%ymm9,%ymm8 168 valignq $1,%ymm9,%ymm10,%ymm9 169 valignq $1,%ymm10,%ymm0,%ymm10 170 171 vmovq %xmm3,%r13 172 addq %r13,%r9 173 174 vpmadd52huq 0(%rsi),%ymm1,%ymm3 175 vpmadd52huq 32(%rsi),%ymm1,%ymm4 176 vpmadd52huq 64(%rsi),%ymm1,%ymm5 177 vpmadd52huq 96(%rsi),%ymm1,%ymm6 178 vpmadd52huq 128(%rsi),%ymm1,%ymm7 179 vpmadd52huq 160(%rsi),%ymm1,%ymm8 180 vpmadd52huq 192(%rsi),%ymm1,%ymm9 181 vpmadd52huq 224(%rsi),%ymm1,%ymm10 182 183 vpmadd52huq 0(%rcx),%ymm2,%ymm3 184 vpmadd52huq 32(%rcx),%ymm2,%ymm4 185 vpmadd52huq 64(%rcx),%ymm2,%ymm5 186 vpmadd52huq 96(%rcx),%ymm2,%ymm6 187 vpmadd52huq 128(%rcx),%ymm2,%ymm7 188 vpmadd52huq 160(%rcx),%ymm2,%ymm8 189 vpmadd52huq 192(%rcx),%ymm2,%ymm9 190 vpmadd52huq 224(%rcx),%ymm2,%ymm10 191 movq 16(%r11),%r13 192 193 vpbroadcastq %r13,%ymm1 194 movq 0(%rsi),%rdx 195 mulxq %r13,%r13,%r12 196 addq %r13,%r9 197 movq %r12,%r10 198 adcq $0,%r10 199 200 movq %r8,%r13 201 imulq %r9,%r13 202 andq %rax,%r13 203 204 vpbroadcastq %r13,%ymm2 205 movq 0(%rcx),%rdx 206 mulxq %r13,%r13,%r12 207 addq %r13,%r9 208 adcq %r12,%r10 209 210 shrq $52,%r9 211 salq $12,%r10 212 orq %r10,%r9 213 214 vpmadd52luq 0(%rsi),%ymm1,%ymm3 215 vpmadd52luq 32(%rsi),%ymm1,%ymm4 216 vpmadd52luq 64(%rsi),%ymm1,%ymm5 217 vpmadd52luq 96(%rsi),%ymm1,%ymm6 218 vpmadd52luq 128(%rsi),%ymm1,%ymm7 219 vpmadd52luq 160(%rsi),%ymm1,%ymm8 220 vpmadd52luq 192(%rsi),%ymm1,%ymm9 221 vpmadd52luq 224(%rsi),%ymm1,%ymm10 222 223 vpmadd52luq 0(%rcx),%ymm2,%ymm3 224 vpmadd52luq 32(%rcx),%ymm2,%ymm4 225 vpmadd52luq 64(%rcx),%ymm2,%ymm5 226 vpmadd52luq 96(%rcx),%ymm2,%ymm6 227 vpmadd52luq 128(%rcx),%ymm2,%ymm7 228 vpmadd52luq 160(%rcx),%ymm2,%ymm8 229 vpmadd52luq 192(%rcx),%ymm2,%ymm9 230 vpmadd52luq 224(%rcx),%ymm2,%ymm10 231 232 233 valignq $1,%ymm3,%ymm4,%ymm3 234 valignq $1,%ymm4,%ymm5,%ymm4 235 valignq $1,%ymm5,%ymm6,%ymm5 236 valignq $1,%ymm6,%ymm7,%ymm6 237 valignq $1,%ymm7,%ymm8,%ymm7 238 valignq $1,%ymm8,%ymm9,%ymm8 239 valignq $1,%ymm9,%ymm10,%ymm9 240 valignq $1,%ymm10,%ymm0,%ymm10 241 242 vmovq %xmm3,%r13 243 addq %r13,%r9 244 245 vpmadd52huq 0(%rsi),%ymm1,%ymm3 246 vpmadd52huq 32(%rsi),%ymm1,%ymm4 247 vpmadd52huq 64(%rsi),%ymm1,%ymm5 248 vpmadd52huq 96(%rsi),%ymm1,%ymm6 249 vpmadd52huq 128(%rsi),%ymm1,%ymm7 250 vpmadd52huq 160(%rsi),%ymm1,%ymm8 251 vpmadd52huq 192(%rsi),%ymm1,%ymm9 252 vpmadd52huq 224(%rsi),%ymm1,%ymm10 253 254 vpmadd52huq 0(%rcx),%ymm2,%ymm3 255 vpmadd52huq 32(%rcx),%ymm2,%ymm4 256 vpmadd52huq 64(%rcx),%ymm2,%ymm5 257 vpmadd52huq 96(%rcx),%ymm2,%ymm6 258 vpmadd52huq 128(%rcx),%ymm2,%ymm7 259 vpmadd52huq 160(%rcx),%ymm2,%ymm8 260 vpmadd52huq 192(%rcx),%ymm2,%ymm9 261 vpmadd52huq 224(%rcx),%ymm2,%ymm10 262 movq 24(%r11),%r13 263 264 vpbroadcastq %r13,%ymm1 265 movq 0(%rsi),%rdx 266 mulxq %r13,%r13,%r12 267 addq %r13,%r9 268 movq %r12,%r10 269 adcq $0,%r10 270 271 movq %r8,%r13 272 imulq %r9,%r13 273 andq %rax,%r13 274 275 vpbroadcastq %r13,%ymm2 276 movq 0(%rcx),%rdx 277 mulxq %r13,%r13,%r12 278 addq %r13,%r9 279 adcq %r12,%r10 280 281 shrq $52,%r9 282 salq $12,%r10 283 orq %r10,%r9 284 285 vpmadd52luq 0(%rsi),%ymm1,%ymm3 286 vpmadd52luq 32(%rsi),%ymm1,%ymm4 287 vpmadd52luq 64(%rsi),%ymm1,%ymm5 288 vpmadd52luq 96(%rsi),%ymm1,%ymm6 289 vpmadd52luq 128(%rsi),%ymm1,%ymm7 290 vpmadd52luq 160(%rsi),%ymm1,%ymm8 291 vpmadd52luq 192(%rsi),%ymm1,%ymm9 292 vpmadd52luq 224(%rsi),%ymm1,%ymm10 293 294 vpmadd52luq 0(%rcx),%ymm2,%ymm3 295 vpmadd52luq 32(%rcx),%ymm2,%ymm4 296 vpmadd52luq 64(%rcx),%ymm2,%ymm5 297 vpmadd52luq 96(%rcx),%ymm2,%ymm6 298 vpmadd52luq 128(%rcx),%ymm2,%ymm7 299 vpmadd52luq 160(%rcx),%ymm2,%ymm8 300 vpmadd52luq 192(%rcx),%ymm2,%ymm9 301 vpmadd52luq 224(%rcx),%ymm2,%ymm10 302 303 304 valignq $1,%ymm3,%ymm4,%ymm3 305 valignq $1,%ymm4,%ymm5,%ymm4 306 valignq $1,%ymm5,%ymm6,%ymm5 307 valignq $1,%ymm6,%ymm7,%ymm6 308 valignq $1,%ymm7,%ymm8,%ymm7 309 valignq $1,%ymm8,%ymm9,%ymm8 310 valignq $1,%ymm9,%ymm10,%ymm9 311 valignq $1,%ymm10,%ymm0,%ymm10 312 313 vmovq %xmm3,%r13 314 addq %r13,%r9 315 316 vpmadd52huq 0(%rsi),%ymm1,%ymm3 317 vpmadd52huq 32(%rsi),%ymm1,%ymm4 318 vpmadd52huq 64(%rsi),%ymm1,%ymm5 319 vpmadd52huq 96(%rsi),%ymm1,%ymm6 320 vpmadd52huq 128(%rsi),%ymm1,%ymm7 321 vpmadd52huq 160(%rsi),%ymm1,%ymm8 322 vpmadd52huq 192(%rsi),%ymm1,%ymm9 323 vpmadd52huq 224(%rsi),%ymm1,%ymm10 324 325 vpmadd52huq 0(%rcx),%ymm2,%ymm3 326 vpmadd52huq 32(%rcx),%ymm2,%ymm4 327 vpmadd52huq 64(%rcx),%ymm2,%ymm5 328 vpmadd52huq 96(%rcx),%ymm2,%ymm6 329 vpmadd52huq 128(%rcx),%ymm2,%ymm7 330 vpmadd52huq 160(%rcx),%ymm2,%ymm8 331 vpmadd52huq 192(%rcx),%ymm2,%ymm9 332 vpmadd52huq 224(%rcx),%ymm2,%ymm10 333 leaq 32(%r11),%r11 334 decl %ebx 335 jne .Lloop7 336 movq 0(%r11),%r13 337 338 vpbroadcastq %r13,%ymm1 339 movq 0(%rsi),%rdx 340 mulxq %r13,%r13,%r12 341 addq %r13,%r9 342 movq %r12,%r10 343 adcq $0,%r10 344 345 movq %r8,%r13 346 imulq %r9,%r13 347 andq %rax,%r13 348 349 vpbroadcastq %r13,%ymm2 350 movq 0(%rcx),%rdx 351 mulxq %r13,%r13,%r12 352 addq %r13,%r9 353 adcq %r12,%r10 354 355 shrq $52,%r9 356 salq $12,%r10 357 orq %r10,%r9 358 359 vpmadd52luq 0(%rsi),%ymm1,%ymm3 360 vpmadd52luq 32(%rsi),%ymm1,%ymm4 361 vpmadd52luq 64(%rsi),%ymm1,%ymm5 362 vpmadd52luq 96(%rsi),%ymm1,%ymm6 363 vpmadd52luq 128(%rsi),%ymm1,%ymm7 364 vpmadd52luq 160(%rsi),%ymm1,%ymm8 365 vpmadd52luq 192(%rsi),%ymm1,%ymm9 366 vpmadd52luq 224(%rsi),%ymm1,%ymm10 367 368 vpmadd52luq 0(%rcx),%ymm2,%ymm3 369 vpmadd52luq 32(%rcx),%ymm2,%ymm4 370 vpmadd52luq 64(%rcx),%ymm2,%ymm5 371 vpmadd52luq 96(%rcx),%ymm2,%ymm6 372 vpmadd52luq 128(%rcx),%ymm2,%ymm7 373 vpmadd52luq 160(%rcx),%ymm2,%ymm8 374 vpmadd52luq 192(%rcx),%ymm2,%ymm9 375 vpmadd52luq 224(%rcx),%ymm2,%ymm10 376 377 378 valignq $1,%ymm3,%ymm4,%ymm3 379 valignq $1,%ymm4,%ymm5,%ymm4 380 valignq $1,%ymm5,%ymm6,%ymm5 381 valignq $1,%ymm6,%ymm7,%ymm6 382 valignq $1,%ymm7,%ymm8,%ymm7 383 valignq $1,%ymm8,%ymm9,%ymm8 384 valignq $1,%ymm9,%ymm10,%ymm9 385 valignq $1,%ymm10,%ymm0,%ymm10 386 387 vmovq %xmm3,%r13 388 addq %r13,%r9 389 390 vpmadd52huq 0(%rsi),%ymm1,%ymm3 391 vpmadd52huq 32(%rsi),%ymm1,%ymm4 392 vpmadd52huq 64(%rsi),%ymm1,%ymm5 393 vpmadd52huq 96(%rsi),%ymm1,%ymm6 394 vpmadd52huq 128(%rsi),%ymm1,%ymm7 395 vpmadd52huq 160(%rsi),%ymm1,%ymm8 396 vpmadd52huq 192(%rsi),%ymm1,%ymm9 397 vpmadd52huq 224(%rsi),%ymm1,%ymm10 398 399 vpmadd52huq 0(%rcx),%ymm2,%ymm3 400 vpmadd52huq 32(%rcx),%ymm2,%ymm4 401 vpmadd52huq 64(%rcx),%ymm2,%ymm5 402 vpmadd52huq 96(%rcx),%ymm2,%ymm6 403 vpmadd52huq 128(%rcx),%ymm2,%ymm7 404 vpmadd52huq 160(%rcx),%ymm2,%ymm8 405 vpmadd52huq 192(%rcx),%ymm2,%ymm9 406 vpmadd52huq 224(%rcx),%ymm2,%ymm10 407 movq 8(%r11),%r13 408 409 vpbroadcastq %r13,%ymm1 410 movq 0(%rsi),%rdx 411 mulxq %r13,%r13,%r12 412 addq %r13,%r9 413 movq %r12,%r10 414 adcq $0,%r10 415 416 movq %r8,%r13 417 imulq %r9,%r13 418 andq %rax,%r13 419 420 vpbroadcastq %r13,%ymm2 421 movq 0(%rcx),%rdx 422 mulxq %r13,%r13,%r12 423 addq %r13,%r9 424 adcq %r12,%r10 425 426 shrq $52,%r9 427 salq $12,%r10 428 orq %r10,%r9 429 430 vpmadd52luq 0(%rsi),%ymm1,%ymm3 431 vpmadd52luq 32(%rsi),%ymm1,%ymm4 432 vpmadd52luq 64(%rsi),%ymm1,%ymm5 433 vpmadd52luq 96(%rsi),%ymm1,%ymm6 434 vpmadd52luq 128(%rsi),%ymm1,%ymm7 435 vpmadd52luq 160(%rsi),%ymm1,%ymm8 436 vpmadd52luq 192(%rsi),%ymm1,%ymm9 437 vpmadd52luq 224(%rsi),%ymm1,%ymm10 438 439 vpmadd52luq 0(%rcx),%ymm2,%ymm3 440 vpmadd52luq 32(%rcx),%ymm2,%ymm4 441 vpmadd52luq 64(%rcx),%ymm2,%ymm5 442 vpmadd52luq 96(%rcx),%ymm2,%ymm6 443 vpmadd52luq 128(%rcx),%ymm2,%ymm7 444 vpmadd52luq 160(%rcx),%ymm2,%ymm8 445 vpmadd52luq 192(%rcx),%ymm2,%ymm9 446 vpmadd52luq 224(%rcx),%ymm2,%ymm10 447 448 449 valignq $1,%ymm3,%ymm4,%ymm3 450 valignq $1,%ymm4,%ymm5,%ymm4 451 valignq $1,%ymm5,%ymm6,%ymm5 452 valignq $1,%ymm6,%ymm7,%ymm6 453 valignq $1,%ymm7,%ymm8,%ymm7 454 valignq $1,%ymm8,%ymm9,%ymm8 455 valignq $1,%ymm9,%ymm10,%ymm9 456 valignq $1,%ymm10,%ymm0,%ymm10 457 458 vmovq %xmm3,%r13 459 addq %r13,%r9 460 461 vpmadd52huq 0(%rsi),%ymm1,%ymm3 462 vpmadd52huq 32(%rsi),%ymm1,%ymm4 463 vpmadd52huq 64(%rsi),%ymm1,%ymm5 464 vpmadd52huq 96(%rsi),%ymm1,%ymm6 465 vpmadd52huq 128(%rsi),%ymm1,%ymm7 466 vpmadd52huq 160(%rsi),%ymm1,%ymm8 467 vpmadd52huq 192(%rsi),%ymm1,%ymm9 468 vpmadd52huq 224(%rsi),%ymm1,%ymm10 469 470 vpmadd52huq 0(%rcx),%ymm2,%ymm3 471 vpmadd52huq 32(%rcx),%ymm2,%ymm4 472 vpmadd52huq 64(%rcx),%ymm2,%ymm5 473 vpmadd52huq 96(%rcx),%ymm2,%ymm6 474 vpmadd52huq 128(%rcx),%ymm2,%ymm7 475 vpmadd52huq 160(%rcx),%ymm2,%ymm8 476 vpmadd52huq 192(%rcx),%ymm2,%ymm9 477 vpmadd52huq 224(%rcx),%ymm2,%ymm10 478 479 vpbroadcastq %r9,%ymm0 480 vpblendd $3,%ymm0,%ymm3,%ymm3 481 482 483 484 vpsrlq $52,%ymm3,%ymm0 485 vpsrlq $52,%ymm4,%ymm1 486 vpsrlq $52,%ymm5,%ymm2 487 vpsrlq $52,%ymm6,%ymm19 488 vpsrlq $52,%ymm7,%ymm20 489 vpsrlq $52,%ymm8,%ymm21 490 vpsrlq $52,%ymm9,%ymm22 491 vpsrlq $52,%ymm10,%ymm23 492 493 494 valignq $3,%ymm22,%ymm23,%ymm23 495 valignq $3,%ymm21,%ymm22,%ymm22 496 valignq $3,%ymm20,%ymm21,%ymm21 497 valignq $3,%ymm19,%ymm20,%ymm20 498 valignq $3,%ymm2,%ymm19,%ymm19 499 valignq $3,%ymm1,%ymm2,%ymm2 500 valignq $3,%ymm0,%ymm1,%ymm1 501 valignq $3,.Lzeros(%rip),%ymm0,%ymm0 502 503 504 vpandq .Lmask52x4(%rip),%ymm3,%ymm3 505 vpandq .Lmask52x4(%rip),%ymm4,%ymm4 506 vpandq .Lmask52x4(%rip),%ymm5,%ymm5 507 vpandq .Lmask52x4(%rip),%ymm6,%ymm6 508 vpandq .Lmask52x4(%rip),%ymm7,%ymm7 509 vpandq .Lmask52x4(%rip),%ymm8,%ymm8 510 vpandq .Lmask52x4(%rip),%ymm9,%ymm9 511 vpandq .Lmask52x4(%rip),%ymm10,%ymm10 512 513 514 vpaddq %ymm0,%ymm3,%ymm3 515 vpaddq %ymm1,%ymm4,%ymm4 516 vpaddq %ymm2,%ymm5,%ymm5 517 vpaddq %ymm19,%ymm6,%ymm6 518 vpaddq %ymm20,%ymm7,%ymm7 519 vpaddq %ymm21,%ymm8,%ymm8 520 vpaddq %ymm22,%ymm9,%ymm9 521 vpaddq %ymm23,%ymm10,%ymm10 522 523 524 525 vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 526 vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2 527 kmovb %k1,%r14d 528 kmovb %k2,%r13d 529 shlb $4,%r13b 530 orb %r13b,%r14b 531 532 vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1 533 vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2 534 kmovb %k1,%r13d 535 kmovb %k2,%r12d 536 shlb $4,%r12b 537 orb %r12b,%r13b 538 539 vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1 540 vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2 541 kmovb %k1,%r12d 542 kmovb %k2,%r11d 543 shlb $4,%r11b 544 orb %r11b,%r12b 545 546 vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1 547 vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2 548 kmovb %k1,%r11d 549 kmovb %k2,%r10d 550 shlb $4,%r10b 551 orb %r10b,%r11b 552 553 addb %r14b,%r14b 554 adcb %r13b,%r13b 555 adcb %r12b,%r12b 556 adcb %r11b,%r11b 557 558 559 vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 560 vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2 561 kmovb %k1,%r9d 562 kmovb %k2,%r8d 563 shlb $4,%r8b 564 orb %r8b,%r9b 565 566 vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1 567 vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2 568 kmovb %k1,%r8d 569 kmovb %k2,%edx 570 shlb $4,%dl 571 orb %dl,%r8b 572 573 vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1 574 vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2 575 kmovb %k1,%edx 576 kmovb %k2,%ecx 577 shlb $4,%cl 578 orb %cl,%dl 579 580 vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1 581 vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2 582 kmovb %k1,%ecx 583 kmovb %k2,%ebx 584 shlb $4,%bl 585 orb %bl,%cl 586 587 addb %r9b,%r14b 588 adcb %r8b,%r13b 589 adcb %dl,%r12b 590 adcb %cl,%r11b 591 592 xorb %r9b,%r14b 593 xorb %r8b,%r13b 594 xorb %dl,%r12b 595 xorb %cl,%r11b 596 597 kmovb %r14d,%k1 598 shrb $4,%r14b 599 kmovb %r14d,%k2 600 kmovb %r13d,%k3 601 shrb $4,%r13b 602 kmovb %r13d,%k4 603 kmovb %r12d,%k5 604 shrb $4,%r12b 605 kmovb %r12d,%k6 606 kmovb %r11d,%k7 607 608 vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} 609 vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2} 610 vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3} 611 vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4} 612 vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5} 613 vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6} 614 vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7} 615 616 vpandq .Lmask52x4(%rip),%ymm3,%ymm3 617 vpandq .Lmask52x4(%rip),%ymm4,%ymm4 618 vpandq .Lmask52x4(%rip),%ymm5,%ymm5 619 vpandq .Lmask52x4(%rip),%ymm6,%ymm6 620 vpandq .Lmask52x4(%rip),%ymm7,%ymm7 621 vpandq .Lmask52x4(%rip),%ymm8,%ymm8 622 vpandq .Lmask52x4(%rip),%ymm9,%ymm9 623 624 shrb $4,%r11b 625 kmovb %r11d,%k1 626 627 vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1} 628 629 vpandq .Lmask52x4(%rip),%ymm10,%ymm10 630 631 vmovdqu64 %ymm3,0(%rdi) 632 vmovdqu64 %ymm4,32(%rdi) 633 vmovdqu64 %ymm5,64(%rdi) 634 vmovdqu64 %ymm6,96(%rdi) 635 vmovdqu64 %ymm7,128(%rdi) 636 vmovdqu64 %ymm8,160(%rdi) 637 vmovdqu64 %ymm9,192(%rdi) 638 vmovdqu64 %ymm10,224(%rdi) 639 640 vzeroupper 641 leaq (%rsp),%rax 642.cfi_def_cfa_register %rax 643 movq 0(%rax),%r15 644.cfi_restore %r15 645 movq 8(%rax),%r14 646.cfi_restore %r14 647 movq 16(%rax),%r13 648.cfi_restore %r13 649 movq 24(%rax),%r12 650.cfi_restore %r12 651 movq 32(%rax),%rbp 652.cfi_restore %rbp 653 movq 40(%rax),%rbx 654.cfi_restore %rbx 655 leaq 48(%rax),%rsp 656.cfi_def_cfa %rsp,8 657.Lossl_rsaz_amm52x30_x1_ifma256_epilogue: 658 .byte 0xf3,0xc3 659.cfi_endproc 660.size ossl_rsaz_amm52x30_x1_ifma256, .-ossl_rsaz_amm52x30_x1_ifma256 661.section .rodata 662.align 32 663.Lmask52x4: 664.quad 0xfffffffffffff 665.quad 0xfffffffffffff 666.quad 0xfffffffffffff 667.quad 0xfffffffffffff 668.text 669 670.globl ossl_rsaz_amm52x30_x2_ifma256 671.type ossl_rsaz_amm52x30_x2_ifma256,@function 672.align 32 673ossl_rsaz_amm52x30_x2_ifma256: 674.cfi_startproc 675.byte 243,15,30,250 676 pushq %rbx 677.cfi_adjust_cfa_offset 8 678.cfi_offset %rbx,-16 679 pushq %rbp 680.cfi_adjust_cfa_offset 8 681.cfi_offset %rbp,-24 682 pushq %r12 683.cfi_adjust_cfa_offset 8 684.cfi_offset %r12,-32 685 pushq %r13 686.cfi_adjust_cfa_offset 8 687.cfi_offset %r13,-40 688 pushq %r14 689.cfi_adjust_cfa_offset 8 690.cfi_offset %r14,-48 691 pushq %r15 692.cfi_adjust_cfa_offset 8 693.cfi_offset %r15,-56 694 695 vpxord %ymm0,%ymm0,%ymm0 696 vmovdqa64 %ymm0,%ymm3 697 vmovdqa64 %ymm0,%ymm4 698 vmovdqa64 %ymm0,%ymm5 699 vmovdqa64 %ymm0,%ymm6 700 vmovdqa64 %ymm0,%ymm7 701 vmovdqa64 %ymm0,%ymm8 702 vmovdqa64 %ymm0,%ymm9 703 vmovdqa64 %ymm0,%ymm10 704 705 vmovdqa64 %ymm0,%ymm11 706 vmovdqa64 %ymm0,%ymm12 707 vmovdqa64 %ymm0,%ymm13 708 vmovdqa64 %ymm0,%ymm14 709 vmovdqa64 %ymm0,%ymm15 710 vmovdqa64 %ymm0,%ymm16 711 vmovdqa64 %ymm0,%ymm17 712 vmovdqa64 %ymm0,%ymm18 713 714 715 xorl %r9d,%r9d 716 xorl %r15d,%r15d 717 718 movq %rdx,%r11 719 movq $0xfffffffffffff,%rax 720 721 movl $30,%ebx 722 723.align 32 724.Lloop30: 725 movq 0(%r11),%r13 726 727 vpbroadcastq %r13,%ymm1 728 movq 0(%rsi),%rdx 729 mulxq %r13,%r13,%r12 730 addq %r13,%r9 731 movq %r12,%r10 732 adcq $0,%r10 733 734 movq (%r8),%r13 735 imulq %r9,%r13 736 andq %rax,%r13 737 738 vpbroadcastq %r13,%ymm2 739 movq 0(%rcx),%rdx 740 mulxq %r13,%r13,%r12 741 addq %r13,%r9 742 adcq %r12,%r10 743 744 shrq $52,%r9 745 salq $12,%r10 746 orq %r10,%r9 747 748 vpmadd52luq 0(%rsi),%ymm1,%ymm3 749 vpmadd52luq 32(%rsi),%ymm1,%ymm4 750 vpmadd52luq 64(%rsi),%ymm1,%ymm5 751 vpmadd52luq 96(%rsi),%ymm1,%ymm6 752 vpmadd52luq 128(%rsi),%ymm1,%ymm7 753 vpmadd52luq 160(%rsi),%ymm1,%ymm8 754 vpmadd52luq 192(%rsi),%ymm1,%ymm9 755 vpmadd52luq 224(%rsi),%ymm1,%ymm10 756 757 vpmadd52luq 0(%rcx),%ymm2,%ymm3 758 vpmadd52luq 32(%rcx),%ymm2,%ymm4 759 vpmadd52luq 64(%rcx),%ymm2,%ymm5 760 vpmadd52luq 96(%rcx),%ymm2,%ymm6 761 vpmadd52luq 128(%rcx),%ymm2,%ymm7 762 vpmadd52luq 160(%rcx),%ymm2,%ymm8 763 vpmadd52luq 192(%rcx),%ymm2,%ymm9 764 vpmadd52luq 224(%rcx),%ymm2,%ymm10 765 766 767 valignq $1,%ymm3,%ymm4,%ymm3 768 valignq $1,%ymm4,%ymm5,%ymm4 769 valignq $1,%ymm5,%ymm6,%ymm5 770 valignq $1,%ymm6,%ymm7,%ymm6 771 valignq $1,%ymm7,%ymm8,%ymm7 772 valignq $1,%ymm8,%ymm9,%ymm8 773 valignq $1,%ymm9,%ymm10,%ymm9 774 valignq $1,%ymm10,%ymm0,%ymm10 775 776 vmovq %xmm3,%r13 777 addq %r13,%r9 778 779 vpmadd52huq 0(%rsi),%ymm1,%ymm3 780 vpmadd52huq 32(%rsi),%ymm1,%ymm4 781 vpmadd52huq 64(%rsi),%ymm1,%ymm5 782 vpmadd52huq 96(%rsi),%ymm1,%ymm6 783 vpmadd52huq 128(%rsi),%ymm1,%ymm7 784 vpmadd52huq 160(%rsi),%ymm1,%ymm8 785 vpmadd52huq 192(%rsi),%ymm1,%ymm9 786 vpmadd52huq 224(%rsi),%ymm1,%ymm10 787 788 vpmadd52huq 0(%rcx),%ymm2,%ymm3 789 vpmadd52huq 32(%rcx),%ymm2,%ymm4 790 vpmadd52huq 64(%rcx),%ymm2,%ymm5 791 vpmadd52huq 96(%rcx),%ymm2,%ymm6 792 vpmadd52huq 128(%rcx),%ymm2,%ymm7 793 vpmadd52huq 160(%rcx),%ymm2,%ymm8 794 vpmadd52huq 192(%rcx),%ymm2,%ymm9 795 vpmadd52huq 224(%rcx),%ymm2,%ymm10 796 movq 256(%r11),%r13 797 798 vpbroadcastq %r13,%ymm1 799 movq 256(%rsi),%rdx 800 mulxq %r13,%r13,%r12 801 addq %r13,%r15 802 movq %r12,%r10 803 adcq $0,%r10 804 805 movq 8(%r8),%r13 806 imulq %r15,%r13 807 andq %rax,%r13 808 809 vpbroadcastq %r13,%ymm2 810 movq 256(%rcx),%rdx 811 mulxq %r13,%r13,%r12 812 addq %r13,%r15 813 adcq %r12,%r10 814 815 shrq $52,%r15 816 salq $12,%r10 817 orq %r10,%r15 818 819 vpmadd52luq 256(%rsi),%ymm1,%ymm11 820 vpmadd52luq 288(%rsi),%ymm1,%ymm12 821 vpmadd52luq 320(%rsi),%ymm1,%ymm13 822 vpmadd52luq 352(%rsi),%ymm1,%ymm14 823 vpmadd52luq 384(%rsi),%ymm1,%ymm15 824 vpmadd52luq 416(%rsi),%ymm1,%ymm16 825 vpmadd52luq 448(%rsi),%ymm1,%ymm17 826 vpmadd52luq 480(%rsi),%ymm1,%ymm18 827 828 vpmadd52luq 256(%rcx),%ymm2,%ymm11 829 vpmadd52luq 288(%rcx),%ymm2,%ymm12 830 vpmadd52luq 320(%rcx),%ymm2,%ymm13 831 vpmadd52luq 352(%rcx),%ymm2,%ymm14 832 vpmadd52luq 384(%rcx),%ymm2,%ymm15 833 vpmadd52luq 416(%rcx),%ymm2,%ymm16 834 vpmadd52luq 448(%rcx),%ymm2,%ymm17 835 vpmadd52luq 480(%rcx),%ymm2,%ymm18 836 837 838 valignq $1,%ymm11,%ymm12,%ymm11 839 valignq $1,%ymm12,%ymm13,%ymm12 840 valignq $1,%ymm13,%ymm14,%ymm13 841 valignq $1,%ymm14,%ymm15,%ymm14 842 valignq $1,%ymm15,%ymm16,%ymm15 843 valignq $1,%ymm16,%ymm17,%ymm16 844 valignq $1,%ymm17,%ymm18,%ymm17 845 valignq $1,%ymm18,%ymm0,%ymm18 846 847 vmovq %xmm11,%r13 848 addq %r13,%r15 849 850 vpmadd52huq 256(%rsi),%ymm1,%ymm11 851 vpmadd52huq 288(%rsi),%ymm1,%ymm12 852 vpmadd52huq 320(%rsi),%ymm1,%ymm13 853 vpmadd52huq 352(%rsi),%ymm1,%ymm14 854 vpmadd52huq 384(%rsi),%ymm1,%ymm15 855 vpmadd52huq 416(%rsi),%ymm1,%ymm16 856 vpmadd52huq 448(%rsi),%ymm1,%ymm17 857 vpmadd52huq 480(%rsi),%ymm1,%ymm18 858 859 vpmadd52huq 256(%rcx),%ymm2,%ymm11 860 vpmadd52huq 288(%rcx),%ymm2,%ymm12 861 vpmadd52huq 320(%rcx),%ymm2,%ymm13 862 vpmadd52huq 352(%rcx),%ymm2,%ymm14 863 vpmadd52huq 384(%rcx),%ymm2,%ymm15 864 vpmadd52huq 416(%rcx),%ymm2,%ymm16 865 vpmadd52huq 448(%rcx),%ymm2,%ymm17 866 vpmadd52huq 480(%rcx),%ymm2,%ymm18 867 leaq 8(%r11),%r11 868 decl %ebx 869 jne .Lloop30 870 871 vpbroadcastq %r9,%ymm0 872 vpblendd $3,%ymm0,%ymm3,%ymm3 873 874 875 876 vpsrlq $52,%ymm3,%ymm0 877 vpsrlq $52,%ymm4,%ymm1 878 vpsrlq $52,%ymm5,%ymm2 879 vpsrlq $52,%ymm6,%ymm19 880 vpsrlq $52,%ymm7,%ymm20 881 vpsrlq $52,%ymm8,%ymm21 882 vpsrlq $52,%ymm9,%ymm22 883 vpsrlq $52,%ymm10,%ymm23 884 885 886 valignq $3,%ymm22,%ymm23,%ymm23 887 valignq $3,%ymm21,%ymm22,%ymm22 888 valignq $3,%ymm20,%ymm21,%ymm21 889 valignq $3,%ymm19,%ymm20,%ymm20 890 valignq $3,%ymm2,%ymm19,%ymm19 891 valignq $3,%ymm1,%ymm2,%ymm2 892 valignq $3,%ymm0,%ymm1,%ymm1 893 valignq $3,.Lzeros(%rip),%ymm0,%ymm0 894 895 896 vpandq .Lmask52x4(%rip),%ymm3,%ymm3 897 vpandq .Lmask52x4(%rip),%ymm4,%ymm4 898 vpandq .Lmask52x4(%rip),%ymm5,%ymm5 899 vpandq .Lmask52x4(%rip),%ymm6,%ymm6 900 vpandq .Lmask52x4(%rip),%ymm7,%ymm7 901 vpandq .Lmask52x4(%rip),%ymm8,%ymm8 902 vpandq .Lmask52x4(%rip),%ymm9,%ymm9 903 vpandq .Lmask52x4(%rip),%ymm10,%ymm10 904 905 906 vpaddq %ymm0,%ymm3,%ymm3 907 vpaddq %ymm1,%ymm4,%ymm4 908 vpaddq %ymm2,%ymm5,%ymm5 909 vpaddq %ymm19,%ymm6,%ymm6 910 vpaddq %ymm20,%ymm7,%ymm7 911 vpaddq %ymm21,%ymm8,%ymm8 912 vpaddq %ymm22,%ymm9,%ymm9 913 vpaddq %ymm23,%ymm10,%ymm10 914 915 916 917 vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 918 vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2 919 kmovb %k1,%r14d 920 kmovb %k2,%r13d 921 shlb $4,%r13b 922 orb %r13b,%r14b 923 924 vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1 925 vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2 926 kmovb %k1,%r13d 927 kmovb %k2,%r12d 928 shlb $4,%r12b 929 orb %r12b,%r13b 930 931 vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1 932 vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2 933 kmovb %k1,%r12d 934 kmovb %k2,%r11d 935 shlb $4,%r11b 936 orb %r11b,%r12b 937 938 vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1 939 vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2 940 kmovb %k1,%r11d 941 kmovb %k2,%r10d 942 shlb $4,%r10b 943 orb %r10b,%r11b 944 945 addb %r14b,%r14b 946 adcb %r13b,%r13b 947 adcb %r12b,%r12b 948 adcb %r11b,%r11b 949 950 951 vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 952 vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2 953 kmovb %k1,%r9d 954 kmovb %k2,%r8d 955 shlb $4,%r8b 956 orb %r8b,%r9b 957 958 vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1 959 vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2 960 kmovb %k1,%r8d 961 kmovb %k2,%edx 962 shlb $4,%dl 963 orb %dl,%r8b 964 965 vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1 966 vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2 967 kmovb %k1,%edx 968 kmovb %k2,%ecx 969 shlb $4,%cl 970 orb %cl,%dl 971 972 vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1 973 vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2 974 kmovb %k1,%ecx 975 kmovb %k2,%ebx 976 shlb $4,%bl 977 orb %bl,%cl 978 979 addb %r9b,%r14b 980 adcb %r8b,%r13b 981 adcb %dl,%r12b 982 adcb %cl,%r11b 983 984 xorb %r9b,%r14b 985 xorb %r8b,%r13b 986 xorb %dl,%r12b 987 xorb %cl,%r11b 988 989 kmovb %r14d,%k1 990 shrb $4,%r14b 991 kmovb %r14d,%k2 992 kmovb %r13d,%k3 993 shrb $4,%r13b 994 kmovb %r13d,%k4 995 kmovb %r12d,%k5 996 shrb $4,%r12b 997 kmovb %r12d,%k6 998 kmovb %r11d,%k7 999 1000 vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} 1001 vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2} 1002 vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3} 1003 vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4} 1004 vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5} 1005 vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6} 1006 vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7} 1007 1008 vpandq .Lmask52x4(%rip),%ymm3,%ymm3 1009 vpandq .Lmask52x4(%rip),%ymm4,%ymm4 1010 vpandq .Lmask52x4(%rip),%ymm5,%ymm5 1011 vpandq .Lmask52x4(%rip),%ymm6,%ymm6 1012 vpandq .Lmask52x4(%rip),%ymm7,%ymm7 1013 vpandq .Lmask52x4(%rip),%ymm8,%ymm8 1014 vpandq .Lmask52x4(%rip),%ymm9,%ymm9 1015 1016 shrb $4,%r11b 1017 kmovb %r11d,%k1 1018 1019 vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1} 1020 1021 vpandq .Lmask52x4(%rip),%ymm10,%ymm10 1022 1023 vpbroadcastq %r15,%ymm0 1024 vpblendd $3,%ymm0,%ymm11,%ymm11 1025 1026 1027 1028 vpsrlq $52,%ymm11,%ymm0 1029 vpsrlq $52,%ymm12,%ymm1 1030 vpsrlq $52,%ymm13,%ymm2 1031 vpsrlq $52,%ymm14,%ymm19 1032 vpsrlq $52,%ymm15,%ymm20 1033 vpsrlq $52,%ymm16,%ymm21 1034 vpsrlq $52,%ymm17,%ymm22 1035 vpsrlq $52,%ymm18,%ymm23 1036 1037 1038 valignq $3,%ymm22,%ymm23,%ymm23 1039 valignq $3,%ymm21,%ymm22,%ymm22 1040 valignq $3,%ymm20,%ymm21,%ymm21 1041 valignq $3,%ymm19,%ymm20,%ymm20 1042 valignq $3,%ymm2,%ymm19,%ymm19 1043 valignq $3,%ymm1,%ymm2,%ymm2 1044 valignq $3,%ymm0,%ymm1,%ymm1 1045 valignq $3,.Lzeros(%rip),%ymm0,%ymm0 1046 1047 1048 vpandq .Lmask52x4(%rip),%ymm11,%ymm11 1049 vpandq .Lmask52x4(%rip),%ymm12,%ymm12 1050 vpandq .Lmask52x4(%rip),%ymm13,%ymm13 1051 vpandq .Lmask52x4(%rip),%ymm14,%ymm14 1052 vpandq .Lmask52x4(%rip),%ymm15,%ymm15 1053 vpandq .Lmask52x4(%rip),%ymm16,%ymm16 1054 vpandq .Lmask52x4(%rip),%ymm17,%ymm17 1055 vpandq .Lmask52x4(%rip),%ymm18,%ymm18 1056 1057 1058 vpaddq %ymm0,%ymm11,%ymm11 1059 vpaddq %ymm1,%ymm12,%ymm12 1060 vpaddq %ymm2,%ymm13,%ymm13 1061 vpaddq %ymm19,%ymm14,%ymm14 1062 vpaddq %ymm20,%ymm15,%ymm15 1063 vpaddq %ymm21,%ymm16,%ymm16 1064 vpaddq %ymm22,%ymm17,%ymm17 1065 vpaddq %ymm23,%ymm18,%ymm18 1066 1067 1068 1069 vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1 1070 vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2 1071 kmovb %k1,%r14d 1072 kmovb %k2,%r13d 1073 shlb $4,%r13b 1074 orb %r13b,%r14b 1075 1076 vpcmpuq $6,.Lmask52x4(%rip),%ymm13,%k1 1077 vpcmpuq $6,.Lmask52x4(%rip),%ymm14,%k2 1078 kmovb %k1,%r13d 1079 kmovb %k2,%r12d 1080 shlb $4,%r12b 1081 orb %r12b,%r13b 1082 1083 vpcmpuq $6,.Lmask52x4(%rip),%ymm15,%k1 1084 vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2 1085 kmovb %k1,%r12d 1086 kmovb %k2,%r11d 1087 shlb $4,%r11b 1088 orb %r11b,%r12b 1089 1090 vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k1 1091 vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k2 1092 kmovb %k1,%r11d 1093 kmovb %k2,%r10d 1094 shlb $4,%r10b 1095 orb %r10b,%r11b 1096 1097 addb %r14b,%r14b 1098 adcb %r13b,%r13b 1099 adcb %r12b,%r12b 1100 adcb %r11b,%r11b 1101 1102 1103 vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1 1104 vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2 1105 kmovb %k1,%r9d 1106 kmovb %k2,%r8d 1107 shlb $4,%r8b 1108 orb %r8b,%r9b 1109 1110 vpcmpuq $0,.Lmask52x4(%rip),%ymm13,%k1 1111 vpcmpuq $0,.Lmask52x4(%rip),%ymm14,%k2 1112 kmovb %k1,%r8d 1113 kmovb %k2,%edx 1114 shlb $4,%dl 1115 orb %dl,%r8b 1116 1117 vpcmpuq $0,.Lmask52x4(%rip),%ymm15,%k1 1118 vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2 1119 kmovb %k1,%edx 1120 kmovb %k2,%ecx 1121 shlb $4,%cl 1122 orb %cl,%dl 1123 1124 vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k1 1125 vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k2 1126 kmovb %k1,%ecx 1127 kmovb %k2,%ebx 1128 shlb $4,%bl 1129 orb %bl,%cl 1130 1131 addb %r9b,%r14b 1132 adcb %r8b,%r13b 1133 adcb %dl,%r12b 1134 adcb %cl,%r11b 1135 1136 xorb %r9b,%r14b 1137 xorb %r8b,%r13b 1138 xorb %dl,%r12b 1139 xorb %cl,%r11b 1140 1141 kmovb %r14d,%k1 1142 shrb $4,%r14b 1143 kmovb %r14d,%k2 1144 kmovb %r13d,%k3 1145 shrb $4,%r13b 1146 kmovb %r13d,%k4 1147 kmovb %r12d,%k5 1148 shrb $4,%r12b 1149 kmovb %r12d,%k6 1150 kmovb %r11d,%k7 1151 1152 vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k1} 1153 vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k2} 1154 vpsubq .Lmask52x4(%rip),%ymm13,%ymm13{%k3} 1155 vpsubq .Lmask52x4(%rip),%ymm14,%ymm14{%k4} 1156 vpsubq .Lmask52x4(%rip),%ymm15,%ymm15{%k5} 1157 vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k6} 1158 vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k7} 1159 1160 vpandq .Lmask52x4(%rip),%ymm11,%ymm11 1161 vpandq .Lmask52x4(%rip),%ymm12,%ymm12 1162 vpandq .Lmask52x4(%rip),%ymm13,%ymm13 1163 vpandq .Lmask52x4(%rip),%ymm14,%ymm14 1164 vpandq .Lmask52x4(%rip),%ymm15,%ymm15 1165 vpandq .Lmask52x4(%rip),%ymm16,%ymm16 1166 vpandq .Lmask52x4(%rip),%ymm17,%ymm17 1167 1168 shrb $4,%r11b 1169 kmovb %r11d,%k1 1170 1171 vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k1} 1172 1173 vpandq .Lmask52x4(%rip),%ymm18,%ymm18 1174 1175 vmovdqu64 %ymm3,0(%rdi) 1176 vmovdqu64 %ymm4,32(%rdi) 1177 vmovdqu64 %ymm5,64(%rdi) 1178 vmovdqu64 %ymm6,96(%rdi) 1179 vmovdqu64 %ymm7,128(%rdi) 1180 vmovdqu64 %ymm8,160(%rdi) 1181 vmovdqu64 %ymm9,192(%rdi) 1182 vmovdqu64 %ymm10,224(%rdi) 1183 1184 vmovdqu64 %ymm11,256(%rdi) 1185 vmovdqu64 %ymm12,288(%rdi) 1186 vmovdqu64 %ymm13,320(%rdi) 1187 vmovdqu64 %ymm14,352(%rdi) 1188 vmovdqu64 %ymm15,384(%rdi) 1189 vmovdqu64 %ymm16,416(%rdi) 1190 vmovdqu64 %ymm17,448(%rdi) 1191 vmovdqu64 %ymm18,480(%rdi) 1192 1193 vzeroupper 1194 leaq (%rsp),%rax 1195.cfi_def_cfa_register %rax 1196 movq 0(%rax),%r15 1197.cfi_restore %r15 1198 movq 8(%rax),%r14 1199.cfi_restore %r14 1200 movq 16(%rax),%r13 1201.cfi_restore %r13 1202 movq 24(%rax),%r12 1203.cfi_restore %r12 1204 movq 32(%rax),%rbp 1205.cfi_restore %rbp 1206 movq 40(%rax),%rbx 1207.cfi_restore %rbx 1208 leaq 48(%rax),%rsp 1209.cfi_def_cfa %rsp,8 1210.Lossl_rsaz_amm52x30_x2_ifma256_epilogue: 1211 .byte 0xf3,0xc3 1212.cfi_endproc 1213.size ossl_rsaz_amm52x30_x2_ifma256, .-ossl_rsaz_amm52x30_x2_ifma256 1214.text 1215 1216.align 32 1217.globl ossl_extract_multiplier_2x30_win5 1218.type ossl_extract_multiplier_2x30_win5,@function 1219ossl_extract_multiplier_2x30_win5: 1220.cfi_startproc 1221.byte 243,15,30,250 1222 vmovdqa64 .Lones(%rip),%ymm30 1223 vpbroadcastq %rdx,%ymm28 1224 vpbroadcastq %rcx,%ymm29 1225 leaq 16384(%rsi),%rax 1226 1227 1228 vpxor %xmm0,%xmm0,%xmm0 1229 vmovdqa64 %ymm0,%ymm27 1230 vmovdqa64 %ymm0,%ymm1 1231 vmovdqa64 %ymm0,%ymm2 1232 vmovdqa64 %ymm0,%ymm3 1233 vmovdqa64 %ymm0,%ymm4 1234 vmovdqa64 %ymm0,%ymm5 1235 vmovdqa64 %ymm0,%ymm16 1236 vmovdqa64 %ymm0,%ymm17 1237 vmovdqa64 %ymm0,%ymm18 1238 vmovdqa64 %ymm0,%ymm19 1239 vmovdqa64 %ymm0,%ymm20 1240 vmovdqa64 %ymm0,%ymm21 1241 vmovdqa64 %ymm0,%ymm22 1242 vmovdqa64 %ymm0,%ymm23 1243 vmovdqa64 %ymm0,%ymm24 1244 vmovdqa64 %ymm0,%ymm25 1245 1246.align 32 1247.Lloop: 1248 vpcmpq $0,%ymm27,%ymm28,%k1 1249 vpcmpq $0,%ymm27,%ymm29,%k2 1250 vmovdqu64 0(%rsi),%ymm26 1251 vpblendmq %ymm26,%ymm0,%ymm0{%k1} 1252 vmovdqu64 32(%rsi),%ymm26 1253 vpblendmq %ymm26,%ymm1,%ymm1{%k1} 1254 vmovdqu64 64(%rsi),%ymm26 1255 vpblendmq %ymm26,%ymm2,%ymm2{%k1} 1256 vmovdqu64 96(%rsi),%ymm26 1257 vpblendmq %ymm26,%ymm3,%ymm3{%k1} 1258 vmovdqu64 128(%rsi),%ymm26 1259 vpblendmq %ymm26,%ymm4,%ymm4{%k1} 1260 vmovdqu64 160(%rsi),%ymm26 1261 vpblendmq %ymm26,%ymm5,%ymm5{%k1} 1262 vmovdqu64 192(%rsi),%ymm26 1263 vpblendmq %ymm26,%ymm16,%ymm16{%k1} 1264 vmovdqu64 224(%rsi),%ymm26 1265 vpblendmq %ymm26,%ymm17,%ymm17{%k1} 1266 vmovdqu64 256(%rsi),%ymm26 1267 vpblendmq %ymm26,%ymm18,%ymm18{%k2} 1268 vmovdqu64 288(%rsi),%ymm26 1269 vpblendmq %ymm26,%ymm19,%ymm19{%k2} 1270 vmovdqu64 320(%rsi),%ymm26 1271 vpblendmq %ymm26,%ymm20,%ymm20{%k2} 1272 vmovdqu64 352(%rsi),%ymm26 1273 vpblendmq %ymm26,%ymm21,%ymm21{%k2} 1274 vmovdqu64 384(%rsi),%ymm26 1275 vpblendmq %ymm26,%ymm22,%ymm22{%k2} 1276 vmovdqu64 416(%rsi),%ymm26 1277 vpblendmq %ymm26,%ymm23,%ymm23{%k2} 1278 vmovdqu64 448(%rsi),%ymm26 1279 vpblendmq %ymm26,%ymm24,%ymm24{%k2} 1280 vmovdqu64 480(%rsi),%ymm26 1281 vpblendmq %ymm26,%ymm25,%ymm25{%k2} 1282 vpaddq %ymm30,%ymm27,%ymm27 1283 addq $512,%rsi 1284 cmpq %rsi,%rax 1285 jne .Lloop 1286 vmovdqu64 %ymm0,0(%rdi) 1287 vmovdqu64 %ymm1,32(%rdi) 1288 vmovdqu64 %ymm2,64(%rdi) 1289 vmovdqu64 %ymm3,96(%rdi) 1290 vmovdqu64 %ymm4,128(%rdi) 1291 vmovdqu64 %ymm5,160(%rdi) 1292 vmovdqu64 %ymm16,192(%rdi) 1293 vmovdqu64 %ymm17,224(%rdi) 1294 vmovdqu64 %ymm18,256(%rdi) 1295 vmovdqu64 %ymm19,288(%rdi) 1296 vmovdqu64 %ymm20,320(%rdi) 1297 vmovdqu64 %ymm21,352(%rdi) 1298 vmovdqu64 %ymm22,384(%rdi) 1299 vmovdqu64 %ymm23,416(%rdi) 1300 vmovdqu64 %ymm24,448(%rdi) 1301 vmovdqu64 %ymm25,480(%rdi) 1302 1303 .byte 0xf3,0xc3 1304.cfi_endproc 1305.size ossl_extract_multiplier_2x30_win5, .-ossl_extract_multiplier_2x30_win5 1306.section .rodata 1307.align 32 1308.Lones: 1309.quad 1,1,1,1 1310.Lzeros: 1311.quad 0,0,0,0 1312 .section ".note.gnu.property", "a" 1313 .p2align 3 1314 .long 1f - 0f 1315 .long 4f - 1f 1316 .long 5 13170: 1318 # "GNU" encoded with .byte, since .asciz isn't supported 1319 # on Solaris. 1320 .byte 0x47 1321 .byte 0x4e 1322 .byte 0x55 1323 .byte 0 13241: 1325 .p2align 3 1326 .long 0xc0000002 1327 .long 3f - 2f 13282: 1329 .long 3 13303: 1331 .p2align 3 13324: 1333