1/* Do not modify. This file is auto-generated from ghash-x86_64.pl. */ 2.text 3 4 5.globl gcm_gmult_4bit 6.type gcm_gmult_4bit,@function 7.align 16 8gcm_gmult_4bit: 9.cfi_startproc 10 pushq %rbx 11.cfi_adjust_cfa_offset 8 12.cfi_offset %rbx,-16 13 pushq %rbp 14.cfi_adjust_cfa_offset 8 15.cfi_offset %rbp,-24 16 pushq %r12 17.cfi_adjust_cfa_offset 8 18.cfi_offset %r12,-32 19 pushq %r13 20.cfi_adjust_cfa_offset 8 21.cfi_offset %r13,-40 22 pushq %r14 23.cfi_adjust_cfa_offset 8 24.cfi_offset %r14,-48 25 pushq %r15 26.cfi_adjust_cfa_offset 8 27.cfi_offset %r15,-56 28 subq $280,%rsp 29.cfi_adjust_cfa_offset 280 30.Lgmult_prologue: 31 32 movzbq 15(%rdi),%r8 33 leaq .Lrem_4bit(%rip),%r11 34 xorq %rax,%rax 35 xorq %rbx,%rbx 36 movb %r8b,%al 37 movb %r8b,%bl 38 shlb $4,%al 39 movq $14,%rcx 40 movq 8(%rsi,%rax,1),%r8 41 movq (%rsi,%rax,1),%r9 42 andb $0xf0,%bl 43 movq %r8,%rdx 44 jmp .Loop1 45 46.align 16 47.Loop1: 48 shrq $4,%r8 49 andq $0xf,%rdx 50 movq %r9,%r10 51 movb (%rdi,%rcx,1),%al 52 shrq $4,%r9 53 xorq 8(%rsi,%rbx,1),%r8 54 shlq $60,%r10 55 xorq (%rsi,%rbx,1),%r9 56 movb %al,%bl 57 xorq (%r11,%rdx,8),%r9 58 movq %r8,%rdx 59 shlb $4,%al 60 xorq %r10,%r8 61 decq %rcx 62 js .Lbreak1 63 64 shrq $4,%r8 65 andq $0xf,%rdx 66 movq %r9,%r10 67 shrq $4,%r9 68 xorq 8(%rsi,%rax,1),%r8 69 shlq $60,%r10 70 xorq (%rsi,%rax,1),%r9 71 andb $0xf0,%bl 72 xorq (%r11,%rdx,8),%r9 73 movq %r8,%rdx 74 xorq %r10,%r8 75 jmp .Loop1 76 77.align 16 78.Lbreak1: 79 shrq $4,%r8 80 andq $0xf,%rdx 81 movq %r9,%r10 82 shrq $4,%r9 83 xorq 8(%rsi,%rax,1),%r8 84 shlq $60,%r10 85 xorq (%rsi,%rax,1),%r9 86 andb $0xf0,%bl 87 xorq (%r11,%rdx,8),%r9 88 movq %r8,%rdx 89 xorq %r10,%r8 90 91 shrq $4,%r8 92 andq $0xf,%rdx 93 movq %r9,%r10 94 shrq $4,%r9 95 xorq 8(%rsi,%rbx,1),%r8 96 shlq $60,%r10 97 xorq (%rsi,%rbx,1),%r9 98 xorq %r10,%r8 99 xorq (%r11,%rdx,8),%r9 100 101 bswapq %r8 102 bswapq %r9 103 movq %r8,8(%rdi) 104 movq %r9,(%rdi) 105 106 leaq 280+48(%rsp),%rsi 107.cfi_def_cfa %rsi,8 108 movq -8(%rsi),%rbx 109.cfi_restore %rbx 110 leaq (%rsi),%rsp 111.cfi_def_cfa_register %rsp 112.Lgmult_epilogue: 113 .byte 0xf3,0xc3 114.cfi_endproc 115.size gcm_gmult_4bit,.-gcm_gmult_4bit 116.globl gcm_ghash_4bit 117.type gcm_ghash_4bit,@function 118.align 16 119gcm_ghash_4bit: 120.cfi_startproc 121 pushq %rbx 122.cfi_adjust_cfa_offset 8 123.cfi_offset %rbx,-16 124 pushq %rbp 125.cfi_adjust_cfa_offset 8 126.cfi_offset %rbp,-24 127 pushq %r12 128.cfi_adjust_cfa_offset 8 129.cfi_offset %r12,-32 130 pushq %r13 131.cfi_adjust_cfa_offset 8 132.cfi_offset %r13,-40 133 pushq %r14 134.cfi_adjust_cfa_offset 8 135.cfi_offset %r14,-48 136 pushq %r15 137.cfi_adjust_cfa_offset 8 138.cfi_offset %r15,-56 139 subq $280,%rsp 140.cfi_adjust_cfa_offset 280 141.Lghash_prologue: 142 movq %rdx,%r14 143 movq %rcx,%r15 144 subq $-128,%rsi 145 leaq 16+128(%rsp),%rbp 146 xorl %edx,%edx 147 movq 0+0-128(%rsi),%r8 148 movq 0+8-128(%rsi),%rax 149 movb %al,%dl 150 shrq $4,%rax 151 movq %r8,%r10 152 shrq $4,%r8 153 movq 16+0-128(%rsi),%r9 154 shlb $4,%dl 155 movq 16+8-128(%rsi),%rbx 156 shlq $60,%r10 157 movb %dl,0(%rsp) 158 orq %r10,%rax 159 movb %bl,%dl 160 shrq $4,%rbx 161 movq %r9,%r10 162 shrq $4,%r9 163 movq %r8,0(%rbp) 164 movq 32+0-128(%rsi),%r8 165 shlb $4,%dl 166 movq %rax,0-128(%rbp) 167 movq 32+8-128(%rsi),%rax 168 shlq $60,%r10 169 movb %dl,1(%rsp) 170 orq %r10,%rbx 171 movb %al,%dl 172 shrq $4,%rax 173 movq %r8,%r10 174 shrq $4,%r8 175 movq %r9,8(%rbp) 176 movq 48+0-128(%rsi),%r9 177 shlb $4,%dl 178 movq %rbx,8-128(%rbp) 179 movq 48+8-128(%rsi),%rbx 180 shlq $60,%r10 181 movb %dl,2(%rsp) 182 orq %r10,%rax 183 movb %bl,%dl 184 shrq $4,%rbx 185 movq %r9,%r10 186 shrq $4,%r9 187 movq %r8,16(%rbp) 188 movq 64+0-128(%rsi),%r8 189 shlb $4,%dl 190 movq %rax,16-128(%rbp) 191 movq 64+8-128(%rsi),%rax 192 shlq $60,%r10 193 movb %dl,3(%rsp) 194 orq %r10,%rbx 195 movb %al,%dl 196 shrq $4,%rax 197 movq %r8,%r10 198 shrq $4,%r8 199 movq %r9,24(%rbp) 200 movq 80+0-128(%rsi),%r9 201 shlb $4,%dl 202 movq %rbx,24-128(%rbp) 203 movq 80+8-128(%rsi),%rbx 204 shlq $60,%r10 205 movb %dl,4(%rsp) 206 orq %r10,%rax 207 movb %bl,%dl 208 shrq $4,%rbx 209 movq %r9,%r10 210 shrq $4,%r9 211 movq %r8,32(%rbp) 212 movq 96+0-128(%rsi),%r8 213 shlb $4,%dl 214 movq %rax,32-128(%rbp) 215 movq 96+8-128(%rsi),%rax 216 shlq $60,%r10 217 movb %dl,5(%rsp) 218 orq %r10,%rbx 219 movb %al,%dl 220 shrq $4,%rax 221 movq %r8,%r10 222 shrq $4,%r8 223 movq %r9,40(%rbp) 224 movq 112+0-128(%rsi),%r9 225 shlb $4,%dl 226 movq %rbx,40-128(%rbp) 227 movq 112+8-128(%rsi),%rbx 228 shlq $60,%r10 229 movb %dl,6(%rsp) 230 orq %r10,%rax 231 movb %bl,%dl 232 shrq $4,%rbx 233 movq %r9,%r10 234 shrq $4,%r9 235 movq %r8,48(%rbp) 236 movq 128+0-128(%rsi),%r8 237 shlb $4,%dl 238 movq %rax,48-128(%rbp) 239 movq 128+8-128(%rsi),%rax 240 shlq $60,%r10 241 movb %dl,7(%rsp) 242 orq %r10,%rbx 243 movb %al,%dl 244 shrq $4,%rax 245 movq %r8,%r10 246 shrq $4,%r8 247 movq %r9,56(%rbp) 248 movq 144+0-128(%rsi),%r9 249 shlb $4,%dl 250 movq %rbx,56-128(%rbp) 251 movq 144+8-128(%rsi),%rbx 252 shlq $60,%r10 253 movb %dl,8(%rsp) 254 orq %r10,%rax 255 movb %bl,%dl 256 shrq $4,%rbx 257 movq %r9,%r10 258 shrq $4,%r9 259 movq %r8,64(%rbp) 260 movq 160+0-128(%rsi),%r8 261 shlb $4,%dl 262 movq %rax,64-128(%rbp) 263 movq 160+8-128(%rsi),%rax 264 shlq $60,%r10 265 movb %dl,9(%rsp) 266 orq %r10,%rbx 267 movb %al,%dl 268 shrq $4,%rax 269 movq %r8,%r10 270 shrq $4,%r8 271 movq %r9,72(%rbp) 272 movq 176+0-128(%rsi),%r9 273 shlb $4,%dl 274 movq %rbx,72-128(%rbp) 275 movq 176+8-128(%rsi),%rbx 276 shlq $60,%r10 277 movb %dl,10(%rsp) 278 orq %r10,%rax 279 movb %bl,%dl 280 shrq $4,%rbx 281 movq %r9,%r10 282 shrq $4,%r9 283 movq %r8,80(%rbp) 284 movq 192+0-128(%rsi),%r8 285 shlb $4,%dl 286 movq %rax,80-128(%rbp) 287 movq 192+8-128(%rsi),%rax 288 shlq $60,%r10 289 movb %dl,11(%rsp) 290 orq %r10,%rbx 291 movb %al,%dl 292 shrq $4,%rax 293 movq %r8,%r10 294 shrq $4,%r8 295 movq %r9,88(%rbp) 296 movq 208+0-128(%rsi),%r9 297 shlb $4,%dl 298 movq %rbx,88-128(%rbp) 299 movq 208+8-128(%rsi),%rbx 300 shlq $60,%r10 301 movb %dl,12(%rsp) 302 orq %r10,%rax 303 movb %bl,%dl 304 shrq $4,%rbx 305 movq %r9,%r10 306 shrq $4,%r9 307 movq %r8,96(%rbp) 308 movq 224+0-128(%rsi),%r8 309 shlb $4,%dl 310 movq %rax,96-128(%rbp) 311 movq 224+8-128(%rsi),%rax 312 shlq $60,%r10 313 movb %dl,13(%rsp) 314 orq %r10,%rbx 315 movb %al,%dl 316 shrq $4,%rax 317 movq %r8,%r10 318 shrq $4,%r8 319 movq %r9,104(%rbp) 320 movq 240+0-128(%rsi),%r9 321 shlb $4,%dl 322 movq %rbx,104-128(%rbp) 323 movq 240+8-128(%rsi),%rbx 324 shlq $60,%r10 325 movb %dl,14(%rsp) 326 orq %r10,%rax 327 movb %bl,%dl 328 shrq $4,%rbx 329 movq %r9,%r10 330 shrq $4,%r9 331 movq %r8,112(%rbp) 332 shlb $4,%dl 333 movq %rax,112-128(%rbp) 334 shlq $60,%r10 335 movb %dl,15(%rsp) 336 orq %r10,%rbx 337 movq %r9,120(%rbp) 338 movq %rbx,120-128(%rbp) 339 addq $-128,%rsi 340 movq 8(%rdi),%r8 341 movq 0(%rdi),%r9 342 addq %r14,%r15 343 leaq .Lrem_8bit(%rip),%r11 344 jmp .Louter_loop 345.align 16 346.Louter_loop: 347 xorq (%r14),%r9 348 movq 8(%r14),%rdx 349 leaq 16(%r14),%r14 350 xorq %r8,%rdx 351 movq %r9,(%rdi) 352 movq %rdx,8(%rdi) 353 shrq $32,%rdx 354 xorq %rax,%rax 355 roll $8,%edx 356 movb %dl,%al 357 movzbl %dl,%ebx 358 shlb $4,%al 359 shrl $4,%ebx 360 roll $8,%edx 361 movq 8(%rsi,%rax,1),%r8 362 movq (%rsi,%rax,1),%r9 363 movb %dl,%al 364 movzbl %dl,%ecx 365 shlb $4,%al 366 movzbq (%rsp,%rbx,1),%r12 367 shrl $4,%ecx 368 xorq %r8,%r12 369 movq %r9,%r10 370 shrq $8,%r8 371 movzbq %r12b,%r12 372 shrq $8,%r9 373 xorq -128(%rbp,%rbx,8),%r8 374 shlq $56,%r10 375 xorq (%rbp,%rbx,8),%r9 376 roll $8,%edx 377 xorq 8(%rsi,%rax,1),%r8 378 xorq (%rsi,%rax,1),%r9 379 movb %dl,%al 380 xorq %r10,%r8 381 movzwq (%r11,%r12,2),%r12 382 movzbl %dl,%ebx 383 shlb $4,%al 384 movzbq (%rsp,%rcx,1),%r13 385 shrl $4,%ebx 386 shlq $48,%r12 387 xorq %r8,%r13 388 movq %r9,%r10 389 xorq %r12,%r9 390 shrq $8,%r8 391 movzbq %r13b,%r13 392 shrq $8,%r9 393 xorq -128(%rbp,%rcx,8),%r8 394 shlq $56,%r10 395 xorq (%rbp,%rcx,8),%r9 396 roll $8,%edx 397 xorq 8(%rsi,%rax,1),%r8 398 xorq (%rsi,%rax,1),%r9 399 movb %dl,%al 400 xorq %r10,%r8 401 movzwq (%r11,%r13,2),%r13 402 movzbl %dl,%ecx 403 shlb $4,%al 404 movzbq (%rsp,%rbx,1),%r12 405 shrl $4,%ecx 406 shlq $48,%r13 407 xorq %r8,%r12 408 movq %r9,%r10 409 xorq %r13,%r9 410 shrq $8,%r8 411 movzbq %r12b,%r12 412 movl 8(%rdi),%edx 413 shrq $8,%r9 414 xorq -128(%rbp,%rbx,8),%r8 415 shlq $56,%r10 416 xorq (%rbp,%rbx,8),%r9 417 roll $8,%edx 418 xorq 8(%rsi,%rax,1),%r8 419 xorq (%rsi,%rax,1),%r9 420 movb %dl,%al 421 xorq %r10,%r8 422 movzwq (%r11,%r12,2),%r12 423 movzbl %dl,%ebx 424 shlb $4,%al 425 movzbq (%rsp,%rcx,1),%r13 426 shrl $4,%ebx 427 shlq $48,%r12 428 xorq %r8,%r13 429 movq %r9,%r10 430 xorq %r12,%r9 431 shrq $8,%r8 432 movzbq %r13b,%r13 433 shrq $8,%r9 434 xorq -128(%rbp,%rcx,8),%r8 435 shlq $56,%r10 436 xorq (%rbp,%rcx,8),%r9 437 roll $8,%edx 438 xorq 8(%rsi,%rax,1),%r8 439 xorq (%rsi,%rax,1),%r9 440 movb %dl,%al 441 xorq %r10,%r8 442 movzwq (%r11,%r13,2),%r13 443 movzbl %dl,%ecx 444 shlb $4,%al 445 movzbq (%rsp,%rbx,1),%r12 446 shrl $4,%ecx 447 shlq $48,%r13 448 xorq %r8,%r12 449 movq %r9,%r10 450 xorq %r13,%r9 451 shrq $8,%r8 452 movzbq %r12b,%r12 453 shrq $8,%r9 454 xorq -128(%rbp,%rbx,8),%r8 455 shlq $56,%r10 456 xorq (%rbp,%rbx,8),%r9 457 roll $8,%edx 458 xorq 8(%rsi,%rax,1),%r8 459 xorq (%rsi,%rax,1),%r9 460 movb %dl,%al 461 xorq %r10,%r8 462 movzwq (%r11,%r12,2),%r12 463 movzbl %dl,%ebx 464 shlb $4,%al 465 movzbq (%rsp,%rcx,1),%r13 466 shrl $4,%ebx 467 shlq $48,%r12 468 xorq %r8,%r13 469 movq %r9,%r10 470 xorq %r12,%r9 471 shrq $8,%r8 472 movzbq %r13b,%r13 473 shrq $8,%r9 474 xorq -128(%rbp,%rcx,8),%r8 475 shlq $56,%r10 476 xorq (%rbp,%rcx,8),%r9 477 roll $8,%edx 478 xorq 8(%rsi,%rax,1),%r8 479 xorq (%rsi,%rax,1),%r9 480 movb %dl,%al 481 xorq %r10,%r8 482 movzwq (%r11,%r13,2),%r13 483 movzbl %dl,%ecx 484 shlb $4,%al 485 movzbq (%rsp,%rbx,1),%r12 486 shrl $4,%ecx 487 shlq $48,%r13 488 xorq %r8,%r12 489 movq %r9,%r10 490 xorq %r13,%r9 491 shrq $8,%r8 492 movzbq %r12b,%r12 493 movl 4(%rdi),%edx 494 shrq $8,%r9 495 xorq -128(%rbp,%rbx,8),%r8 496 shlq $56,%r10 497 xorq (%rbp,%rbx,8),%r9 498 roll $8,%edx 499 xorq 8(%rsi,%rax,1),%r8 500 xorq (%rsi,%rax,1),%r9 501 movb %dl,%al 502 xorq %r10,%r8 503 movzwq (%r11,%r12,2),%r12 504 movzbl %dl,%ebx 505 shlb $4,%al 506 movzbq (%rsp,%rcx,1),%r13 507 shrl $4,%ebx 508 shlq $48,%r12 509 xorq %r8,%r13 510 movq %r9,%r10 511 xorq %r12,%r9 512 shrq $8,%r8 513 movzbq %r13b,%r13 514 shrq $8,%r9 515 xorq -128(%rbp,%rcx,8),%r8 516 shlq $56,%r10 517 xorq (%rbp,%rcx,8),%r9 518 roll $8,%edx 519 xorq 8(%rsi,%rax,1),%r8 520 xorq (%rsi,%rax,1),%r9 521 movb %dl,%al 522 xorq %r10,%r8 523 movzwq (%r11,%r13,2),%r13 524 movzbl %dl,%ecx 525 shlb $4,%al 526 movzbq (%rsp,%rbx,1),%r12 527 shrl $4,%ecx 528 shlq $48,%r13 529 xorq %r8,%r12 530 movq %r9,%r10 531 xorq %r13,%r9 532 shrq $8,%r8 533 movzbq %r12b,%r12 534 shrq $8,%r9 535 xorq -128(%rbp,%rbx,8),%r8 536 shlq $56,%r10 537 xorq (%rbp,%rbx,8),%r9 538 roll $8,%edx 539 xorq 8(%rsi,%rax,1),%r8 540 xorq (%rsi,%rax,1),%r9 541 movb %dl,%al 542 xorq %r10,%r8 543 movzwq (%r11,%r12,2),%r12 544 movzbl %dl,%ebx 545 shlb $4,%al 546 movzbq (%rsp,%rcx,1),%r13 547 shrl $4,%ebx 548 shlq $48,%r12 549 xorq %r8,%r13 550 movq %r9,%r10 551 xorq %r12,%r9 552 shrq $8,%r8 553 movzbq %r13b,%r13 554 shrq $8,%r9 555 xorq -128(%rbp,%rcx,8),%r8 556 shlq $56,%r10 557 xorq (%rbp,%rcx,8),%r9 558 roll $8,%edx 559 xorq 8(%rsi,%rax,1),%r8 560 xorq (%rsi,%rax,1),%r9 561 movb %dl,%al 562 xorq %r10,%r8 563 movzwq (%r11,%r13,2),%r13 564 movzbl %dl,%ecx 565 shlb $4,%al 566 movzbq (%rsp,%rbx,1),%r12 567 shrl $4,%ecx 568 shlq $48,%r13 569 xorq %r8,%r12 570 movq %r9,%r10 571 xorq %r13,%r9 572 shrq $8,%r8 573 movzbq %r12b,%r12 574 movl 0(%rdi),%edx 575 shrq $8,%r9 576 xorq -128(%rbp,%rbx,8),%r8 577 shlq $56,%r10 578 xorq (%rbp,%rbx,8),%r9 579 roll $8,%edx 580 xorq 8(%rsi,%rax,1),%r8 581 xorq (%rsi,%rax,1),%r9 582 movb %dl,%al 583 xorq %r10,%r8 584 movzwq (%r11,%r12,2),%r12 585 movzbl %dl,%ebx 586 shlb $4,%al 587 movzbq (%rsp,%rcx,1),%r13 588 shrl $4,%ebx 589 shlq $48,%r12 590 xorq %r8,%r13 591 movq %r9,%r10 592 xorq %r12,%r9 593 shrq $8,%r8 594 movzbq %r13b,%r13 595 shrq $8,%r9 596 xorq -128(%rbp,%rcx,8),%r8 597 shlq $56,%r10 598 xorq (%rbp,%rcx,8),%r9 599 roll $8,%edx 600 xorq 8(%rsi,%rax,1),%r8 601 xorq (%rsi,%rax,1),%r9 602 movb %dl,%al 603 xorq %r10,%r8 604 movzwq (%r11,%r13,2),%r13 605 movzbl %dl,%ecx 606 shlb $4,%al 607 movzbq (%rsp,%rbx,1),%r12 608 shrl $4,%ecx 609 shlq $48,%r13 610 xorq %r8,%r12 611 movq %r9,%r10 612 xorq %r13,%r9 613 shrq $8,%r8 614 movzbq %r12b,%r12 615 shrq $8,%r9 616 xorq -128(%rbp,%rbx,8),%r8 617 shlq $56,%r10 618 xorq (%rbp,%rbx,8),%r9 619 roll $8,%edx 620 xorq 8(%rsi,%rax,1),%r8 621 xorq (%rsi,%rax,1),%r9 622 movb %dl,%al 623 xorq %r10,%r8 624 movzwq (%r11,%r12,2),%r12 625 movzbl %dl,%ebx 626 shlb $4,%al 627 movzbq (%rsp,%rcx,1),%r13 628 shrl $4,%ebx 629 shlq $48,%r12 630 xorq %r8,%r13 631 movq %r9,%r10 632 xorq %r12,%r9 633 shrq $8,%r8 634 movzbq %r13b,%r13 635 shrq $8,%r9 636 xorq -128(%rbp,%rcx,8),%r8 637 shlq $56,%r10 638 xorq (%rbp,%rcx,8),%r9 639 roll $8,%edx 640 xorq 8(%rsi,%rax,1),%r8 641 xorq (%rsi,%rax,1),%r9 642 movb %dl,%al 643 xorq %r10,%r8 644 movzwq (%r11,%r13,2),%r13 645 movzbl %dl,%ecx 646 shlb $4,%al 647 movzbq (%rsp,%rbx,1),%r12 648 andl $240,%ecx 649 shlq $48,%r13 650 xorq %r8,%r12 651 movq %r9,%r10 652 xorq %r13,%r9 653 shrq $8,%r8 654 movzbq %r12b,%r12 655 movl -4(%rdi),%edx 656 shrq $8,%r9 657 xorq -128(%rbp,%rbx,8),%r8 658 shlq $56,%r10 659 xorq (%rbp,%rbx,8),%r9 660 movzwq (%r11,%r12,2),%r12 661 xorq 8(%rsi,%rax,1),%r8 662 xorq (%rsi,%rax,1),%r9 663 shlq $48,%r12 664 xorq %r10,%r8 665 xorq %r12,%r9 666 movzbq %r8b,%r13 667 shrq $4,%r8 668 movq %r9,%r10 669 shlb $4,%r13b 670 shrq $4,%r9 671 xorq 8(%rsi,%rcx,1),%r8 672 movzwq (%r11,%r13,2),%r13 673 shlq $60,%r10 674 xorq (%rsi,%rcx,1),%r9 675 xorq %r10,%r8 676 shlq $48,%r13 677 bswapq %r8 678 xorq %r13,%r9 679 bswapq %r9 680 cmpq %r15,%r14 681 jb .Louter_loop 682 movq %r8,8(%rdi) 683 movq %r9,(%rdi) 684 685 leaq 280+48(%rsp),%rsi 686.cfi_def_cfa %rsi,8 687 movq -48(%rsi),%r15 688.cfi_restore %r15 689 movq -40(%rsi),%r14 690.cfi_restore %r14 691 movq -32(%rsi),%r13 692.cfi_restore %r13 693 movq -24(%rsi),%r12 694.cfi_restore %r12 695 movq -16(%rsi),%rbp 696.cfi_restore %rbp 697 movq -8(%rsi),%rbx 698.cfi_restore %rbx 699 leaq 0(%rsi),%rsp 700.cfi_def_cfa_register %rsp 701.Lghash_epilogue: 702 .byte 0xf3,0xc3 703.cfi_endproc 704.size gcm_ghash_4bit,.-gcm_ghash_4bit 705.globl gcm_init_clmul 706.type gcm_init_clmul,@function 707.align 16 708gcm_init_clmul: 709.cfi_startproc 710.L_init_clmul: 711 movdqu (%rsi),%xmm2 712 pshufd $78,%xmm2,%xmm2 713 714 715 pshufd $255,%xmm2,%xmm4 716 movdqa %xmm2,%xmm3 717 psllq $1,%xmm2 718 pxor %xmm5,%xmm5 719 psrlq $63,%xmm3 720 pcmpgtd %xmm4,%xmm5 721 pslldq $8,%xmm3 722 por %xmm3,%xmm2 723 724 725 pand .L0x1c2_polynomial(%rip),%xmm5 726 pxor %xmm5,%xmm2 727 728 729 pshufd $78,%xmm2,%xmm6 730 movdqa %xmm2,%xmm0 731 pxor %xmm2,%xmm6 732 movdqa %xmm0,%xmm1 733 pshufd $78,%xmm0,%xmm3 734 pxor %xmm0,%xmm3 735.byte 102,15,58,68,194,0 736.byte 102,15,58,68,202,17 737.byte 102,15,58,68,222,0 738 pxor %xmm0,%xmm3 739 pxor %xmm1,%xmm3 740 741 movdqa %xmm3,%xmm4 742 psrldq $8,%xmm3 743 pslldq $8,%xmm4 744 pxor %xmm3,%xmm1 745 pxor %xmm4,%xmm0 746 747 movdqa %xmm0,%xmm4 748 movdqa %xmm0,%xmm3 749 psllq $5,%xmm0 750 pxor %xmm0,%xmm3 751 psllq $1,%xmm0 752 pxor %xmm3,%xmm0 753 psllq $57,%xmm0 754 movdqa %xmm0,%xmm3 755 pslldq $8,%xmm0 756 psrldq $8,%xmm3 757 pxor %xmm4,%xmm0 758 pxor %xmm3,%xmm1 759 760 761 movdqa %xmm0,%xmm4 762 psrlq $1,%xmm0 763 pxor %xmm4,%xmm1 764 pxor %xmm0,%xmm4 765 psrlq $5,%xmm0 766 pxor %xmm4,%xmm0 767 psrlq $1,%xmm0 768 pxor %xmm1,%xmm0 769 pshufd $78,%xmm2,%xmm3 770 pshufd $78,%xmm0,%xmm4 771 pxor %xmm2,%xmm3 772 movdqu %xmm2,0(%rdi) 773 pxor %xmm0,%xmm4 774 movdqu %xmm0,16(%rdi) 775.byte 102,15,58,15,227,8 776 movdqu %xmm4,32(%rdi) 777 movdqa %xmm0,%xmm1 778 pshufd $78,%xmm0,%xmm3 779 pxor %xmm0,%xmm3 780.byte 102,15,58,68,194,0 781.byte 102,15,58,68,202,17 782.byte 102,15,58,68,222,0 783 pxor %xmm0,%xmm3 784 pxor %xmm1,%xmm3 785 786 movdqa %xmm3,%xmm4 787 psrldq $8,%xmm3 788 pslldq $8,%xmm4 789 pxor %xmm3,%xmm1 790 pxor %xmm4,%xmm0 791 792 movdqa %xmm0,%xmm4 793 movdqa %xmm0,%xmm3 794 psllq $5,%xmm0 795 pxor %xmm0,%xmm3 796 psllq $1,%xmm0 797 pxor %xmm3,%xmm0 798 psllq $57,%xmm0 799 movdqa %xmm0,%xmm3 800 pslldq $8,%xmm0 801 psrldq $8,%xmm3 802 pxor %xmm4,%xmm0 803 pxor %xmm3,%xmm1 804 805 806 movdqa %xmm0,%xmm4 807 psrlq $1,%xmm0 808 pxor %xmm4,%xmm1 809 pxor %xmm0,%xmm4 810 psrlq $5,%xmm0 811 pxor %xmm4,%xmm0 812 psrlq $1,%xmm0 813 pxor %xmm1,%xmm0 814 movdqa %xmm0,%xmm5 815 movdqa %xmm0,%xmm1 816 pshufd $78,%xmm0,%xmm3 817 pxor %xmm0,%xmm3 818.byte 102,15,58,68,194,0 819.byte 102,15,58,68,202,17 820.byte 102,15,58,68,222,0 821 pxor %xmm0,%xmm3 822 pxor %xmm1,%xmm3 823 824 movdqa %xmm3,%xmm4 825 psrldq $8,%xmm3 826 pslldq $8,%xmm4 827 pxor %xmm3,%xmm1 828 pxor %xmm4,%xmm0 829 830 movdqa %xmm0,%xmm4 831 movdqa %xmm0,%xmm3 832 psllq $5,%xmm0 833 pxor %xmm0,%xmm3 834 psllq $1,%xmm0 835 pxor %xmm3,%xmm0 836 psllq $57,%xmm0 837 movdqa %xmm0,%xmm3 838 pslldq $8,%xmm0 839 psrldq $8,%xmm3 840 pxor %xmm4,%xmm0 841 pxor %xmm3,%xmm1 842 843 844 movdqa %xmm0,%xmm4 845 psrlq $1,%xmm0 846 pxor %xmm4,%xmm1 847 pxor %xmm0,%xmm4 848 psrlq $5,%xmm0 849 pxor %xmm4,%xmm0 850 psrlq $1,%xmm0 851 pxor %xmm1,%xmm0 852 pshufd $78,%xmm5,%xmm3 853 pshufd $78,%xmm0,%xmm4 854 pxor %xmm5,%xmm3 855 movdqu %xmm5,48(%rdi) 856 pxor %xmm0,%xmm4 857 movdqu %xmm0,64(%rdi) 858.byte 102,15,58,15,227,8 859 movdqu %xmm4,80(%rdi) 860 .byte 0xf3,0xc3 861.cfi_endproc 862.size gcm_init_clmul,.-gcm_init_clmul 863.globl gcm_gmult_clmul 864.type gcm_gmult_clmul,@function 865.align 16 866gcm_gmult_clmul: 867.cfi_startproc 868.L_gmult_clmul: 869 movdqu (%rdi),%xmm0 870 movdqa .Lbswap_mask(%rip),%xmm5 871 movdqu (%rsi),%xmm2 872 movdqu 32(%rsi),%xmm4 873.byte 102,15,56,0,197 874 movdqa %xmm0,%xmm1 875 pshufd $78,%xmm0,%xmm3 876 pxor %xmm0,%xmm3 877.byte 102,15,58,68,194,0 878.byte 102,15,58,68,202,17 879.byte 102,15,58,68,220,0 880 pxor %xmm0,%xmm3 881 pxor %xmm1,%xmm3 882 883 movdqa %xmm3,%xmm4 884 psrldq $8,%xmm3 885 pslldq $8,%xmm4 886 pxor %xmm3,%xmm1 887 pxor %xmm4,%xmm0 888 889 movdqa %xmm0,%xmm4 890 movdqa %xmm0,%xmm3 891 psllq $5,%xmm0 892 pxor %xmm0,%xmm3 893 psllq $1,%xmm0 894 pxor %xmm3,%xmm0 895 psllq $57,%xmm0 896 movdqa %xmm0,%xmm3 897 pslldq $8,%xmm0 898 psrldq $8,%xmm3 899 pxor %xmm4,%xmm0 900 pxor %xmm3,%xmm1 901 902 903 movdqa %xmm0,%xmm4 904 psrlq $1,%xmm0 905 pxor %xmm4,%xmm1 906 pxor %xmm0,%xmm4 907 psrlq $5,%xmm0 908 pxor %xmm4,%xmm0 909 psrlq $1,%xmm0 910 pxor %xmm1,%xmm0 911.byte 102,15,56,0,197 912 movdqu %xmm0,(%rdi) 913 .byte 0xf3,0xc3 914.cfi_endproc 915.size gcm_gmult_clmul,.-gcm_gmult_clmul 916.globl gcm_ghash_clmul 917.type gcm_ghash_clmul,@function 918.align 32 919gcm_ghash_clmul: 920.cfi_startproc 921.L_ghash_clmul: 922 movdqa .Lbswap_mask(%rip),%xmm10 923 924 movdqu (%rdi),%xmm0 925 movdqu (%rsi),%xmm2 926 movdqu 32(%rsi),%xmm7 927.byte 102,65,15,56,0,194 928 929 subq $0x10,%rcx 930 jz .Lodd_tail 931 932 movdqu 16(%rsi),%xmm6 933 movl OPENSSL_ia32cap_P+4(%rip),%eax 934 cmpq $0x30,%rcx 935 jb .Lskip4x 936 937 andl $71303168,%eax 938 cmpl $4194304,%eax 939 je .Lskip4x 940 941 subq $0x30,%rcx 942 movq $0xA040608020C0E000,%rax 943 movdqu 48(%rsi),%xmm14 944 movdqu 64(%rsi),%xmm15 945 946 947 948 949 movdqu 48(%rdx),%xmm3 950 movdqu 32(%rdx),%xmm11 951.byte 102,65,15,56,0,218 952.byte 102,69,15,56,0,218 953 movdqa %xmm3,%xmm5 954 pshufd $78,%xmm3,%xmm4 955 pxor %xmm3,%xmm4 956.byte 102,15,58,68,218,0 957.byte 102,15,58,68,234,17 958.byte 102,15,58,68,231,0 959 960 movdqa %xmm11,%xmm13 961 pshufd $78,%xmm11,%xmm12 962 pxor %xmm11,%xmm12 963.byte 102,68,15,58,68,222,0 964.byte 102,68,15,58,68,238,17 965.byte 102,68,15,58,68,231,16 966 xorps %xmm11,%xmm3 967 xorps %xmm13,%xmm5 968 movups 80(%rsi),%xmm7 969 xorps %xmm12,%xmm4 970 971 movdqu 16(%rdx),%xmm11 972 movdqu 0(%rdx),%xmm8 973.byte 102,69,15,56,0,218 974.byte 102,69,15,56,0,194 975 movdqa %xmm11,%xmm13 976 pshufd $78,%xmm11,%xmm12 977 pxor %xmm8,%xmm0 978 pxor %xmm11,%xmm12 979.byte 102,69,15,58,68,222,0 980 movdqa %xmm0,%xmm1 981 pshufd $78,%xmm0,%xmm8 982 pxor %xmm0,%xmm8 983.byte 102,69,15,58,68,238,17 984.byte 102,68,15,58,68,231,0 985 xorps %xmm11,%xmm3 986 xorps %xmm13,%xmm5 987 988 leaq 64(%rdx),%rdx 989 subq $0x40,%rcx 990 jc .Ltail4x 991 992 jmp .Lmod4_loop 993.align 32 994.Lmod4_loop: 995.byte 102,65,15,58,68,199,0 996 xorps %xmm12,%xmm4 997 movdqu 48(%rdx),%xmm11 998.byte 102,69,15,56,0,218 999.byte 102,65,15,58,68,207,17 1000 xorps %xmm3,%xmm0 1001 movdqu 32(%rdx),%xmm3 1002 movdqa %xmm11,%xmm13 1003.byte 102,68,15,58,68,199,16 1004 pshufd $78,%xmm11,%xmm12 1005 xorps %xmm5,%xmm1 1006 pxor %xmm11,%xmm12 1007.byte 102,65,15,56,0,218 1008 movups 32(%rsi),%xmm7 1009 xorps %xmm4,%xmm8 1010.byte 102,68,15,58,68,218,0 1011 pshufd $78,%xmm3,%xmm4 1012 1013 pxor %xmm0,%xmm8 1014 movdqa %xmm3,%xmm5 1015 pxor %xmm1,%xmm8 1016 pxor %xmm3,%xmm4 1017 movdqa %xmm8,%xmm9 1018.byte 102,68,15,58,68,234,17 1019 pslldq $8,%xmm8 1020 psrldq $8,%xmm9 1021 pxor %xmm8,%xmm0 1022 movdqa .L7_mask(%rip),%xmm8 1023 pxor %xmm9,%xmm1 1024.byte 102,76,15,110,200 1025 1026 pand %xmm0,%xmm8 1027.byte 102,69,15,56,0,200 1028 pxor %xmm0,%xmm9 1029.byte 102,68,15,58,68,231,0 1030 psllq $57,%xmm9 1031 movdqa %xmm9,%xmm8 1032 pslldq $8,%xmm9 1033.byte 102,15,58,68,222,0 1034 psrldq $8,%xmm8 1035 pxor %xmm9,%xmm0 1036 pxor %xmm8,%xmm1 1037 movdqu 0(%rdx),%xmm8 1038 1039 movdqa %xmm0,%xmm9 1040 psrlq $1,%xmm0 1041.byte 102,15,58,68,238,17 1042 xorps %xmm11,%xmm3 1043 movdqu 16(%rdx),%xmm11 1044.byte 102,69,15,56,0,218 1045.byte 102,15,58,68,231,16 1046 xorps %xmm13,%xmm5 1047 movups 80(%rsi),%xmm7 1048.byte 102,69,15,56,0,194 1049 pxor %xmm9,%xmm1 1050 pxor %xmm0,%xmm9 1051 psrlq $5,%xmm0 1052 1053 movdqa %xmm11,%xmm13 1054 pxor %xmm12,%xmm4 1055 pshufd $78,%xmm11,%xmm12 1056 pxor %xmm9,%xmm0 1057 pxor %xmm8,%xmm1 1058 pxor %xmm11,%xmm12 1059.byte 102,69,15,58,68,222,0 1060 psrlq $1,%xmm0 1061 pxor %xmm1,%xmm0 1062 movdqa %xmm0,%xmm1 1063.byte 102,69,15,58,68,238,17 1064 xorps %xmm11,%xmm3 1065 pshufd $78,%xmm0,%xmm8 1066 pxor %xmm0,%xmm8 1067 1068.byte 102,68,15,58,68,231,0 1069 xorps %xmm13,%xmm5 1070 1071 leaq 64(%rdx),%rdx 1072 subq $0x40,%rcx 1073 jnc .Lmod4_loop 1074 1075.Ltail4x: 1076.byte 102,65,15,58,68,199,0 1077.byte 102,65,15,58,68,207,17 1078.byte 102,68,15,58,68,199,16 1079 xorps %xmm12,%xmm4 1080 xorps %xmm3,%xmm0 1081 xorps %xmm5,%xmm1 1082 pxor %xmm0,%xmm1 1083 pxor %xmm4,%xmm8 1084 1085 pxor %xmm1,%xmm8 1086 pxor %xmm0,%xmm1 1087 1088 movdqa %xmm8,%xmm9 1089 psrldq $8,%xmm8 1090 pslldq $8,%xmm9 1091 pxor %xmm8,%xmm1 1092 pxor %xmm9,%xmm0 1093 1094 movdqa %xmm0,%xmm4 1095 movdqa %xmm0,%xmm3 1096 psllq $5,%xmm0 1097 pxor %xmm0,%xmm3 1098 psllq $1,%xmm0 1099 pxor %xmm3,%xmm0 1100 psllq $57,%xmm0 1101 movdqa %xmm0,%xmm3 1102 pslldq $8,%xmm0 1103 psrldq $8,%xmm3 1104 pxor %xmm4,%xmm0 1105 pxor %xmm3,%xmm1 1106 1107 1108 movdqa %xmm0,%xmm4 1109 psrlq $1,%xmm0 1110 pxor %xmm4,%xmm1 1111 pxor %xmm0,%xmm4 1112 psrlq $5,%xmm0 1113 pxor %xmm4,%xmm0 1114 psrlq $1,%xmm0 1115 pxor %xmm1,%xmm0 1116 addq $0x40,%rcx 1117 jz .Ldone 1118 movdqu 32(%rsi),%xmm7 1119 subq $0x10,%rcx 1120 jz .Lodd_tail 1121.Lskip4x: 1122 1123 1124 1125 1126 1127 movdqu (%rdx),%xmm8 1128 movdqu 16(%rdx),%xmm3 1129.byte 102,69,15,56,0,194 1130.byte 102,65,15,56,0,218 1131 pxor %xmm8,%xmm0 1132 1133 movdqa %xmm3,%xmm5 1134 pshufd $78,%xmm3,%xmm4 1135 pxor %xmm3,%xmm4 1136.byte 102,15,58,68,218,0 1137.byte 102,15,58,68,234,17 1138.byte 102,15,58,68,231,0 1139 1140 leaq 32(%rdx),%rdx 1141 nop 1142 subq $0x20,%rcx 1143 jbe .Leven_tail 1144 nop 1145 jmp .Lmod_loop 1146 1147.align 32 1148.Lmod_loop: 1149 movdqa %xmm0,%xmm1 1150 movdqa %xmm4,%xmm8 1151 pshufd $78,%xmm0,%xmm4 1152 pxor %xmm0,%xmm4 1153 1154.byte 102,15,58,68,198,0 1155.byte 102,15,58,68,206,17 1156.byte 102,15,58,68,231,16 1157 1158 pxor %xmm3,%xmm0 1159 pxor %xmm5,%xmm1 1160 movdqu (%rdx),%xmm9 1161 pxor %xmm0,%xmm8 1162.byte 102,69,15,56,0,202 1163 movdqu 16(%rdx),%xmm3 1164 1165 pxor %xmm1,%xmm8 1166 pxor %xmm9,%xmm1 1167 pxor %xmm8,%xmm4 1168.byte 102,65,15,56,0,218 1169 movdqa %xmm4,%xmm8 1170 psrldq $8,%xmm8 1171 pslldq $8,%xmm4 1172 pxor %xmm8,%xmm1 1173 pxor %xmm4,%xmm0 1174 1175 movdqa %xmm3,%xmm5 1176 1177 movdqa %xmm0,%xmm9 1178 movdqa %xmm0,%xmm8 1179 psllq $5,%xmm0 1180 pxor %xmm0,%xmm8 1181.byte 102,15,58,68,218,0 1182 psllq $1,%xmm0 1183 pxor %xmm8,%xmm0 1184 psllq $57,%xmm0 1185 movdqa %xmm0,%xmm8 1186 pslldq $8,%xmm0 1187 psrldq $8,%xmm8 1188 pxor %xmm9,%xmm0 1189 pshufd $78,%xmm5,%xmm4 1190 pxor %xmm8,%xmm1 1191 pxor %xmm5,%xmm4 1192 1193 movdqa %xmm0,%xmm9 1194 psrlq $1,%xmm0 1195.byte 102,15,58,68,234,17 1196 pxor %xmm9,%xmm1 1197 pxor %xmm0,%xmm9 1198 psrlq $5,%xmm0 1199 pxor %xmm9,%xmm0 1200 leaq 32(%rdx),%rdx 1201 psrlq $1,%xmm0 1202.byte 102,15,58,68,231,0 1203 pxor %xmm1,%xmm0 1204 1205 subq $0x20,%rcx 1206 ja .Lmod_loop 1207 1208.Leven_tail: 1209 movdqa %xmm0,%xmm1 1210 movdqa %xmm4,%xmm8 1211 pshufd $78,%xmm0,%xmm4 1212 pxor %xmm0,%xmm4 1213 1214.byte 102,15,58,68,198,0 1215.byte 102,15,58,68,206,17 1216.byte 102,15,58,68,231,16 1217 1218 pxor %xmm3,%xmm0 1219 pxor %xmm5,%xmm1 1220 pxor %xmm0,%xmm8 1221 pxor %xmm1,%xmm8 1222 pxor %xmm8,%xmm4 1223 movdqa %xmm4,%xmm8 1224 psrldq $8,%xmm8 1225 pslldq $8,%xmm4 1226 pxor %xmm8,%xmm1 1227 pxor %xmm4,%xmm0 1228 1229 movdqa %xmm0,%xmm4 1230 movdqa %xmm0,%xmm3 1231 psllq $5,%xmm0 1232 pxor %xmm0,%xmm3 1233 psllq $1,%xmm0 1234 pxor %xmm3,%xmm0 1235 psllq $57,%xmm0 1236 movdqa %xmm0,%xmm3 1237 pslldq $8,%xmm0 1238 psrldq $8,%xmm3 1239 pxor %xmm4,%xmm0 1240 pxor %xmm3,%xmm1 1241 1242 1243 movdqa %xmm0,%xmm4 1244 psrlq $1,%xmm0 1245 pxor %xmm4,%xmm1 1246 pxor %xmm0,%xmm4 1247 psrlq $5,%xmm0 1248 pxor %xmm4,%xmm0 1249 psrlq $1,%xmm0 1250 pxor %xmm1,%xmm0 1251 testq %rcx,%rcx 1252 jnz .Ldone 1253 1254.Lodd_tail: 1255 movdqu (%rdx),%xmm8 1256.byte 102,69,15,56,0,194 1257 pxor %xmm8,%xmm0 1258 movdqa %xmm0,%xmm1 1259 pshufd $78,%xmm0,%xmm3 1260 pxor %xmm0,%xmm3 1261.byte 102,15,58,68,194,0 1262.byte 102,15,58,68,202,17 1263.byte 102,15,58,68,223,0 1264 pxor %xmm0,%xmm3 1265 pxor %xmm1,%xmm3 1266 1267 movdqa %xmm3,%xmm4 1268 psrldq $8,%xmm3 1269 pslldq $8,%xmm4 1270 pxor %xmm3,%xmm1 1271 pxor %xmm4,%xmm0 1272 1273 movdqa %xmm0,%xmm4 1274 movdqa %xmm0,%xmm3 1275 psllq $5,%xmm0 1276 pxor %xmm0,%xmm3 1277 psllq $1,%xmm0 1278 pxor %xmm3,%xmm0 1279 psllq $57,%xmm0 1280 movdqa %xmm0,%xmm3 1281 pslldq $8,%xmm0 1282 psrldq $8,%xmm3 1283 pxor %xmm4,%xmm0 1284 pxor %xmm3,%xmm1 1285 1286 1287 movdqa %xmm0,%xmm4 1288 psrlq $1,%xmm0 1289 pxor %xmm4,%xmm1 1290 pxor %xmm0,%xmm4 1291 psrlq $5,%xmm0 1292 pxor %xmm4,%xmm0 1293 psrlq $1,%xmm0 1294 pxor %xmm1,%xmm0 1295.Ldone: 1296.byte 102,65,15,56,0,194 1297 movdqu %xmm0,(%rdi) 1298 .byte 0xf3,0xc3 1299.cfi_endproc 1300.size gcm_ghash_clmul,.-gcm_ghash_clmul 1301.globl gcm_init_avx 1302.type gcm_init_avx,@function 1303.align 32 1304gcm_init_avx: 1305.cfi_startproc 1306 vzeroupper 1307 1308 vmovdqu (%rsi),%xmm2 1309 vpshufd $78,%xmm2,%xmm2 1310 1311 1312 vpshufd $255,%xmm2,%xmm4 1313 vpsrlq $63,%xmm2,%xmm3 1314 vpsllq $1,%xmm2,%xmm2 1315 vpxor %xmm5,%xmm5,%xmm5 1316 vpcmpgtd %xmm4,%xmm5,%xmm5 1317 vpslldq $8,%xmm3,%xmm3 1318 vpor %xmm3,%xmm2,%xmm2 1319 1320 1321 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 1322 vpxor %xmm5,%xmm2,%xmm2 1323 1324 vpunpckhqdq %xmm2,%xmm2,%xmm6 1325 vmovdqa %xmm2,%xmm0 1326 vpxor %xmm2,%xmm6,%xmm6 1327 movq $4,%r10 1328 jmp .Linit_start_avx 1329.align 32 1330.Linit_loop_avx: 1331 vpalignr $8,%xmm3,%xmm4,%xmm5 1332 vmovdqu %xmm5,-16(%rdi) 1333 vpunpckhqdq %xmm0,%xmm0,%xmm3 1334 vpxor %xmm0,%xmm3,%xmm3 1335 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1336 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1337 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1338 vpxor %xmm0,%xmm1,%xmm4 1339 vpxor %xmm4,%xmm3,%xmm3 1340 1341 vpslldq $8,%xmm3,%xmm4 1342 vpsrldq $8,%xmm3,%xmm3 1343 vpxor %xmm4,%xmm0,%xmm0 1344 vpxor %xmm3,%xmm1,%xmm1 1345 vpsllq $57,%xmm0,%xmm3 1346 vpsllq $62,%xmm0,%xmm4 1347 vpxor %xmm3,%xmm4,%xmm4 1348 vpsllq $63,%xmm0,%xmm3 1349 vpxor %xmm3,%xmm4,%xmm4 1350 vpslldq $8,%xmm4,%xmm3 1351 vpsrldq $8,%xmm4,%xmm4 1352 vpxor %xmm3,%xmm0,%xmm0 1353 vpxor %xmm4,%xmm1,%xmm1 1354 1355 vpsrlq $1,%xmm0,%xmm4 1356 vpxor %xmm0,%xmm1,%xmm1 1357 vpxor %xmm4,%xmm0,%xmm0 1358 vpsrlq $5,%xmm4,%xmm4 1359 vpxor %xmm4,%xmm0,%xmm0 1360 vpsrlq $1,%xmm0,%xmm0 1361 vpxor %xmm1,%xmm0,%xmm0 1362.Linit_start_avx: 1363 vmovdqa %xmm0,%xmm5 1364 vpunpckhqdq %xmm0,%xmm0,%xmm3 1365 vpxor %xmm0,%xmm3,%xmm3 1366 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1367 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1368 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1369 vpxor %xmm0,%xmm1,%xmm4 1370 vpxor %xmm4,%xmm3,%xmm3 1371 1372 vpslldq $8,%xmm3,%xmm4 1373 vpsrldq $8,%xmm3,%xmm3 1374 vpxor %xmm4,%xmm0,%xmm0 1375 vpxor %xmm3,%xmm1,%xmm1 1376 vpsllq $57,%xmm0,%xmm3 1377 vpsllq $62,%xmm0,%xmm4 1378 vpxor %xmm3,%xmm4,%xmm4 1379 vpsllq $63,%xmm0,%xmm3 1380 vpxor %xmm3,%xmm4,%xmm4 1381 vpslldq $8,%xmm4,%xmm3 1382 vpsrldq $8,%xmm4,%xmm4 1383 vpxor %xmm3,%xmm0,%xmm0 1384 vpxor %xmm4,%xmm1,%xmm1 1385 1386 vpsrlq $1,%xmm0,%xmm4 1387 vpxor %xmm0,%xmm1,%xmm1 1388 vpxor %xmm4,%xmm0,%xmm0 1389 vpsrlq $5,%xmm4,%xmm4 1390 vpxor %xmm4,%xmm0,%xmm0 1391 vpsrlq $1,%xmm0,%xmm0 1392 vpxor %xmm1,%xmm0,%xmm0 1393 vpshufd $78,%xmm5,%xmm3 1394 vpshufd $78,%xmm0,%xmm4 1395 vpxor %xmm5,%xmm3,%xmm3 1396 vmovdqu %xmm5,0(%rdi) 1397 vpxor %xmm0,%xmm4,%xmm4 1398 vmovdqu %xmm0,16(%rdi) 1399 leaq 48(%rdi),%rdi 1400 subq $1,%r10 1401 jnz .Linit_loop_avx 1402 1403 vpalignr $8,%xmm4,%xmm3,%xmm5 1404 vmovdqu %xmm5,-16(%rdi) 1405 1406 vzeroupper 1407 .byte 0xf3,0xc3 1408.cfi_endproc 1409.size gcm_init_avx,.-gcm_init_avx 1410.globl gcm_gmult_avx 1411.type gcm_gmult_avx,@function 1412.align 32 1413gcm_gmult_avx: 1414.cfi_startproc 1415 jmp .L_gmult_clmul 1416.cfi_endproc 1417.size gcm_gmult_avx,.-gcm_gmult_avx 1418.globl gcm_ghash_avx 1419.type gcm_ghash_avx,@function 1420.align 32 1421gcm_ghash_avx: 1422.cfi_startproc 1423 vzeroupper 1424 1425 vmovdqu (%rdi),%xmm10 1426 leaq .L0x1c2_polynomial(%rip),%r10 1427 leaq 64(%rsi),%rsi 1428 vmovdqu .Lbswap_mask(%rip),%xmm13 1429 vpshufb %xmm13,%xmm10,%xmm10 1430 cmpq $0x80,%rcx 1431 jb .Lshort_avx 1432 subq $0x80,%rcx 1433 1434 vmovdqu 112(%rdx),%xmm14 1435 vmovdqu 0-64(%rsi),%xmm6 1436 vpshufb %xmm13,%xmm14,%xmm14 1437 vmovdqu 32-64(%rsi),%xmm7 1438 1439 vpunpckhqdq %xmm14,%xmm14,%xmm9 1440 vmovdqu 96(%rdx),%xmm15 1441 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1442 vpxor %xmm14,%xmm9,%xmm9 1443 vpshufb %xmm13,%xmm15,%xmm15 1444 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1445 vmovdqu 16-64(%rsi),%xmm6 1446 vpunpckhqdq %xmm15,%xmm15,%xmm8 1447 vmovdqu 80(%rdx),%xmm14 1448 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1449 vpxor %xmm15,%xmm8,%xmm8 1450 1451 vpshufb %xmm13,%xmm14,%xmm14 1452 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1453 vpunpckhqdq %xmm14,%xmm14,%xmm9 1454 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1455 vmovdqu 48-64(%rsi),%xmm6 1456 vpxor %xmm14,%xmm9,%xmm9 1457 vmovdqu 64(%rdx),%xmm15 1458 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1459 vmovdqu 80-64(%rsi),%xmm7 1460 1461 vpshufb %xmm13,%xmm15,%xmm15 1462 vpxor %xmm0,%xmm3,%xmm3 1463 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1464 vpxor %xmm1,%xmm4,%xmm4 1465 vpunpckhqdq %xmm15,%xmm15,%xmm8 1466 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1467 vmovdqu 64-64(%rsi),%xmm6 1468 vpxor %xmm2,%xmm5,%xmm5 1469 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1470 vpxor %xmm15,%xmm8,%xmm8 1471 1472 vmovdqu 48(%rdx),%xmm14 1473 vpxor %xmm3,%xmm0,%xmm0 1474 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1475 vpxor %xmm4,%xmm1,%xmm1 1476 vpshufb %xmm13,%xmm14,%xmm14 1477 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1478 vmovdqu 96-64(%rsi),%xmm6 1479 vpxor %xmm5,%xmm2,%xmm2 1480 vpunpckhqdq %xmm14,%xmm14,%xmm9 1481 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1482 vmovdqu 128-64(%rsi),%xmm7 1483 vpxor %xmm14,%xmm9,%xmm9 1484 1485 vmovdqu 32(%rdx),%xmm15 1486 vpxor %xmm0,%xmm3,%xmm3 1487 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1488 vpxor %xmm1,%xmm4,%xmm4 1489 vpshufb %xmm13,%xmm15,%xmm15 1490 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1491 vmovdqu 112-64(%rsi),%xmm6 1492 vpxor %xmm2,%xmm5,%xmm5 1493 vpunpckhqdq %xmm15,%xmm15,%xmm8 1494 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1495 vpxor %xmm15,%xmm8,%xmm8 1496 1497 vmovdqu 16(%rdx),%xmm14 1498 vpxor %xmm3,%xmm0,%xmm0 1499 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1500 vpxor %xmm4,%xmm1,%xmm1 1501 vpshufb %xmm13,%xmm14,%xmm14 1502 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1503 vmovdqu 144-64(%rsi),%xmm6 1504 vpxor %xmm5,%xmm2,%xmm2 1505 vpunpckhqdq %xmm14,%xmm14,%xmm9 1506 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1507 vmovdqu 176-64(%rsi),%xmm7 1508 vpxor %xmm14,%xmm9,%xmm9 1509 1510 vmovdqu (%rdx),%xmm15 1511 vpxor %xmm0,%xmm3,%xmm3 1512 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1513 vpxor %xmm1,%xmm4,%xmm4 1514 vpshufb %xmm13,%xmm15,%xmm15 1515 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1516 vmovdqu 160-64(%rsi),%xmm6 1517 vpxor %xmm2,%xmm5,%xmm5 1518 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1519 1520 leaq 128(%rdx),%rdx 1521 cmpq $0x80,%rcx 1522 jb .Ltail_avx 1523 1524 vpxor %xmm10,%xmm15,%xmm15 1525 subq $0x80,%rcx 1526 jmp .Loop8x_avx 1527 1528.align 32 1529.Loop8x_avx: 1530 vpunpckhqdq %xmm15,%xmm15,%xmm8 1531 vmovdqu 112(%rdx),%xmm14 1532 vpxor %xmm0,%xmm3,%xmm3 1533 vpxor %xmm15,%xmm8,%xmm8 1534 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 1535 vpshufb %xmm13,%xmm14,%xmm14 1536 vpxor %xmm1,%xmm4,%xmm4 1537 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 1538 vmovdqu 0-64(%rsi),%xmm6 1539 vpunpckhqdq %xmm14,%xmm14,%xmm9 1540 vpxor %xmm2,%xmm5,%xmm5 1541 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 1542 vmovdqu 32-64(%rsi),%xmm7 1543 vpxor %xmm14,%xmm9,%xmm9 1544 1545 vmovdqu 96(%rdx),%xmm15 1546 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1547 vpxor %xmm3,%xmm10,%xmm10 1548 vpshufb %xmm13,%xmm15,%xmm15 1549 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1550 vxorps %xmm4,%xmm11,%xmm11 1551 vmovdqu 16-64(%rsi),%xmm6 1552 vpunpckhqdq %xmm15,%xmm15,%xmm8 1553 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1554 vpxor %xmm5,%xmm12,%xmm12 1555 vxorps %xmm15,%xmm8,%xmm8 1556 1557 vmovdqu 80(%rdx),%xmm14 1558 vpxor %xmm10,%xmm12,%xmm12 1559 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1560 vpxor %xmm11,%xmm12,%xmm12 1561 vpslldq $8,%xmm12,%xmm9 1562 vpxor %xmm0,%xmm3,%xmm3 1563 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1564 vpsrldq $8,%xmm12,%xmm12 1565 vpxor %xmm9,%xmm10,%xmm10 1566 vmovdqu 48-64(%rsi),%xmm6 1567 vpshufb %xmm13,%xmm14,%xmm14 1568 vxorps %xmm12,%xmm11,%xmm11 1569 vpxor %xmm1,%xmm4,%xmm4 1570 vpunpckhqdq %xmm14,%xmm14,%xmm9 1571 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1572 vmovdqu 80-64(%rsi),%xmm7 1573 vpxor %xmm14,%xmm9,%xmm9 1574 vpxor %xmm2,%xmm5,%xmm5 1575 1576 vmovdqu 64(%rdx),%xmm15 1577 vpalignr $8,%xmm10,%xmm10,%xmm12 1578 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1579 vpshufb %xmm13,%xmm15,%xmm15 1580 vpxor %xmm3,%xmm0,%xmm0 1581 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1582 vmovdqu 64-64(%rsi),%xmm6 1583 vpunpckhqdq %xmm15,%xmm15,%xmm8 1584 vpxor %xmm4,%xmm1,%xmm1 1585 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1586 vxorps %xmm15,%xmm8,%xmm8 1587 vpxor %xmm5,%xmm2,%xmm2 1588 1589 vmovdqu 48(%rdx),%xmm14 1590 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1591 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1592 vpshufb %xmm13,%xmm14,%xmm14 1593 vpxor %xmm0,%xmm3,%xmm3 1594 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1595 vmovdqu 96-64(%rsi),%xmm6 1596 vpunpckhqdq %xmm14,%xmm14,%xmm9 1597 vpxor %xmm1,%xmm4,%xmm4 1598 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1599 vmovdqu 128-64(%rsi),%xmm7 1600 vpxor %xmm14,%xmm9,%xmm9 1601 vpxor %xmm2,%xmm5,%xmm5 1602 1603 vmovdqu 32(%rdx),%xmm15 1604 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1605 vpshufb %xmm13,%xmm15,%xmm15 1606 vpxor %xmm3,%xmm0,%xmm0 1607 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1608 vmovdqu 112-64(%rsi),%xmm6 1609 vpunpckhqdq %xmm15,%xmm15,%xmm8 1610 vpxor %xmm4,%xmm1,%xmm1 1611 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1612 vpxor %xmm15,%xmm8,%xmm8 1613 vpxor %xmm5,%xmm2,%xmm2 1614 vxorps %xmm12,%xmm10,%xmm10 1615 1616 vmovdqu 16(%rdx),%xmm14 1617 vpalignr $8,%xmm10,%xmm10,%xmm12 1618 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1619 vpshufb %xmm13,%xmm14,%xmm14 1620 vpxor %xmm0,%xmm3,%xmm3 1621 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1622 vmovdqu 144-64(%rsi),%xmm6 1623 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1624 vxorps %xmm11,%xmm12,%xmm12 1625 vpunpckhqdq %xmm14,%xmm14,%xmm9 1626 vpxor %xmm1,%xmm4,%xmm4 1627 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1628 vmovdqu 176-64(%rsi),%xmm7 1629 vpxor %xmm14,%xmm9,%xmm9 1630 vpxor %xmm2,%xmm5,%xmm5 1631 1632 vmovdqu (%rdx),%xmm15 1633 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1634 vpshufb %xmm13,%xmm15,%xmm15 1635 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1636 vmovdqu 160-64(%rsi),%xmm6 1637 vpxor %xmm12,%xmm15,%xmm15 1638 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1639 vpxor %xmm10,%xmm15,%xmm15 1640 1641 leaq 128(%rdx),%rdx 1642 subq $0x80,%rcx 1643 jnc .Loop8x_avx 1644 1645 addq $0x80,%rcx 1646 jmp .Ltail_no_xor_avx 1647 1648.align 32 1649.Lshort_avx: 1650 vmovdqu -16(%rdx,%rcx,1),%xmm14 1651 leaq (%rdx,%rcx,1),%rdx 1652 vmovdqu 0-64(%rsi),%xmm6 1653 vmovdqu 32-64(%rsi),%xmm7 1654 vpshufb %xmm13,%xmm14,%xmm15 1655 1656 vmovdqa %xmm0,%xmm3 1657 vmovdqa %xmm1,%xmm4 1658 vmovdqa %xmm2,%xmm5 1659 subq $0x10,%rcx 1660 jz .Ltail_avx 1661 1662 vpunpckhqdq %xmm15,%xmm15,%xmm8 1663 vpxor %xmm0,%xmm3,%xmm3 1664 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1665 vpxor %xmm15,%xmm8,%xmm8 1666 vmovdqu -32(%rdx),%xmm14 1667 vpxor %xmm1,%xmm4,%xmm4 1668 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1669 vmovdqu 16-64(%rsi),%xmm6 1670 vpshufb %xmm13,%xmm14,%xmm15 1671 vpxor %xmm2,%xmm5,%xmm5 1672 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1673 vpsrldq $8,%xmm7,%xmm7 1674 subq $0x10,%rcx 1675 jz .Ltail_avx 1676 1677 vpunpckhqdq %xmm15,%xmm15,%xmm8 1678 vpxor %xmm0,%xmm3,%xmm3 1679 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1680 vpxor %xmm15,%xmm8,%xmm8 1681 vmovdqu -48(%rdx),%xmm14 1682 vpxor %xmm1,%xmm4,%xmm4 1683 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1684 vmovdqu 48-64(%rsi),%xmm6 1685 vpshufb %xmm13,%xmm14,%xmm15 1686 vpxor %xmm2,%xmm5,%xmm5 1687 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1688 vmovdqu 80-64(%rsi),%xmm7 1689 subq $0x10,%rcx 1690 jz .Ltail_avx 1691 1692 vpunpckhqdq %xmm15,%xmm15,%xmm8 1693 vpxor %xmm0,%xmm3,%xmm3 1694 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1695 vpxor %xmm15,%xmm8,%xmm8 1696 vmovdqu -64(%rdx),%xmm14 1697 vpxor %xmm1,%xmm4,%xmm4 1698 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1699 vmovdqu 64-64(%rsi),%xmm6 1700 vpshufb %xmm13,%xmm14,%xmm15 1701 vpxor %xmm2,%xmm5,%xmm5 1702 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1703 vpsrldq $8,%xmm7,%xmm7 1704 subq $0x10,%rcx 1705 jz .Ltail_avx 1706 1707 vpunpckhqdq %xmm15,%xmm15,%xmm8 1708 vpxor %xmm0,%xmm3,%xmm3 1709 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1710 vpxor %xmm15,%xmm8,%xmm8 1711 vmovdqu -80(%rdx),%xmm14 1712 vpxor %xmm1,%xmm4,%xmm4 1713 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1714 vmovdqu 96-64(%rsi),%xmm6 1715 vpshufb %xmm13,%xmm14,%xmm15 1716 vpxor %xmm2,%xmm5,%xmm5 1717 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1718 vmovdqu 128-64(%rsi),%xmm7 1719 subq $0x10,%rcx 1720 jz .Ltail_avx 1721 1722 vpunpckhqdq %xmm15,%xmm15,%xmm8 1723 vpxor %xmm0,%xmm3,%xmm3 1724 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1725 vpxor %xmm15,%xmm8,%xmm8 1726 vmovdqu -96(%rdx),%xmm14 1727 vpxor %xmm1,%xmm4,%xmm4 1728 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1729 vmovdqu 112-64(%rsi),%xmm6 1730 vpshufb %xmm13,%xmm14,%xmm15 1731 vpxor %xmm2,%xmm5,%xmm5 1732 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1733 vpsrldq $8,%xmm7,%xmm7 1734 subq $0x10,%rcx 1735 jz .Ltail_avx 1736 1737 vpunpckhqdq %xmm15,%xmm15,%xmm8 1738 vpxor %xmm0,%xmm3,%xmm3 1739 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1740 vpxor %xmm15,%xmm8,%xmm8 1741 vmovdqu -112(%rdx),%xmm14 1742 vpxor %xmm1,%xmm4,%xmm4 1743 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1744 vmovdqu 144-64(%rsi),%xmm6 1745 vpshufb %xmm13,%xmm14,%xmm15 1746 vpxor %xmm2,%xmm5,%xmm5 1747 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1748 vmovq 184-64(%rsi),%xmm7 1749 subq $0x10,%rcx 1750 jmp .Ltail_avx 1751 1752.align 32 1753.Ltail_avx: 1754 vpxor %xmm10,%xmm15,%xmm15 1755.Ltail_no_xor_avx: 1756 vpunpckhqdq %xmm15,%xmm15,%xmm8 1757 vpxor %xmm0,%xmm3,%xmm3 1758 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1759 vpxor %xmm15,%xmm8,%xmm8 1760 vpxor %xmm1,%xmm4,%xmm4 1761 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1762 vpxor %xmm2,%xmm5,%xmm5 1763 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1764 1765 vmovdqu (%r10),%xmm12 1766 1767 vpxor %xmm0,%xmm3,%xmm10 1768 vpxor %xmm1,%xmm4,%xmm11 1769 vpxor %xmm2,%xmm5,%xmm5 1770 1771 vpxor %xmm10,%xmm5,%xmm5 1772 vpxor %xmm11,%xmm5,%xmm5 1773 vpslldq $8,%xmm5,%xmm9 1774 vpsrldq $8,%xmm5,%xmm5 1775 vpxor %xmm9,%xmm10,%xmm10 1776 vpxor %xmm5,%xmm11,%xmm11 1777 1778 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1779 vpalignr $8,%xmm10,%xmm10,%xmm10 1780 vpxor %xmm9,%xmm10,%xmm10 1781 1782 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1783 vpalignr $8,%xmm10,%xmm10,%xmm10 1784 vpxor %xmm11,%xmm10,%xmm10 1785 vpxor %xmm9,%xmm10,%xmm10 1786 1787 cmpq $0,%rcx 1788 jne .Lshort_avx 1789 1790 vpshufb %xmm13,%xmm10,%xmm10 1791 vmovdqu %xmm10,(%rdi) 1792 vzeroupper 1793 .byte 0xf3,0xc3 1794.cfi_endproc 1795.size gcm_ghash_avx,.-gcm_ghash_avx 1796.align 64 1797.Lbswap_mask: 1798.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1799.L0x1c2_polynomial: 1800.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1801.L7_mask: 1802.long 7,0,7,0 1803.L7_mask_poly: 1804.long 7,0,450,0 1805.align 64 1806.type .Lrem_4bit,@object 1807.Lrem_4bit: 1808.long 0,0,0,471859200,0,943718400,0,610271232 1809.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1810.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1811.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1812.type .Lrem_8bit,@object 1813.Lrem_8bit: 1814.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1815.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1816.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1817.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1818.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1819.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1820.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1821.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1822.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1823.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1824.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1825.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1826.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1827.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1828.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1829.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1830.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1831.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1832.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1833.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1834.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1835.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1836.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1837.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1838.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1839.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1840.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1841.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1842.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1843.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1844.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1845.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1846 1847.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1848.align 64 1849