1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from ghash-x86_64.pl. */ 3.text 4 5 6.globl gcm_gmult_4bit 7.type gcm_gmult_4bit,@function 8.align 16 9gcm_gmult_4bit: 10.cfi_startproc 11 pushq %rbx 12.cfi_adjust_cfa_offset 8 13.cfi_offset %rbx,-16 14 pushq %rbp 15.cfi_adjust_cfa_offset 8 16.cfi_offset %rbp,-24 17 pushq %r12 18.cfi_adjust_cfa_offset 8 19.cfi_offset %r12,-32 20 pushq %r13 21.cfi_adjust_cfa_offset 8 22.cfi_offset %r13,-40 23 pushq %r14 24.cfi_adjust_cfa_offset 8 25.cfi_offset %r14,-48 26 pushq %r15 27.cfi_adjust_cfa_offset 8 28.cfi_offset %r15,-56 29 subq $280,%rsp 30.cfi_adjust_cfa_offset 280 31.Lgmult_prologue: 32 33 movzbq 15(%rdi),%r8 34 leaq .Lrem_4bit(%rip),%r11 35 xorq %rax,%rax 36 xorq %rbx,%rbx 37 movb %r8b,%al 38 movb %r8b,%bl 39 shlb $4,%al 40 movq $14,%rcx 41 movq 8(%rsi,%rax,1),%r8 42 movq (%rsi,%rax,1),%r9 43 andb $0xf0,%bl 44 movq %r8,%rdx 45 jmp .Loop1 46 47.align 16 48.Loop1: 49 shrq $4,%r8 50 andq $0xf,%rdx 51 movq %r9,%r10 52 movb (%rdi,%rcx,1),%al 53 shrq $4,%r9 54 xorq 8(%rsi,%rbx,1),%r8 55 shlq $60,%r10 56 xorq (%rsi,%rbx,1),%r9 57 movb %al,%bl 58 xorq (%r11,%rdx,8),%r9 59 movq %r8,%rdx 60 shlb $4,%al 61 xorq %r10,%r8 62 decq %rcx 63 js .Lbreak1 64 65 shrq $4,%r8 66 andq $0xf,%rdx 67 movq %r9,%r10 68 shrq $4,%r9 69 xorq 8(%rsi,%rax,1),%r8 70 shlq $60,%r10 71 xorq (%rsi,%rax,1),%r9 72 andb $0xf0,%bl 73 xorq (%r11,%rdx,8),%r9 74 movq %r8,%rdx 75 xorq %r10,%r8 76 jmp .Loop1 77 78.align 16 79.Lbreak1: 80 shrq $4,%r8 81 andq $0xf,%rdx 82 movq %r9,%r10 83 shrq $4,%r9 84 xorq 8(%rsi,%rax,1),%r8 85 shlq $60,%r10 86 xorq (%rsi,%rax,1),%r9 87 andb $0xf0,%bl 88 xorq (%r11,%rdx,8),%r9 89 movq %r8,%rdx 90 xorq %r10,%r8 91 92 shrq $4,%r8 93 andq $0xf,%rdx 94 movq %r9,%r10 95 shrq $4,%r9 96 xorq 8(%rsi,%rbx,1),%r8 97 shlq $60,%r10 98 xorq (%rsi,%rbx,1),%r9 99 xorq %r10,%r8 100 xorq (%r11,%rdx,8),%r9 101 102 bswapq %r8 103 bswapq %r9 104 movq %r8,8(%rdi) 105 movq %r9,(%rdi) 106 107 leaq 280+48(%rsp),%rsi 108.cfi_def_cfa %rsi,8 109 movq -8(%rsi),%rbx 110.cfi_restore %rbx 111 leaq (%rsi),%rsp 112.cfi_def_cfa_register %rsp 113.Lgmult_epilogue: 114 .byte 0xf3,0xc3 115.cfi_endproc 116.size gcm_gmult_4bit,.-gcm_gmult_4bit 117.globl gcm_ghash_4bit 118.type gcm_ghash_4bit,@function 119.align 16 120gcm_ghash_4bit: 121.cfi_startproc 122 pushq %rbx 123.cfi_adjust_cfa_offset 8 124.cfi_offset %rbx,-16 125 pushq %rbp 126.cfi_adjust_cfa_offset 8 127.cfi_offset %rbp,-24 128 pushq %r12 129.cfi_adjust_cfa_offset 8 130.cfi_offset %r12,-32 131 pushq %r13 132.cfi_adjust_cfa_offset 8 133.cfi_offset %r13,-40 134 pushq %r14 135.cfi_adjust_cfa_offset 8 136.cfi_offset %r14,-48 137 pushq %r15 138.cfi_adjust_cfa_offset 8 139.cfi_offset %r15,-56 140 subq $280,%rsp 141.cfi_adjust_cfa_offset 280 142.Lghash_prologue: 143 movq %rdx,%r14 144 movq %rcx,%r15 145 subq $-128,%rsi 146 leaq 16+128(%rsp),%rbp 147 xorl %edx,%edx 148 movq 0+0-128(%rsi),%r8 149 movq 0+8-128(%rsi),%rax 150 movb %al,%dl 151 shrq $4,%rax 152 movq %r8,%r10 153 shrq $4,%r8 154 movq 16+0-128(%rsi),%r9 155 shlb $4,%dl 156 movq 16+8-128(%rsi),%rbx 157 shlq $60,%r10 158 movb %dl,0(%rsp) 159 orq %r10,%rax 160 movb %bl,%dl 161 shrq $4,%rbx 162 movq %r9,%r10 163 shrq $4,%r9 164 movq %r8,0(%rbp) 165 movq 32+0-128(%rsi),%r8 166 shlb $4,%dl 167 movq %rax,0-128(%rbp) 168 movq 32+8-128(%rsi),%rax 169 shlq $60,%r10 170 movb %dl,1(%rsp) 171 orq %r10,%rbx 172 movb %al,%dl 173 shrq $4,%rax 174 movq %r8,%r10 175 shrq $4,%r8 176 movq %r9,8(%rbp) 177 movq 48+0-128(%rsi),%r9 178 shlb $4,%dl 179 movq %rbx,8-128(%rbp) 180 movq 48+8-128(%rsi),%rbx 181 shlq $60,%r10 182 movb %dl,2(%rsp) 183 orq %r10,%rax 184 movb %bl,%dl 185 shrq $4,%rbx 186 movq %r9,%r10 187 shrq $4,%r9 188 movq %r8,16(%rbp) 189 movq 64+0-128(%rsi),%r8 190 shlb $4,%dl 191 movq %rax,16-128(%rbp) 192 movq 64+8-128(%rsi),%rax 193 shlq $60,%r10 194 movb %dl,3(%rsp) 195 orq %r10,%rbx 196 movb %al,%dl 197 shrq $4,%rax 198 movq %r8,%r10 199 shrq $4,%r8 200 movq %r9,24(%rbp) 201 movq 80+0-128(%rsi),%r9 202 shlb $4,%dl 203 movq %rbx,24-128(%rbp) 204 movq 80+8-128(%rsi),%rbx 205 shlq $60,%r10 206 movb %dl,4(%rsp) 207 orq %r10,%rax 208 movb %bl,%dl 209 shrq $4,%rbx 210 movq %r9,%r10 211 shrq $4,%r9 212 movq %r8,32(%rbp) 213 movq 96+0-128(%rsi),%r8 214 shlb $4,%dl 215 movq %rax,32-128(%rbp) 216 movq 96+8-128(%rsi),%rax 217 shlq $60,%r10 218 movb %dl,5(%rsp) 219 orq %r10,%rbx 220 movb %al,%dl 221 shrq $4,%rax 222 movq %r8,%r10 223 shrq $4,%r8 224 movq %r9,40(%rbp) 225 movq 112+0-128(%rsi),%r9 226 shlb $4,%dl 227 movq %rbx,40-128(%rbp) 228 movq 112+8-128(%rsi),%rbx 229 shlq $60,%r10 230 movb %dl,6(%rsp) 231 orq %r10,%rax 232 movb %bl,%dl 233 shrq $4,%rbx 234 movq %r9,%r10 235 shrq $4,%r9 236 movq %r8,48(%rbp) 237 movq 128+0-128(%rsi),%r8 238 shlb $4,%dl 239 movq %rax,48-128(%rbp) 240 movq 128+8-128(%rsi),%rax 241 shlq $60,%r10 242 movb %dl,7(%rsp) 243 orq %r10,%rbx 244 movb %al,%dl 245 shrq $4,%rax 246 movq %r8,%r10 247 shrq $4,%r8 248 movq %r9,56(%rbp) 249 movq 144+0-128(%rsi),%r9 250 shlb $4,%dl 251 movq %rbx,56-128(%rbp) 252 movq 144+8-128(%rsi),%rbx 253 shlq $60,%r10 254 movb %dl,8(%rsp) 255 orq %r10,%rax 256 movb %bl,%dl 257 shrq $4,%rbx 258 movq %r9,%r10 259 shrq $4,%r9 260 movq %r8,64(%rbp) 261 movq 160+0-128(%rsi),%r8 262 shlb $4,%dl 263 movq %rax,64-128(%rbp) 264 movq 160+8-128(%rsi),%rax 265 shlq $60,%r10 266 movb %dl,9(%rsp) 267 orq %r10,%rbx 268 movb %al,%dl 269 shrq $4,%rax 270 movq %r8,%r10 271 shrq $4,%r8 272 movq %r9,72(%rbp) 273 movq 176+0-128(%rsi),%r9 274 shlb $4,%dl 275 movq %rbx,72-128(%rbp) 276 movq 176+8-128(%rsi),%rbx 277 shlq $60,%r10 278 movb %dl,10(%rsp) 279 orq %r10,%rax 280 movb %bl,%dl 281 shrq $4,%rbx 282 movq %r9,%r10 283 shrq $4,%r9 284 movq %r8,80(%rbp) 285 movq 192+0-128(%rsi),%r8 286 shlb $4,%dl 287 movq %rax,80-128(%rbp) 288 movq 192+8-128(%rsi),%rax 289 shlq $60,%r10 290 movb %dl,11(%rsp) 291 orq %r10,%rbx 292 movb %al,%dl 293 shrq $4,%rax 294 movq %r8,%r10 295 shrq $4,%r8 296 movq %r9,88(%rbp) 297 movq 208+0-128(%rsi),%r9 298 shlb $4,%dl 299 movq %rbx,88-128(%rbp) 300 movq 208+8-128(%rsi),%rbx 301 shlq $60,%r10 302 movb %dl,12(%rsp) 303 orq %r10,%rax 304 movb %bl,%dl 305 shrq $4,%rbx 306 movq %r9,%r10 307 shrq $4,%r9 308 movq %r8,96(%rbp) 309 movq 224+0-128(%rsi),%r8 310 shlb $4,%dl 311 movq %rax,96-128(%rbp) 312 movq 224+8-128(%rsi),%rax 313 shlq $60,%r10 314 movb %dl,13(%rsp) 315 orq %r10,%rbx 316 movb %al,%dl 317 shrq $4,%rax 318 movq %r8,%r10 319 shrq $4,%r8 320 movq %r9,104(%rbp) 321 movq 240+0-128(%rsi),%r9 322 shlb $4,%dl 323 movq %rbx,104-128(%rbp) 324 movq 240+8-128(%rsi),%rbx 325 shlq $60,%r10 326 movb %dl,14(%rsp) 327 orq %r10,%rax 328 movb %bl,%dl 329 shrq $4,%rbx 330 movq %r9,%r10 331 shrq $4,%r9 332 movq %r8,112(%rbp) 333 shlb $4,%dl 334 movq %rax,112-128(%rbp) 335 shlq $60,%r10 336 movb %dl,15(%rsp) 337 orq %r10,%rbx 338 movq %r9,120(%rbp) 339 movq %rbx,120-128(%rbp) 340 addq $-128,%rsi 341 movq 8(%rdi),%r8 342 movq 0(%rdi),%r9 343 addq %r14,%r15 344 leaq .Lrem_8bit(%rip),%r11 345 jmp .Louter_loop 346.align 16 347.Louter_loop: 348 xorq (%r14),%r9 349 movq 8(%r14),%rdx 350 leaq 16(%r14),%r14 351 xorq %r8,%rdx 352 movq %r9,(%rdi) 353 movq %rdx,8(%rdi) 354 shrq $32,%rdx 355 xorq %rax,%rax 356 roll $8,%edx 357 movb %dl,%al 358 movzbl %dl,%ebx 359 shlb $4,%al 360 shrl $4,%ebx 361 roll $8,%edx 362 movq 8(%rsi,%rax,1),%r8 363 movq (%rsi,%rax,1),%r9 364 movb %dl,%al 365 movzbl %dl,%ecx 366 shlb $4,%al 367 movzbq (%rsp,%rbx,1),%r12 368 shrl $4,%ecx 369 xorq %r8,%r12 370 movq %r9,%r10 371 shrq $8,%r8 372 movzbq %r12b,%r12 373 shrq $8,%r9 374 xorq -128(%rbp,%rbx,8),%r8 375 shlq $56,%r10 376 xorq (%rbp,%rbx,8),%r9 377 roll $8,%edx 378 xorq 8(%rsi,%rax,1),%r8 379 xorq (%rsi,%rax,1),%r9 380 movb %dl,%al 381 xorq %r10,%r8 382 movzwq (%r11,%r12,2),%r12 383 movzbl %dl,%ebx 384 shlb $4,%al 385 movzbq (%rsp,%rcx,1),%r13 386 shrl $4,%ebx 387 shlq $48,%r12 388 xorq %r8,%r13 389 movq %r9,%r10 390 xorq %r12,%r9 391 shrq $8,%r8 392 movzbq %r13b,%r13 393 shrq $8,%r9 394 xorq -128(%rbp,%rcx,8),%r8 395 shlq $56,%r10 396 xorq (%rbp,%rcx,8),%r9 397 roll $8,%edx 398 xorq 8(%rsi,%rax,1),%r8 399 xorq (%rsi,%rax,1),%r9 400 movb %dl,%al 401 xorq %r10,%r8 402 movzwq (%r11,%r13,2),%r13 403 movzbl %dl,%ecx 404 shlb $4,%al 405 movzbq (%rsp,%rbx,1),%r12 406 shrl $4,%ecx 407 shlq $48,%r13 408 xorq %r8,%r12 409 movq %r9,%r10 410 xorq %r13,%r9 411 shrq $8,%r8 412 movzbq %r12b,%r12 413 movl 8(%rdi),%edx 414 shrq $8,%r9 415 xorq -128(%rbp,%rbx,8),%r8 416 shlq $56,%r10 417 xorq (%rbp,%rbx,8),%r9 418 roll $8,%edx 419 xorq 8(%rsi,%rax,1),%r8 420 xorq (%rsi,%rax,1),%r9 421 movb %dl,%al 422 xorq %r10,%r8 423 movzwq (%r11,%r12,2),%r12 424 movzbl %dl,%ebx 425 shlb $4,%al 426 movzbq (%rsp,%rcx,1),%r13 427 shrl $4,%ebx 428 shlq $48,%r12 429 xorq %r8,%r13 430 movq %r9,%r10 431 xorq %r12,%r9 432 shrq $8,%r8 433 movzbq %r13b,%r13 434 shrq $8,%r9 435 xorq -128(%rbp,%rcx,8),%r8 436 shlq $56,%r10 437 xorq (%rbp,%rcx,8),%r9 438 roll $8,%edx 439 xorq 8(%rsi,%rax,1),%r8 440 xorq (%rsi,%rax,1),%r9 441 movb %dl,%al 442 xorq %r10,%r8 443 movzwq (%r11,%r13,2),%r13 444 movzbl %dl,%ecx 445 shlb $4,%al 446 movzbq (%rsp,%rbx,1),%r12 447 shrl $4,%ecx 448 shlq $48,%r13 449 xorq %r8,%r12 450 movq %r9,%r10 451 xorq %r13,%r9 452 shrq $8,%r8 453 movzbq %r12b,%r12 454 shrq $8,%r9 455 xorq -128(%rbp,%rbx,8),%r8 456 shlq $56,%r10 457 xorq (%rbp,%rbx,8),%r9 458 roll $8,%edx 459 xorq 8(%rsi,%rax,1),%r8 460 xorq (%rsi,%rax,1),%r9 461 movb %dl,%al 462 xorq %r10,%r8 463 movzwq (%r11,%r12,2),%r12 464 movzbl %dl,%ebx 465 shlb $4,%al 466 movzbq (%rsp,%rcx,1),%r13 467 shrl $4,%ebx 468 shlq $48,%r12 469 xorq %r8,%r13 470 movq %r9,%r10 471 xorq %r12,%r9 472 shrq $8,%r8 473 movzbq %r13b,%r13 474 shrq $8,%r9 475 xorq -128(%rbp,%rcx,8),%r8 476 shlq $56,%r10 477 xorq (%rbp,%rcx,8),%r9 478 roll $8,%edx 479 xorq 8(%rsi,%rax,1),%r8 480 xorq (%rsi,%rax,1),%r9 481 movb %dl,%al 482 xorq %r10,%r8 483 movzwq (%r11,%r13,2),%r13 484 movzbl %dl,%ecx 485 shlb $4,%al 486 movzbq (%rsp,%rbx,1),%r12 487 shrl $4,%ecx 488 shlq $48,%r13 489 xorq %r8,%r12 490 movq %r9,%r10 491 xorq %r13,%r9 492 shrq $8,%r8 493 movzbq %r12b,%r12 494 movl 4(%rdi),%edx 495 shrq $8,%r9 496 xorq -128(%rbp,%rbx,8),%r8 497 shlq $56,%r10 498 xorq (%rbp,%rbx,8),%r9 499 roll $8,%edx 500 xorq 8(%rsi,%rax,1),%r8 501 xorq (%rsi,%rax,1),%r9 502 movb %dl,%al 503 xorq %r10,%r8 504 movzwq (%r11,%r12,2),%r12 505 movzbl %dl,%ebx 506 shlb $4,%al 507 movzbq (%rsp,%rcx,1),%r13 508 shrl $4,%ebx 509 shlq $48,%r12 510 xorq %r8,%r13 511 movq %r9,%r10 512 xorq %r12,%r9 513 shrq $8,%r8 514 movzbq %r13b,%r13 515 shrq $8,%r9 516 xorq -128(%rbp,%rcx,8),%r8 517 shlq $56,%r10 518 xorq (%rbp,%rcx,8),%r9 519 roll $8,%edx 520 xorq 8(%rsi,%rax,1),%r8 521 xorq (%rsi,%rax,1),%r9 522 movb %dl,%al 523 xorq %r10,%r8 524 movzwq (%r11,%r13,2),%r13 525 movzbl %dl,%ecx 526 shlb $4,%al 527 movzbq (%rsp,%rbx,1),%r12 528 shrl $4,%ecx 529 shlq $48,%r13 530 xorq %r8,%r12 531 movq %r9,%r10 532 xorq %r13,%r9 533 shrq $8,%r8 534 movzbq %r12b,%r12 535 shrq $8,%r9 536 xorq -128(%rbp,%rbx,8),%r8 537 shlq $56,%r10 538 xorq (%rbp,%rbx,8),%r9 539 roll $8,%edx 540 xorq 8(%rsi,%rax,1),%r8 541 xorq (%rsi,%rax,1),%r9 542 movb %dl,%al 543 xorq %r10,%r8 544 movzwq (%r11,%r12,2),%r12 545 movzbl %dl,%ebx 546 shlb $4,%al 547 movzbq (%rsp,%rcx,1),%r13 548 shrl $4,%ebx 549 shlq $48,%r12 550 xorq %r8,%r13 551 movq %r9,%r10 552 xorq %r12,%r9 553 shrq $8,%r8 554 movzbq %r13b,%r13 555 shrq $8,%r9 556 xorq -128(%rbp,%rcx,8),%r8 557 shlq $56,%r10 558 xorq (%rbp,%rcx,8),%r9 559 roll $8,%edx 560 xorq 8(%rsi,%rax,1),%r8 561 xorq (%rsi,%rax,1),%r9 562 movb %dl,%al 563 xorq %r10,%r8 564 movzwq (%r11,%r13,2),%r13 565 movzbl %dl,%ecx 566 shlb $4,%al 567 movzbq (%rsp,%rbx,1),%r12 568 shrl $4,%ecx 569 shlq $48,%r13 570 xorq %r8,%r12 571 movq %r9,%r10 572 xorq %r13,%r9 573 shrq $8,%r8 574 movzbq %r12b,%r12 575 movl 0(%rdi),%edx 576 shrq $8,%r9 577 xorq -128(%rbp,%rbx,8),%r8 578 shlq $56,%r10 579 xorq (%rbp,%rbx,8),%r9 580 roll $8,%edx 581 xorq 8(%rsi,%rax,1),%r8 582 xorq (%rsi,%rax,1),%r9 583 movb %dl,%al 584 xorq %r10,%r8 585 movzwq (%r11,%r12,2),%r12 586 movzbl %dl,%ebx 587 shlb $4,%al 588 movzbq (%rsp,%rcx,1),%r13 589 shrl $4,%ebx 590 shlq $48,%r12 591 xorq %r8,%r13 592 movq %r9,%r10 593 xorq %r12,%r9 594 shrq $8,%r8 595 movzbq %r13b,%r13 596 shrq $8,%r9 597 xorq -128(%rbp,%rcx,8),%r8 598 shlq $56,%r10 599 xorq (%rbp,%rcx,8),%r9 600 roll $8,%edx 601 xorq 8(%rsi,%rax,1),%r8 602 xorq (%rsi,%rax,1),%r9 603 movb %dl,%al 604 xorq %r10,%r8 605 movzwq (%r11,%r13,2),%r13 606 movzbl %dl,%ecx 607 shlb $4,%al 608 movzbq (%rsp,%rbx,1),%r12 609 shrl $4,%ecx 610 shlq $48,%r13 611 xorq %r8,%r12 612 movq %r9,%r10 613 xorq %r13,%r9 614 shrq $8,%r8 615 movzbq %r12b,%r12 616 shrq $8,%r9 617 xorq -128(%rbp,%rbx,8),%r8 618 shlq $56,%r10 619 xorq (%rbp,%rbx,8),%r9 620 roll $8,%edx 621 xorq 8(%rsi,%rax,1),%r8 622 xorq (%rsi,%rax,1),%r9 623 movb %dl,%al 624 xorq %r10,%r8 625 movzwq (%r11,%r12,2),%r12 626 movzbl %dl,%ebx 627 shlb $4,%al 628 movzbq (%rsp,%rcx,1),%r13 629 shrl $4,%ebx 630 shlq $48,%r12 631 xorq %r8,%r13 632 movq %r9,%r10 633 xorq %r12,%r9 634 shrq $8,%r8 635 movzbq %r13b,%r13 636 shrq $8,%r9 637 xorq -128(%rbp,%rcx,8),%r8 638 shlq $56,%r10 639 xorq (%rbp,%rcx,8),%r9 640 roll $8,%edx 641 xorq 8(%rsi,%rax,1),%r8 642 xorq (%rsi,%rax,1),%r9 643 movb %dl,%al 644 xorq %r10,%r8 645 movzwq (%r11,%r13,2),%r13 646 movzbl %dl,%ecx 647 shlb $4,%al 648 movzbq (%rsp,%rbx,1),%r12 649 andl $240,%ecx 650 shlq $48,%r13 651 xorq %r8,%r12 652 movq %r9,%r10 653 xorq %r13,%r9 654 shrq $8,%r8 655 movzbq %r12b,%r12 656 movl -4(%rdi),%edx 657 shrq $8,%r9 658 xorq -128(%rbp,%rbx,8),%r8 659 shlq $56,%r10 660 xorq (%rbp,%rbx,8),%r9 661 movzwq (%r11,%r12,2),%r12 662 xorq 8(%rsi,%rax,1),%r8 663 xorq (%rsi,%rax,1),%r9 664 shlq $48,%r12 665 xorq %r10,%r8 666 xorq %r12,%r9 667 movzbq %r8b,%r13 668 shrq $4,%r8 669 movq %r9,%r10 670 shlb $4,%r13b 671 shrq $4,%r9 672 xorq 8(%rsi,%rcx,1),%r8 673 movzwq (%r11,%r13,2),%r13 674 shlq $60,%r10 675 xorq (%rsi,%rcx,1),%r9 676 xorq %r10,%r8 677 shlq $48,%r13 678 bswapq %r8 679 xorq %r13,%r9 680 bswapq %r9 681 cmpq %r15,%r14 682 jb .Louter_loop 683 movq %r8,8(%rdi) 684 movq %r9,(%rdi) 685 686 leaq 280+48(%rsp),%rsi 687.cfi_def_cfa %rsi,8 688 movq -48(%rsi),%r15 689.cfi_restore %r15 690 movq -40(%rsi),%r14 691.cfi_restore %r14 692 movq -32(%rsi),%r13 693.cfi_restore %r13 694 movq -24(%rsi),%r12 695.cfi_restore %r12 696 movq -16(%rsi),%rbp 697.cfi_restore %rbp 698 movq -8(%rsi),%rbx 699.cfi_restore %rbx 700 leaq 0(%rsi),%rsp 701.cfi_def_cfa_register %rsp 702.Lghash_epilogue: 703 .byte 0xf3,0xc3 704.cfi_endproc 705.size gcm_ghash_4bit,.-gcm_ghash_4bit 706.globl gcm_init_clmul 707.type gcm_init_clmul,@function 708.align 16 709gcm_init_clmul: 710.cfi_startproc 711.L_init_clmul: 712 movdqu (%rsi),%xmm2 713 pshufd $78,%xmm2,%xmm2 714 715 716 pshufd $255,%xmm2,%xmm4 717 movdqa %xmm2,%xmm3 718 psllq $1,%xmm2 719 pxor %xmm5,%xmm5 720 psrlq $63,%xmm3 721 pcmpgtd %xmm4,%xmm5 722 pslldq $8,%xmm3 723 por %xmm3,%xmm2 724 725 726 pand .L0x1c2_polynomial(%rip),%xmm5 727 pxor %xmm5,%xmm2 728 729 730 pshufd $78,%xmm2,%xmm6 731 movdqa %xmm2,%xmm0 732 pxor %xmm2,%xmm6 733 movdqa %xmm0,%xmm1 734 pshufd $78,%xmm0,%xmm3 735 pxor %xmm0,%xmm3 736.byte 102,15,58,68,194,0 737.byte 102,15,58,68,202,17 738.byte 102,15,58,68,222,0 739 pxor %xmm0,%xmm3 740 pxor %xmm1,%xmm3 741 742 movdqa %xmm3,%xmm4 743 psrldq $8,%xmm3 744 pslldq $8,%xmm4 745 pxor %xmm3,%xmm1 746 pxor %xmm4,%xmm0 747 748 movdqa %xmm0,%xmm4 749 movdqa %xmm0,%xmm3 750 psllq $5,%xmm0 751 pxor %xmm0,%xmm3 752 psllq $1,%xmm0 753 pxor %xmm3,%xmm0 754 psllq $57,%xmm0 755 movdqa %xmm0,%xmm3 756 pslldq $8,%xmm0 757 psrldq $8,%xmm3 758 pxor %xmm4,%xmm0 759 pxor %xmm3,%xmm1 760 761 762 movdqa %xmm0,%xmm4 763 psrlq $1,%xmm0 764 pxor %xmm4,%xmm1 765 pxor %xmm0,%xmm4 766 psrlq $5,%xmm0 767 pxor %xmm4,%xmm0 768 psrlq $1,%xmm0 769 pxor %xmm1,%xmm0 770 pshufd $78,%xmm2,%xmm3 771 pshufd $78,%xmm0,%xmm4 772 pxor %xmm2,%xmm3 773 movdqu %xmm2,0(%rdi) 774 pxor %xmm0,%xmm4 775 movdqu %xmm0,16(%rdi) 776.byte 102,15,58,15,227,8 777 movdqu %xmm4,32(%rdi) 778 movdqa %xmm0,%xmm1 779 pshufd $78,%xmm0,%xmm3 780 pxor %xmm0,%xmm3 781.byte 102,15,58,68,194,0 782.byte 102,15,58,68,202,17 783.byte 102,15,58,68,222,0 784 pxor %xmm0,%xmm3 785 pxor %xmm1,%xmm3 786 787 movdqa %xmm3,%xmm4 788 psrldq $8,%xmm3 789 pslldq $8,%xmm4 790 pxor %xmm3,%xmm1 791 pxor %xmm4,%xmm0 792 793 movdqa %xmm0,%xmm4 794 movdqa %xmm0,%xmm3 795 psllq $5,%xmm0 796 pxor %xmm0,%xmm3 797 psllq $1,%xmm0 798 pxor %xmm3,%xmm0 799 psllq $57,%xmm0 800 movdqa %xmm0,%xmm3 801 pslldq $8,%xmm0 802 psrldq $8,%xmm3 803 pxor %xmm4,%xmm0 804 pxor %xmm3,%xmm1 805 806 807 movdqa %xmm0,%xmm4 808 psrlq $1,%xmm0 809 pxor %xmm4,%xmm1 810 pxor %xmm0,%xmm4 811 psrlq $5,%xmm0 812 pxor %xmm4,%xmm0 813 psrlq $1,%xmm0 814 pxor %xmm1,%xmm0 815 movdqa %xmm0,%xmm5 816 movdqa %xmm0,%xmm1 817 pshufd $78,%xmm0,%xmm3 818 pxor %xmm0,%xmm3 819.byte 102,15,58,68,194,0 820.byte 102,15,58,68,202,17 821.byte 102,15,58,68,222,0 822 pxor %xmm0,%xmm3 823 pxor %xmm1,%xmm3 824 825 movdqa %xmm3,%xmm4 826 psrldq $8,%xmm3 827 pslldq $8,%xmm4 828 pxor %xmm3,%xmm1 829 pxor %xmm4,%xmm0 830 831 movdqa %xmm0,%xmm4 832 movdqa %xmm0,%xmm3 833 psllq $5,%xmm0 834 pxor %xmm0,%xmm3 835 psllq $1,%xmm0 836 pxor %xmm3,%xmm0 837 psllq $57,%xmm0 838 movdqa %xmm0,%xmm3 839 pslldq $8,%xmm0 840 psrldq $8,%xmm3 841 pxor %xmm4,%xmm0 842 pxor %xmm3,%xmm1 843 844 845 movdqa %xmm0,%xmm4 846 psrlq $1,%xmm0 847 pxor %xmm4,%xmm1 848 pxor %xmm0,%xmm4 849 psrlq $5,%xmm0 850 pxor %xmm4,%xmm0 851 psrlq $1,%xmm0 852 pxor %xmm1,%xmm0 853 pshufd $78,%xmm5,%xmm3 854 pshufd $78,%xmm0,%xmm4 855 pxor %xmm5,%xmm3 856 movdqu %xmm5,48(%rdi) 857 pxor %xmm0,%xmm4 858 movdqu %xmm0,64(%rdi) 859.byte 102,15,58,15,227,8 860 movdqu %xmm4,80(%rdi) 861 .byte 0xf3,0xc3 862.cfi_endproc 863.size gcm_init_clmul,.-gcm_init_clmul 864.globl gcm_gmult_clmul 865.type gcm_gmult_clmul,@function 866.align 16 867gcm_gmult_clmul: 868.cfi_startproc 869.L_gmult_clmul: 870 movdqu (%rdi),%xmm0 871 movdqa .Lbswap_mask(%rip),%xmm5 872 movdqu (%rsi),%xmm2 873 movdqu 32(%rsi),%xmm4 874.byte 102,15,56,0,197 875 movdqa %xmm0,%xmm1 876 pshufd $78,%xmm0,%xmm3 877 pxor %xmm0,%xmm3 878.byte 102,15,58,68,194,0 879.byte 102,15,58,68,202,17 880.byte 102,15,58,68,220,0 881 pxor %xmm0,%xmm3 882 pxor %xmm1,%xmm3 883 884 movdqa %xmm3,%xmm4 885 psrldq $8,%xmm3 886 pslldq $8,%xmm4 887 pxor %xmm3,%xmm1 888 pxor %xmm4,%xmm0 889 890 movdqa %xmm0,%xmm4 891 movdqa %xmm0,%xmm3 892 psllq $5,%xmm0 893 pxor %xmm0,%xmm3 894 psllq $1,%xmm0 895 pxor %xmm3,%xmm0 896 psllq $57,%xmm0 897 movdqa %xmm0,%xmm3 898 pslldq $8,%xmm0 899 psrldq $8,%xmm3 900 pxor %xmm4,%xmm0 901 pxor %xmm3,%xmm1 902 903 904 movdqa %xmm0,%xmm4 905 psrlq $1,%xmm0 906 pxor %xmm4,%xmm1 907 pxor %xmm0,%xmm4 908 psrlq $5,%xmm0 909 pxor %xmm4,%xmm0 910 psrlq $1,%xmm0 911 pxor %xmm1,%xmm0 912.byte 102,15,56,0,197 913 movdqu %xmm0,(%rdi) 914 .byte 0xf3,0xc3 915.cfi_endproc 916.size gcm_gmult_clmul,.-gcm_gmult_clmul 917.globl gcm_ghash_clmul 918.type gcm_ghash_clmul,@function 919.align 32 920gcm_ghash_clmul: 921.cfi_startproc 922.L_ghash_clmul: 923 movdqa .Lbswap_mask(%rip),%xmm10 924 925 movdqu (%rdi),%xmm0 926 movdqu (%rsi),%xmm2 927 movdqu 32(%rsi),%xmm7 928.byte 102,65,15,56,0,194 929 930 subq $0x10,%rcx 931 jz .Lodd_tail 932 933 movdqu 16(%rsi),%xmm6 934 movl OPENSSL_ia32cap_P+4(%rip),%eax 935 cmpq $0x30,%rcx 936 jb .Lskip4x 937 938 andl $71303168,%eax 939 cmpl $4194304,%eax 940 je .Lskip4x 941 942 subq $0x30,%rcx 943 movq $0xA040608020C0E000,%rax 944 movdqu 48(%rsi),%xmm14 945 movdqu 64(%rsi),%xmm15 946 947 948 949 950 movdqu 48(%rdx),%xmm3 951 movdqu 32(%rdx),%xmm11 952.byte 102,65,15,56,0,218 953.byte 102,69,15,56,0,218 954 movdqa %xmm3,%xmm5 955 pshufd $78,%xmm3,%xmm4 956 pxor %xmm3,%xmm4 957.byte 102,15,58,68,218,0 958.byte 102,15,58,68,234,17 959.byte 102,15,58,68,231,0 960 961 movdqa %xmm11,%xmm13 962 pshufd $78,%xmm11,%xmm12 963 pxor %xmm11,%xmm12 964.byte 102,68,15,58,68,222,0 965.byte 102,68,15,58,68,238,17 966.byte 102,68,15,58,68,231,16 967 xorps %xmm11,%xmm3 968 xorps %xmm13,%xmm5 969 movups 80(%rsi),%xmm7 970 xorps %xmm12,%xmm4 971 972 movdqu 16(%rdx),%xmm11 973 movdqu 0(%rdx),%xmm8 974.byte 102,69,15,56,0,218 975.byte 102,69,15,56,0,194 976 movdqa %xmm11,%xmm13 977 pshufd $78,%xmm11,%xmm12 978 pxor %xmm8,%xmm0 979 pxor %xmm11,%xmm12 980.byte 102,69,15,58,68,222,0 981 movdqa %xmm0,%xmm1 982 pshufd $78,%xmm0,%xmm8 983 pxor %xmm0,%xmm8 984.byte 102,69,15,58,68,238,17 985.byte 102,68,15,58,68,231,0 986 xorps %xmm11,%xmm3 987 xorps %xmm13,%xmm5 988 989 leaq 64(%rdx),%rdx 990 subq $0x40,%rcx 991 jc .Ltail4x 992 993 jmp .Lmod4_loop 994.align 32 995.Lmod4_loop: 996.byte 102,65,15,58,68,199,0 997 xorps %xmm12,%xmm4 998 movdqu 48(%rdx),%xmm11 999.byte 102,69,15,56,0,218 1000.byte 102,65,15,58,68,207,17 1001 xorps %xmm3,%xmm0 1002 movdqu 32(%rdx),%xmm3 1003 movdqa %xmm11,%xmm13 1004.byte 102,68,15,58,68,199,16 1005 pshufd $78,%xmm11,%xmm12 1006 xorps %xmm5,%xmm1 1007 pxor %xmm11,%xmm12 1008.byte 102,65,15,56,0,218 1009 movups 32(%rsi),%xmm7 1010 xorps %xmm4,%xmm8 1011.byte 102,68,15,58,68,218,0 1012 pshufd $78,%xmm3,%xmm4 1013 1014 pxor %xmm0,%xmm8 1015 movdqa %xmm3,%xmm5 1016 pxor %xmm1,%xmm8 1017 pxor %xmm3,%xmm4 1018 movdqa %xmm8,%xmm9 1019.byte 102,68,15,58,68,234,17 1020 pslldq $8,%xmm8 1021 psrldq $8,%xmm9 1022 pxor %xmm8,%xmm0 1023 movdqa .L7_mask(%rip),%xmm8 1024 pxor %xmm9,%xmm1 1025.byte 102,76,15,110,200 1026 1027 pand %xmm0,%xmm8 1028.byte 102,69,15,56,0,200 1029 pxor %xmm0,%xmm9 1030.byte 102,68,15,58,68,231,0 1031 psllq $57,%xmm9 1032 movdqa %xmm9,%xmm8 1033 pslldq $8,%xmm9 1034.byte 102,15,58,68,222,0 1035 psrldq $8,%xmm8 1036 pxor %xmm9,%xmm0 1037 pxor %xmm8,%xmm1 1038 movdqu 0(%rdx),%xmm8 1039 1040 movdqa %xmm0,%xmm9 1041 psrlq $1,%xmm0 1042.byte 102,15,58,68,238,17 1043 xorps %xmm11,%xmm3 1044 movdqu 16(%rdx),%xmm11 1045.byte 102,69,15,56,0,218 1046.byte 102,15,58,68,231,16 1047 xorps %xmm13,%xmm5 1048 movups 80(%rsi),%xmm7 1049.byte 102,69,15,56,0,194 1050 pxor %xmm9,%xmm1 1051 pxor %xmm0,%xmm9 1052 psrlq $5,%xmm0 1053 1054 movdqa %xmm11,%xmm13 1055 pxor %xmm12,%xmm4 1056 pshufd $78,%xmm11,%xmm12 1057 pxor %xmm9,%xmm0 1058 pxor %xmm8,%xmm1 1059 pxor %xmm11,%xmm12 1060.byte 102,69,15,58,68,222,0 1061 psrlq $1,%xmm0 1062 pxor %xmm1,%xmm0 1063 movdqa %xmm0,%xmm1 1064.byte 102,69,15,58,68,238,17 1065 xorps %xmm11,%xmm3 1066 pshufd $78,%xmm0,%xmm8 1067 pxor %xmm0,%xmm8 1068 1069.byte 102,68,15,58,68,231,0 1070 xorps %xmm13,%xmm5 1071 1072 leaq 64(%rdx),%rdx 1073 subq $0x40,%rcx 1074 jnc .Lmod4_loop 1075 1076.Ltail4x: 1077.byte 102,65,15,58,68,199,0 1078.byte 102,65,15,58,68,207,17 1079.byte 102,68,15,58,68,199,16 1080 xorps %xmm12,%xmm4 1081 xorps %xmm3,%xmm0 1082 xorps %xmm5,%xmm1 1083 pxor %xmm0,%xmm1 1084 pxor %xmm4,%xmm8 1085 1086 pxor %xmm1,%xmm8 1087 pxor %xmm0,%xmm1 1088 1089 movdqa %xmm8,%xmm9 1090 psrldq $8,%xmm8 1091 pslldq $8,%xmm9 1092 pxor %xmm8,%xmm1 1093 pxor %xmm9,%xmm0 1094 1095 movdqa %xmm0,%xmm4 1096 movdqa %xmm0,%xmm3 1097 psllq $5,%xmm0 1098 pxor %xmm0,%xmm3 1099 psllq $1,%xmm0 1100 pxor %xmm3,%xmm0 1101 psllq $57,%xmm0 1102 movdqa %xmm0,%xmm3 1103 pslldq $8,%xmm0 1104 psrldq $8,%xmm3 1105 pxor %xmm4,%xmm0 1106 pxor %xmm3,%xmm1 1107 1108 1109 movdqa %xmm0,%xmm4 1110 psrlq $1,%xmm0 1111 pxor %xmm4,%xmm1 1112 pxor %xmm0,%xmm4 1113 psrlq $5,%xmm0 1114 pxor %xmm4,%xmm0 1115 psrlq $1,%xmm0 1116 pxor %xmm1,%xmm0 1117 addq $0x40,%rcx 1118 jz .Ldone 1119 movdqu 32(%rsi),%xmm7 1120 subq $0x10,%rcx 1121 jz .Lodd_tail 1122.Lskip4x: 1123 1124 1125 1126 1127 1128 movdqu (%rdx),%xmm8 1129 movdqu 16(%rdx),%xmm3 1130.byte 102,69,15,56,0,194 1131.byte 102,65,15,56,0,218 1132 pxor %xmm8,%xmm0 1133 1134 movdqa %xmm3,%xmm5 1135 pshufd $78,%xmm3,%xmm4 1136 pxor %xmm3,%xmm4 1137.byte 102,15,58,68,218,0 1138.byte 102,15,58,68,234,17 1139.byte 102,15,58,68,231,0 1140 1141 leaq 32(%rdx),%rdx 1142 nop 1143 subq $0x20,%rcx 1144 jbe .Leven_tail 1145 nop 1146 jmp .Lmod_loop 1147 1148.align 32 1149.Lmod_loop: 1150 movdqa %xmm0,%xmm1 1151 movdqa %xmm4,%xmm8 1152 pshufd $78,%xmm0,%xmm4 1153 pxor %xmm0,%xmm4 1154 1155.byte 102,15,58,68,198,0 1156.byte 102,15,58,68,206,17 1157.byte 102,15,58,68,231,16 1158 1159 pxor %xmm3,%xmm0 1160 pxor %xmm5,%xmm1 1161 movdqu (%rdx),%xmm9 1162 pxor %xmm0,%xmm8 1163.byte 102,69,15,56,0,202 1164 movdqu 16(%rdx),%xmm3 1165 1166 pxor %xmm1,%xmm8 1167 pxor %xmm9,%xmm1 1168 pxor %xmm8,%xmm4 1169.byte 102,65,15,56,0,218 1170 movdqa %xmm4,%xmm8 1171 psrldq $8,%xmm8 1172 pslldq $8,%xmm4 1173 pxor %xmm8,%xmm1 1174 pxor %xmm4,%xmm0 1175 1176 movdqa %xmm3,%xmm5 1177 1178 movdqa %xmm0,%xmm9 1179 movdqa %xmm0,%xmm8 1180 psllq $5,%xmm0 1181 pxor %xmm0,%xmm8 1182.byte 102,15,58,68,218,0 1183 psllq $1,%xmm0 1184 pxor %xmm8,%xmm0 1185 psllq $57,%xmm0 1186 movdqa %xmm0,%xmm8 1187 pslldq $8,%xmm0 1188 psrldq $8,%xmm8 1189 pxor %xmm9,%xmm0 1190 pshufd $78,%xmm5,%xmm4 1191 pxor %xmm8,%xmm1 1192 pxor %xmm5,%xmm4 1193 1194 movdqa %xmm0,%xmm9 1195 psrlq $1,%xmm0 1196.byte 102,15,58,68,234,17 1197 pxor %xmm9,%xmm1 1198 pxor %xmm0,%xmm9 1199 psrlq $5,%xmm0 1200 pxor %xmm9,%xmm0 1201 leaq 32(%rdx),%rdx 1202 psrlq $1,%xmm0 1203.byte 102,15,58,68,231,0 1204 pxor %xmm1,%xmm0 1205 1206 subq $0x20,%rcx 1207 ja .Lmod_loop 1208 1209.Leven_tail: 1210 movdqa %xmm0,%xmm1 1211 movdqa %xmm4,%xmm8 1212 pshufd $78,%xmm0,%xmm4 1213 pxor %xmm0,%xmm4 1214 1215.byte 102,15,58,68,198,0 1216.byte 102,15,58,68,206,17 1217.byte 102,15,58,68,231,16 1218 1219 pxor %xmm3,%xmm0 1220 pxor %xmm5,%xmm1 1221 pxor %xmm0,%xmm8 1222 pxor %xmm1,%xmm8 1223 pxor %xmm8,%xmm4 1224 movdqa %xmm4,%xmm8 1225 psrldq $8,%xmm8 1226 pslldq $8,%xmm4 1227 pxor %xmm8,%xmm1 1228 pxor %xmm4,%xmm0 1229 1230 movdqa %xmm0,%xmm4 1231 movdqa %xmm0,%xmm3 1232 psllq $5,%xmm0 1233 pxor %xmm0,%xmm3 1234 psllq $1,%xmm0 1235 pxor %xmm3,%xmm0 1236 psllq $57,%xmm0 1237 movdqa %xmm0,%xmm3 1238 pslldq $8,%xmm0 1239 psrldq $8,%xmm3 1240 pxor %xmm4,%xmm0 1241 pxor %xmm3,%xmm1 1242 1243 1244 movdqa %xmm0,%xmm4 1245 psrlq $1,%xmm0 1246 pxor %xmm4,%xmm1 1247 pxor %xmm0,%xmm4 1248 psrlq $5,%xmm0 1249 pxor %xmm4,%xmm0 1250 psrlq $1,%xmm0 1251 pxor %xmm1,%xmm0 1252 testq %rcx,%rcx 1253 jnz .Ldone 1254 1255.Lodd_tail: 1256 movdqu (%rdx),%xmm8 1257.byte 102,69,15,56,0,194 1258 pxor %xmm8,%xmm0 1259 movdqa %xmm0,%xmm1 1260 pshufd $78,%xmm0,%xmm3 1261 pxor %xmm0,%xmm3 1262.byte 102,15,58,68,194,0 1263.byte 102,15,58,68,202,17 1264.byte 102,15,58,68,223,0 1265 pxor %xmm0,%xmm3 1266 pxor %xmm1,%xmm3 1267 1268 movdqa %xmm3,%xmm4 1269 psrldq $8,%xmm3 1270 pslldq $8,%xmm4 1271 pxor %xmm3,%xmm1 1272 pxor %xmm4,%xmm0 1273 1274 movdqa %xmm0,%xmm4 1275 movdqa %xmm0,%xmm3 1276 psllq $5,%xmm0 1277 pxor %xmm0,%xmm3 1278 psllq $1,%xmm0 1279 pxor %xmm3,%xmm0 1280 psllq $57,%xmm0 1281 movdqa %xmm0,%xmm3 1282 pslldq $8,%xmm0 1283 psrldq $8,%xmm3 1284 pxor %xmm4,%xmm0 1285 pxor %xmm3,%xmm1 1286 1287 1288 movdqa %xmm0,%xmm4 1289 psrlq $1,%xmm0 1290 pxor %xmm4,%xmm1 1291 pxor %xmm0,%xmm4 1292 psrlq $5,%xmm0 1293 pxor %xmm4,%xmm0 1294 psrlq $1,%xmm0 1295 pxor %xmm1,%xmm0 1296.Ldone: 1297.byte 102,65,15,56,0,194 1298 movdqu %xmm0,(%rdi) 1299 .byte 0xf3,0xc3 1300.cfi_endproc 1301.size gcm_ghash_clmul,.-gcm_ghash_clmul 1302.globl gcm_init_avx 1303.type gcm_init_avx,@function 1304.align 32 1305gcm_init_avx: 1306.cfi_startproc 1307 vzeroupper 1308 1309 vmovdqu (%rsi),%xmm2 1310 vpshufd $78,%xmm2,%xmm2 1311 1312 1313 vpshufd $255,%xmm2,%xmm4 1314 vpsrlq $63,%xmm2,%xmm3 1315 vpsllq $1,%xmm2,%xmm2 1316 vpxor %xmm5,%xmm5,%xmm5 1317 vpcmpgtd %xmm4,%xmm5,%xmm5 1318 vpslldq $8,%xmm3,%xmm3 1319 vpor %xmm3,%xmm2,%xmm2 1320 1321 1322 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 1323 vpxor %xmm5,%xmm2,%xmm2 1324 1325 vpunpckhqdq %xmm2,%xmm2,%xmm6 1326 vmovdqa %xmm2,%xmm0 1327 vpxor %xmm2,%xmm6,%xmm6 1328 movq $4,%r10 1329 jmp .Linit_start_avx 1330.align 32 1331.Linit_loop_avx: 1332 vpalignr $8,%xmm3,%xmm4,%xmm5 1333 vmovdqu %xmm5,-16(%rdi) 1334 vpunpckhqdq %xmm0,%xmm0,%xmm3 1335 vpxor %xmm0,%xmm3,%xmm3 1336 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1337 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1338 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1339 vpxor %xmm0,%xmm1,%xmm4 1340 vpxor %xmm4,%xmm3,%xmm3 1341 1342 vpslldq $8,%xmm3,%xmm4 1343 vpsrldq $8,%xmm3,%xmm3 1344 vpxor %xmm4,%xmm0,%xmm0 1345 vpxor %xmm3,%xmm1,%xmm1 1346 vpsllq $57,%xmm0,%xmm3 1347 vpsllq $62,%xmm0,%xmm4 1348 vpxor %xmm3,%xmm4,%xmm4 1349 vpsllq $63,%xmm0,%xmm3 1350 vpxor %xmm3,%xmm4,%xmm4 1351 vpslldq $8,%xmm4,%xmm3 1352 vpsrldq $8,%xmm4,%xmm4 1353 vpxor %xmm3,%xmm0,%xmm0 1354 vpxor %xmm4,%xmm1,%xmm1 1355 1356 vpsrlq $1,%xmm0,%xmm4 1357 vpxor %xmm0,%xmm1,%xmm1 1358 vpxor %xmm4,%xmm0,%xmm0 1359 vpsrlq $5,%xmm4,%xmm4 1360 vpxor %xmm4,%xmm0,%xmm0 1361 vpsrlq $1,%xmm0,%xmm0 1362 vpxor %xmm1,%xmm0,%xmm0 1363.Linit_start_avx: 1364 vmovdqa %xmm0,%xmm5 1365 vpunpckhqdq %xmm0,%xmm0,%xmm3 1366 vpxor %xmm0,%xmm3,%xmm3 1367 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1368 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1369 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1370 vpxor %xmm0,%xmm1,%xmm4 1371 vpxor %xmm4,%xmm3,%xmm3 1372 1373 vpslldq $8,%xmm3,%xmm4 1374 vpsrldq $8,%xmm3,%xmm3 1375 vpxor %xmm4,%xmm0,%xmm0 1376 vpxor %xmm3,%xmm1,%xmm1 1377 vpsllq $57,%xmm0,%xmm3 1378 vpsllq $62,%xmm0,%xmm4 1379 vpxor %xmm3,%xmm4,%xmm4 1380 vpsllq $63,%xmm0,%xmm3 1381 vpxor %xmm3,%xmm4,%xmm4 1382 vpslldq $8,%xmm4,%xmm3 1383 vpsrldq $8,%xmm4,%xmm4 1384 vpxor %xmm3,%xmm0,%xmm0 1385 vpxor %xmm4,%xmm1,%xmm1 1386 1387 vpsrlq $1,%xmm0,%xmm4 1388 vpxor %xmm0,%xmm1,%xmm1 1389 vpxor %xmm4,%xmm0,%xmm0 1390 vpsrlq $5,%xmm4,%xmm4 1391 vpxor %xmm4,%xmm0,%xmm0 1392 vpsrlq $1,%xmm0,%xmm0 1393 vpxor %xmm1,%xmm0,%xmm0 1394 vpshufd $78,%xmm5,%xmm3 1395 vpshufd $78,%xmm0,%xmm4 1396 vpxor %xmm5,%xmm3,%xmm3 1397 vmovdqu %xmm5,0(%rdi) 1398 vpxor %xmm0,%xmm4,%xmm4 1399 vmovdqu %xmm0,16(%rdi) 1400 leaq 48(%rdi),%rdi 1401 subq $1,%r10 1402 jnz .Linit_loop_avx 1403 1404 vpalignr $8,%xmm4,%xmm3,%xmm5 1405 vmovdqu %xmm5,-16(%rdi) 1406 1407 vzeroupper 1408 .byte 0xf3,0xc3 1409.cfi_endproc 1410.size gcm_init_avx,.-gcm_init_avx 1411.globl gcm_gmult_avx 1412.type gcm_gmult_avx,@function 1413.align 32 1414gcm_gmult_avx: 1415.cfi_startproc 1416 jmp .L_gmult_clmul 1417.cfi_endproc 1418.size gcm_gmult_avx,.-gcm_gmult_avx 1419.globl gcm_ghash_avx 1420.type gcm_ghash_avx,@function 1421.align 32 1422gcm_ghash_avx: 1423.cfi_startproc 1424 vzeroupper 1425 1426 vmovdqu (%rdi),%xmm10 1427 leaq .L0x1c2_polynomial(%rip),%r10 1428 leaq 64(%rsi),%rsi 1429 vmovdqu .Lbswap_mask(%rip),%xmm13 1430 vpshufb %xmm13,%xmm10,%xmm10 1431 cmpq $0x80,%rcx 1432 jb .Lshort_avx 1433 subq $0x80,%rcx 1434 1435 vmovdqu 112(%rdx),%xmm14 1436 vmovdqu 0-64(%rsi),%xmm6 1437 vpshufb %xmm13,%xmm14,%xmm14 1438 vmovdqu 32-64(%rsi),%xmm7 1439 1440 vpunpckhqdq %xmm14,%xmm14,%xmm9 1441 vmovdqu 96(%rdx),%xmm15 1442 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1443 vpxor %xmm14,%xmm9,%xmm9 1444 vpshufb %xmm13,%xmm15,%xmm15 1445 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1446 vmovdqu 16-64(%rsi),%xmm6 1447 vpunpckhqdq %xmm15,%xmm15,%xmm8 1448 vmovdqu 80(%rdx),%xmm14 1449 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1450 vpxor %xmm15,%xmm8,%xmm8 1451 1452 vpshufb %xmm13,%xmm14,%xmm14 1453 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1454 vpunpckhqdq %xmm14,%xmm14,%xmm9 1455 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1456 vmovdqu 48-64(%rsi),%xmm6 1457 vpxor %xmm14,%xmm9,%xmm9 1458 vmovdqu 64(%rdx),%xmm15 1459 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1460 vmovdqu 80-64(%rsi),%xmm7 1461 1462 vpshufb %xmm13,%xmm15,%xmm15 1463 vpxor %xmm0,%xmm3,%xmm3 1464 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1465 vpxor %xmm1,%xmm4,%xmm4 1466 vpunpckhqdq %xmm15,%xmm15,%xmm8 1467 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1468 vmovdqu 64-64(%rsi),%xmm6 1469 vpxor %xmm2,%xmm5,%xmm5 1470 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1471 vpxor %xmm15,%xmm8,%xmm8 1472 1473 vmovdqu 48(%rdx),%xmm14 1474 vpxor %xmm3,%xmm0,%xmm0 1475 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1476 vpxor %xmm4,%xmm1,%xmm1 1477 vpshufb %xmm13,%xmm14,%xmm14 1478 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1479 vmovdqu 96-64(%rsi),%xmm6 1480 vpxor %xmm5,%xmm2,%xmm2 1481 vpunpckhqdq %xmm14,%xmm14,%xmm9 1482 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1483 vmovdqu 128-64(%rsi),%xmm7 1484 vpxor %xmm14,%xmm9,%xmm9 1485 1486 vmovdqu 32(%rdx),%xmm15 1487 vpxor %xmm0,%xmm3,%xmm3 1488 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1489 vpxor %xmm1,%xmm4,%xmm4 1490 vpshufb %xmm13,%xmm15,%xmm15 1491 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1492 vmovdqu 112-64(%rsi),%xmm6 1493 vpxor %xmm2,%xmm5,%xmm5 1494 vpunpckhqdq %xmm15,%xmm15,%xmm8 1495 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1496 vpxor %xmm15,%xmm8,%xmm8 1497 1498 vmovdqu 16(%rdx),%xmm14 1499 vpxor %xmm3,%xmm0,%xmm0 1500 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1501 vpxor %xmm4,%xmm1,%xmm1 1502 vpshufb %xmm13,%xmm14,%xmm14 1503 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1504 vmovdqu 144-64(%rsi),%xmm6 1505 vpxor %xmm5,%xmm2,%xmm2 1506 vpunpckhqdq %xmm14,%xmm14,%xmm9 1507 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1508 vmovdqu 176-64(%rsi),%xmm7 1509 vpxor %xmm14,%xmm9,%xmm9 1510 1511 vmovdqu (%rdx),%xmm15 1512 vpxor %xmm0,%xmm3,%xmm3 1513 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1514 vpxor %xmm1,%xmm4,%xmm4 1515 vpshufb %xmm13,%xmm15,%xmm15 1516 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1517 vmovdqu 160-64(%rsi),%xmm6 1518 vpxor %xmm2,%xmm5,%xmm5 1519 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1520 1521 leaq 128(%rdx),%rdx 1522 cmpq $0x80,%rcx 1523 jb .Ltail_avx 1524 1525 vpxor %xmm10,%xmm15,%xmm15 1526 subq $0x80,%rcx 1527 jmp .Loop8x_avx 1528 1529.align 32 1530.Loop8x_avx: 1531 vpunpckhqdq %xmm15,%xmm15,%xmm8 1532 vmovdqu 112(%rdx),%xmm14 1533 vpxor %xmm0,%xmm3,%xmm3 1534 vpxor %xmm15,%xmm8,%xmm8 1535 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 1536 vpshufb %xmm13,%xmm14,%xmm14 1537 vpxor %xmm1,%xmm4,%xmm4 1538 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 1539 vmovdqu 0-64(%rsi),%xmm6 1540 vpunpckhqdq %xmm14,%xmm14,%xmm9 1541 vpxor %xmm2,%xmm5,%xmm5 1542 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 1543 vmovdqu 32-64(%rsi),%xmm7 1544 vpxor %xmm14,%xmm9,%xmm9 1545 1546 vmovdqu 96(%rdx),%xmm15 1547 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1548 vpxor %xmm3,%xmm10,%xmm10 1549 vpshufb %xmm13,%xmm15,%xmm15 1550 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1551 vxorps %xmm4,%xmm11,%xmm11 1552 vmovdqu 16-64(%rsi),%xmm6 1553 vpunpckhqdq %xmm15,%xmm15,%xmm8 1554 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1555 vpxor %xmm5,%xmm12,%xmm12 1556 vxorps %xmm15,%xmm8,%xmm8 1557 1558 vmovdqu 80(%rdx),%xmm14 1559 vpxor %xmm10,%xmm12,%xmm12 1560 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1561 vpxor %xmm11,%xmm12,%xmm12 1562 vpslldq $8,%xmm12,%xmm9 1563 vpxor %xmm0,%xmm3,%xmm3 1564 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1565 vpsrldq $8,%xmm12,%xmm12 1566 vpxor %xmm9,%xmm10,%xmm10 1567 vmovdqu 48-64(%rsi),%xmm6 1568 vpshufb %xmm13,%xmm14,%xmm14 1569 vxorps %xmm12,%xmm11,%xmm11 1570 vpxor %xmm1,%xmm4,%xmm4 1571 vpunpckhqdq %xmm14,%xmm14,%xmm9 1572 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1573 vmovdqu 80-64(%rsi),%xmm7 1574 vpxor %xmm14,%xmm9,%xmm9 1575 vpxor %xmm2,%xmm5,%xmm5 1576 1577 vmovdqu 64(%rdx),%xmm15 1578 vpalignr $8,%xmm10,%xmm10,%xmm12 1579 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1580 vpshufb %xmm13,%xmm15,%xmm15 1581 vpxor %xmm3,%xmm0,%xmm0 1582 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1583 vmovdqu 64-64(%rsi),%xmm6 1584 vpunpckhqdq %xmm15,%xmm15,%xmm8 1585 vpxor %xmm4,%xmm1,%xmm1 1586 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1587 vxorps %xmm15,%xmm8,%xmm8 1588 vpxor %xmm5,%xmm2,%xmm2 1589 1590 vmovdqu 48(%rdx),%xmm14 1591 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1592 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1593 vpshufb %xmm13,%xmm14,%xmm14 1594 vpxor %xmm0,%xmm3,%xmm3 1595 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1596 vmovdqu 96-64(%rsi),%xmm6 1597 vpunpckhqdq %xmm14,%xmm14,%xmm9 1598 vpxor %xmm1,%xmm4,%xmm4 1599 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1600 vmovdqu 128-64(%rsi),%xmm7 1601 vpxor %xmm14,%xmm9,%xmm9 1602 vpxor %xmm2,%xmm5,%xmm5 1603 1604 vmovdqu 32(%rdx),%xmm15 1605 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1606 vpshufb %xmm13,%xmm15,%xmm15 1607 vpxor %xmm3,%xmm0,%xmm0 1608 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1609 vmovdqu 112-64(%rsi),%xmm6 1610 vpunpckhqdq %xmm15,%xmm15,%xmm8 1611 vpxor %xmm4,%xmm1,%xmm1 1612 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1613 vpxor %xmm15,%xmm8,%xmm8 1614 vpxor %xmm5,%xmm2,%xmm2 1615 vxorps %xmm12,%xmm10,%xmm10 1616 1617 vmovdqu 16(%rdx),%xmm14 1618 vpalignr $8,%xmm10,%xmm10,%xmm12 1619 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1620 vpshufb %xmm13,%xmm14,%xmm14 1621 vpxor %xmm0,%xmm3,%xmm3 1622 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1623 vmovdqu 144-64(%rsi),%xmm6 1624 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1625 vxorps %xmm11,%xmm12,%xmm12 1626 vpunpckhqdq %xmm14,%xmm14,%xmm9 1627 vpxor %xmm1,%xmm4,%xmm4 1628 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1629 vmovdqu 176-64(%rsi),%xmm7 1630 vpxor %xmm14,%xmm9,%xmm9 1631 vpxor %xmm2,%xmm5,%xmm5 1632 1633 vmovdqu (%rdx),%xmm15 1634 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1635 vpshufb %xmm13,%xmm15,%xmm15 1636 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1637 vmovdqu 160-64(%rsi),%xmm6 1638 vpxor %xmm12,%xmm15,%xmm15 1639 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1640 vpxor %xmm10,%xmm15,%xmm15 1641 1642 leaq 128(%rdx),%rdx 1643 subq $0x80,%rcx 1644 jnc .Loop8x_avx 1645 1646 addq $0x80,%rcx 1647 jmp .Ltail_no_xor_avx 1648 1649.align 32 1650.Lshort_avx: 1651 vmovdqu -16(%rdx,%rcx,1),%xmm14 1652 leaq (%rdx,%rcx,1),%rdx 1653 vmovdqu 0-64(%rsi),%xmm6 1654 vmovdqu 32-64(%rsi),%xmm7 1655 vpshufb %xmm13,%xmm14,%xmm15 1656 1657 vmovdqa %xmm0,%xmm3 1658 vmovdqa %xmm1,%xmm4 1659 vmovdqa %xmm2,%xmm5 1660 subq $0x10,%rcx 1661 jz .Ltail_avx 1662 1663 vpunpckhqdq %xmm15,%xmm15,%xmm8 1664 vpxor %xmm0,%xmm3,%xmm3 1665 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1666 vpxor %xmm15,%xmm8,%xmm8 1667 vmovdqu -32(%rdx),%xmm14 1668 vpxor %xmm1,%xmm4,%xmm4 1669 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1670 vmovdqu 16-64(%rsi),%xmm6 1671 vpshufb %xmm13,%xmm14,%xmm15 1672 vpxor %xmm2,%xmm5,%xmm5 1673 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1674 vpsrldq $8,%xmm7,%xmm7 1675 subq $0x10,%rcx 1676 jz .Ltail_avx 1677 1678 vpunpckhqdq %xmm15,%xmm15,%xmm8 1679 vpxor %xmm0,%xmm3,%xmm3 1680 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1681 vpxor %xmm15,%xmm8,%xmm8 1682 vmovdqu -48(%rdx),%xmm14 1683 vpxor %xmm1,%xmm4,%xmm4 1684 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1685 vmovdqu 48-64(%rsi),%xmm6 1686 vpshufb %xmm13,%xmm14,%xmm15 1687 vpxor %xmm2,%xmm5,%xmm5 1688 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1689 vmovdqu 80-64(%rsi),%xmm7 1690 subq $0x10,%rcx 1691 jz .Ltail_avx 1692 1693 vpunpckhqdq %xmm15,%xmm15,%xmm8 1694 vpxor %xmm0,%xmm3,%xmm3 1695 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1696 vpxor %xmm15,%xmm8,%xmm8 1697 vmovdqu -64(%rdx),%xmm14 1698 vpxor %xmm1,%xmm4,%xmm4 1699 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1700 vmovdqu 64-64(%rsi),%xmm6 1701 vpshufb %xmm13,%xmm14,%xmm15 1702 vpxor %xmm2,%xmm5,%xmm5 1703 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1704 vpsrldq $8,%xmm7,%xmm7 1705 subq $0x10,%rcx 1706 jz .Ltail_avx 1707 1708 vpunpckhqdq %xmm15,%xmm15,%xmm8 1709 vpxor %xmm0,%xmm3,%xmm3 1710 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1711 vpxor %xmm15,%xmm8,%xmm8 1712 vmovdqu -80(%rdx),%xmm14 1713 vpxor %xmm1,%xmm4,%xmm4 1714 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1715 vmovdqu 96-64(%rsi),%xmm6 1716 vpshufb %xmm13,%xmm14,%xmm15 1717 vpxor %xmm2,%xmm5,%xmm5 1718 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1719 vmovdqu 128-64(%rsi),%xmm7 1720 subq $0x10,%rcx 1721 jz .Ltail_avx 1722 1723 vpunpckhqdq %xmm15,%xmm15,%xmm8 1724 vpxor %xmm0,%xmm3,%xmm3 1725 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1726 vpxor %xmm15,%xmm8,%xmm8 1727 vmovdqu -96(%rdx),%xmm14 1728 vpxor %xmm1,%xmm4,%xmm4 1729 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1730 vmovdqu 112-64(%rsi),%xmm6 1731 vpshufb %xmm13,%xmm14,%xmm15 1732 vpxor %xmm2,%xmm5,%xmm5 1733 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1734 vpsrldq $8,%xmm7,%xmm7 1735 subq $0x10,%rcx 1736 jz .Ltail_avx 1737 1738 vpunpckhqdq %xmm15,%xmm15,%xmm8 1739 vpxor %xmm0,%xmm3,%xmm3 1740 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1741 vpxor %xmm15,%xmm8,%xmm8 1742 vmovdqu -112(%rdx),%xmm14 1743 vpxor %xmm1,%xmm4,%xmm4 1744 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1745 vmovdqu 144-64(%rsi),%xmm6 1746 vpshufb %xmm13,%xmm14,%xmm15 1747 vpxor %xmm2,%xmm5,%xmm5 1748 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1749 vmovq 184-64(%rsi),%xmm7 1750 subq $0x10,%rcx 1751 jmp .Ltail_avx 1752 1753.align 32 1754.Ltail_avx: 1755 vpxor %xmm10,%xmm15,%xmm15 1756.Ltail_no_xor_avx: 1757 vpunpckhqdq %xmm15,%xmm15,%xmm8 1758 vpxor %xmm0,%xmm3,%xmm3 1759 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1760 vpxor %xmm15,%xmm8,%xmm8 1761 vpxor %xmm1,%xmm4,%xmm4 1762 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1763 vpxor %xmm2,%xmm5,%xmm5 1764 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1765 1766 vmovdqu (%r10),%xmm12 1767 1768 vpxor %xmm0,%xmm3,%xmm10 1769 vpxor %xmm1,%xmm4,%xmm11 1770 vpxor %xmm2,%xmm5,%xmm5 1771 1772 vpxor %xmm10,%xmm5,%xmm5 1773 vpxor %xmm11,%xmm5,%xmm5 1774 vpslldq $8,%xmm5,%xmm9 1775 vpsrldq $8,%xmm5,%xmm5 1776 vpxor %xmm9,%xmm10,%xmm10 1777 vpxor %xmm5,%xmm11,%xmm11 1778 1779 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1780 vpalignr $8,%xmm10,%xmm10,%xmm10 1781 vpxor %xmm9,%xmm10,%xmm10 1782 1783 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1784 vpalignr $8,%xmm10,%xmm10,%xmm10 1785 vpxor %xmm11,%xmm10,%xmm10 1786 vpxor %xmm9,%xmm10,%xmm10 1787 1788 cmpq $0,%rcx 1789 jne .Lshort_avx 1790 1791 vpshufb %xmm13,%xmm10,%xmm10 1792 vmovdqu %xmm10,(%rdi) 1793 vzeroupper 1794 .byte 0xf3,0xc3 1795.cfi_endproc 1796.size gcm_ghash_avx,.-gcm_ghash_avx 1797.align 64 1798.Lbswap_mask: 1799.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1800.L0x1c2_polynomial: 1801.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1802.L7_mask: 1803.long 7,0,7,0 1804.L7_mask_poly: 1805.long 7,0,450,0 1806.align 64 1807.type .Lrem_4bit,@object 1808.Lrem_4bit: 1809.long 0,0,0,471859200,0,943718400,0,610271232 1810.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1811.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1812.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1813.type .Lrem_8bit,@object 1814.Lrem_8bit: 1815.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1816.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1817.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1818.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1819.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1820.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1821.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1822.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1823.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1824.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1825.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1826.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1827.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1828.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1829.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1830.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1831.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1832.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1833.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1834.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1835.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1836.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1837.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1838.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1839.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1840.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1841.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1842.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1843.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1844.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1845.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1846.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1847 1848.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1849.align 64 1850