1/* Do not modify. This file is auto-generated from ghash-x86_64.pl. */ 2.text 3 4 5.globl gcm_gmult_4bit 6.type gcm_gmult_4bit,@function 7.align 16 8gcm_gmult_4bit: 9.cfi_startproc 10.byte 243,15,30,250 11 pushq %rbx 12.cfi_adjust_cfa_offset 8 13.cfi_offset %rbx,-16 14 pushq %rbp 15.cfi_adjust_cfa_offset 8 16.cfi_offset %rbp,-24 17 pushq %r12 18.cfi_adjust_cfa_offset 8 19.cfi_offset %r12,-32 20 pushq %r13 21.cfi_adjust_cfa_offset 8 22.cfi_offset %r13,-40 23 pushq %r14 24.cfi_adjust_cfa_offset 8 25.cfi_offset %r14,-48 26 pushq %r15 27.cfi_adjust_cfa_offset 8 28.cfi_offset %r15,-56 29 subq $280,%rsp 30.cfi_adjust_cfa_offset 280 31.Lgmult_prologue: 32 33 movzbq 15(%rdi),%r8 34 leaq .Lrem_4bit(%rip),%r11 35 xorq %rax,%rax 36 xorq %rbx,%rbx 37 movb %r8b,%al 38 movb %r8b,%bl 39 shlb $4,%al 40 movq $14,%rcx 41 movq 8(%rsi,%rax,1),%r8 42 movq (%rsi,%rax,1),%r9 43 andb $0xf0,%bl 44 movq %r8,%rdx 45 jmp .Loop1 46 47.align 16 48.Loop1: 49 shrq $4,%r8 50 andq $0xf,%rdx 51 movq %r9,%r10 52 movb (%rdi,%rcx,1),%al 53 shrq $4,%r9 54 xorq 8(%rsi,%rbx,1),%r8 55 shlq $60,%r10 56 xorq (%rsi,%rbx,1),%r9 57 movb %al,%bl 58 xorq (%r11,%rdx,8),%r9 59 movq %r8,%rdx 60 shlb $4,%al 61 xorq %r10,%r8 62 decq %rcx 63 js .Lbreak1 64 65 shrq $4,%r8 66 andq $0xf,%rdx 67 movq %r9,%r10 68 shrq $4,%r9 69 xorq 8(%rsi,%rax,1),%r8 70 shlq $60,%r10 71 xorq (%rsi,%rax,1),%r9 72 andb $0xf0,%bl 73 xorq (%r11,%rdx,8),%r9 74 movq %r8,%rdx 75 xorq %r10,%r8 76 jmp .Loop1 77 78.align 16 79.Lbreak1: 80 shrq $4,%r8 81 andq $0xf,%rdx 82 movq %r9,%r10 83 shrq $4,%r9 84 xorq 8(%rsi,%rax,1),%r8 85 shlq $60,%r10 86 xorq (%rsi,%rax,1),%r9 87 andb $0xf0,%bl 88 xorq (%r11,%rdx,8),%r9 89 movq %r8,%rdx 90 xorq %r10,%r8 91 92 shrq $4,%r8 93 andq $0xf,%rdx 94 movq %r9,%r10 95 shrq $4,%r9 96 xorq 8(%rsi,%rbx,1),%r8 97 shlq $60,%r10 98 xorq (%rsi,%rbx,1),%r9 99 xorq %r10,%r8 100 xorq (%r11,%rdx,8),%r9 101 102 bswapq %r8 103 bswapq %r9 104 movq %r8,8(%rdi) 105 movq %r9,(%rdi) 106 107 leaq 280+48(%rsp),%rsi 108.cfi_def_cfa %rsi,8 109 movq -8(%rsi),%rbx 110.cfi_restore %rbx 111 leaq (%rsi),%rsp 112.cfi_def_cfa_register %rsp 113.Lgmult_epilogue: 114 .byte 0xf3,0xc3 115.cfi_endproc 116.size gcm_gmult_4bit,.-gcm_gmult_4bit 117.globl gcm_ghash_4bit 118.type gcm_ghash_4bit,@function 119.align 16 120gcm_ghash_4bit: 121.cfi_startproc 122.byte 243,15,30,250 123 pushq %rbx 124.cfi_adjust_cfa_offset 8 125.cfi_offset %rbx,-16 126 pushq %rbp 127.cfi_adjust_cfa_offset 8 128.cfi_offset %rbp,-24 129 pushq %r12 130.cfi_adjust_cfa_offset 8 131.cfi_offset %r12,-32 132 pushq %r13 133.cfi_adjust_cfa_offset 8 134.cfi_offset %r13,-40 135 pushq %r14 136.cfi_adjust_cfa_offset 8 137.cfi_offset %r14,-48 138 pushq %r15 139.cfi_adjust_cfa_offset 8 140.cfi_offset %r15,-56 141 subq $280,%rsp 142.cfi_adjust_cfa_offset 280 143.Lghash_prologue: 144 movq %rdx,%r14 145 movq %rcx,%r15 146 subq $-128,%rsi 147 leaq 16+128(%rsp),%rbp 148 xorl %edx,%edx 149 movq 0+0-128(%rsi),%r8 150 movq 0+8-128(%rsi),%rax 151 movb %al,%dl 152 shrq $4,%rax 153 movq %r8,%r10 154 shrq $4,%r8 155 movq 16+0-128(%rsi),%r9 156 shlb $4,%dl 157 movq 16+8-128(%rsi),%rbx 158 shlq $60,%r10 159 movb %dl,0(%rsp) 160 orq %r10,%rax 161 movb %bl,%dl 162 shrq $4,%rbx 163 movq %r9,%r10 164 shrq $4,%r9 165 movq %r8,0(%rbp) 166 movq 32+0-128(%rsi),%r8 167 shlb $4,%dl 168 movq %rax,0-128(%rbp) 169 movq 32+8-128(%rsi),%rax 170 shlq $60,%r10 171 movb %dl,1(%rsp) 172 orq %r10,%rbx 173 movb %al,%dl 174 shrq $4,%rax 175 movq %r8,%r10 176 shrq $4,%r8 177 movq %r9,8(%rbp) 178 movq 48+0-128(%rsi),%r9 179 shlb $4,%dl 180 movq %rbx,8-128(%rbp) 181 movq 48+8-128(%rsi),%rbx 182 shlq $60,%r10 183 movb %dl,2(%rsp) 184 orq %r10,%rax 185 movb %bl,%dl 186 shrq $4,%rbx 187 movq %r9,%r10 188 shrq $4,%r9 189 movq %r8,16(%rbp) 190 movq 64+0-128(%rsi),%r8 191 shlb $4,%dl 192 movq %rax,16-128(%rbp) 193 movq 64+8-128(%rsi),%rax 194 shlq $60,%r10 195 movb %dl,3(%rsp) 196 orq %r10,%rbx 197 movb %al,%dl 198 shrq $4,%rax 199 movq %r8,%r10 200 shrq $4,%r8 201 movq %r9,24(%rbp) 202 movq 80+0-128(%rsi),%r9 203 shlb $4,%dl 204 movq %rbx,24-128(%rbp) 205 movq 80+8-128(%rsi),%rbx 206 shlq $60,%r10 207 movb %dl,4(%rsp) 208 orq %r10,%rax 209 movb %bl,%dl 210 shrq $4,%rbx 211 movq %r9,%r10 212 shrq $4,%r9 213 movq %r8,32(%rbp) 214 movq 96+0-128(%rsi),%r8 215 shlb $4,%dl 216 movq %rax,32-128(%rbp) 217 movq 96+8-128(%rsi),%rax 218 shlq $60,%r10 219 movb %dl,5(%rsp) 220 orq %r10,%rbx 221 movb %al,%dl 222 shrq $4,%rax 223 movq %r8,%r10 224 shrq $4,%r8 225 movq %r9,40(%rbp) 226 movq 112+0-128(%rsi),%r9 227 shlb $4,%dl 228 movq %rbx,40-128(%rbp) 229 movq 112+8-128(%rsi),%rbx 230 shlq $60,%r10 231 movb %dl,6(%rsp) 232 orq %r10,%rax 233 movb %bl,%dl 234 shrq $4,%rbx 235 movq %r9,%r10 236 shrq $4,%r9 237 movq %r8,48(%rbp) 238 movq 128+0-128(%rsi),%r8 239 shlb $4,%dl 240 movq %rax,48-128(%rbp) 241 movq 128+8-128(%rsi),%rax 242 shlq $60,%r10 243 movb %dl,7(%rsp) 244 orq %r10,%rbx 245 movb %al,%dl 246 shrq $4,%rax 247 movq %r8,%r10 248 shrq $4,%r8 249 movq %r9,56(%rbp) 250 movq 144+0-128(%rsi),%r9 251 shlb $4,%dl 252 movq %rbx,56-128(%rbp) 253 movq 144+8-128(%rsi),%rbx 254 shlq $60,%r10 255 movb %dl,8(%rsp) 256 orq %r10,%rax 257 movb %bl,%dl 258 shrq $4,%rbx 259 movq %r9,%r10 260 shrq $4,%r9 261 movq %r8,64(%rbp) 262 movq 160+0-128(%rsi),%r8 263 shlb $4,%dl 264 movq %rax,64-128(%rbp) 265 movq 160+8-128(%rsi),%rax 266 shlq $60,%r10 267 movb %dl,9(%rsp) 268 orq %r10,%rbx 269 movb %al,%dl 270 shrq $4,%rax 271 movq %r8,%r10 272 shrq $4,%r8 273 movq %r9,72(%rbp) 274 movq 176+0-128(%rsi),%r9 275 shlb $4,%dl 276 movq %rbx,72-128(%rbp) 277 movq 176+8-128(%rsi),%rbx 278 shlq $60,%r10 279 movb %dl,10(%rsp) 280 orq %r10,%rax 281 movb %bl,%dl 282 shrq $4,%rbx 283 movq %r9,%r10 284 shrq $4,%r9 285 movq %r8,80(%rbp) 286 movq 192+0-128(%rsi),%r8 287 shlb $4,%dl 288 movq %rax,80-128(%rbp) 289 movq 192+8-128(%rsi),%rax 290 shlq $60,%r10 291 movb %dl,11(%rsp) 292 orq %r10,%rbx 293 movb %al,%dl 294 shrq $4,%rax 295 movq %r8,%r10 296 shrq $4,%r8 297 movq %r9,88(%rbp) 298 movq 208+0-128(%rsi),%r9 299 shlb $4,%dl 300 movq %rbx,88-128(%rbp) 301 movq 208+8-128(%rsi),%rbx 302 shlq $60,%r10 303 movb %dl,12(%rsp) 304 orq %r10,%rax 305 movb %bl,%dl 306 shrq $4,%rbx 307 movq %r9,%r10 308 shrq $4,%r9 309 movq %r8,96(%rbp) 310 movq 224+0-128(%rsi),%r8 311 shlb $4,%dl 312 movq %rax,96-128(%rbp) 313 movq 224+8-128(%rsi),%rax 314 shlq $60,%r10 315 movb %dl,13(%rsp) 316 orq %r10,%rbx 317 movb %al,%dl 318 shrq $4,%rax 319 movq %r8,%r10 320 shrq $4,%r8 321 movq %r9,104(%rbp) 322 movq 240+0-128(%rsi),%r9 323 shlb $4,%dl 324 movq %rbx,104-128(%rbp) 325 movq 240+8-128(%rsi),%rbx 326 shlq $60,%r10 327 movb %dl,14(%rsp) 328 orq %r10,%rax 329 movb %bl,%dl 330 shrq $4,%rbx 331 movq %r9,%r10 332 shrq $4,%r9 333 movq %r8,112(%rbp) 334 shlb $4,%dl 335 movq %rax,112-128(%rbp) 336 shlq $60,%r10 337 movb %dl,15(%rsp) 338 orq %r10,%rbx 339 movq %r9,120(%rbp) 340 movq %rbx,120-128(%rbp) 341 addq $-128,%rsi 342 movq 8(%rdi),%r8 343 movq 0(%rdi),%r9 344 addq %r14,%r15 345 leaq .Lrem_8bit(%rip),%r11 346 jmp .Louter_loop 347.align 16 348.Louter_loop: 349 xorq (%r14),%r9 350 movq 8(%r14),%rdx 351 leaq 16(%r14),%r14 352 xorq %r8,%rdx 353 movq %r9,(%rdi) 354 movq %rdx,8(%rdi) 355 shrq $32,%rdx 356 xorq %rax,%rax 357 roll $8,%edx 358 movb %dl,%al 359 movzbl %dl,%ebx 360 shlb $4,%al 361 shrl $4,%ebx 362 roll $8,%edx 363 movq 8(%rsi,%rax,1),%r8 364 movq (%rsi,%rax,1),%r9 365 movb %dl,%al 366 movzbl %dl,%ecx 367 shlb $4,%al 368 movzbq (%rsp,%rbx,1),%r12 369 shrl $4,%ecx 370 xorq %r8,%r12 371 movq %r9,%r10 372 shrq $8,%r8 373 movzbq %r12b,%r12 374 shrq $8,%r9 375 xorq -128(%rbp,%rbx,8),%r8 376 shlq $56,%r10 377 xorq (%rbp,%rbx,8),%r9 378 roll $8,%edx 379 xorq 8(%rsi,%rax,1),%r8 380 xorq (%rsi,%rax,1),%r9 381 movb %dl,%al 382 xorq %r10,%r8 383 movzwq (%r11,%r12,2),%r12 384 movzbl %dl,%ebx 385 shlb $4,%al 386 movzbq (%rsp,%rcx,1),%r13 387 shrl $4,%ebx 388 shlq $48,%r12 389 xorq %r8,%r13 390 movq %r9,%r10 391 xorq %r12,%r9 392 shrq $8,%r8 393 movzbq %r13b,%r13 394 shrq $8,%r9 395 xorq -128(%rbp,%rcx,8),%r8 396 shlq $56,%r10 397 xorq (%rbp,%rcx,8),%r9 398 roll $8,%edx 399 xorq 8(%rsi,%rax,1),%r8 400 xorq (%rsi,%rax,1),%r9 401 movb %dl,%al 402 xorq %r10,%r8 403 movzwq (%r11,%r13,2),%r13 404 movzbl %dl,%ecx 405 shlb $4,%al 406 movzbq (%rsp,%rbx,1),%r12 407 shrl $4,%ecx 408 shlq $48,%r13 409 xorq %r8,%r12 410 movq %r9,%r10 411 xorq %r13,%r9 412 shrq $8,%r8 413 movzbq %r12b,%r12 414 movl 8(%rdi),%edx 415 shrq $8,%r9 416 xorq -128(%rbp,%rbx,8),%r8 417 shlq $56,%r10 418 xorq (%rbp,%rbx,8),%r9 419 roll $8,%edx 420 xorq 8(%rsi,%rax,1),%r8 421 xorq (%rsi,%rax,1),%r9 422 movb %dl,%al 423 xorq %r10,%r8 424 movzwq (%r11,%r12,2),%r12 425 movzbl %dl,%ebx 426 shlb $4,%al 427 movzbq (%rsp,%rcx,1),%r13 428 shrl $4,%ebx 429 shlq $48,%r12 430 xorq %r8,%r13 431 movq %r9,%r10 432 xorq %r12,%r9 433 shrq $8,%r8 434 movzbq %r13b,%r13 435 shrq $8,%r9 436 xorq -128(%rbp,%rcx,8),%r8 437 shlq $56,%r10 438 xorq (%rbp,%rcx,8),%r9 439 roll $8,%edx 440 xorq 8(%rsi,%rax,1),%r8 441 xorq (%rsi,%rax,1),%r9 442 movb %dl,%al 443 xorq %r10,%r8 444 movzwq (%r11,%r13,2),%r13 445 movzbl %dl,%ecx 446 shlb $4,%al 447 movzbq (%rsp,%rbx,1),%r12 448 shrl $4,%ecx 449 shlq $48,%r13 450 xorq %r8,%r12 451 movq %r9,%r10 452 xorq %r13,%r9 453 shrq $8,%r8 454 movzbq %r12b,%r12 455 shrq $8,%r9 456 xorq -128(%rbp,%rbx,8),%r8 457 shlq $56,%r10 458 xorq (%rbp,%rbx,8),%r9 459 roll $8,%edx 460 xorq 8(%rsi,%rax,1),%r8 461 xorq (%rsi,%rax,1),%r9 462 movb %dl,%al 463 xorq %r10,%r8 464 movzwq (%r11,%r12,2),%r12 465 movzbl %dl,%ebx 466 shlb $4,%al 467 movzbq (%rsp,%rcx,1),%r13 468 shrl $4,%ebx 469 shlq $48,%r12 470 xorq %r8,%r13 471 movq %r9,%r10 472 xorq %r12,%r9 473 shrq $8,%r8 474 movzbq %r13b,%r13 475 shrq $8,%r9 476 xorq -128(%rbp,%rcx,8),%r8 477 shlq $56,%r10 478 xorq (%rbp,%rcx,8),%r9 479 roll $8,%edx 480 xorq 8(%rsi,%rax,1),%r8 481 xorq (%rsi,%rax,1),%r9 482 movb %dl,%al 483 xorq %r10,%r8 484 movzwq (%r11,%r13,2),%r13 485 movzbl %dl,%ecx 486 shlb $4,%al 487 movzbq (%rsp,%rbx,1),%r12 488 shrl $4,%ecx 489 shlq $48,%r13 490 xorq %r8,%r12 491 movq %r9,%r10 492 xorq %r13,%r9 493 shrq $8,%r8 494 movzbq %r12b,%r12 495 movl 4(%rdi),%edx 496 shrq $8,%r9 497 xorq -128(%rbp,%rbx,8),%r8 498 shlq $56,%r10 499 xorq (%rbp,%rbx,8),%r9 500 roll $8,%edx 501 xorq 8(%rsi,%rax,1),%r8 502 xorq (%rsi,%rax,1),%r9 503 movb %dl,%al 504 xorq %r10,%r8 505 movzwq (%r11,%r12,2),%r12 506 movzbl %dl,%ebx 507 shlb $4,%al 508 movzbq (%rsp,%rcx,1),%r13 509 shrl $4,%ebx 510 shlq $48,%r12 511 xorq %r8,%r13 512 movq %r9,%r10 513 xorq %r12,%r9 514 shrq $8,%r8 515 movzbq %r13b,%r13 516 shrq $8,%r9 517 xorq -128(%rbp,%rcx,8),%r8 518 shlq $56,%r10 519 xorq (%rbp,%rcx,8),%r9 520 roll $8,%edx 521 xorq 8(%rsi,%rax,1),%r8 522 xorq (%rsi,%rax,1),%r9 523 movb %dl,%al 524 xorq %r10,%r8 525 movzwq (%r11,%r13,2),%r13 526 movzbl %dl,%ecx 527 shlb $4,%al 528 movzbq (%rsp,%rbx,1),%r12 529 shrl $4,%ecx 530 shlq $48,%r13 531 xorq %r8,%r12 532 movq %r9,%r10 533 xorq %r13,%r9 534 shrq $8,%r8 535 movzbq %r12b,%r12 536 shrq $8,%r9 537 xorq -128(%rbp,%rbx,8),%r8 538 shlq $56,%r10 539 xorq (%rbp,%rbx,8),%r9 540 roll $8,%edx 541 xorq 8(%rsi,%rax,1),%r8 542 xorq (%rsi,%rax,1),%r9 543 movb %dl,%al 544 xorq %r10,%r8 545 movzwq (%r11,%r12,2),%r12 546 movzbl %dl,%ebx 547 shlb $4,%al 548 movzbq (%rsp,%rcx,1),%r13 549 shrl $4,%ebx 550 shlq $48,%r12 551 xorq %r8,%r13 552 movq %r9,%r10 553 xorq %r12,%r9 554 shrq $8,%r8 555 movzbq %r13b,%r13 556 shrq $8,%r9 557 xorq -128(%rbp,%rcx,8),%r8 558 shlq $56,%r10 559 xorq (%rbp,%rcx,8),%r9 560 roll $8,%edx 561 xorq 8(%rsi,%rax,1),%r8 562 xorq (%rsi,%rax,1),%r9 563 movb %dl,%al 564 xorq %r10,%r8 565 movzwq (%r11,%r13,2),%r13 566 movzbl %dl,%ecx 567 shlb $4,%al 568 movzbq (%rsp,%rbx,1),%r12 569 shrl $4,%ecx 570 shlq $48,%r13 571 xorq %r8,%r12 572 movq %r9,%r10 573 xorq %r13,%r9 574 shrq $8,%r8 575 movzbq %r12b,%r12 576 movl 0(%rdi),%edx 577 shrq $8,%r9 578 xorq -128(%rbp,%rbx,8),%r8 579 shlq $56,%r10 580 xorq (%rbp,%rbx,8),%r9 581 roll $8,%edx 582 xorq 8(%rsi,%rax,1),%r8 583 xorq (%rsi,%rax,1),%r9 584 movb %dl,%al 585 xorq %r10,%r8 586 movzwq (%r11,%r12,2),%r12 587 movzbl %dl,%ebx 588 shlb $4,%al 589 movzbq (%rsp,%rcx,1),%r13 590 shrl $4,%ebx 591 shlq $48,%r12 592 xorq %r8,%r13 593 movq %r9,%r10 594 xorq %r12,%r9 595 shrq $8,%r8 596 movzbq %r13b,%r13 597 shrq $8,%r9 598 xorq -128(%rbp,%rcx,8),%r8 599 shlq $56,%r10 600 xorq (%rbp,%rcx,8),%r9 601 roll $8,%edx 602 xorq 8(%rsi,%rax,1),%r8 603 xorq (%rsi,%rax,1),%r9 604 movb %dl,%al 605 xorq %r10,%r8 606 movzwq (%r11,%r13,2),%r13 607 movzbl %dl,%ecx 608 shlb $4,%al 609 movzbq (%rsp,%rbx,1),%r12 610 shrl $4,%ecx 611 shlq $48,%r13 612 xorq %r8,%r12 613 movq %r9,%r10 614 xorq %r13,%r9 615 shrq $8,%r8 616 movzbq %r12b,%r12 617 shrq $8,%r9 618 xorq -128(%rbp,%rbx,8),%r8 619 shlq $56,%r10 620 xorq (%rbp,%rbx,8),%r9 621 roll $8,%edx 622 xorq 8(%rsi,%rax,1),%r8 623 xorq (%rsi,%rax,1),%r9 624 movb %dl,%al 625 xorq %r10,%r8 626 movzwq (%r11,%r12,2),%r12 627 movzbl %dl,%ebx 628 shlb $4,%al 629 movzbq (%rsp,%rcx,1),%r13 630 shrl $4,%ebx 631 shlq $48,%r12 632 xorq %r8,%r13 633 movq %r9,%r10 634 xorq %r12,%r9 635 shrq $8,%r8 636 movzbq %r13b,%r13 637 shrq $8,%r9 638 xorq -128(%rbp,%rcx,8),%r8 639 shlq $56,%r10 640 xorq (%rbp,%rcx,8),%r9 641 roll $8,%edx 642 xorq 8(%rsi,%rax,1),%r8 643 xorq (%rsi,%rax,1),%r9 644 movb %dl,%al 645 xorq %r10,%r8 646 movzwq (%r11,%r13,2),%r13 647 movzbl %dl,%ecx 648 shlb $4,%al 649 movzbq (%rsp,%rbx,1),%r12 650 andl $240,%ecx 651 shlq $48,%r13 652 xorq %r8,%r12 653 movq %r9,%r10 654 xorq %r13,%r9 655 shrq $8,%r8 656 movzbq %r12b,%r12 657 movl -4(%rdi),%edx 658 shrq $8,%r9 659 xorq -128(%rbp,%rbx,8),%r8 660 shlq $56,%r10 661 xorq (%rbp,%rbx,8),%r9 662 movzwq (%r11,%r12,2),%r12 663 xorq 8(%rsi,%rax,1),%r8 664 xorq (%rsi,%rax,1),%r9 665 shlq $48,%r12 666 xorq %r10,%r8 667 xorq %r12,%r9 668 movzbq %r8b,%r13 669 shrq $4,%r8 670 movq %r9,%r10 671 shlb $4,%r13b 672 shrq $4,%r9 673 xorq 8(%rsi,%rcx,1),%r8 674 movzwq (%r11,%r13,2),%r13 675 shlq $60,%r10 676 xorq (%rsi,%rcx,1),%r9 677 xorq %r10,%r8 678 shlq $48,%r13 679 bswapq %r8 680 xorq %r13,%r9 681 bswapq %r9 682 cmpq %r15,%r14 683 jb .Louter_loop 684 movq %r8,8(%rdi) 685 movq %r9,(%rdi) 686 687 leaq 280+48(%rsp),%rsi 688.cfi_def_cfa %rsi,8 689 movq -48(%rsi),%r15 690.cfi_restore %r15 691 movq -40(%rsi),%r14 692.cfi_restore %r14 693 movq -32(%rsi),%r13 694.cfi_restore %r13 695 movq -24(%rsi),%r12 696.cfi_restore %r12 697 movq -16(%rsi),%rbp 698.cfi_restore %rbp 699 movq -8(%rsi),%rbx 700.cfi_restore %rbx 701 leaq 0(%rsi),%rsp 702.cfi_def_cfa_register %rsp 703.Lghash_epilogue: 704 .byte 0xf3,0xc3 705.cfi_endproc 706.size gcm_ghash_4bit,.-gcm_ghash_4bit 707.globl gcm_init_clmul 708.type gcm_init_clmul,@function 709.align 16 710gcm_init_clmul: 711.cfi_startproc 712.byte 243,15,30,250 713.L_init_clmul: 714 movdqu (%rsi),%xmm2 715 pshufd $78,%xmm2,%xmm2 716 717 718 pshufd $255,%xmm2,%xmm4 719 movdqa %xmm2,%xmm3 720 psllq $1,%xmm2 721 pxor %xmm5,%xmm5 722 psrlq $63,%xmm3 723 pcmpgtd %xmm4,%xmm5 724 pslldq $8,%xmm3 725 por %xmm3,%xmm2 726 727 728 pand .L0x1c2_polynomial(%rip),%xmm5 729 pxor %xmm5,%xmm2 730 731 732 pshufd $78,%xmm2,%xmm6 733 movdqa %xmm2,%xmm0 734 pxor %xmm2,%xmm6 735 movdqa %xmm0,%xmm1 736 pshufd $78,%xmm0,%xmm3 737 pxor %xmm0,%xmm3 738.byte 102,15,58,68,194,0 739.byte 102,15,58,68,202,17 740.byte 102,15,58,68,222,0 741 pxor %xmm0,%xmm3 742 pxor %xmm1,%xmm3 743 744 movdqa %xmm3,%xmm4 745 psrldq $8,%xmm3 746 pslldq $8,%xmm4 747 pxor %xmm3,%xmm1 748 pxor %xmm4,%xmm0 749 750 movdqa %xmm0,%xmm4 751 movdqa %xmm0,%xmm3 752 psllq $5,%xmm0 753 pxor %xmm0,%xmm3 754 psllq $1,%xmm0 755 pxor %xmm3,%xmm0 756 psllq $57,%xmm0 757 movdqa %xmm0,%xmm3 758 pslldq $8,%xmm0 759 psrldq $8,%xmm3 760 pxor %xmm4,%xmm0 761 pxor %xmm3,%xmm1 762 763 764 movdqa %xmm0,%xmm4 765 psrlq $1,%xmm0 766 pxor %xmm4,%xmm1 767 pxor %xmm0,%xmm4 768 psrlq $5,%xmm0 769 pxor %xmm4,%xmm0 770 psrlq $1,%xmm0 771 pxor %xmm1,%xmm0 772 pshufd $78,%xmm2,%xmm3 773 pshufd $78,%xmm0,%xmm4 774 pxor %xmm2,%xmm3 775 movdqu %xmm2,0(%rdi) 776 pxor %xmm0,%xmm4 777 movdqu %xmm0,16(%rdi) 778.byte 102,15,58,15,227,8 779 movdqu %xmm4,32(%rdi) 780 movdqa %xmm0,%xmm1 781 pshufd $78,%xmm0,%xmm3 782 pxor %xmm0,%xmm3 783.byte 102,15,58,68,194,0 784.byte 102,15,58,68,202,17 785.byte 102,15,58,68,222,0 786 pxor %xmm0,%xmm3 787 pxor %xmm1,%xmm3 788 789 movdqa %xmm3,%xmm4 790 psrldq $8,%xmm3 791 pslldq $8,%xmm4 792 pxor %xmm3,%xmm1 793 pxor %xmm4,%xmm0 794 795 movdqa %xmm0,%xmm4 796 movdqa %xmm0,%xmm3 797 psllq $5,%xmm0 798 pxor %xmm0,%xmm3 799 psllq $1,%xmm0 800 pxor %xmm3,%xmm0 801 psllq $57,%xmm0 802 movdqa %xmm0,%xmm3 803 pslldq $8,%xmm0 804 psrldq $8,%xmm3 805 pxor %xmm4,%xmm0 806 pxor %xmm3,%xmm1 807 808 809 movdqa %xmm0,%xmm4 810 psrlq $1,%xmm0 811 pxor %xmm4,%xmm1 812 pxor %xmm0,%xmm4 813 psrlq $5,%xmm0 814 pxor %xmm4,%xmm0 815 psrlq $1,%xmm0 816 pxor %xmm1,%xmm0 817 movdqa %xmm0,%xmm5 818 movdqa %xmm0,%xmm1 819 pshufd $78,%xmm0,%xmm3 820 pxor %xmm0,%xmm3 821.byte 102,15,58,68,194,0 822.byte 102,15,58,68,202,17 823.byte 102,15,58,68,222,0 824 pxor %xmm0,%xmm3 825 pxor %xmm1,%xmm3 826 827 movdqa %xmm3,%xmm4 828 psrldq $8,%xmm3 829 pslldq $8,%xmm4 830 pxor %xmm3,%xmm1 831 pxor %xmm4,%xmm0 832 833 movdqa %xmm0,%xmm4 834 movdqa %xmm0,%xmm3 835 psllq $5,%xmm0 836 pxor %xmm0,%xmm3 837 psllq $1,%xmm0 838 pxor %xmm3,%xmm0 839 psllq $57,%xmm0 840 movdqa %xmm0,%xmm3 841 pslldq $8,%xmm0 842 psrldq $8,%xmm3 843 pxor %xmm4,%xmm0 844 pxor %xmm3,%xmm1 845 846 847 movdqa %xmm0,%xmm4 848 psrlq $1,%xmm0 849 pxor %xmm4,%xmm1 850 pxor %xmm0,%xmm4 851 psrlq $5,%xmm0 852 pxor %xmm4,%xmm0 853 psrlq $1,%xmm0 854 pxor %xmm1,%xmm0 855 pshufd $78,%xmm5,%xmm3 856 pshufd $78,%xmm0,%xmm4 857 pxor %xmm5,%xmm3 858 movdqu %xmm5,48(%rdi) 859 pxor %xmm0,%xmm4 860 movdqu %xmm0,64(%rdi) 861.byte 102,15,58,15,227,8 862 movdqu %xmm4,80(%rdi) 863 .byte 0xf3,0xc3 864.cfi_endproc 865.size gcm_init_clmul,.-gcm_init_clmul 866.globl gcm_gmult_clmul 867.type gcm_gmult_clmul,@function 868.align 16 869gcm_gmult_clmul: 870.cfi_startproc 871.byte 243,15,30,250 872.L_gmult_clmul: 873 movdqu (%rdi),%xmm0 874 movdqa .Lbswap_mask(%rip),%xmm5 875 movdqu (%rsi),%xmm2 876 movdqu 32(%rsi),%xmm4 877.byte 102,15,56,0,197 878 movdqa %xmm0,%xmm1 879 pshufd $78,%xmm0,%xmm3 880 pxor %xmm0,%xmm3 881.byte 102,15,58,68,194,0 882.byte 102,15,58,68,202,17 883.byte 102,15,58,68,220,0 884 pxor %xmm0,%xmm3 885 pxor %xmm1,%xmm3 886 887 movdqa %xmm3,%xmm4 888 psrldq $8,%xmm3 889 pslldq $8,%xmm4 890 pxor %xmm3,%xmm1 891 pxor %xmm4,%xmm0 892 893 movdqa %xmm0,%xmm4 894 movdqa %xmm0,%xmm3 895 psllq $5,%xmm0 896 pxor %xmm0,%xmm3 897 psllq $1,%xmm0 898 pxor %xmm3,%xmm0 899 psllq $57,%xmm0 900 movdqa %xmm0,%xmm3 901 pslldq $8,%xmm0 902 psrldq $8,%xmm3 903 pxor %xmm4,%xmm0 904 pxor %xmm3,%xmm1 905 906 907 movdqa %xmm0,%xmm4 908 psrlq $1,%xmm0 909 pxor %xmm4,%xmm1 910 pxor %xmm0,%xmm4 911 psrlq $5,%xmm0 912 pxor %xmm4,%xmm0 913 psrlq $1,%xmm0 914 pxor %xmm1,%xmm0 915.byte 102,15,56,0,197 916 movdqu %xmm0,(%rdi) 917 .byte 0xf3,0xc3 918.cfi_endproc 919.size gcm_gmult_clmul,.-gcm_gmult_clmul 920.globl gcm_ghash_clmul 921.type gcm_ghash_clmul,@function 922.align 32 923gcm_ghash_clmul: 924.cfi_startproc 925.byte 243,15,30,250 926.L_ghash_clmul: 927 movdqa .Lbswap_mask(%rip),%xmm10 928 929 movdqu (%rdi),%xmm0 930 movdqu (%rsi),%xmm2 931 movdqu 32(%rsi),%xmm7 932.byte 102,65,15,56,0,194 933 934 subq $0x10,%rcx 935 jz .Lodd_tail 936 937 movdqu 16(%rsi),%xmm6 938 movl OPENSSL_ia32cap_P+4(%rip),%eax 939 cmpq $0x30,%rcx 940 jb .Lskip4x 941 942 andl $71303168,%eax 943 cmpl $4194304,%eax 944 je .Lskip4x 945 946 subq $0x30,%rcx 947 movq $0xA040608020C0E000,%rax 948 movdqu 48(%rsi),%xmm14 949 movdqu 64(%rsi),%xmm15 950 951 952 953 954 movdqu 48(%rdx),%xmm3 955 movdqu 32(%rdx),%xmm11 956.byte 102,65,15,56,0,218 957.byte 102,69,15,56,0,218 958 movdqa %xmm3,%xmm5 959 pshufd $78,%xmm3,%xmm4 960 pxor %xmm3,%xmm4 961.byte 102,15,58,68,218,0 962.byte 102,15,58,68,234,17 963.byte 102,15,58,68,231,0 964 965 movdqa %xmm11,%xmm13 966 pshufd $78,%xmm11,%xmm12 967 pxor %xmm11,%xmm12 968.byte 102,68,15,58,68,222,0 969.byte 102,68,15,58,68,238,17 970.byte 102,68,15,58,68,231,16 971 xorps %xmm11,%xmm3 972 xorps %xmm13,%xmm5 973 movups 80(%rsi),%xmm7 974 xorps %xmm12,%xmm4 975 976 movdqu 16(%rdx),%xmm11 977 movdqu 0(%rdx),%xmm8 978.byte 102,69,15,56,0,218 979.byte 102,69,15,56,0,194 980 movdqa %xmm11,%xmm13 981 pshufd $78,%xmm11,%xmm12 982 pxor %xmm8,%xmm0 983 pxor %xmm11,%xmm12 984.byte 102,69,15,58,68,222,0 985 movdqa %xmm0,%xmm1 986 pshufd $78,%xmm0,%xmm8 987 pxor %xmm0,%xmm8 988.byte 102,69,15,58,68,238,17 989.byte 102,68,15,58,68,231,0 990 xorps %xmm11,%xmm3 991 xorps %xmm13,%xmm5 992 993 leaq 64(%rdx),%rdx 994 subq $0x40,%rcx 995 jc .Ltail4x 996 997 jmp .Lmod4_loop 998.align 32 999.Lmod4_loop: 1000.byte 102,65,15,58,68,199,0 1001 xorps %xmm12,%xmm4 1002 movdqu 48(%rdx),%xmm11 1003.byte 102,69,15,56,0,218 1004.byte 102,65,15,58,68,207,17 1005 xorps %xmm3,%xmm0 1006 movdqu 32(%rdx),%xmm3 1007 movdqa %xmm11,%xmm13 1008.byte 102,68,15,58,68,199,16 1009 pshufd $78,%xmm11,%xmm12 1010 xorps %xmm5,%xmm1 1011 pxor %xmm11,%xmm12 1012.byte 102,65,15,56,0,218 1013 movups 32(%rsi),%xmm7 1014 xorps %xmm4,%xmm8 1015.byte 102,68,15,58,68,218,0 1016 pshufd $78,%xmm3,%xmm4 1017 1018 pxor %xmm0,%xmm8 1019 movdqa %xmm3,%xmm5 1020 pxor %xmm1,%xmm8 1021 pxor %xmm3,%xmm4 1022 movdqa %xmm8,%xmm9 1023.byte 102,68,15,58,68,234,17 1024 pslldq $8,%xmm8 1025 psrldq $8,%xmm9 1026 pxor %xmm8,%xmm0 1027 movdqa .L7_mask(%rip),%xmm8 1028 pxor %xmm9,%xmm1 1029.byte 102,76,15,110,200 1030 1031 pand %xmm0,%xmm8 1032.byte 102,69,15,56,0,200 1033 pxor %xmm0,%xmm9 1034.byte 102,68,15,58,68,231,0 1035 psllq $57,%xmm9 1036 movdqa %xmm9,%xmm8 1037 pslldq $8,%xmm9 1038.byte 102,15,58,68,222,0 1039 psrldq $8,%xmm8 1040 pxor %xmm9,%xmm0 1041 pxor %xmm8,%xmm1 1042 movdqu 0(%rdx),%xmm8 1043 1044 movdqa %xmm0,%xmm9 1045 psrlq $1,%xmm0 1046.byte 102,15,58,68,238,17 1047 xorps %xmm11,%xmm3 1048 movdqu 16(%rdx),%xmm11 1049.byte 102,69,15,56,0,218 1050.byte 102,15,58,68,231,16 1051 xorps %xmm13,%xmm5 1052 movups 80(%rsi),%xmm7 1053.byte 102,69,15,56,0,194 1054 pxor %xmm9,%xmm1 1055 pxor %xmm0,%xmm9 1056 psrlq $5,%xmm0 1057 1058 movdqa %xmm11,%xmm13 1059 pxor %xmm12,%xmm4 1060 pshufd $78,%xmm11,%xmm12 1061 pxor %xmm9,%xmm0 1062 pxor %xmm8,%xmm1 1063 pxor %xmm11,%xmm12 1064.byte 102,69,15,58,68,222,0 1065 psrlq $1,%xmm0 1066 pxor %xmm1,%xmm0 1067 movdqa %xmm0,%xmm1 1068.byte 102,69,15,58,68,238,17 1069 xorps %xmm11,%xmm3 1070 pshufd $78,%xmm0,%xmm8 1071 pxor %xmm0,%xmm8 1072 1073.byte 102,68,15,58,68,231,0 1074 xorps %xmm13,%xmm5 1075 1076 leaq 64(%rdx),%rdx 1077 subq $0x40,%rcx 1078 jnc .Lmod4_loop 1079 1080.Ltail4x: 1081.byte 102,65,15,58,68,199,0 1082.byte 102,65,15,58,68,207,17 1083.byte 102,68,15,58,68,199,16 1084 xorps %xmm12,%xmm4 1085 xorps %xmm3,%xmm0 1086 xorps %xmm5,%xmm1 1087 pxor %xmm0,%xmm1 1088 pxor %xmm4,%xmm8 1089 1090 pxor %xmm1,%xmm8 1091 pxor %xmm0,%xmm1 1092 1093 movdqa %xmm8,%xmm9 1094 psrldq $8,%xmm8 1095 pslldq $8,%xmm9 1096 pxor %xmm8,%xmm1 1097 pxor %xmm9,%xmm0 1098 1099 movdqa %xmm0,%xmm4 1100 movdqa %xmm0,%xmm3 1101 psllq $5,%xmm0 1102 pxor %xmm0,%xmm3 1103 psllq $1,%xmm0 1104 pxor %xmm3,%xmm0 1105 psllq $57,%xmm0 1106 movdqa %xmm0,%xmm3 1107 pslldq $8,%xmm0 1108 psrldq $8,%xmm3 1109 pxor %xmm4,%xmm0 1110 pxor %xmm3,%xmm1 1111 1112 1113 movdqa %xmm0,%xmm4 1114 psrlq $1,%xmm0 1115 pxor %xmm4,%xmm1 1116 pxor %xmm0,%xmm4 1117 psrlq $5,%xmm0 1118 pxor %xmm4,%xmm0 1119 psrlq $1,%xmm0 1120 pxor %xmm1,%xmm0 1121 addq $0x40,%rcx 1122 jz .Ldone 1123 movdqu 32(%rsi),%xmm7 1124 subq $0x10,%rcx 1125 jz .Lodd_tail 1126.Lskip4x: 1127 1128 1129 1130 1131 1132 movdqu (%rdx),%xmm8 1133 movdqu 16(%rdx),%xmm3 1134.byte 102,69,15,56,0,194 1135.byte 102,65,15,56,0,218 1136 pxor %xmm8,%xmm0 1137 1138 movdqa %xmm3,%xmm5 1139 pshufd $78,%xmm3,%xmm4 1140 pxor %xmm3,%xmm4 1141.byte 102,15,58,68,218,0 1142.byte 102,15,58,68,234,17 1143.byte 102,15,58,68,231,0 1144 1145 leaq 32(%rdx),%rdx 1146 nop 1147 subq $0x20,%rcx 1148 jbe .Leven_tail 1149 nop 1150 jmp .Lmod_loop 1151 1152.align 32 1153.Lmod_loop: 1154 movdqa %xmm0,%xmm1 1155 movdqa %xmm4,%xmm8 1156 pshufd $78,%xmm0,%xmm4 1157 pxor %xmm0,%xmm4 1158 1159.byte 102,15,58,68,198,0 1160.byte 102,15,58,68,206,17 1161.byte 102,15,58,68,231,16 1162 1163 pxor %xmm3,%xmm0 1164 pxor %xmm5,%xmm1 1165 movdqu (%rdx),%xmm9 1166 pxor %xmm0,%xmm8 1167.byte 102,69,15,56,0,202 1168 movdqu 16(%rdx),%xmm3 1169 1170 pxor %xmm1,%xmm8 1171 pxor %xmm9,%xmm1 1172 pxor %xmm8,%xmm4 1173.byte 102,65,15,56,0,218 1174 movdqa %xmm4,%xmm8 1175 psrldq $8,%xmm8 1176 pslldq $8,%xmm4 1177 pxor %xmm8,%xmm1 1178 pxor %xmm4,%xmm0 1179 1180 movdqa %xmm3,%xmm5 1181 1182 movdqa %xmm0,%xmm9 1183 movdqa %xmm0,%xmm8 1184 psllq $5,%xmm0 1185 pxor %xmm0,%xmm8 1186.byte 102,15,58,68,218,0 1187 psllq $1,%xmm0 1188 pxor %xmm8,%xmm0 1189 psllq $57,%xmm0 1190 movdqa %xmm0,%xmm8 1191 pslldq $8,%xmm0 1192 psrldq $8,%xmm8 1193 pxor %xmm9,%xmm0 1194 pshufd $78,%xmm5,%xmm4 1195 pxor %xmm8,%xmm1 1196 pxor %xmm5,%xmm4 1197 1198 movdqa %xmm0,%xmm9 1199 psrlq $1,%xmm0 1200.byte 102,15,58,68,234,17 1201 pxor %xmm9,%xmm1 1202 pxor %xmm0,%xmm9 1203 psrlq $5,%xmm0 1204 pxor %xmm9,%xmm0 1205 leaq 32(%rdx),%rdx 1206 psrlq $1,%xmm0 1207.byte 102,15,58,68,231,0 1208 pxor %xmm1,%xmm0 1209 1210 subq $0x20,%rcx 1211 ja .Lmod_loop 1212 1213.Leven_tail: 1214 movdqa %xmm0,%xmm1 1215 movdqa %xmm4,%xmm8 1216 pshufd $78,%xmm0,%xmm4 1217 pxor %xmm0,%xmm4 1218 1219.byte 102,15,58,68,198,0 1220.byte 102,15,58,68,206,17 1221.byte 102,15,58,68,231,16 1222 1223 pxor %xmm3,%xmm0 1224 pxor %xmm5,%xmm1 1225 pxor %xmm0,%xmm8 1226 pxor %xmm1,%xmm8 1227 pxor %xmm8,%xmm4 1228 movdqa %xmm4,%xmm8 1229 psrldq $8,%xmm8 1230 pslldq $8,%xmm4 1231 pxor %xmm8,%xmm1 1232 pxor %xmm4,%xmm0 1233 1234 movdqa %xmm0,%xmm4 1235 movdqa %xmm0,%xmm3 1236 psllq $5,%xmm0 1237 pxor %xmm0,%xmm3 1238 psllq $1,%xmm0 1239 pxor %xmm3,%xmm0 1240 psllq $57,%xmm0 1241 movdqa %xmm0,%xmm3 1242 pslldq $8,%xmm0 1243 psrldq $8,%xmm3 1244 pxor %xmm4,%xmm0 1245 pxor %xmm3,%xmm1 1246 1247 1248 movdqa %xmm0,%xmm4 1249 psrlq $1,%xmm0 1250 pxor %xmm4,%xmm1 1251 pxor %xmm0,%xmm4 1252 psrlq $5,%xmm0 1253 pxor %xmm4,%xmm0 1254 psrlq $1,%xmm0 1255 pxor %xmm1,%xmm0 1256 testq %rcx,%rcx 1257 jnz .Ldone 1258 1259.Lodd_tail: 1260 movdqu (%rdx),%xmm8 1261.byte 102,69,15,56,0,194 1262 pxor %xmm8,%xmm0 1263 movdqa %xmm0,%xmm1 1264 pshufd $78,%xmm0,%xmm3 1265 pxor %xmm0,%xmm3 1266.byte 102,15,58,68,194,0 1267.byte 102,15,58,68,202,17 1268.byte 102,15,58,68,223,0 1269 pxor %xmm0,%xmm3 1270 pxor %xmm1,%xmm3 1271 1272 movdqa %xmm3,%xmm4 1273 psrldq $8,%xmm3 1274 pslldq $8,%xmm4 1275 pxor %xmm3,%xmm1 1276 pxor %xmm4,%xmm0 1277 1278 movdqa %xmm0,%xmm4 1279 movdqa %xmm0,%xmm3 1280 psllq $5,%xmm0 1281 pxor %xmm0,%xmm3 1282 psllq $1,%xmm0 1283 pxor %xmm3,%xmm0 1284 psllq $57,%xmm0 1285 movdqa %xmm0,%xmm3 1286 pslldq $8,%xmm0 1287 psrldq $8,%xmm3 1288 pxor %xmm4,%xmm0 1289 pxor %xmm3,%xmm1 1290 1291 1292 movdqa %xmm0,%xmm4 1293 psrlq $1,%xmm0 1294 pxor %xmm4,%xmm1 1295 pxor %xmm0,%xmm4 1296 psrlq $5,%xmm0 1297 pxor %xmm4,%xmm0 1298 psrlq $1,%xmm0 1299 pxor %xmm1,%xmm0 1300.Ldone: 1301.byte 102,65,15,56,0,194 1302 movdqu %xmm0,(%rdi) 1303 .byte 0xf3,0xc3 1304.cfi_endproc 1305.size gcm_ghash_clmul,.-gcm_ghash_clmul 1306.globl gcm_init_avx 1307.type gcm_init_avx,@function 1308.align 32 1309gcm_init_avx: 1310.cfi_startproc 1311.byte 243,15,30,250 1312 vzeroupper 1313 1314 vmovdqu (%rsi),%xmm2 1315 vpshufd $78,%xmm2,%xmm2 1316 1317 1318 vpshufd $255,%xmm2,%xmm4 1319 vpsrlq $63,%xmm2,%xmm3 1320 vpsllq $1,%xmm2,%xmm2 1321 vpxor %xmm5,%xmm5,%xmm5 1322 vpcmpgtd %xmm4,%xmm5,%xmm5 1323 vpslldq $8,%xmm3,%xmm3 1324 vpor %xmm3,%xmm2,%xmm2 1325 1326 1327 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 1328 vpxor %xmm5,%xmm2,%xmm2 1329 1330 vpunpckhqdq %xmm2,%xmm2,%xmm6 1331 vmovdqa %xmm2,%xmm0 1332 vpxor %xmm2,%xmm6,%xmm6 1333 movq $4,%r10 1334 jmp .Linit_start_avx 1335.align 32 1336.Linit_loop_avx: 1337 vpalignr $8,%xmm3,%xmm4,%xmm5 1338 vmovdqu %xmm5,-16(%rdi) 1339 vpunpckhqdq %xmm0,%xmm0,%xmm3 1340 vpxor %xmm0,%xmm3,%xmm3 1341 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1342 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1343 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1344 vpxor %xmm0,%xmm1,%xmm4 1345 vpxor %xmm4,%xmm3,%xmm3 1346 1347 vpslldq $8,%xmm3,%xmm4 1348 vpsrldq $8,%xmm3,%xmm3 1349 vpxor %xmm4,%xmm0,%xmm0 1350 vpxor %xmm3,%xmm1,%xmm1 1351 vpsllq $57,%xmm0,%xmm3 1352 vpsllq $62,%xmm0,%xmm4 1353 vpxor %xmm3,%xmm4,%xmm4 1354 vpsllq $63,%xmm0,%xmm3 1355 vpxor %xmm3,%xmm4,%xmm4 1356 vpslldq $8,%xmm4,%xmm3 1357 vpsrldq $8,%xmm4,%xmm4 1358 vpxor %xmm3,%xmm0,%xmm0 1359 vpxor %xmm4,%xmm1,%xmm1 1360 1361 vpsrlq $1,%xmm0,%xmm4 1362 vpxor %xmm0,%xmm1,%xmm1 1363 vpxor %xmm4,%xmm0,%xmm0 1364 vpsrlq $5,%xmm4,%xmm4 1365 vpxor %xmm4,%xmm0,%xmm0 1366 vpsrlq $1,%xmm0,%xmm0 1367 vpxor %xmm1,%xmm0,%xmm0 1368.Linit_start_avx: 1369 vmovdqa %xmm0,%xmm5 1370 vpunpckhqdq %xmm0,%xmm0,%xmm3 1371 vpxor %xmm0,%xmm3,%xmm3 1372 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1373 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1374 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1375 vpxor %xmm0,%xmm1,%xmm4 1376 vpxor %xmm4,%xmm3,%xmm3 1377 1378 vpslldq $8,%xmm3,%xmm4 1379 vpsrldq $8,%xmm3,%xmm3 1380 vpxor %xmm4,%xmm0,%xmm0 1381 vpxor %xmm3,%xmm1,%xmm1 1382 vpsllq $57,%xmm0,%xmm3 1383 vpsllq $62,%xmm0,%xmm4 1384 vpxor %xmm3,%xmm4,%xmm4 1385 vpsllq $63,%xmm0,%xmm3 1386 vpxor %xmm3,%xmm4,%xmm4 1387 vpslldq $8,%xmm4,%xmm3 1388 vpsrldq $8,%xmm4,%xmm4 1389 vpxor %xmm3,%xmm0,%xmm0 1390 vpxor %xmm4,%xmm1,%xmm1 1391 1392 vpsrlq $1,%xmm0,%xmm4 1393 vpxor %xmm0,%xmm1,%xmm1 1394 vpxor %xmm4,%xmm0,%xmm0 1395 vpsrlq $5,%xmm4,%xmm4 1396 vpxor %xmm4,%xmm0,%xmm0 1397 vpsrlq $1,%xmm0,%xmm0 1398 vpxor %xmm1,%xmm0,%xmm0 1399 vpshufd $78,%xmm5,%xmm3 1400 vpshufd $78,%xmm0,%xmm4 1401 vpxor %xmm5,%xmm3,%xmm3 1402 vmovdqu %xmm5,0(%rdi) 1403 vpxor %xmm0,%xmm4,%xmm4 1404 vmovdqu %xmm0,16(%rdi) 1405 leaq 48(%rdi),%rdi 1406 subq $1,%r10 1407 jnz .Linit_loop_avx 1408 1409 vpalignr $8,%xmm4,%xmm3,%xmm5 1410 vmovdqu %xmm5,-16(%rdi) 1411 1412 vzeroupper 1413 .byte 0xf3,0xc3 1414.cfi_endproc 1415.size gcm_init_avx,.-gcm_init_avx 1416.globl gcm_gmult_avx 1417.type gcm_gmult_avx,@function 1418.align 32 1419gcm_gmult_avx: 1420.cfi_startproc 1421.byte 243,15,30,250 1422 jmp .L_gmult_clmul 1423.cfi_endproc 1424.size gcm_gmult_avx,.-gcm_gmult_avx 1425.globl gcm_ghash_avx 1426.type gcm_ghash_avx,@function 1427.align 32 1428gcm_ghash_avx: 1429.cfi_startproc 1430.byte 243,15,30,250 1431 vzeroupper 1432 1433 vmovdqu (%rdi),%xmm10 1434 leaq .L0x1c2_polynomial(%rip),%r10 1435 leaq 64(%rsi),%rsi 1436 vmovdqu .Lbswap_mask(%rip),%xmm13 1437 vpshufb %xmm13,%xmm10,%xmm10 1438 cmpq $0x80,%rcx 1439 jb .Lshort_avx 1440 subq $0x80,%rcx 1441 1442 vmovdqu 112(%rdx),%xmm14 1443 vmovdqu 0-64(%rsi),%xmm6 1444 vpshufb %xmm13,%xmm14,%xmm14 1445 vmovdqu 32-64(%rsi),%xmm7 1446 1447 vpunpckhqdq %xmm14,%xmm14,%xmm9 1448 vmovdqu 96(%rdx),%xmm15 1449 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1450 vpxor %xmm14,%xmm9,%xmm9 1451 vpshufb %xmm13,%xmm15,%xmm15 1452 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1453 vmovdqu 16-64(%rsi),%xmm6 1454 vpunpckhqdq %xmm15,%xmm15,%xmm8 1455 vmovdqu 80(%rdx),%xmm14 1456 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1457 vpxor %xmm15,%xmm8,%xmm8 1458 1459 vpshufb %xmm13,%xmm14,%xmm14 1460 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1461 vpunpckhqdq %xmm14,%xmm14,%xmm9 1462 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1463 vmovdqu 48-64(%rsi),%xmm6 1464 vpxor %xmm14,%xmm9,%xmm9 1465 vmovdqu 64(%rdx),%xmm15 1466 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1467 vmovdqu 80-64(%rsi),%xmm7 1468 1469 vpshufb %xmm13,%xmm15,%xmm15 1470 vpxor %xmm0,%xmm3,%xmm3 1471 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1472 vpxor %xmm1,%xmm4,%xmm4 1473 vpunpckhqdq %xmm15,%xmm15,%xmm8 1474 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1475 vmovdqu 64-64(%rsi),%xmm6 1476 vpxor %xmm2,%xmm5,%xmm5 1477 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1478 vpxor %xmm15,%xmm8,%xmm8 1479 1480 vmovdqu 48(%rdx),%xmm14 1481 vpxor %xmm3,%xmm0,%xmm0 1482 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1483 vpxor %xmm4,%xmm1,%xmm1 1484 vpshufb %xmm13,%xmm14,%xmm14 1485 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1486 vmovdqu 96-64(%rsi),%xmm6 1487 vpxor %xmm5,%xmm2,%xmm2 1488 vpunpckhqdq %xmm14,%xmm14,%xmm9 1489 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1490 vmovdqu 128-64(%rsi),%xmm7 1491 vpxor %xmm14,%xmm9,%xmm9 1492 1493 vmovdqu 32(%rdx),%xmm15 1494 vpxor %xmm0,%xmm3,%xmm3 1495 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1496 vpxor %xmm1,%xmm4,%xmm4 1497 vpshufb %xmm13,%xmm15,%xmm15 1498 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1499 vmovdqu 112-64(%rsi),%xmm6 1500 vpxor %xmm2,%xmm5,%xmm5 1501 vpunpckhqdq %xmm15,%xmm15,%xmm8 1502 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1503 vpxor %xmm15,%xmm8,%xmm8 1504 1505 vmovdqu 16(%rdx),%xmm14 1506 vpxor %xmm3,%xmm0,%xmm0 1507 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1508 vpxor %xmm4,%xmm1,%xmm1 1509 vpshufb %xmm13,%xmm14,%xmm14 1510 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1511 vmovdqu 144-64(%rsi),%xmm6 1512 vpxor %xmm5,%xmm2,%xmm2 1513 vpunpckhqdq %xmm14,%xmm14,%xmm9 1514 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1515 vmovdqu 176-64(%rsi),%xmm7 1516 vpxor %xmm14,%xmm9,%xmm9 1517 1518 vmovdqu (%rdx),%xmm15 1519 vpxor %xmm0,%xmm3,%xmm3 1520 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1521 vpxor %xmm1,%xmm4,%xmm4 1522 vpshufb %xmm13,%xmm15,%xmm15 1523 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1524 vmovdqu 160-64(%rsi),%xmm6 1525 vpxor %xmm2,%xmm5,%xmm5 1526 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1527 1528 leaq 128(%rdx),%rdx 1529 cmpq $0x80,%rcx 1530 jb .Ltail_avx 1531 1532 vpxor %xmm10,%xmm15,%xmm15 1533 subq $0x80,%rcx 1534 jmp .Loop8x_avx 1535 1536.align 32 1537.Loop8x_avx: 1538 vpunpckhqdq %xmm15,%xmm15,%xmm8 1539 vmovdqu 112(%rdx),%xmm14 1540 vpxor %xmm0,%xmm3,%xmm3 1541 vpxor %xmm15,%xmm8,%xmm8 1542 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 1543 vpshufb %xmm13,%xmm14,%xmm14 1544 vpxor %xmm1,%xmm4,%xmm4 1545 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 1546 vmovdqu 0-64(%rsi),%xmm6 1547 vpunpckhqdq %xmm14,%xmm14,%xmm9 1548 vpxor %xmm2,%xmm5,%xmm5 1549 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 1550 vmovdqu 32-64(%rsi),%xmm7 1551 vpxor %xmm14,%xmm9,%xmm9 1552 1553 vmovdqu 96(%rdx),%xmm15 1554 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1555 vpxor %xmm3,%xmm10,%xmm10 1556 vpshufb %xmm13,%xmm15,%xmm15 1557 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1558 vxorps %xmm4,%xmm11,%xmm11 1559 vmovdqu 16-64(%rsi),%xmm6 1560 vpunpckhqdq %xmm15,%xmm15,%xmm8 1561 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1562 vpxor %xmm5,%xmm12,%xmm12 1563 vxorps %xmm15,%xmm8,%xmm8 1564 1565 vmovdqu 80(%rdx),%xmm14 1566 vpxor %xmm10,%xmm12,%xmm12 1567 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1568 vpxor %xmm11,%xmm12,%xmm12 1569 vpslldq $8,%xmm12,%xmm9 1570 vpxor %xmm0,%xmm3,%xmm3 1571 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1572 vpsrldq $8,%xmm12,%xmm12 1573 vpxor %xmm9,%xmm10,%xmm10 1574 vmovdqu 48-64(%rsi),%xmm6 1575 vpshufb %xmm13,%xmm14,%xmm14 1576 vxorps %xmm12,%xmm11,%xmm11 1577 vpxor %xmm1,%xmm4,%xmm4 1578 vpunpckhqdq %xmm14,%xmm14,%xmm9 1579 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1580 vmovdqu 80-64(%rsi),%xmm7 1581 vpxor %xmm14,%xmm9,%xmm9 1582 vpxor %xmm2,%xmm5,%xmm5 1583 1584 vmovdqu 64(%rdx),%xmm15 1585 vpalignr $8,%xmm10,%xmm10,%xmm12 1586 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1587 vpshufb %xmm13,%xmm15,%xmm15 1588 vpxor %xmm3,%xmm0,%xmm0 1589 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1590 vmovdqu 64-64(%rsi),%xmm6 1591 vpunpckhqdq %xmm15,%xmm15,%xmm8 1592 vpxor %xmm4,%xmm1,%xmm1 1593 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1594 vxorps %xmm15,%xmm8,%xmm8 1595 vpxor %xmm5,%xmm2,%xmm2 1596 1597 vmovdqu 48(%rdx),%xmm14 1598 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1599 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1600 vpshufb %xmm13,%xmm14,%xmm14 1601 vpxor %xmm0,%xmm3,%xmm3 1602 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1603 vmovdqu 96-64(%rsi),%xmm6 1604 vpunpckhqdq %xmm14,%xmm14,%xmm9 1605 vpxor %xmm1,%xmm4,%xmm4 1606 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1607 vmovdqu 128-64(%rsi),%xmm7 1608 vpxor %xmm14,%xmm9,%xmm9 1609 vpxor %xmm2,%xmm5,%xmm5 1610 1611 vmovdqu 32(%rdx),%xmm15 1612 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1613 vpshufb %xmm13,%xmm15,%xmm15 1614 vpxor %xmm3,%xmm0,%xmm0 1615 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1616 vmovdqu 112-64(%rsi),%xmm6 1617 vpunpckhqdq %xmm15,%xmm15,%xmm8 1618 vpxor %xmm4,%xmm1,%xmm1 1619 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1620 vpxor %xmm15,%xmm8,%xmm8 1621 vpxor %xmm5,%xmm2,%xmm2 1622 vxorps %xmm12,%xmm10,%xmm10 1623 1624 vmovdqu 16(%rdx),%xmm14 1625 vpalignr $8,%xmm10,%xmm10,%xmm12 1626 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1627 vpshufb %xmm13,%xmm14,%xmm14 1628 vpxor %xmm0,%xmm3,%xmm3 1629 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1630 vmovdqu 144-64(%rsi),%xmm6 1631 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1632 vxorps %xmm11,%xmm12,%xmm12 1633 vpunpckhqdq %xmm14,%xmm14,%xmm9 1634 vpxor %xmm1,%xmm4,%xmm4 1635 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1636 vmovdqu 176-64(%rsi),%xmm7 1637 vpxor %xmm14,%xmm9,%xmm9 1638 vpxor %xmm2,%xmm5,%xmm5 1639 1640 vmovdqu (%rdx),%xmm15 1641 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1642 vpshufb %xmm13,%xmm15,%xmm15 1643 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1644 vmovdqu 160-64(%rsi),%xmm6 1645 vpxor %xmm12,%xmm15,%xmm15 1646 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1647 vpxor %xmm10,%xmm15,%xmm15 1648 1649 leaq 128(%rdx),%rdx 1650 subq $0x80,%rcx 1651 jnc .Loop8x_avx 1652 1653 addq $0x80,%rcx 1654 jmp .Ltail_no_xor_avx 1655 1656.align 32 1657.Lshort_avx: 1658 vmovdqu -16(%rdx,%rcx,1),%xmm14 1659 leaq (%rdx,%rcx,1),%rdx 1660 vmovdqu 0-64(%rsi),%xmm6 1661 vmovdqu 32-64(%rsi),%xmm7 1662 vpshufb %xmm13,%xmm14,%xmm15 1663 1664 vmovdqa %xmm0,%xmm3 1665 vmovdqa %xmm1,%xmm4 1666 vmovdqa %xmm2,%xmm5 1667 subq $0x10,%rcx 1668 jz .Ltail_avx 1669 1670 vpunpckhqdq %xmm15,%xmm15,%xmm8 1671 vpxor %xmm0,%xmm3,%xmm3 1672 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1673 vpxor %xmm15,%xmm8,%xmm8 1674 vmovdqu -32(%rdx),%xmm14 1675 vpxor %xmm1,%xmm4,%xmm4 1676 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1677 vmovdqu 16-64(%rsi),%xmm6 1678 vpshufb %xmm13,%xmm14,%xmm15 1679 vpxor %xmm2,%xmm5,%xmm5 1680 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1681 vpsrldq $8,%xmm7,%xmm7 1682 subq $0x10,%rcx 1683 jz .Ltail_avx 1684 1685 vpunpckhqdq %xmm15,%xmm15,%xmm8 1686 vpxor %xmm0,%xmm3,%xmm3 1687 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1688 vpxor %xmm15,%xmm8,%xmm8 1689 vmovdqu -48(%rdx),%xmm14 1690 vpxor %xmm1,%xmm4,%xmm4 1691 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1692 vmovdqu 48-64(%rsi),%xmm6 1693 vpshufb %xmm13,%xmm14,%xmm15 1694 vpxor %xmm2,%xmm5,%xmm5 1695 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1696 vmovdqu 80-64(%rsi),%xmm7 1697 subq $0x10,%rcx 1698 jz .Ltail_avx 1699 1700 vpunpckhqdq %xmm15,%xmm15,%xmm8 1701 vpxor %xmm0,%xmm3,%xmm3 1702 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1703 vpxor %xmm15,%xmm8,%xmm8 1704 vmovdqu -64(%rdx),%xmm14 1705 vpxor %xmm1,%xmm4,%xmm4 1706 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1707 vmovdqu 64-64(%rsi),%xmm6 1708 vpshufb %xmm13,%xmm14,%xmm15 1709 vpxor %xmm2,%xmm5,%xmm5 1710 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1711 vpsrldq $8,%xmm7,%xmm7 1712 subq $0x10,%rcx 1713 jz .Ltail_avx 1714 1715 vpunpckhqdq %xmm15,%xmm15,%xmm8 1716 vpxor %xmm0,%xmm3,%xmm3 1717 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1718 vpxor %xmm15,%xmm8,%xmm8 1719 vmovdqu -80(%rdx),%xmm14 1720 vpxor %xmm1,%xmm4,%xmm4 1721 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1722 vmovdqu 96-64(%rsi),%xmm6 1723 vpshufb %xmm13,%xmm14,%xmm15 1724 vpxor %xmm2,%xmm5,%xmm5 1725 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1726 vmovdqu 128-64(%rsi),%xmm7 1727 subq $0x10,%rcx 1728 jz .Ltail_avx 1729 1730 vpunpckhqdq %xmm15,%xmm15,%xmm8 1731 vpxor %xmm0,%xmm3,%xmm3 1732 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1733 vpxor %xmm15,%xmm8,%xmm8 1734 vmovdqu -96(%rdx),%xmm14 1735 vpxor %xmm1,%xmm4,%xmm4 1736 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1737 vmovdqu 112-64(%rsi),%xmm6 1738 vpshufb %xmm13,%xmm14,%xmm15 1739 vpxor %xmm2,%xmm5,%xmm5 1740 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1741 vpsrldq $8,%xmm7,%xmm7 1742 subq $0x10,%rcx 1743 jz .Ltail_avx 1744 1745 vpunpckhqdq %xmm15,%xmm15,%xmm8 1746 vpxor %xmm0,%xmm3,%xmm3 1747 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1748 vpxor %xmm15,%xmm8,%xmm8 1749 vmovdqu -112(%rdx),%xmm14 1750 vpxor %xmm1,%xmm4,%xmm4 1751 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1752 vmovdqu 144-64(%rsi),%xmm6 1753 vpshufb %xmm13,%xmm14,%xmm15 1754 vpxor %xmm2,%xmm5,%xmm5 1755 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1756 vmovq 184-64(%rsi),%xmm7 1757 subq $0x10,%rcx 1758 jmp .Ltail_avx 1759 1760.align 32 1761.Ltail_avx: 1762 vpxor %xmm10,%xmm15,%xmm15 1763.Ltail_no_xor_avx: 1764 vpunpckhqdq %xmm15,%xmm15,%xmm8 1765 vpxor %xmm0,%xmm3,%xmm3 1766 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1767 vpxor %xmm15,%xmm8,%xmm8 1768 vpxor %xmm1,%xmm4,%xmm4 1769 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1770 vpxor %xmm2,%xmm5,%xmm5 1771 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1772 1773 vmovdqu (%r10),%xmm12 1774 1775 vpxor %xmm0,%xmm3,%xmm10 1776 vpxor %xmm1,%xmm4,%xmm11 1777 vpxor %xmm2,%xmm5,%xmm5 1778 1779 vpxor %xmm10,%xmm5,%xmm5 1780 vpxor %xmm11,%xmm5,%xmm5 1781 vpslldq $8,%xmm5,%xmm9 1782 vpsrldq $8,%xmm5,%xmm5 1783 vpxor %xmm9,%xmm10,%xmm10 1784 vpxor %xmm5,%xmm11,%xmm11 1785 1786 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1787 vpalignr $8,%xmm10,%xmm10,%xmm10 1788 vpxor %xmm9,%xmm10,%xmm10 1789 1790 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1791 vpalignr $8,%xmm10,%xmm10,%xmm10 1792 vpxor %xmm11,%xmm10,%xmm10 1793 vpxor %xmm9,%xmm10,%xmm10 1794 1795 cmpq $0,%rcx 1796 jne .Lshort_avx 1797 1798 vpshufb %xmm13,%xmm10,%xmm10 1799 vmovdqu %xmm10,(%rdi) 1800 vzeroupper 1801 .byte 0xf3,0xc3 1802.cfi_endproc 1803.size gcm_ghash_avx,.-gcm_ghash_avx 1804.section .rodata 1805.align 64 1806.Lbswap_mask: 1807.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1808.L0x1c2_polynomial: 1809.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1810.L7_mask: 1811.long 7,0,7,0 1812.L7_mask_poly: 1813.long 7,0,450,0 1814.align 64 1815.type .Lrem_4bit,@object 1816.Lrem_4bit: 1817.long 0,0,0,471859200,0,943718400,0,610271232 1818.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1819.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1820.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1821.type .Lrem_8bit,@object 1822.Lrem_8bit: 1823.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1824.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1825.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1826.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1827.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1828.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1829.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1830.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1831.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1832.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1833.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1834.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1835.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1836.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1837.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1838.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1839.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1840.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1841.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1842.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1843.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1844.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1845.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1846.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1847.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1848.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1849.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1850.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1851.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1852.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1853.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1854.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1855 1856.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1857.align 64 1858.previous 1859 .section ".note.gnu.property", "a" 1860 .p2align 3 1861 .long 1f - 0f 1862 .long 4f - 1f 1863 .long 5 18640: 1865 # "GNU" encoded with .byte, since .asciz isn't supported 1866 # on Solaris. 1867 .byte 0x47 1868 .byte 0x4e 1869 .byte 0x55 1870 .byte 0 18711: 1872 .p2align 3 1873 .long 0xc0000002 1874 .long 3f - 2f 18752: 1876 .long 3 18773: 1878 .p2align 3 18794: 1880