1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/TargetLowering.h" 19 20 namespace llvm { 21 class X86Subtarget; 22 class X86TargetMachine; 23 24 namespace X86ISD { 25 // X86 Specific DAG Nodes 26 enum NodeType : unsigned { 27 // Start the numbering where the builtin ops leave off. 28 FIRST_NUMBER = ISD::BUILTIN_OP_END, 29 30 /// Bit scan forward. 31 BSF, 32 /// Bit scan reverse. 33 BSR, 34 35 /// X86 funnel/double shift i16 instructions. These correspond to 36 /// X86::SHLDW and X86::SHRDW instructions which have different amt 37 /// modulo rules to generic funnel shifts. 38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 39 FSHL, 40 FSHR, 41 42 /// Bitwise logical AND of floating point values. This corresponds 43 /// to X86::ANDPS or X86::ANDPD. 44 FAND, 45 46 /// Bitwise logical OR of floating point values. This corresponds 47 /// to X86::ORPS or X86::ORPD. 48 FOR, 49 50 /// Bitwise logical XOR of floating point values. This corresponds 51 /// to X86::XORPS or X86::XORPD. 52 FXOR, 53 54 /// Bitwise logical ANDNOT of floating point values. This 55 /// corresponds to X86::ANDNPS or X86::ANDNPD. 56 FANDN, 57 58 /// These operations represent an abstract X86 call 59 /// instruction, which includes a bunch of information. In particular the 60 /// operands of these node are: 61 /// 62 /// #0 - The incoming token chain 63 /// #1 - The callee 64 /// #2 - The number of arg bytes the caller pushes on the stack. 65 /// #3 - The number of arg bytes the callee pops off the stack. 66 /// #4 - The value to pass in AL/AX/EAX (optional) 67 /// #5 - The value to pass in DL/DX/EDX (optional) 68 /// 69 /// The result values of these nodes are: 70 /// 71 /// #0 - The outgoing token chain 72 /// #1 - The first register result value (optional) 73 /// #2 - The second register result value (optional) 74 /// 75 CALL, 76 77 /// Same as call except it adds the NoTrack prefix. 78 NT_CALL, 79 80 // Pseudo for a OBJC call that gets emitted together with a special 81 // marker instruction. 82 CALL_RVMARKER, 83 84 /// X86 compare and logical compare instructions. 85 CMP, 86 FCMP, 87 COMI, 88 UCOMI, 89 90 /// X86 bit-test instructions. 91 BT, 92 93 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 94 /// operand, usually produced by a CMP instruction. 95 SETCC, 96 97 /// X86 Select 98 SELECTS, 99 100 // Same as SETCC except it's materialized with a sbb and the value is all 101 // one's or all zero's. 102 SETCC_CARRY, // R = carry_bit ? ~0 : 0 103 104 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 105 /// Operands are two FP values to compare; result is a mask of 106 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 107 FSETCC, 108 109 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 110 /// and a version with SAE. 111 FSETCCM, 112 FSETCCM_SAE, 113 114 /// X86 conditional moves. Operand 0 and operand 1 are the two values 115 /// to select from. Operand 2 is the condition code, and operand 3 is the 116 /// flag operand produced by a CMP or TEST instruction. 117 CMOV, 118 119 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 120 /// is the block to branch if condition is true, operand 2 is the 121 /// condition code, and operand 3 is the flag operand produced by a CMP 122 /// or TEST instruction. 123 BRCOND, 124 125 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 126 /// operand 1 is the target address. 127 NT_BRIND, 128 129 /// Return with a glue operand. Operand 0 is the chain operand, operand 130 /// 1 is the number of bytes of stack to pop. 131 RET_GLUE, 132 133 /// Return from interrupt. Operand 0 is the number of bytes to pop. 134 IRET, 135 136 /// Repeat fill, corresponds to X86::REP_STOSx. 137 REP_STOS, 138 139 /// Repeat move, corresponds to X86::REP_MOVSx. 140 REP_MOVS, 141 142 /// On Darwin, this node represents the result of the popl 143 /// at function entry, used for PIC code. 144 GlobalBaseReg, 145 146 /// A wrapper node for TargetConstantPool, TargetJumpTable, 147 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 148 /// MCSymbol and TargetBlockAddress. 149 Wrapper, 150 151 /// Special wrapper used under X86-64 PIC mode for RIP 152 /// relative displacements. 153 WrapperRIP, 154 155 /// Copies a 64-bit value from an MMX vector to the low word 156 /// of an XMM vector, with the high word zero filled. 157 MOVQ2DQ, 158 159 /// Copies a 64-bit value from the low word of an XMM vector 160 /// to an MMX vector. 161 MOVDQ2Q, 162 163 /// Copies a 32-bit value from the low word of a MMX 164 /// vector to a GPR. 165 MMX_MOVD2W, 166 167 /// Copies a GPR into the low 32-bit word of a MMX vector 168 /// and zero out the high word. 169 MMX_MOVW2D, 170 171 /// Extract an 8-bit value from a vector and zero extend it to 172 /// i32, corresponds to X86::PEXTRB. 173 PEXTRB, 174 175 /// Extract a 16-bit value from a vector and zero extend it to 176 /// i32, corresponds to X86::PEXTRW. 177 PEXTRW, 178 179 /// Insert any element of a 4 x float vector into any element 180 /// of a destination 4 x floatvector. 181 INSERTPS, 182 183 /// Insert the lower 8-bits of a 32-bit value to a vector, 184 /// corresponds to X86::PINSRB. 185 PINSRB, 186 187 /// Insert the lower 16-bits of a 32-bit value to a vector, 188 /// corresponds to X86::PINSRW. 189 PINSRW, 190 191 /// Shuffle 16 8-bit values within a vector. 192 PSHUFB, 193 194 /// Compute Sum of Absolute Differences. 195 PSADBW, 196 /// Compute Double Block Packed Sum-Absolute-Differences 197 DBPSADBW, 198 199 /// Bitwise Logical AND NOT of Packed FP values. 200 ANDNP, 201 202 /// Blend where the selector is an immediate. 203 BLENDI, 204 205 /// Dynamic (non-constant condition) vector blend where only the sign bits 206 /// of the condition elements are used. This is used to enforce that the 207 /// condition mask is not valid for generic VSELECT optimizations. This 208 /// is also used to implement the intrinsics. 209 /// Operands are in VSELECT order: MASK, TRUE, FALSE 210 BLENDV, 211 212 /// Combined add and sub on an FP vector. 213 ADDSUB, 214 215 // FP vector ops with rounding mode. 216 FADD_RND, 217 FADDS, 218 FADDS_RND, 219 FSUB_RND, 220 FSUBS, 221 FSUBS_RND, 222 FMUL_RND, 223 FMULS, 224 FMULS_RND, 225 FDIV_RND, 226 FDIVS, 227 FDIVS_RND, 228 FMAX_SAE, 229 FMAXS_SAE, 230 FMIN_SAE, 231 FMINS_SAE, 232 FSQRT_RND, 233 FSQRTS, 234 FSQRTS_RND, 235 236 // FP vector get exponent. 237 FGETEXP, 238 FGETEXP_SAE, 239 FGETEXPS, 240 FGETEXPS_SAE, 241 // Extract Normalized Mantissas. 242 VGETMANT, 243 VGETMANT_SAE, 244 VGETMANTS, 245 VGETMANTS_SAE, 246 // FP Scale. 247 SCALEF, 248 SCALEF_RND, 249 SCALEFS, 250 SCALEFS_RND, 251 252 /// Integer horizontal add/sub. 253 HADD, 254 HSUB, 255 256 /// Floating point horizontal add/sub. 257 FHADD, 258 FHSUB, 259 260 // Detect Conflicts Within a Vector 261 CONFLICT, 262 263 /// Floating point max and min. 264 FMAX, 265 FMIN, 266 267 /// Commutative FMIN and FMAX. 268 FMAXC, 269 FMINC, 270 271 /// Scalar intrinsic floating point max and min. 272 FMAXS, 273 FMINS, 274 275 /// Floating point reciprocal-sqrt and reciprocal approximation. 276 /// Note that these typically require refinement 277 /// in order to obtain suitable precision. 278 FRSQRT, 279 FRCP, 280 281 // AVX-512 reciprocal approximations with a little more precision. 282 RSQRT14, 283 RSQRT14S, 284 RCP14, 285 RCP14S, 286 287 // Thread Local Storage. 288 TLSADDR, 289 290 // Thread Local Storage. A call to get the start address 291 // of the TLS block for the current module. 292 TLSBASEADDR, 293 294 // Thread Local Storage. When calling to an OS provided 295 // thunk at the address from an earlier relocation. 296 TLSCALL, 297 298 // Thread Local Storage. A descriptor containing pointer to 299 // code and to argument to get the TLS offset for the symbol. 300 TLSDESC, 301 302 // Exception Handling helpers. 303 EH_RETURN, 304 305 // SjLj exception handling setjmp. 306 EH_SJLJ_SETJMP, 307 308 // SjLj exception handling longjmp. 309 EH_SJLJ_LONGJMP, 310 311 // SjLj exception handling dispatch. 312 EH_SJLJ_SETUP_DISPATCH, 313 314 /// Tail call return. See X86TargetLowering::LowerCall for 315 /// the list of operands. 316 TC_RETURN, 317 318 // Vector move to low scalar and zero higher vector elements. 319 VZEXT_MOVL, 320 321 // Vector integer truncate. 322 VTRUNC, 323 // Vector integer truncate with unsigned/signed saturation. 324 VTRUNCUS, 325 VTRUNCS, 326 327 // Masked version of the above. Used when less than a 128-bit result is 328 // produced since the mask only applies to the lower elements and can't 329 // be represented by a select. 330 // SRC, PASSTHRU, MASK 331 VMTRUNC, 332 VMTRUNCUS, 333 VMTRUNCS, 334 335 // Vector FP extend. 336 VFPEXT, 337 VFPEXT_SAE, 338 VFPEXTS, 339 VFPEXTS_SAE, 340 341 // Vector FP round. 342 VFPROUND, 343 VFPROUND_RND, 344 VFPROUNDS, 345 VFPROUNDS_RND, 346 347 // Masked version of above. Used for v2f64->v4f32. 348 // SRC, PASSTHRU, MASK 349 VMFPROUND, 350 351 // 128-bit vector logical left / right shift 352 VSHLDQ, 353 VSRLDQ, 354 355 // Vector shift elements 356 VSHL, 357 VSRL, 358 VSRA, 359 360 // Vector variable shift 361 VSHLV, 362 VSRLV, 363 VSRAV, 364 365 // Vector shift elements by immediate 366 VSHLI, 367 VSRLI, 368 VSRAI, 369 370 // Shifts of mask registers. 371 KSHIFTL, 372 KSHIFTR, 373 374 // Bit rotate by immediate 375 VROTLI, 376 VROTRI, 377 378 // Vector packed double/float comparison. 379 CMPP, 380 381 // Vector integer comparisons. 382 PCMPEQ, 383 PCMPGT, 384 385 // v8i16 Horizontal minimum and position. 386 PHMINPOS, 387 388 MULTISHIFT, 389 390 /// Vector comparison generating mask bits for fp and 391 /// integer signed and unsigned data types. 392 CMPM, 393 // Vector mask comparison generating mask bits for FP values. 394 CMPMM, 395 // Vector mask comparison with SAE for FP values. 396 CMPMM_SAE, 397 398 // Arithmetic operations with FLAGS results. 399 ADD, 400 SUB, 401 ADC, 402 SBB, 403 SMUL, 404 UMUL, 405 OR, 406 XOR, 407 AND, 408 409 // Bit field extract. 410 BEXTR, 411 BEXTRI, 412 413 // Zero High Bits Starting with Specified Bit Position. 414 BZHI, 415 416 // Parallel extract and deposit. 417 PDEP, 418 PEXT, 419 420 // X86-specific multiply by immediate. 421 MUL_IMM, 422 423 // Vector sign bit extraction. 424 MOVMSK, 425 426 // Vector bitwise comparisons. 427 PTEST, 428 429 // Vector packed fp sign bitwise comparisons. 430 TESTP, 431 432 // OR/AND test for masks. 433 KORTEST, 434 KTEST, 435 436 // ADD for masks. 437 KADD, 438 439 // Several flavors of instructions with vector shuffle behaviors. 440 // Saturated signed/unnsigned packing. 441 PACKSS, 442 PACKUS, 443 // Intra-lane alignr. 444 PALIGNR, 445 // AVX512 inter-lane alignr. 446 VALIGN, 447 PSHUFD, 448 PSHUFHW, 449 PSHUFLW, 450 SHUFP, 451 // VBMI2 Concat & Shift. 452 VSHLD, 453 VSHRD, 454 VSHLDV, 455 VSHRDV, 456 // Shuffle Packed Values at 128-bit granularity. 457 SHUF128, 458 MOVDDUP, 459 MOVSHDUP, 460 MOVSLDUP, 461 MOVLHPS, 462 MOVHLPS, 463 MOVSD, 464 MOVSS, 465 MOVSH, 466 UNPCKL, 467 UNPCKH, 468 VPERMILPV, 469 VPERMILPI, 470 VPERMI, 471 VPERM2X128, 472 473 // Variable Permute (VPERM). 474 // Res = VPERMV MaskV, V0 475 VPERMV, 476 477 // 3-op Variable Permute (VPERMT2). 478 // Res = VPERMV3 V0, MaskV, V1 479 VPERMV3, 480 481 // Bitwise ternary logic. 482 VPTERNLOG, 483 // Fix Up Special Packed Float32/64 values. 484 VFIXUPIMM, 485 VFIXUPIMM_SAE, 486 VFIXUPIMMS, 487 VFIXUPIMMS_SAE, 488 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 489 VRANGE, 490 VRANGE_SAE, 491 VRANGES, 492 VRANGES_SAE, 493 // Reduce - Perform Reduction Transformation on scalar\packed FP. 494 VREDUCE, 495 VREDUCE_SAE, 496 VREDUCES, 497 VREDUCES_SAE, 498 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 499 // Also used by the legacy (V)ROUND intrinsics where we mask out the 500 // scaling part of the immediate. 501 VRNDSCALE, 502 VRNDSCALE_SAE, 503 VRNDSCALES, 504 VRNDSCALES_SAE, 505 // Tests Types Of a FP Values for packed types. 506 VFPCLASS, 507 // Tests Types Of a FP Values for scalar types. 508 VFPCLASSS, 509 510 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 511 // a vector, this node may change the vector length as part of the splat. 512 VBROADCAST, 513 // Broadcast mask to vector. 514 VBROADCASTM, 515 516 /// SSE4A Extraction and Insertion. 517 EXTRQI, 518 INSERTQI, 519 520 // XOP arithmetic/logical shifts. 521 VPSHA, 522 VPSHL, 523 // XOP signed/unsigned integer comparisons. 524 VPCOM, 525 VPCOMU, 526 // XOP packed permute bytes. 527 VPPERM, 528 // XOP two source permutation. 529 VPERMIL2, 530 531 // Vector multiply packed unsigned doubleword integers. 532 PMULUDQ, 533 // Vector multiply packed signed doubleword integers. 534 PMULDQ, 535 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 536 MULHRS, 537 538 // Multiply and Add Packed Integers. 539 VPMADDUBSW, 540 VPMADDWD, 541 542 // AVX512IFMA multiply and add. 543 // NOTE: These are different than the instruction and perform 544 // op0 x op1 + op2. 545 VPMADD52L, 546 VPMADD52H, 547 548 // VNNI 549 VPDPBUSD, 550 VPDPBUSDS, 551 VPDPWSSD, 552 VPDPWSSDS, 553 554 // FMA nodes. 555 // We use the target independent ISD::FMA for the non-inverted case. 556 FNMADD, 557 FMSUB, 558 FNMSUB, 559 FMADDSUB, 560 FMSUBADD, 561 562 // FMA with rounding mode. 563 FMADD_RND, 564 FNMADD_RND, 565 FMSUB_RND, 566 FNMSUB_RND, 567 FMADDSUB_RND, 568 FMSUBADD_RND, 569 570 // AVX512-FP16 complex addition and multiplication. 571 VFMADDC, 572 VFMADDC_RND, 573 VFCMADDC, 574 VFCMADDC_RND, 575 576 VFMULC, 577 VFMULC_RND, 578 VFCMULC, 579 VFCMULC_RND, 580 581 VFMADDCSH, 582 VFMADDCSH_RND, 583 VFCMADDCSH, 584 VFCMADDCSH_RND, 585 586 VFMULCSH, 587 VFMULCSH_RND, 588 VFCMULCSH, 589 VFCMULCSH_RND, 590 591 VPDPBSUD, 592 VPDPBSUDS, 593 VPDPBUUD, 594 VPDPBUUDS, 595 VPDPBSSD, 596 VPDPBSSDS, 597 598 // Compress and expand. 599 COMPRESS, 600 EXPAND, 601 602 // Bits shuffle 603 VPSHUFBITQMB, 604 605 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 606 SINT_TO_FP_RND, 607 UINT_TO_FP_RND, 608 SCALAR_SINT_TO_FP, 609 SCALAR_UINT_TO_FP, 610 SCALAR_SINT_TO_FP_RND, 611 SCALAR_UINT_TO_FP_RND, 612 613 // Vector float/double to signed/unsigned integer. 614 CVTP2SI, 615 CVTP2UI, 616 CVTP2SI_RND, 617 CVTP2UI_RND, 618 // Scalar float/double to signed/unsigned integer. 619 CVTS2SI, 620 CVTS2UI, 621 CVTS2SI_RND, 622 CVTS2UI_RND, 623 624 // Vector float/double to signed/unsigned integer with truncation. 625 CVTTP2SI, 626 CVTTP2UI, 627 CVTTP2SI_SAE, 628 CVTTP2UI_SAE, 629 // Scalar float/double to signed/unsigned integer with truncation. 630 CVTTS2SI, 631 CVTTS2UI, 632 CVTTS2SI_SAE, 633 CVTTS2UI_SAE, 634 635 // Vector signed/unsigned integer to float/double. 636 CVTSI2P, 637 CVTUI2P, 638 639 // Masked versions of above. Used for v2f64->v4f32. 640 // SRC, PASSTHRU, MASK 641 MCVTP2SI, 642 MCVTP2UI, 643 MCVTTP2SI, 644 MCVTTP2UI, 645 MCVTSI2P, 646 MCVTUI2P, 647 648 // Vector float to bfloat16. 649 // Convert TWO packed single data to one packed BF16 data 650 CVTNE2PS2BF16, 651 // Convert packed single data to packed BF16 data 652 CVTNEPS2BF16, 653 // Masked version of above. 654 // SRC, PASSTHRU, MASK 655 MCVTNEPS2BF16, 656 657 // Dot product of BF16 pairs to accumulated into 658 // packed single precision. 659 DPBF16PS, 660 661 // A stack checking function call. On Windows it's _chkstk call. 662 DYN_ALLOCA, 663 664 // For allocating variable amounts of stack space when using 665 // segmented stacks. Check if the current stacklet has enough space, and 666 // falls back to heap allocation if not. 667 SEG_ALLOCA, 668 669 // For allocating stack space when using stack clash protector. 670 // Allocation is performed by block, and each block is probed. 671 PROBED_ALLOCA, 672 673 // Memory barriers. 674 MFENCE, 675 676 // Get a random integer and indicate whether it is valid in CF. 677 RDRAND, 678 679 // Get a NIST SP800-90B & C compliant random integer and 680 // indicate whether it is valid in CF. 681 RDSEED, 682 683 // Protection keys 684 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 685 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 686 // value for ECX. 687 RDPKRU, 688 WRPKRU, 689 690 // SSE42 string comparisons. 691 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 692 // will emit one or two instructions based on which results are used. If 693 // flags and index/mask this allows us to use a single instruction since 694 // we won't have to pick and opcode for flags. Instead we can rely on the 695 // DAG to CSE everything and decide at isel. 696 PCMPISTR, 697 PCMPESTR, 698 699 // Test if in transactional execution. 700 XTEST, 701 702 // Conversions between float and half-float. 703 CVTPS2PH, 704 CVTPS2PH_SAE, 705 CVTPH2PS, 706 CVTPH2PS_SAE, 707 708 // Masked version of above. 709 // SRC, RND, PASSTHRU, MASK 710 MCVTPS2PH, 711 MCVTPS2PH_SAE, 712 713 // Galois Field Arithmetic Instructions 714 GF2P8AFFINEINVQB, 715 GF2P8AFFINEQB, 716 GF2P8MULB, 717 718 // LWP insert record. 719 LWPINS, 720 721 // User level wait 722 UMWAIT, 723 TPAUSE, 724 725 // Enqueue Stores Instructions 726 ENQCMD, 727 ENQCMDS, 728 729 // For avx512-vp2intersect 730 VP2INTERSECT, 731 732 // User level interrupts - testui 733 TESTUI, 734 735 // Perform an FP80 add after changing precision control in FPCW. 736 FP80_ADD, 737 738 // Conditional compare instructions 739 CCMP, 740 CTEST, 741 742 /// X86 strict FP compare instructions. 743 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 744 STRICT_FCMPS, 745 746 // Vector packed double/float comparison. 747 STRICT_CMPP, 748 749 /// Vector comparison generating mask bits for fp and 750 /// integer signed and unsigned data types. 751 STRICT_CMPM, 752 753 // Vector float/double to signed/unsigned integer with truncation. 754 STRICT_CVTTP2SI, 755 STRICT_CVTTP2UI, 756 757 // Vector FP extend. 758 STRICT_VFPEXT, 759 760 // Vector FP round. 761 STRICT_VFPROUND, 762 763 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 764 // Also used by the legacy (V)ROUND intrinsics where we mask out the 765 // scaling part of the immediate. 766 STRICT_VRNDSCALE, 767 768 // Vector signed/unsigned integer to float/double. 769 STRICT_CVTSI2P, 770 STRICT_CVTUI2P, 771 772 // Strict FMA nodes. 773 STRICT_FNMADD, 774 STRICT_FMSUB, 775 STRICT_FNMSUB, 776 777 // Conversions between float and half-float. 778 STRICT_CVTPS2PH, 779 STRICT_CVTPH2PS, 780 781 // Perform an FP80 add after changing precision control in FPCW. 782 STRICT_FP80_ADD, 783 784 // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and 785 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. 786 787 // Compare and swap. 788 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 789 LCMPXCHG8_DAG, 790 LCMPXCHG16_DAG, 791 LCMPXCHG16_SAVE_RBX_DAG, 792 793 /// LOCK-prefixed arithmetic read-modify-write instructions. 794 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 795 LADD, 796 LSUB, 797 LOR, 798 LXOR, 799 LAND, 800 LBTS, 801 LBTC, 802 LBTR, 803 LBTS_RM, 804 LBTC_RM, 805 LBTR_RM, 806 807 /// RAO arithmetic instructions. 808 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) 809 AADD, 810 AOR, 811 AXOR, 812 AAND, 813 814 // Load, scalar_to_vector, and zero extend. 815 VZEXT_LOAD, 816 817 // extract_vector_elt, store. 818 VEXTRACT_STORE, 819 820 // scalar broadcast from memory. 821 VBROADCAST_LOAD, 822 823 // subvector broadcast from memory. 824 SUBV_BROADCAST_LOAD, 825 826 // Store FP control word into i16 memory. 827 FNSTCW16m, 828 829 // Load FP control word from i16 memory. 830 FLDCW16m, 831 832 // Store x87 FPU environment into memory. 833 FNSTENVm, 834 835 // Load x87 FPU environment from memory. 836 FLDENVm, 837 838 /// This instruction implements FP_TO_SINT with the 839 /// integer destination in memory and a FP reg source. This corresponds 840 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 841 /// has two inputs (token chain and address) and two outputs (int value 842 /// and token chain). Memory VT specifies the type to store to. 843 FP_TO_INT_IN_MEM, 844 845 /// This instruction implements SINT_TO_FP with the 846 /// integer source in memory and FP reg result. This corresponds to the 847 /// X86::FILD*m instructions. It has two inputs (token chain and address) 848 /// and two outputs (FP value and token chain). The integer source type is 849 /// specified by the memory VT. 850 FILD, 851 852 /// This instruction implements a fp->int store from FP stack 853 /// slots. This corresponds to the fist instruction. It takes a 854 /// chain operand, value to store, address, and glue. The memory VT 855 /// specifies the type to store as. 856 FIST, 857 858 /// This instruction implements an extending load to FP stack slots. 859 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 860 /// operand, and ptr to load from. The memory VT specifies the type to 861 /// load from. 862 FLD, 863 864 /// This instruction implements a truncating store from FP stack 865 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 866 /// chain operand, value to store, address, and glue. The memory VT 867 /// specifies the type to store as. 868 FST, 869 870 /// These instructions grab the address of the next argument 871 /// from a va_list. (reads and modifies the va_list in memory) 872 VAARG_64, 873 VAARG_X32, 874 875 // Vector truncating store with unsigned/signed saturation 876 VTRUNCSTOREUS, 877 VTRUNCSTORES, 878 // Vector truncating masked store with unsigned/signed saturation 879 VMTRUNCSTOREUS, 880 VMTRUNCSTORES, 881 882 // X86 specific gather and scatter 883 MGATHER, 884 MSCATTER, 885 886 // Key locker nodes that produce flags. 887 AESENC128KL, 888 AESDEC128KL, 889 AESENC256KL, 890 AESDEC256KL, 891 AESENCWIDE128KL, 892 AESDECWIDE128KL, 893 AESENCWIDE256KL, 894 AESDECWIDE256KL, 895 896 /// Compare and Add if Condition is Met. Compare value in operand 2 with 897 /// value in memory of operand 1. If condition of operand 4 is met, add 898 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is 899 /// always updated with the original value from operand 1. 900 CMPCCXADD, 901 902 // Save xmm argument registers to the stack, according to %al. An operator 903 // is needed so that this can be expanded with control flow. 904 VASTART_SAVE_XMM_REGS, 905 906 // Conditional load/store instructions 907 CLOAD, 908 CSTORE, 909 910 // WARNING: Do not add anything in the end unless you want the node to 911 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 912 // opcodes will be thought as target memory ops! 913 }; 914 } // end namespace X86ISD 915 916 namespace X86 { 917 /// Current rounding mode is represented in bits 11:10 of FPSR. These 918 /// values are same as corresponding constants for rounding mode used 919 /// in glibc. 920 enum RoundingMode { 921 rmToNearest = 0, // FE_TONEAREST 922 rmDownward = 1 << 10, // FE_DOWNWARD 923 rmUpward = 2 << 10, // FE_UPWARD 924 rmTowardZero = 3 << 10, // FE_TOWARDZERO 925 rmMask = 3 << 10 // Bit mask selecting rounding mode 926 }; 927 } 928 929 /// Define some predicates that are used for node matching. 930 namespace X86 { 931 /// Returns true if Elt is a constant zero or floating point constant +0.0. 932 bool isZeroNode(SDValue Elt); 933 934 /// Returns true of the given offset can be 935 /// fit into displacement field of the instruction. 936 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 937 bool hasSymbolicDisplacement); 938 939 /// Determines whether the callee is required to pop its 940 /// own arguments. Callee pop is necessary to support tail calls. 941 bool isCalleePop(CallingConv::ID CallingConv, 942 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 943 944 /// If Op is a constant whose elements are all the same constant or 945 /// undefined, return true and return the constant value in \p SplatVal. 946 /// If we have undef bits that don't cover an entire element, we treat these 947 /// as zero if AllowPartialUndefs is set, else we fail and return false. 948 bool isConstantSplat(SDValue Op, APInt &SplatVal, 949 bool AllowPartialUndefs = true); 950 951 /// Check if Op is a load operation that could be folded into some other x86 952 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. 953 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, 954 bool AssumeSingleUse = false); 955 956 /// Check if Op is a load operation that could be folded into a vector splat 957 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. 958 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, 959 const X86Subtarget &Subtarget, 960 bool AssumeSingleUse = false); 961 962 /// Check if Op is a value that could be used to fold a store into some 963 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). 964 bool mayFoldIntoStore(SDValue Op); 965 966 /// Check if Op is an operation that could be folded into a zero extend x86 967 /// instruction. 968 bool mayFoldIntoZeroExtend(SDValue Op); 969 970 /// True if the target supports the extended frame for async Swift 971 /// functions. 972 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, 973 const MachineFunction &MF); 974 } // end namespace X86 975 976 //===--------------------------------------------------------------------===// 977 // X86 Implementation of the TargetLowering interface 978 class X86TargetLowering final : public TargetLowering { 979 public: 980 explicit X86TargetLowering(const X86TargetMachine &TM, 981 const X86Subtarget &STI); 982 983 unsigned getJumpTableEncoding() const override; 984 bool useSoftFloat() const override; 985 986 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 987 ArgListTy &Args) const override; 988 getScalarShiftAmountTy(const DataLayout &,EVT VT)989 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 990 return MVT::i8; 991 } 992 993 const MCExpr * 994 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 995 const MachineBasicBlock *MBB, unsigned uid, 996 MCContext &Ctx) const override; 997 998 /// Returns relocation base for the given PIC jumptable. 999 SDValue getPICJumpTableRelocBase(SDValue Table, 1000 SelectionDAG &DAG) const override; 1001 const MCExpr * 1002 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 1003 unsigned JTI, MCContext &Ctx) const override; 1004 1005 /// Return the desired alignment for ByVal aggregate 1006 /// function arguments in the caller parameter area. For X86, aggregates 1007 /// that contains are placed at 16-byte boundaries while the rest are at 1008 /// 4-byte boundaries. 1009 uint64_t getByValTypeAlignment(Type *Ty, 1010 const DataLayout &DL) const override; 1011 1012 EVT getOptimalMemOpType(const MemOp &Op, 1013 const AttributeList &FuncAttributes) const override; 1014 1015 /// Returns true if it's safe to use load / store of the 1016 /// specified type to expand memcpy / memset inline. This is mostly true 1017 /// for all types except for some special cases. For example, on X86 1018 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 1019 /// also does type conversion. Note the specified type doesn't have to be 1020 /// legal as the hook is used before type legalization. 1021 bool isSafeMemOpType(MVT VT) const override; 1022 1023 bool isMemoryAccessFast(EVT VT, Align Alignment) const; 1024 1025 /// Returns true if the target allows unaligned memory accesses of the 1026 /// specified type. Returns whether it is "fast" in the last argument. 1027 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, 1028 MachineMemOperand::Flags Flags, 1029 unsigned *Fast) const override; 1030 1031 /// This function returns true if the memory access is aligned or if the 1032 /// target allows this specific unaligned memory access. If the access is 1033 /// allowed, the optional final parameter returns a relative speed of the 1034 /// access (as defined by the target). 1035 bool allowsMemoryAccess( 1036 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, 1037 Align Alignment, 1038 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1039 unsigned *Fast = nullptr) const override; 1040 allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,const MachineMemOperand & MMO,unsigned * Fast)1041 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1042 const MachineMemOperand &MMO, 1043 unsigned *Fast) const { 1044 return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), 1045 MMO.getAlign(), MMO.getFlags(), Fast); 1046 } 1047 1048 /// Provide custom lowering hooks for some operations. 1049 /// 1050 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 1051 1052 /// Replace the results of node with an illegal result 1053 /// type with new values built out of custom code. 1054 /// 1055 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 1056 SelectionDAG &DAG) const override; 1057 1058 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 1059 1060 bool preferABDSToABSWithNSW(EVT VT) const override; 1061 1062 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, 1063 EVT ExtVT) const override; 1064 1065 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, 1066 EVT VT) const override; 1067 1068 /// Return true if the target has native support for 1069 /// the specified value type and it is 'desirable' to use the type for the 1070 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 1071 /// instruction encodings are longer and some i16 instructions are slow. 1072 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 1073 1074 /// Return true if the target has native support for the 1075 /// specified value type and it is 'desirable' to use the type. e.g. On x86 1076 /// i16 is legal, but undesirable since i16 instruction encodings are longer 1077 /// and some i16 instructions are slow. 1078 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 1079 1080 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an 1081 /// integer, None otherwise. 1082 TargetLowering::AndOrSETCCFoldKind 1083 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, 1084 const SDNode *SETCC0, 1085 const SDNode *SETCC1) const override; 1086 1087 /// Return the newly negated expression if the cost is not expensive and 1088 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 1089 /// do the negation. 1090 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 1091 bool LegalOperations, bool ForCodeSize, 1092 NegatibleCost &Cost, 1093 unsigned Depth) const override; 1094 1095 MachineBasicBlock * 1096 EmitInstrWithCustomInserter(MachineInstr &MI, 1097 MachineBasicBlock *MBB) const override; 1098 1099 /// This method returns the name of a target specific DAG node. 1100 const char *getTargetNodeName(unsigned Opcode) const override; 1101 1102 /// Do not merge vector stores after legalization because that may conflict 1103 /// with x86-specific store splitting optimizations. mergeStoresAfterLegalization(EVT MemVT)1104 bool mergeStoresAfterLegalization(EVT MemVT) const override { 1105 return !MemVT.isVector(); 1106 } 1107 1108 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 1109 const MachineFunction &MF) const override; 1110 1111 bool isCheapToSpeculateCttz(Type *Ty) const override; 1112 1113 bool isCheapToSpeculateCtlz(Type *Ty) const override; 1114 1115 bool isCtlzFast() const override; 1116 isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1117 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 1118 // If the pair to store is a mixture of float and int values, we will 1119 // save two bitwise instructions and one float-to-int instruction and 1120 // increase one store instruction. There is potentially a more 1121 // significant benefit because it avoids the float->int domain switch 1122 // for input value. So It is more likely a win. 1123 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 1124 (LTy.isInteger() && HTy.isFloatingPoint())) 1125 return true; 1126 // If the pair only contains int values, we will save two bitwise 1127 // instructions and increase one store instruction (costing one more 1128 // store buffer). Since the benefit is more blurred so we leave 1129 // such pair out until we get testcase to prove it is a win. 1130 return false; 1131 } 1132 1133 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1134 1135 bool hasAndNotCompare(SDValue Y) const override; 1136 1137 bool hasAndNot(SDValue Y) const override; 1138 1139 bool hasBitTest(SDValue X, SDValue Y) const override; 1140 1141 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1142 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1143 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1144 SelectionDAG &DAG) const override; 1145 1146 unsigned preferedOpcodeForCmpEqPiecesOfOperand( 1147 EVT VT, unsigned ShiftOpc, bool MayTransformRotate, 1148 const APInt &ShiftOrRotateAmt, 1149 const std::optional<APInt> &AndMask) const override; 1150 1151 bool preferScalarizeSplat(SDNode *N) const override; 1152 1153 CondMergingParams 1154 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, 1155 const Value *Rhs) const override; 1156 1157 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1158 CombineLevel Level) const override; 1159 1160 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1161 1162 bool shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1163 shouldTransformSignedTruncationCheck(EVT XVT, 1164 unsigned KeptBits) const override { 1165 // For vectors, we don't have a preference.. 1166 if (XVT.isVector()) 1167 return false; 1168 1169 auto VTIsOk = [](EVT VT) -> bool { 1170 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1171 VT == MVT::i64; 1172 }; 1173 1174 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1175 // XVT will be larger than KeptBitsVT. 1176 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1177 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1178 } 1179 1180 ShiftLegalizationStrategy 1181 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 1182 unsigned ExpansionFactor) const override; 1183 1184 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1185 shouldConvertFpToSat(unsigned Op,EVT FPVT,EVT VT)1186 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { 1187 // Converting to sat variants holds little benefit on X86 as we will just 1188 // need to saturate the value back using fp arithmatic. 1189 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); 1190 } 1191 convertSetCCLogicToBitwiseLogic(EVT VT)1192 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1193 return VT.isScalarInteger(); 1194 } 1195 1196 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1197 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1198 1199 /// Return the value type to use for ISD::SETCC. 1200 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1201 EVT VT) const override; 1202 1203 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1204 const APInt &DemandedElts, 1205 TargetLoweringOpt &TLO) const override; 1206 1207 /// Determine which of the bits specified in Mask are known to be either 1208 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1209 void computeKnownBitsForTargetNode(const SDValue Op, 1210 KnownBits &Known, 1211 const APInt &DemandedElts, 1212 const SelectionDAG &DAG, 1213 unsigned Depth = 0) const override; 1214 1215 /// Determine the number of bits in the operation that are sign bits. 1216 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1217 const APInt &DemandedElts, 1218 const SelectionDAG &DAG, 1219 unsigned Depth) const override; 1220 1221 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1222 const APInt &DemandedElts, 1223 APInt &KnownUndef, 1224 APInt &KnownZero, 1225 TargetLoweringOpt &TLO, 1226 unsigned Depth) const override; 1227 1228 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1229 const APInt &DemandedElts, 1230 unsigned MaskIndex, 1231 TargetLoweringOpt &TLO, 1232 unsigned Depth) const; 1233 1234 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1235 const APInt &DemandedBits, 1236 const APInt &DemandedElts, 1237 KnownBits &Known, 1238 TargetLoweringOpt &TLO, 1239 unsigned Depth) const override; 1240 1241 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1242 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1243 SelectionDAG &DAG, unsigned Depth) const override; 1244 1245 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( 1246 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1247 bool PoisonOnly, unsigned Depth) const override; 1248 1249 bool canCreateUndefOrPoisonForTargetNode( 1250 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1251 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; 1252 1253 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, 1254 APInt &UndefElts, const SelectionDAG &DAG, 1255 unsigned Depth) const override; 1256 isTargetCanonicalConstantNode(SDValue Op)1257 bool isTargetCanonicalConstantNode(SDValue Op) const override { 1258 // Peek through bitcasts/extracts/inserts to see if we have a broadcast 1259 // vector from memory. 1260 while (Op.getOpcode() == ISD::BITCAST || 1261 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || 1262 (Op.getOpcode() == ISD::INSERT_SUBVECTOR && 1263 Op.getOperand(0).isUndef())) 1264 Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); 1265 1266 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || 1267 TargetLowering::isTargetCanonicalConstantNode(Op); 1268 } 1269 1270 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1271 1272 SDValue unwrapAddress(SDValue N) const override; 1273 1274 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1275 1276 bool ExpandInlineAsm(CallInst *CI) const override; 1277 1278 ConstraintType getConstraintType(StringRef Constraint) const override; 1279 1280 /// Examine constraint string and operand type and determine a weight value. 1281 /// The operand object must already have been set up with the operand type. 1282 ConstraintWeight 1283 getSingleConstraintMatchWeight(AsmOperandInfo &Info, 1284 const char *Constraint) const override; 1285 1286 const char *LowerXConstraint(EVT ConstraintVT) const override; 1287 1288 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1289 /// add anything to Ops. If hasMemory is true it means one of the asm 1290 /// constraint of the inline asm instruction being processed is 'm'. 1291 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1292 std::vector<SDValue> &Ops, 1293 SelectionDAG &DAG) const override; 1294 1295 InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode)1296 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1297 if (ConstraintCode == "v") 1298 return InlineAsm::ConstraintCode::v; 1299 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1300 } 1301 1302 /// Handle Lowering flag assembly outputs. 1303 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1304 const SDLoc &DL, 1305 const AsmOperandInfo &Constraint, 1306 SelectionDAG &DAG) const override; 1307 1308 /// Given a physical register constraint 1309 /// (e.g. {edx}), return the register number and the register class for the 1310 /// register. This should only be used for C_Register constraints. On 1311 /// error, this returns a register number of 0. 1312 std::pair<unsigned, const TargetRegisterClass *> 1313 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1314 StringRef Constraint, MVT VT) const override; 1315 1316 /// Return true if the addressing mode represented 1317 /// by AM is legal for this target, for a load/store of the specified type. 1318 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1319 Type *Ty, unsigned AS, 1320 Instruction *I = nullptr) const override; 1321 1322 bool addressingModeSupportsTLS(const GlobalValue &GV) const override; 1323 1324 /// Return true if the specified immediate is legal 1325 /// icmp immediate, that is the target has icmp instructions which can 1326 /// compare a register against the immediate without having to materialize 1327 /// the immediate into a register. 1328 bool isLegalICmpImmediate(int64_t Imm) const override; 1329 1330 /// Return true if the specified immediate is legal 1331 /// add immediate, that is the target has add instructions which can 1332 /// add a register and the immediate without having to materialize 1333 /// the immediate into a register. 1334 bool isLegalAddImmediate(int64_t Imm) const override; 1335 1336 bool isLegalStoreImmediate(int64_t Imm) const override; 1337 1338 /// This is used to enable splatted operand transforms for vector shifts 1339 /// and vector funnel shifts. 1340 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1341 1342 /// Add x86-specific opcodes to the default list. 1343 bool isBinOp(unsigned Opcode) const override; 1344 1345 /// Returns true if the opcode is a commutative binary operation. 1346 bool isCommutativeBinOp(unsigned Opcode) const override; 1347 1348 /// Return true if it's free to truncate a value of 1349 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1350 /// register EAX to i16 by referencing its sub-register AX. 1351 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1352 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1353 1354 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1355 1356 /// Return true if any actual instruction that defines a 1357 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1358 /// register. This does not necessarily include registers defined in 1359 /// unknown ways, such as incoming arguments, or copies from unknown 1360 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1361 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1362 /// all instructions that define 32-bit values implicit zero-extend the 1363 /// result out to 64 bits. 1364 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1365 bool isZExtFree(EVT VT1, EVT VT2) const override; 1366 bool isZExtFree(SDValue Val, EVT VT2) const override; 1367 1368 bool shouldSinkOperands(Instruction *I, 1369 SmallVectorImpl<Use *> &Ops) const override; 1370 bool shouldConvertPhiType(Type *From, Type *To) const override; 1371 1372 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1373 /// extend node) is profitable. 1374 bool isVectorLoadExtDesirable(SDValue) const override; 1375 1376 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1377 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1378 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1379 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1380 EVT VT) const override; 1381 1382 /// Return true if it's profitable to narrow operations of type SrcVT to 1383 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not 1384 /// from i32 to i16. 1385 bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; 1386 1387 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 1388 EVT VT) const override; 1389 1390 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1391 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1392 /// true and stores the intrinsic information into the IntrinsicInfo that was 1393 /// passed to the function. 1394 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1395 MachineFunction &MF, 1396 unsigned Intrinsic) const override; 1397 1398 /// Returns true if the target can instruction select the 1399 /// specified FP immediate natively. If false, the legalizer will 1400 /// materialize the FP immediate as a load from a constant pool. 1401 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1402 bool ForCodeSize) const override; 1403 1404 /// Targets can use this to indicate that they only support *some* 1405 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1406 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1407 /// be legal. 1408 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1409 1410 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1411 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1412 /// constant pool entry. 1413 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1414 1415 /// Returns true if lowering to a jump table is allowed. 1416 bool areJTsAllowed(const Function *Fn) const override; 1417 1418 MVT getPreferredSwitchConditionType(LLVMContext &Context, 1419 EVT ConditionVT) const override; 1420 1421 /// If true, then instruction selection should 1422 /// seek to shrink the FP constant of the specified type to a smaller type 1423 /// in order to save space and / or reduce runtime. 1424 bool ShouldShrinkFPConstant(EVT VT) const override; 1425 1426 /// Return true if we believe it is correct and profitable to reduce the 1427 /// load node to a smaller type. 1428 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1429 EVT NewVT) const override; 1430 1431 /// Return true if the specified scalar FP type is computed in an SSE 1432 /// register, not on the X87 floating point stack. 1433 bool isScalarFPTypeInSSEReg(EVT VT) const; 1434 1435 /// Returns true if it is beneficial to convert a load of a constant 1436 /// to just the constant itself. 1437 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1438 Type *Ty) const override; 1439 1440 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1441 1442 bool convertSelectOfConstantsToMath(EVT VT) const override; 1443 1444 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1445 SDValue C) const override; 1446 1447 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1448 /// with this index. 1449 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1450 unsigned Index) const override; 1451 1452 /// Scalar ops always have equal or better analysis/performance/power than 1453 /// the vector equivalent, so this always makes sense if the scalar op is 1454 /// supported. shouldScalarizeBinop(SDValue)1455 bool shouldScalarizeBinop(SDValue) const override; 1456 1457 /// Extract of a scalar FP value from index 0 of a vector is free. 1458 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1459 EVT EltVT = VT.getScalarType(); 1460 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1461 } 1462 1463 /// Overflow nodes should get combined/lowered to optimal instructions 1464 /// (they should allow eliminating explicit compares by getting flags from 1465 /// math ops). 1466 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1467 bool MathUsed) const override; 1468 storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)1469 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 1470 unsigned AddrSpace) const override { 1471 // If we can replace more than 2 scalar stores, there will be a reduction 1472 // in instructions even after we add a vector constant load. 1473 return IsZero || NumElem > 2; 1474 } 1475 1476 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1477 const SelectionDAG &DAG, 1478 const MachineMemOperand &MMO) const override; 1479 1480 Register getRegisterByName(const char* RegName, LLT VT, 1481 const MachineFunction &MF) const override; 1482 1483 /// If a physical register, this returns the register that receives the 1484 /// exception address on entry to an EH pad. 1485 Register 1486 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1487 1488 /// If a physical register, this returns the register that receives the 1489 /// exception typeid on entry to a landing pad. 1490 Register 1491 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1492 1493 bool needsFixedCatchObjects() const override; 1494 1495 /// This method returns a target specific FastISel object, 1496 /// or null if the target does not support "fast" ISel. 1497 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1498 const TargetLibraryInfo *libInfo) const override; 1499 1500 /// If the target has a standard location for the stack protector cookie, 1501 /// returns the address of that location. Otherwise, returns nullptr. 1502 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 1503 1504 bool useLoadStackGuardNode() const override; 1505 bool useStackGuardXorFP() const override; 1506 void insertSSPDeclarations(Module &M) const override; 1507 Value *getSDagStackGuard(const Module &M) const override; 1508 Function *getSSPStackGuardCheck(const Module &M) const override; 1509 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1510 const SDLoc &DL) const override; 1511 1512 1513 /// Return true if the target stores SafeStack pointer at a fixed offset in 1514 /// some non-standard address space, and populates the address space and 1515 /// offset as appropriate. 1516 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 1517 1518 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1519 SDValue Chain, SDValue Pointer, 1520 MachinePointerInfo PtrInfo, 1521 Align Alignment, 1522 SelectionDAG &DAG) const; 1523 1524 /// Customize the preferred legalization strategy for certain types. 1525 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1526 softPromoteHalfType()1527 bool softPromoteHalfType() const override { return true; } 1528 1529 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1530 EVT VT) const override; 1531 1532 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1533 CallingConv::ID CC, 1534 EVT VT) const override; 1535 1536 unsigned getVectorTypeBreakdownForCallingConv( 1537 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1538 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1539 1540 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1541 1542 bool supportSwiftError() const override; 1543 supportKCFIBundles()1544 bool supportKCFIBundles() const override { return true; } 1545 1546 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 1547 MachineBasicBlock::instr_iterator &MBBI, 1548 const TargetInstrInfo *TII) const override; 1549 1550 bool hasStackProbeSymbol(const MachineFunction &MF) const override; 1551 bool hasInlineStackProbe(const MachineFunction &MF) const override; 1552 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; 1553 1554 unsigned getStackProbeSize(const MachineFunction &MF) const; 1555 hasVectorBlend()1556 bool hasVectorBlend() const override { return true; } 1557 getMaxSupportedInterleaveFactor()1558 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1559 1560 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, 1561 unsigned OpNo) const override; 1562 1563 SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 1564 MachineMemOperand *MMO, SDValue &NewLoad, 1565 SDValue Ptr, SDValue PassThru, 1566 SDValue Mask) const override; 1567 SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 1568 MachineMemOperand *MMO, SDValue Ptr, SDValue Val, 1569 SDValue Mask) const override; 1570 1571 /// Lower interleaved load(s) into target specific 1572 /// instructions/intrinsics. 1573 bool lowerInterleavedLoad(LoadInst *LI, 1574 ArrayRef<ShuffleVectorInst *> Shuffles, 1575 ArrayRef<unsigned> Indices, 1576 unsigned Factor) const override; 1577 1578 /// Lower interleaved store(s) into target specific 1579 /// instructions/intrinsics. 1580 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1581 unsigned Factor) const override; 1582 1583 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, 1584 int JTI, SelectionDAG &DAG) const override; 1585 1586 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1587 getTypeToTransformTo(LLVMContext & Context,EVT VT)1588 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { 1589 if (VT == MVT::f80) 1590 return EVT::getIntegerVT(Context, 96); 1591 return TargetLoweringBase::getTypeToTransformTo(Context, VT); 1592 } 1593 1594 protected: 1595 std::pair<const TargetRegisterClass *, uint8_t> 1596 findRepresentativeClass(const TargetRegisterInfo *TRI, 1597 MVT VT) const override; 1598 1599 private: 1600 /// Keep a reference to the X86Subtarget around so that we can 1601 /// make the right decision when generating code for different targets. 1602 const X86Subtarget &Subtarget; 1603 1604 /// A list of legal FP immediates. 1605 std::vector<APFloat> LegalFPImmediates; 1606 1607 /// Indicate that this x86 target can instruction 1608 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)1609 void addLegalFPImmediate(const APFloat& Imm) { 1610 LegalFPImmediates.push_back(Imm); 1611 } 1612 1613 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1614 CallingConv::ID CallConv, bool isVarArg, 1615 const SmallVectorImpl<ISD::InputArg> &Ins, 1616 const SDLoc &dl, SelectionDAG &DAG, 1617 SmallVectorImpl<SDValue> &InVals, 1618 uint32_t *RegMask) const; 1619 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1620 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1621 const SDLoc &dl, SelectionDAG &DAG, 1622 const CCValAssign &VA, MachineFrameInfo &MFI, 1623 unsigned i) const; 1624 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1625 const SDLoc &dl, SelectionDAG &DAG, 1626 const CCValAssign &VA, 1627 ISD::ArgFlagsTy Flags, bool isByval) const; 1628 1629 // Call lowering helpers. 1630 1631 /// Check whether the call is eligible for tail call optimization. Targets 1632 /// that want to do tail call optimization should implement this function. 1633 bool IsEligibleForTailCallOptimization( 1634 TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, 1635 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const; 1636 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1637 SDValue Chain, bool IsTailCall, 1638 bool Is64Bit, int FPDiff, 1639 const SDLoc &dl) const; 1640 1641 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1642 SelectionDAG &DAG) const; 1643 1644 unsigned getAddressSpace() const; 1645 1646 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1647 SDValue &Chain) const; 1648 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1649 1650 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1651 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1652 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1653 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1654 1655 unsigned getGlobalWrapperKind(const GlobalValue *GV, 1656 const unsigned char OpFlags) const; 1657 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1658 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1659 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1660 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1661 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1662 1663 /// Creates target global address or external symbol nodes for calls or 1664 /// other uses. 1665 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1666 bool ForCall) const; 1667 1668 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1669 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1670 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1671 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1672 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1673 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1674 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1675 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1676 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1677 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1678 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1679 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1680 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1681 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1682 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1683 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1684 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1685 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1686 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1687 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1688 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1689 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1690 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1691 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1692 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1693 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1694 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1695 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; 1696 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1697 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, 1698 SDValue &Chain) const; 1699 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1700 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1701 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1702 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1703 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1704 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1705 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; 1706 1707 SDValue 1708 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1709 const SmallVectorImpl<ISD::InputArg> &Ins, 1710 const SDLoc &dl, SelectionDAG &DAG, 1711 SmallVectorImpl<SDValue> &InVals) const override; 1712 SDValue LowerCall(CallLoweringInfo &CLI, 1713 SmallVectorImpl<SDValue> &InVals) const override; 1714 1715 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1716 const SmallVectorImpl<ISD::OutputArg> &Outs, 1717 const SmallVectorImpl<SDValue> &OutVals, 1718 const SDLoc &dl, SelectionDAG &DAG) const override; 1719 supportSplitCSR(MachineFunction * MF)1720 bool supportSplitCSR(MachineFunction *MF) const override { 1721 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1722 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1723 } 1724 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1725 void insertCopiesSplitCSR( 1726 MachineBasicBlock *Entry, 1727 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1728 1729 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1730 1731 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1732 1733 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1734 ISD::NodeType ExtendKind) const override; 1735 1736 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1737 bool isVarArg, 1738 const SmallVectorImpl<ISD::OutputArg> &Outs, 1739 LLVMContext &Context) const override; 1740 1741 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1742 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 1743 1744 TargetLoweringBase::AtomicExpansionKind 1745 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1746 TargetLoweringBase::AtomicExpansionKind 1747 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1748 TargetLoweringBase::AtomicExpansionKind 1749 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1750 TargetLoweringBase::AtomicExpansionKind 1751 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; 1752 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1753 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1754 1755 LoadInst * 1756 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1757 1758 bool needsCmpXchgNb(Type *MemType) const; 1759 1760 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1761 MachineBasicBlock *DispatchBB, int FI) const; 1762 1763 // Utility function to emit the low-level va_arg code for X86-64. 1764 MachineBasicBlock * 1765 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1766 1767 /// Utility function to emit the xmm reg save portion of va_start. 1768 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1769 MachineInstr &MI2, 1770 MachineBasicBlock *BB) const; 1771 1772 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1773 MachineBasicBlock *BB) const; 1774 1775 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1776 MachineBasicBlock *BB) const; 1777 1778 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1779 MachineBasicBlock *BB) const; 1780 1781 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1782 MachineBasicBlock *BB) const; 1783 1784 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1785 MachineBasicBlock *BB) const; 1786 1787 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1788 MachineBasicBlock *BB) const; 1789 1790 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1791 MachineBasicBlock *BB) const; 1792 1793 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1794 MachineBasicBlock *MBB) const; 1795 1796 void emitSetJmpShadowStackFix(MachineInstr &MI, 1797 MachineBasicBlock *MBB) const; 1798 1799 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1800 MachineBasicBlock *MBB) const; 1801 1802 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1803 MachineBasicBlock *MBB) const; 1804 1805 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1806 MachineBasicBlock *MBB) const; 1807 1808 MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI, 1809 MachineBasicBlock *MBB) const; 1810 1811 /// Emit flags for the given setcc condition and operands. Also returns the 1812 /// corresponding X86 condition code constant in X86CC. 1813 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1814 const SDLoc &dl, SelectionDAG &DAG, 1815 SDValue &X86CC) const; 1816 1817 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, 1818 SDValue IntPow2) const override; 1819 1820 /// Check if replacement of SQRT with RSQRT should be disabled. 1821 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1822 1823 /// Use rsqrt* to speed up sqrt calculations. 1824 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1825 int &RefinementSteps, bool &UseOneConstNR, 1826 bool Reciprocal) const override; 1827 1828 /// Use rcp* to speed up fdiv calculations. 1829 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1830 int &RefinementSteps) const override; 1831 1832 /// Reassociate floating point divisions into multiply by reciprocal. 1833 unsigned combineRepeatedFPDivisors() const override; 1834 1835 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1836 SmallVectorImpl<SDNode *> &Created) const override; 1837 1838 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, 1839 SDValue V2) const; 1840 }; 1841 1842 namespace X86 { 1843 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1844 const TargetLibraryInfo *libInfo); 1845 } // end namespace X86 1846 1847 // X86 specific Gather/Scatter nodes. 1848 // The class has the same order of operands as MaskedGatherScatterSDNode for 1849 // convenience. 1850 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1851 public: 1852 // This is a intended as a utility and should never be directly created. 1853 X86MaskedGatherScatterSDNode() = delete; 1854 ~X86MaskedGatherScatterSDNode() = delete; 1855 getBasePtr()1856 const SDValue &getBasePtr() const { return getOperand(3); } getIndex()1857 const SDValue &getIndex() const { return getOperand(4); } getMask()1858 const SDValue &getMask() const { return getOperand(2); } getScale()1859 const SDValue &getScale() const { return getOperand(5); } 1860 classof(const SDNode * N)1861 static bool classof(const SDNode *N) { 1862 return N->getOpcode() == X86ISD::MGATHER || 1863 N->getOpcode() == X86ISD::MSCATTER; 1864 } 1865 }; 1866 1867 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1868 public: getPassThru()1869 const SDValue &getPassThru() const { return getOperand(1); } 1870 classof(const SDNode * N)1871 static bool classof(const SDNode *N) { 1872 return N->getOpcode() == X86ISD::MGATHER; 1873 } 1874 }; 1875 1876 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1877 public: getValue()1878 const SDValue &getValue() const { return getOperand(1); } 1879 classof(const SDNode * N)1880 static bool classof(const SDNode *N) { 1881 return N->getOpcode() == X86ISD::MSCATTER; 1882 } 1883 }; 1884 1885 /// Generate unpacklo/unpackhi shuffle mask. 1886 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1887 bool Unary); 1888 1889 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1890 /// imposed by AVX and specific to the unary pattern. Example: 1891 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1892 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1893 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1894 1895 } // end namespace llvm 1896 1897 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1898