1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/TargetLowering.h" 19 20 namespace llvm { 21 class X86Subtarget; 22 class X86TargetMachine; 23 24 namespace X86ISD { 25 // X86 Specific DAG Nodes 26 enum NodeType : unsigned { 27 // Start the numbering where the builtin ops leave off. 28 FIRST_NUMBER = ISD::BUILTIN_OP_END, 29 30 /// Bit scan forward. 31 BSF, 32 /// Bit scan reverse. 33 BSR, 34 35 /// X86 funnel/double shift i16 instructions. These correspond to 36 /// X86::SHLDW and X86::SHRDW instructions which have different amt 37 /// modulo rules to generic funnel shifts. 38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 39 FSHL, 40 FSHR, 41 42 /// Bitwise logical AND of floating point values. This corresponds 43 /// to X86::ANDPS or X86::ANDPD. 44 FAND, 45 46 /// Bitwise logical OR of floating point values. This corresponds 47 /// to X86::ORPS or X86::ORPD. 48 FOR, 49 50 /// Bitwise logical XOR of floating point values. This corresponds 51 /// to X86::XORPS or X86::XORPD. 52 FXOR, 53 54 /// Bitwise logical ANDNOT of floating point values. This 55 /// corresponds to X86::ANDNPS or X86::ANDNPD. 56 FANDN, 57 58 /// These operations represent an abstract X86 call 59 /// instruction, which includes a bunch of information. In particular the 60 /// operands of these node are: 61 /// 62 /// #0 - The incoming token chain 63 /// #1 - The callee 64 /// #2 - The number of arg bytes the caller pushes on the stack. 65 /// #3 - The number of arg bytes the callee pops off the stack. 66 /// #4 - The value to pass in AL/AX/EAX (optional) 67 /// #5 - The value to pass in DL/DX/EDX (optional) 68 /// 69 /// The result values of these nodes are: 70 /// 71 /// #0 - The outgoing token chain 72 /// #1 - The first register result value (optional) 73 /// #2 - The second register result value (optional) 74 /// 75 CALL, 76 77 /// Same as call except it adds the NoTrack prefix. 78 NT_CALL, 79 80 // Pseudo for a OBJC call that gets emitted together with a special 81 // marker instruction. 82 CALL_RVMARKER, 83 84 /// X86 compare and logical compare instructions. 85 CMP, 86 FCMP, 87 COMI, 88 UCOMI, 89 90 /// X86 bit-test instructions. 91 BT, 92 93 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 94 /// operand, usually produced by a CMP instruction. 95 SETCC, 96 97 /// X86 Select 98 SELECTS, 99 100 // Same as SETCC except it's materialized with a sbb and the value is all 101 // one's or all zero's. 102 SETCC_CARRY, // R = carry_bit ? ~0 : 0 103 104 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 105 /// Operands are two FP values to compare; result is a mask of 106 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 107 FSETCC, 108 109 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 110 /// and a version with SAE. 111 FSETCCM, 112 FSETCCM_SAE, 113 114 /// X86 conditional moves. Operand 0 and operand 1 are the two values 115 /// to select from. Operand 2 is the condition code, and operand 3 is the 116 /// flag operand produced by a CMP or TEST instruction. 117 CMOV, 118 119 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 120 /// is the block to branch if condition is true, operand 2 is the 121 /// condition code, and operand 3 is the flag operand produced by a CMP 122 /// or TEST instruction. 123 BRCOND, 124 125 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 126 /// operand 1 is the target address. 127 NT_BRIND, 128 129 /// Return with a glue operand. Operand 0 is the chain operand, operand 130 /// 1 is the number of bytes of stack to pop. 131 RET_GLUE, 132 133 /// Return from interrupt. Operand 0 is the number of bytes to pop. 134 IRET, 135 136 /// Repeat fill, corresponds to X86::REP_STOSx. 137 REP_STOS, 138 139 /// Repeat move, corresponds to X86::REP_MOVSx. 140 REP_MOVS, 141 142 /// On Darwin, this node represents the result of the popl 143 /// at function entry, used for PIC code. 144 GlobalBaseReg, 145 146 /// A wrapper node for TargetConstantPool, TargetJumpTable, 147 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 148 /// MCSymbol and TargetBlockAddress. 149 Wrapper, 150 151 /// Special wrapper used under X86-64 PIC mode for RIP 152 /// relative displacements. 153 WrapperRIP, 154 155 /// Copies a 64-bit value from an MMX vector to the low word 156 /// of an XMM vector, with the high word zero filled. 157 MOVQ2DQ, 158 159 /// Copies a 64-bit value from the low word of an XMM vector 160 /// to an MMX vector. 161 MOVDQ2Q, 162 163 /// Copies a 32-bit value from the low word of a MMX 164 /// vector to a GPR. 165 MMX_MOVD2W, 166 167 /// Copies a GPR into the low 32-bit word of a MMX vector 168 /// and zero out the high word. 169 MMX_MOVW2D, 170 171 /// Extract an 8-bit value from a vector and zero extend it to 172 /// i32, corresponds to X86::PEXTRB. 173 PEXTRB, 174 175 /// Extract a 16-bit value from a vector and zero extend it to 176 /// i32, corresponds to X86::PEXTRW. 177 PEXTRW, 178 179 /// Insert any element of a 4 x float vector into any element 180 /// of a destination 4 x floatvector. 181 INSERTPS, 182 183 /// Insert the lower 8-bits of a 32-bit value to a vector, 184 /// corresponds to X86::PINSRB. 185 PINSRB, 186 187 /// Insert the lower 16-bits of a 32-bit value to a vector, 188 /// corresponds to X86::PINSRW. 189 PINSRW, 190 191 /// Shuffle 16 8-bit values within a vector. 192 PSHUFB, 193 194 /// Compute Sum of Absolute Differences. 195 PSADBW, 196 /// Compute Double Block Packed Sum-Absolute-Differences 197 DBPSADBW, 198 199 /// Bitwise Logical AND NOT of Packed FP values. 200 ANDNP, 201 202 /// Blend where the selector is an immediate. 203 BLENDI, 204 205 /// Dynamic (non-constant condition) vector blend where only the sign bits 206 /// of the condition elements are used. This is used to enforce that the 207 /// condition mask is not valid for generic VSELECT optimizations. This 208 /// is also used to implement the intrinsics. 209 /// Operands are in VSELECT order: MASK, TRUE, FALSE 210 BLENDV, 211 212 /// Combined add and sub on an FP vector. 213 ADDSUB, 214 215 // FP vector ops with rounding mode. 216 FADD_RND, 217 FADDS, 218 FADDS_RND, 219 FSUB_RND, 220 FSUBS, 221 FSUBS_RND, 222 FMUL_RND, 223 FMULS, 224 FMULS_RND, 225 FDIV_RND, 226 FDIVS, 227 FDIVS_RND, 228 FMAX_SAE, 229 FMAXS_SAE, 230 FMIN_SAE, 231 FMINS_SAE, 232 FSQRT_RND, 233 FSQRTS, 234 FSQRTS_RND, 235 236 // FP vector get exponent. 237 FGETEXP, 238 FGETEXP_SAE, 239 FGETEXPS, 240 FGETEXPS_SAE, 241 // Extract Normalized Mantissas. 242 VGETMANT, 243 VGETMANT_SAE, 244 VGETMANTS, 245 VGETMANTS_SAE, 246 // FP Scale. 247 SCALEF, 248 SCALEF_RND, 249 SCALEFS, 250 SCALEFS_RND, 251 252 /// Integer horizontal add/sub. 253 HADD, 254 HSUB, 255 256 /// Floating point horizontal add/sub. 257 FHADD, 258 FHSUB, 259 260 // Detect Conflicts Within a Vector 261 CONFLICT, 262 263 /// Floating point max and min. 264 FMAX, 265 FMIN, 266 267 /// Commutative FMIN and FMAX. 268 FMAXC, 269 FMINC, 270 271 /// Scalar intrinsic floating point max and min. 272 FMAXS, 273 FMINS, 274 275 /// Floating point reciprocal-sqrt and reciprocal approximation. 276 /// Note that these typically require refinement 277 /// in order to obtain suitable precision. 278 FRSQRT, 279 FRCP, 280 281 // AVX-512 reciprocal approximations with a little more precision. 282 RSQRT14, 283 RSQRT14S, 284 RCP14, 285 RCP14S, 286 287 // Thread Local Storage. 288 TLSADDR, 289 290 // Thread Local Storage. A call to get the start address 291 // of the TLS block for the current module. 292 TLSBASEADDR, 293 294 // Thread Local Storage. When calling to an OS provided 295 // thunk at the address from an earlier relocation. 296 TLSCALL, 297 298 // Exception Handling helpers. 299 EH_RETURN, 300 301 // SjLj exception handling setjmp. 302 EH_SJLJ_SETJMP, 303 304 // SjLj exception handling longjmp. 305 EH_SJLJ_LONGJMP, 306 307 // SjLj exception handling dispatch. 308 EH_SJLJ_SETUP_DISPATCH, 309 310 /// Tail call return. See X86TargetLowering::LowerCall for 311 /// the list of operands. 312 TC_RETURN, 313 314 // Vector move to low scalar and zero higher vector elements. 315 VZEXT_MOVL, 316 317 // Vector integer truncate. 318 VTRUNC, 319 // Vector integer truncate with unsigned/signed saturation. 320 VTRUNCUS, 321 VTRUNCS, 322 323 // Masked version of the above. Used when less than a 128-bit result is 324 // produced since the mask only applies to the lower elements and can't 325 // be represented by a select. 326 // SRC, PASSTHRU, MASK 327 VMTRUNC, 328 VMTRUNCUS, 329 VMTRUNCS, 330 331 // Vector FP extend. 332 VFPEXT, 333 VFPEXT_SAE, 334 VFPEXTS, 335 VFPEXTS_SAE, 336 337 // Vector FP round. 338 VFPROUND, 339 VFPROUND_RND, 340 VFPROUNDS, 341 VFPROUNDS_RND, 342 343 // Masked version of above. Used for v2f64->v4f32. 344 // SRC, PASSTHRU, MASK 345 VMFPROUND, 346 347 // 128-bit vector logical left / right shift 348 VSHLDQ, 349 VSRLDQ, 350 351 // Vector shift elements 352 VSHL, 353 VSRL, 354 VSRA, 355 356 // Vector variable shift 357 VSHLV, 358 VSRLV, 359 VSRAV, 360 361 // Vector shift elements by immediate 362 VSHLI, 363 VSRLI, 364 VSRAI, 365 366 // Shifts of mask registers. 367 KSHIFTL, 368 KSHIFTR, 369 370 // Bit rotate by immediate 371 VROTLI, 372 VROTRI, 373 374 // Vector packed double/float comparison. 375 CMPP, 376 377 // Vector integer comparisons. 378 PCMPEQ, 379 PCMPGT, 380 381 // v8i16 Horizontal minimum and position. 382 PHMINPOS, 383 384 MULTISHIFT, 385 386 /// Vector comparison generating mask bits for fp and 387 /// integer signed and unsigned data types. 388 CMPM, 389 // Vector mask comparison generating mask bits for FP values. 390 CMPMM, 391 // Vector mask comparison with SAE for FP values. 392 CMPMM_SAE, 393 394 // Arithmetic operations with FLAGS results. 395 ADD, 396 SUB, 397 ADC, 398 SBB, 399 SMUL, 400 UMUL, 401 OR, 402 XOR, 403 AND, 404 405 // Bit field extract. 406 BEXTR, 407 BEXTRI, 408 409 // Zero High Bits Starting with Specified Bit Position. 410 BZHI, 411 412 // Parallel extract and deposit. 413 PDEP, 414 PEXT, 415 416 // X86-specific multiply by immediate. 417 MUL_IMM, 418 419 // Vector sign bit extraction. 420 MOVMSK, 421 422 // Vector bitwise comparisons. 423 PTEST, 424 425 // Vector packed fp sign bitwise comparisons. 426 TESTP, 427 428 // OR/AND test for masks. 429 KORTEST, 430 KTEST, 431 432 // ADD for masks. 433 KADD, 434 435 // Several flavors of instructions with vector shuffle behaviors. 436 // Saturated signed/unnsigned packing. 437 PACKSS, 438 PACKUS, 439 // Intra-lane alignr. 440 PALIGNR, 441 // AVX512 inter-lane alignr. 442 VALIGN, 443 PSHUFD, 444 PSHUFHW, 445 PSHUFLW, 446 SHUFP, 447 // VBMI2 Concat & Shift. 448 VSHLD, 449 VSHRD, 450 VSHLDV, 451 VSHRDV, 452 // Shuffle Packed Values at 128-bit granularity. 453 SHUF128, 454 MOVDDUP, 455 MOVSHDUP, 456 MOVSLDUP, 457 MOVLHPS, 458 MOVHLPS, 459 MOVSD, 460 MOVSS, 461 MOVSH, 462 UNPCKL, 463 UNPCKH, 464 VPERMILPV, 465 VPERMILPI, 466 VPERMI, 467 VPERM2X128, 468 469 // Variable Permute (VPERM). 470 // Res = VPERMV MaskV, V0 471 VPERMV, 472 473 // 3-op Variable Permute (VPERMT2). 474 // Res = VPERMV3 V0, MaskV, V1 475 VPERMV3, 476 477 // Bitwise ternary logic. 478 VPTERNLOG, 479 // Fix Up Special Packed Float32/64 values. 480 VFIXUPIMM, 481 VFIXUPIMM_SAE, 482 VFIXUPIMMS, 483 VFIXUPIMMS_SAE, 484 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 485 VRANGE, 486 VRANGE_SAE, 487 VRANGES, 488 VRANGES_SAE, 489 // Reduce - Perform Reduction Transformation on scalar\packed FP. 490 VREDUCE, 491 VREDUCE_SAE, 492 VREDUCES, 493 VREDUCES_SAE, 494 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 495 // Also used by the legacy (V)ROUND intrinsics where we mask out the 496 // scaling part of the immediate. 497 VRNDSCALE, 498 VRNDSCALE_SAE, 499 VRNDSCALES, 500 VRNDSCALES_SAE, 501 // Tests Types Of a FP Values for packed types. 502 VFPCLASS, 503 // Tests Types Of a FP Values for scalar types. 504 VFPCLASSS, 505 506 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 507 // a vector, this node may change the vector length as part of the splat. 508 VBROADCAST, 509 // Broadcast mask to vector. 510 VBROADCASTM, 511 512 /// SSE4A Extraction and Insertion. 513 EXTRQI, 514 INSERTQI, 515 516 // XOP arithmetic/logical shifts. 517 VPSHA, 518 VPSHL, 519 // XOP signed/unsigned integer comparisons. 520 VPCOM, 521 VPCOMU, 522 // XOP packed permute bytes. 523 VPPERM, 524 // XOP two source permutation. 525 VPERMIL2, 526 527 // Vector multiply packed unsigned doubleword integers. 528 PMULUDQ, 529 // Vector multiply packed signed doubleword integers. 530 PMULDQ, 531 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 532 MULHRS, 533 534 // Multiply and Add Packed Integers. 535 VPMADDUBSW, 536 VPMADDWD, 537 538 // AVX512IFMA multiply and add. 539 // NOTE: These are different than the instruction and perform 540 // op0 x op1 + op2. 541 VPMADD52L, 542 VPMADD52H, 543 544 // VNNI 545 VPDPBUSD, 546 VPDPBUSDS, 547 VPDPWSSD, 548 VPDPWSSDS, 549 550 // FMA nodes. 551 // We use the target independent ISD::FMA for the non-inverted case. 552 FNMADD, 553 FMSUB, 554 FNMSUB, 555 FMADDSUB, 556 FMSUBADD, 557 558 // FMA with rounding mode. 559 FMADD_RND, 560 FNMADD_RND, 561 FMSUB_RND, 562 FNMSUB_RND, 563 FMADDSUB_RND, 564 FMSUBADD_RND, 565 566 // AVX512-FP16 complex addition and multiplication. 567 VFMADDC, 568 VFMADDC_RND, 569 VFCMADDC, 570 VFCMADDC_RND, 571 572 VFMULC, 573 VFMULC_RND, 574 VFCMULC, 575 VFCMULC_RND, 576 577 VFMADDCSH, 578 VFMADDCSH_RND, 579 VFCMADDCSH, 580 VFCMADDCSH_RND, 581 582 VFMULCSH, 583 VFMULCSH_RND, 584 VFCMULCSH, 585 VFCMULCSH_RND, 586 587 VPDPBSUD, 588 VPDPBSUDS, 589 VPDPBUUD, 590 VPDPBUUDS, 591 VPDPBSSD, 592 VPDPBSSDS, 593 594 // Compress and expand. 595 COMPRESS, 596 EXPAND, 597 598 // Bits shuffle 599 VPSHUFBITQMB, 600 601 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 602 SINT_TO_FP_RND, 603 UINT_TO_FP_RND, 604 SCALAR_SINT_TO_FP, 605 SCALAR_UINT_TO_FP, 606 SCALAR_SINT_TO_FP_RND, 607 SCALAR_UINT_TO_FP_RND, 608 609 // Vector float/double to signed/unsigned integer. 610 CVTP2SI, 611 CVTP2UI, 612 CVTP2SI_RND, 613 CVTP2UI_RND, 614 // Scalar float/double to signed/unsigned integer. 615 CVTS2SI, 616 CVTS2UI, 617 CVTS2SI_RND, 618 CVTS2UI_RND, 619 620 // Vector float/double to signed/unsigned integer with truncation. 621 CVTTP2SI, 622 CVTTP2UI, 623 CVTTP2SI_SAE, 624 CVTTP2UI_SAE, 625 // Scalar float/double to signed/unsigned integer with truncation. 626 CVTTS2SI, 627 CVTTS2UI, 628 CVTTS2SI_SAE, 629 CVTTS2UI_SAE, 630 631 // Vector signed/unsigned integer to float/double. 632 CVTSI2P, 633 CVTUI2P, 634 635 // Masked versions of above. Used for v2f64->v4f32. 636 // SRC, PASSTHRU, MASK 637 MCVTP2SI, 638 MCVTP2UI, 639 MCVTTP2SI, 640 MCVTTP2UI, 641 MCVTSI2P, 642 MCVTUI2P, 643 644 // Vector float to bfloat16. 645 // Convert TWO packed single data to one packed BF16 data 646 CVTNE2PS2BF16, 647 // Convert packed single data to packed BF16 data 648 CVTNEPS2BF16, 649 // Masked version of above. 650 // SRC, PASSTHRU, MASK 651 MCVTNEPS2BF16, 652 653 // Dot product of BF16 pairs to accumulated into 654 // packed single precision. 655 DPBF16PS, 656 657 // A stack checking function call. On Windows it's _chkstk call. 658 DYN_ALLOCA, 659 660 // For allocating variable amounts of stack space when using 661 // segmented stacks. Check if the current stacklet has enough space, and 662 // falls back to heap allocation if not. 663 SEG_ALLOCA, 664 665 // For allocating stack space when using stack clash protector. 666 // Allocation is performed by block, and each block is probed. 667 PROBED_ALLOCA, 668 669 // Memory barriers. 670 MFENCE, 671 672 // Get a random integer and indicate whether it is valid in CF. 673 RDRAND, 674 675 // Get a NIST SP800-90B & C compliant random integer and 676 // indicate whether it is valid in CF. 677 RDSEED, 678 679 // Protection keys 680 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 681 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 682 // value for ECX. 683 RDPKRU, 684 WRPKRU, 685 686 // SSE42 string comparisons. 687 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 688 // will emit one or two instructions based on which results are used. If 689 // flags and index/mask this allows us to use a single instruction since 690 // we won't have to pick and opcode for flags. Instead we can rely on the 691 // DAG to CSE everything and decide at isel. 692 PCMPISTR, 693 PCMPESTR, 694 695 // Test if in transactional execution. 696 XTEST, 697 698 // ERI instructions. 699 RSQRT28, 700 RSQRT28_SAE, 701 RSQRT28S, 702 RSQRT28S_SAE, 703 RCP28, 704 RCP28_SAE, 705 RCP28S, 706 RCP28S_SAE, 707 EXP2, 708 EXP2_SAE, 709 710 // Conversions between float and half-float. 711 CVTPS2PH, 712 CVTPS2PH_SAE, 713 CVTPH2PS, 714 CVTPH2PS_SAE, 715 716 // Masked version of above. 717 // SRC, RND, PASSTHRU, MASK 718 MCVTPS2PH, 719 MCVTPS2PH_SAE, 720 721 // Galois Field Arithmetic Instructions 722 GF2P8AFFINEINVQB, 723 GF2P8AFFINEQB, 724 GF2P8MULB, 725 726 // LWP insert record. 727 LWPINS, 728 729 // User level wait 730 UMWAIT, 731 TPAUSE, 732 733 // Enqueue Stores Instructions 734 ENQCMD, 735 ENQCMDS, 736 737 // For avx512-vp2intersect 738 VP2INTERSECT, 739 740 // User level interrupts - testui 741 TESTUI, 742 743 // Perform an FP80 add after changing precision control in FPCW. 744 FP80_ADD, 745 746 /// X86 strict FP compare instructions. 747 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 748 STRICT_FCMPS, 749 750 // Vector packed double/float comparison. 751 STRICT_CMPP, 752 753 /// Vector comparison generating mask bits for fp and 754 /// integer signed and unsigned data types. 755 STRICT_CMPM, 756 757 // Vector float/double to signed/unsigned integer with truncation. 758 STRICT_CVTTP2SI, 759 STRICT_CVTTP2UI, 760 761 // Vector FP extend. 762 STRICT_VFPEXT, 763 764 // Vector FP round. 765 STRICT_VFPROUND, 766 767 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 768 // Also used by the legacy (V)ROUND intrinsics where we mask out the 769 // scaling part of the immediate. 770 STRICT_VRNDSCALE, 771 772 // Vector signed/unsigned integer to float/double. 773 STRICT_CVTSI2P, 774 STRICT_CVTUI2P, 775 776 // Strict FMA nodes. 777 STRICT_FNMADD, 778 STRICT_FMSUB, 779 STRICT_FNMSUB, 780 781 // Conversions between float and half-float. 782 STRICT_CVTPS2PH, 783 STRICT_CVTPH2PS, 784 785 // Perform an FP80 add after changing precision control in FPCW. 786 STRICT_FP80_ADD, 787 788 // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and 789 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. 790 791 // Compare and swap. 792 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 793 LCMPXCHG8_DAG, 794 LCMPXCHG16_DAG, 795 LCMPXCHG16_SAVE_RBX_DAG, 796 797 /// LOCK-prefixed arithmetic read-modify-write instructions. 798 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 799 LADD, 800 LSUB, 801 LOR, 802 LXOR, 803 LAND, 804 LBTS, 805 LBTC, 806 LBTR, 807 LBTS_RM, 808 LBTC_RM, 809 LBTR_RM, 810 811 /// RAO arithmetic instructions. 812 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) 813 AADD, 814 AOR, 815 AXOR, 816 AAND, 817 818 // Load, scalar_to_vector, and zero extend. 819 VZEXT_LOAD, 820 821 // extract_vector_elt, store. 822 VEXTRACT_STORE, 823 824 // scalar broadcast from memory. 825 VBROADCAST_LOAD, 826 827 // subvector broadcast from memory. 828 SUBV_BROADCAST_LOAD, 829 830 // Store FP control word into i16 memory. 831 FNSTCW16m, 832 833 // Load FP control word from i16 memory. 834 FLDCW16m, 835 836 // Store x87 FPU environment into memory. 837 FNSTENVm, 838 839 // Load x87 FPU environment from memory. 840 FLDENVm, 841 842 /// This instruction implements FP_TO_SINT with the 843 /// integer destination in memory and a FP reg source. This corresponds 844 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 845 /// has two inputs (token chain and address) and two outputs (int value 846 /// and token chain). Memory VT specifies the type to store to. 847 FP_TO_INT_IN_MEM, 848 849 /// This instruction implements SINT_TO_FP with the 850 /// integer source in memory and FP reg result. This corresponds to the 851 /// X86::FILD*m instructions. It has two inputs (token chain and address) 852 /// and two outputs (FP value and token chain). The integer source type is 853 /// specified by the memory VT. 854 FILD, 855 856 /// This instruction implements a fp->int store from FP stack 857 /// slots. This corresponds to the fist instruction. It takes a 858 /// chain operand, value to store, address, and glue. The memory VT 859 /// specifies the type to store as. 860 FIST, 861 862 /// This instruction implements an extending load to FP stack slots. 863 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 864 /// operand, and ptr to load from. The memory VT specifies the type to 865 /// load from. 866 FLD, 867 868 /// This instruction implements a truncating store from FP stack 869 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 870 /// chain operand, value to store, address, and glue. The memory VT 871 /// specifies the type to store as. 872 FST, 873 874 /// These instructions grab the address of the next argument 875 /// from a va_list. (reads and modifies the va_list in memory) 876 VAARG_64, 877 VAARG_X32, 878 879 // Vector truncating store with unsigned/signed saturation 880 VTRUNCSTOREUS, 881 VTRUNCSTORES, 882 // Vector truncating masked store with unsigned/signed saturation 883 VMTRUNCSTOREUS, 884 VMTRUNCSTORES, 885 886 // X86 specific gather and scatter 887 MGATHER, 888 MSCATTER, 889 890 // Key locker nodes that produce flags. 891 AESENC128KL, 892 AESDEC128KL, 893 AESENC256KL, 894 AESDEC256KL, 895 AESENCWIDE128KL, 896 AESDECWIDE128KL, 897 AESENCWIDE256KL, 898 AESDECWIDE256KL, 899 900 /// Compare and Add if Condition is Met. Compare value in operand 2 with 901 /// value in memory of operand 1. If condition of operand 4 is met, add 902 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is 903 /// always updated with the original value from operand 1. 904 CMPCCXADD, 905 906 // Save xmm argument registers to the stack, according to %al. An operator 907 // is needed so that this can be expanded with control flow. 908 VASTART_SAVE_XMM_REGS, 909 910 // WARNING: Do not add anything in the end unless you want the node to 911 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 912 // opcodes will be thought as target memory ops! 913 }; 914 } // end namespace X86ISD 915 916 namespace X86 { 917 /// Current rounding mode is represented in bits 11:10 of FPSR. These 918 /// values are same as corresponding constants for rounding mode used 919 /// in glibc. 920 enum RoundingMode { 921 rmToNearest = 0, // FE_TONEAREST 922 rmDownward = 1 << 10, // FE_DOWNWARD 923 rmUpward = 2 << 10, // FE_UPWARD 924 rmTowardZero = 3 << 10, // FE_TOWARDZERO 925 rmMask = 3 << 10 // Bit mask selecting rounding mode 926 }; 927 } 928 929 /// Define some predicates that are used for node matching. 930 namespace X86 { 931 /// Returns true if Elt is a constant zero or floating point constant +0.0. 932 bool isZeroNode(SDValue Elt); 933 934 /// Returns true of the given offset can be 935 /// fit into displacement field of the instruction. 936 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 937 bool hasSymbolicDisplacement); 938 939 /// Determines whether the callee is required to pop its 940 /// own arguments. Callee pop is necessary to support tail calls. 941 bool isCalleePop(CallingConv::ID CallingConv, 942 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 943 944 /// If Op is a constant whose elements are all the same constant or 945 /// undefined, return true and return the constant value in \p SplatVal. 946 /// If we have undef bits that don't cover an entire element, we treat these 947 /// as zero if AllowPartialUndefs is set, else we fail and return false. 948 bool isConstantSplat(SDValue Op, APInt &SplatVal, 949 bool AllowPartialUndefs = true); 950 951 /// Check if Op is a load operation that could be folded into some other x86 952 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. 953 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, 954 bool AssumeSingleUse = false); 955 956 /// Check if Op is a load operation that could be folded into a vector splat 957 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. 958 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, 959 const X86Subtarget &Subtarget, 960 bool AssumeSingleUse = false); 961 962 /// Check if Op is a value that could be used to fold a store into some 963 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). 964 bool mayFoldIntoStore(SDValue Op); 965 966 /// Check if Op is an operation that could be folded into a zero extend x86 967 /// instruction. 968 bool mayFoldIntoZeroExtend(SDValue Op); 969 } // end namespace X86 970 971 //===--------------------------------------------------------------------===// 972 // X86 Implementation of the TargetLowering interface 973 class X86TargetLowering final : public TargetLowering { 974 public: 975 explicit X86TargetLowering(const X86TargetMachine &TM, 976 const X86Subtarget &STI); 977 978 unsigned getJumpTableEncoding() const override; 979 bool useSoftFloat() const override; 980 981 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 982 ArgListTy &Args) const override; 983 984 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 985 return MVT::i8; 986 } 987 988 const MCExpr * 989 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 990 const MachineBasicBlock *MBB, unsigned uid, 991 MCContext &Ctx) const override; 992 993 /// Returns relocation base for the given PIC jumptable. 994 SDValue getPICJumpTableRelocBase(SDValue Table, 995 SelectionDAG &DAG) const override; 996 const MCExpr * 997 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 998 unsigned JTI, MCContext &Ctx) const override; 999 1000 /// Return the desired alignment for ByVal aggregate 1001 /// function arguments in the caller parameter area. For X86, aggregates 1002 /// that contains are placed at 16-byte boundaries while the rest are at 1003 /// 4-byte boundaries. 1004 uint64_t getByValTypeAlignment(Type *Ty, 1005 const DataLayout &DL) const override; 1006 1007 EVT getOptimalMemOpType(const MemOp &Op, 1008 const AttributeList &FuncAttributes) const override; 1009 1010 /// Returns true if it's safe to use load / store of the 1011 /// specified type to expand memcpy / memset inline. This is mostly true 1012 /// for all types except for some special cases. For example, on X86 1013 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 1014 /// also does type conversion. Note the specified type doesn't have to be 1015 /// legal as the hook is used before type legalization. 1016 bool isSafeMemOpType(MVT VT) const override; 1017 1018 bool isMemoryAccessFast(EVT VT, Align Alignment) const; 1019 1020 /// Returns true if the target allows unaligned memory accesses of the 1021 /// specified type. Returns whether it is "fast" in the last argument. 1022 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, 1023 MachineMemOperand::Flags Flags, 1024 unsigned *Fast) const override; 1025 1026 /// This function returns true if the memory access is aligned or if the 1027 /// target allows this specific unaligned memory access. If the access is 1028 /// allowed, the optional final parameter returns a relative speed of the 1029 /// access (as defined by the target). 1030 bool allowsMemoryAccess( 1031 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, 1032 Align Alignment, 1033 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1034 unsigned *Fast = nullptr) const override; 1035 1036 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1037 const MachineMemOperand &MMO, 1038 unsigned *Fast) const { 1039 return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), 1040 MMO.getAlign(), MMO.getFlags(), Fast); 1041 } 1042 1043 /// Provide custom lowering hooks for some operations. 1044 /// 1045 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 1046 1047 /// Replace the results of node with an illegal result 1048 /// type with new values built out of custom code. 1049 /// 1050 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 1051 SelectionDAG &DAG) const override; 1052 1053 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 1054 1055 bool preferABDSToABSWithNSW(EVT VT) const override; 1056 1057 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, 1058 EVT ExtVT) const override; 1059 1060 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, 1061 EVT VT) const override; 1062 1063 /// Return true if the target has native support for 1064 /// the specified value type and it is 'desirable' to use the type for the 1065 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 1066 /// instruction encodings are longer and some i16 instructions are slow. 1067 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 1068 1069 /// Return true if the target has native support for the 1070 /// specified value type and it is 'desirable' to use the type. e.g. On x86 1071 /// i16 is legal, but undesirable since i16 instruction encodings are longer 1072 /// and some i16 instructions are slow. 1073 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 1074 1075 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an 1076 /// integer, None otherwise. 1077 TargetLowering::AndOrSETCCFoldKind 1078 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, 1079 const SDNode *SETCC0, 1080 const SDNode *SETCC1) const override; 1081 1082 /// Return the newly negated expression if the cost is not expensive and 1083 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 1084 /// do the negation. 1085 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 1086 bool LegalOperations, bool ForCodeSize, 1087 NegatibleCost &Cost, 1088 unsigned Depth) const override; 1089 1090 MachineBasicBlock * 1091 EmitInstrWithCustomInserter(MachineInstr &MI, 1092 MachineBasicBlock *MBB) const override; 1093 1094 /// This method returns the name of a target specific DAG node. 1095 const char *getTargetNodeName(unsigned Opcode) const override; 1096 1097 /// Do not merge vector stores after legalization because that may conflict 1098 /// with x86-specific store splitting optimizations. 1099 bool mergeStoresAfterLegalization(EVT MemVT) const override { 1100 return !MemVT.isVector(); 1101 } 1102 1103 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 1104 const MachineFunction &MF) const override; 1105 1106 bool isCheapToSpeculateCttz(Type *Ty) const override; 1107 1108 bool isCheapToSpeculateCtlz(Type *Ty) const override; 1109 1110 bool isCtlzFast() const override; 1111 1112 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 1113 // If the pair to store is a mixture of float and int values, we will 1114 // save two bitwise instructions and one float-to-int instruction and 1115 // increase one store instruction. There is potentially a more 1116 // significant benefit because it avoids the float->int domain switch 1117 // for input value. So It is more likely a win. 1118 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 1119 (LTy.isInteger() && HTy.isFloatingPoint())) 1120 return true; 1121 // If the pair only contains int values, we will save two bitwise 1122 // instructions and increase one store instruction (costing one more 1123 // store buffer). Since the benefit is more blurred so we leave 1124 // such pair out until we get testcase to prove it is a win. 1125 return false; 1126 } 1127 1128 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1129 1130 bool hasAndNotCompare(SDValue Y) const override; 1131 1132 bool hasAndNot(SDValue Y) const override; 1133 1134 bool hasBitTest(SDValue X, SDValue Y) const override; 1135 1136 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1137 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1138 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1139 SelectionDAG &DAG) const override; 1140 1141 unsigned preferedOpcodeForCmpEqPiecesOfOperand( 1142 EVT VT, unsigned ShiftOpc, bool MayTransformRotate, 1143 const APInt &ShiftOrRotateAmt, 1144 const std::optional<APInt> &AndMask) const override; 1145 1146 bool preferScalarizeSplat(SDNode *N) const override; 1147 1148 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1149 CombineLevel Level) const override; 1150 1151 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1152 1153 bool 1154 shouldTransformSignedTruncationCheck(EVT XVT, 1155 unsigned KeptBits) const override { 1156 // For vectors, we don't have a preference.. 1157 if (XVT.isVector()) 1158 return false; 1159 1160 auto VTIsOk = [](EVT VT) -> bool { 1161 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1162 VT == MVT::i64; 1163 }; 1164 1165 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1166 // XVT will be larger than KeptBitsVT. 1167 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1168 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1169 } 1170 1171 ShiftLegalizationStrategy 1172 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 1173 unsigned ExpansionFactor) const override; 1174 1175 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1176 1177 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { 1178 // Converting to sat variants holds little benefit on X86 as we will just 1179 // need to saturate the value back using fp arithmatic. 1180 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); 1181 } 1182 1183 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1184 return VT.isScalarInteger(); 1185 } 1186 1187 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1188 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1189 1190 /// Return the value type to use for ISD::SETCC. 1191 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1192 EVT VT) const override; 1193 1194 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1195 const APInt &DemandedElts, 1196 TargetLoweringOpt &TLO) const override; 1197 1198 /// Determine which of the bits specified in Mask are known to be either 1199 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1200 void computeKnownBitsForTargetNode(const SDValue Op, 1201 KnownBits &Known, 1202 const APInt &DemandedElts, 1203 const SelectionDAG &DAG, 1204 unsigned Depth = 0) const override; 1205 1206 /// Determine the number of bits in the operation that are sign bits. 1207 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1208 const APInt &DemandedElts, 1209 const SelectionDAG &DAG, 1210 unsigned Depth) const override; 1211 1212 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1213 const APInt &DemandedElts, 1214 APInt &KnownUndef, 1215 APInt &KnownZero, 1216 TargetLoweringOpt &TLO, 1217 unsigned Depth) const override; 1218 1219 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1220 const APInt &DemandedElts, 1221 unsigned MaskIndex, 1222 TargetLoweringOpt &TLO, 1223 unsigned Depth) const; 1224 1225 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1226 const APInt &DemandedBits, 1227 const APInt &DemandedElts, 1228 KnownBits &Known, 1229 TargetLoweringOpt &TLO, 1230 unsigned Depth) const override; 1231 1232 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1233 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1234 SelectionDAG &DAG, unsigned Depth) const override; 1235 1236 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( 1237 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1238 bool PoisonOnly, unsigned Depth) const override; 1239 1240 bool canCreateUndefOrPoisonForTargetNode( 1241 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1242 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; 1243 1244 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, 1245 APInt &UndefElts, const SelectionDAG &DAG, 1246 unsigned Depth) const override; 1247 1248 bool isTargetCanonicalConstantNode(SDValue Op) const override { 1249 // Peek through bitcasts/extracts/inserts to see if we have a broadcast 1250 // vector from memory. 1251 while (Op.getOpcode() == ISD::BITCAST || 1252 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || 1253 (Op.getOpcode() == ISD::INSERT_SUBVECTOR && 1254 Op.getOperand(0).isUndef())) 1255 Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); 1256 1257 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || 1258 TargetLowering::isTargetCanonicalConstantNode(Op); 1259 } 1260 1261 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1262 1263 SDValue unwrapAddress(SDValue N) const override; 1264 1265 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1266 1267 bool ExpandInlineAsm(CallInst *CI) const override; 1268 1269 ConstraintType getConstraintType(StringRef Constraint) const override; 1270 1271 /// Examine constraint string and operand type and determine a weight value. 1272 /// The operand object must already have been set up with the operand type. 1273 ConstraintWeight 1274 getSingleConstraintMatchWeight(AsmOperandInfo &Info, 1275 const char *Constraint) const override; 1276 1277 const char *LowerXConstraint(EVT ConstraintVT) const override; 1278 1279 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1280 /// add anything to Ops. If hasMemory is true it means one of the asm 1281 /// constraint of the inline asm instruction being processed is 'm'. 1282 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1283 std::vector<SDValue> &Ops, 1284 SelectionDAG &DAG) const override; 1285 1286 InlineAsm::ConstraintCode 1287 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1288 if (ConstraintCode == "v") 1289 return InlineAsm::ConstraintCode::v; 1290 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1291 } 1292 1293 /// Handle Lowering flag assembly outputs. 1294 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1295 const SDLoc &DL, 1296 const AsmOperandInfo &Constraint, 1297 SelectionDAG &DAG) const override; 1298 1299 /// Given a physical register constraint 1300 /// (e.g. {edx}), return the register number and the register class for the 1301 /// register. This should only be used for C_Register constraints. On 1302 /// error, this returns a register number of 0. 1303 std::pair<unsigned, const TargetRegisterClass *> 1304 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1305 StringRef Constraint, MVT VT) const override; 1306 1307 /// Return true if the addressing mode represented 1308 /// by AM is legal for this target, for a load/store of the specified type. 1309 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1310 Type *Ty, unsigned AS, 1311 Instruction *I = nullptr) const override; 1312 1313 /// Return true if the specified immediate is legal 1314 /// icmp immediate, that is the target has icmp instructions which can 1315 /// compare a register against the immediate without having to materialize 1316 /// the immediate into a register. 1317 bool isLegalICmpImmediate(int64_t Imm) const override; 1318 1319 /// Return true if the specified immediate is legal 1320 /// add immediate, that is the target has add instructions which can 1321 /// add a register and the immediate without having to materialize 1322 /// the immediate into a register. 1323 bool isLegalAddImmediate(int64_t Imm) const override; 1324 1325 bool isLegalStoreImmediate(int64_t Imm) const override; 1326 1327 /// This is used to enable splatted operand transforms for vector shifts 1328 /// and vector funnel shifts. 1329 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1330 1331 /// Add x86-specific opcodes to the default list. 1332 bool isBinOp(unsigned Opcode) const override; 1333 1334 /// Returns true if the opcode is a commutative binary operation. 1335 bool isCommutativeBinOp(unsigned Opcode) const override; 1336 1337 /// Return true if it's free to truncate a value of 1338 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1339 /// register EAX to i16 by referencing its sub-register AX. 1340 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1341 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1342 1343 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1344 1345 /// Return true if any actual instruction that defines a 1346 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1347 /// register. This does not necessarily include registers defined in 1348 /// unknown ways, such as incoming arguments, or copies from unknown 1349 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1350 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1351 /// all instructions that define 32-bit values implicit zero-extend the 1352 /// result out to 64 bits. 1353 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1354 bool isZExtFree(EVT VT1, EVT VT2) const override; 1355 bool isZExtFree(SDValue Val, EVT VT2) const override; 1356 1357 bool shouldSinkOperands(Instruction *I, 1358 SmallVectorImpl<Use *> &Ops) const override; 1359 bool shouldConvertPhiType(Type *From, Type *To) const override; 1360 1361 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1362 /// extend node) is profitable. 1363 bool isVectorLoadExtDesirable(SDValue) const override; 1364 1365 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1366 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1367 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1368 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1369 EVT VT) const override; 1370 1371 /// Return true if it's profitable to narrow operations of type SrcVT to 1372 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not 1373 /// from i32 to i16. 1374 bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; 1375 1376 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 1377 EVT VT) const override; 1378 1379 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1380 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1381 /// true and stores the intrinsic information into the IntrinsicInfo that was 1382 /// passed to the function. 1383 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1384 MachineFunction &MF, 1385 unsigned Intrinsic) const override; 1386 1387 /// Returns true if the target can instruction select the 1388 /// specified FP immediate natively. If false, the legalizer will 1389 /// materialize the FP immediate as a load from a constant pool. 1390 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1391 bool ForCodeSize) const override; 1392 1393 /// Targets can use this to indicate that they only support *some* 1394 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1395 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1396 /// be legal. 1397 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1398 1399 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1400 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1401 /// constant pool entry. 1402 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1403 1404 /// Returns true if lowering to a jump table is allowed. 1405 bool areJTsAllowed(const Function *Fn) const override; 1406 1407 MVT getPreferredSwitchConditionType(LLVMContext &Context, 1408 EVT ConditionVT) const override; 1409 1410 /// If true, then instruction selection should 1411 /// seek to shrink the FP constant of the specified type to a smaller type 1412 /// in order to save space and / or reduce runtime. 1413 bool ShouldShrinkFPConstant(EVT VT) const override; 1414 1415 /// Return true if we believe it is correct and profitable to reduce the 1416 /// load node to a smaller type. 1417 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1418 EVT NewVT) const override; 1419 1420 /// Return true if the specified scalar FP type is computed in an SSE 1421 /// register, not on the X87 floating point stack. 1422 bool isScalarFPTypeInSSEReg(EVT VT) const; 1423 1424 /// Returns true if it is beneficial to convert a load of a constant 1425 /// to just the constant itself. 1426 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1427 Type *Ty) const override; 1428 1429 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1430 1431 bool convertSelectOfConstantsToMath(EVT VT) const override; 1432 1433 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1434 SDValue C) const override; 1435 1436 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1437 /// with this index. 1438 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1439 unsigned Index) const override; 1440 1441 /// Scalar ops always have equal or better analysis/performance/power than 1442 /// the vector equivalent, so this always makes sense if the scalar op is 1443 /// supported. 1444 bool shouldScalarizeBinop(SDValue) const override; 1445 1446 /// Extract of a scalar FP value from index 0 of a vector is free. 1447 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1448 EVT EltVT = VT.getScalarType(); 1449 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1450 } 1451 1452 /// Overflow nodes should get combined/lowered to optimal instructions 1453 /// (they should allow eliminating explicit compares by getting flags from 1454 /// math ops). 1455 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1456 bool MathUsed) const override; 1457 1458 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 1459 unsigned AddrSpace) const override { 1460 // If we can replace more than 2 scalar stores, there will be a reduction 1461 // in instructions even after we add a vector constant load. 1462 return IsZero || NumElem > 2; 1463 } 1464 1465 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1466 const SelectionDAG &DAG, 1467 const MachineMemOperand &MMO) const override; 1468 1469 /// Intel processors have a unified instruction and data cache 1470 const char * getClearCacheBuiltinName() const override { 1471 return nullptr; // nothing to do, move along. 1472 } 1473 1474 Register getRegisterByName(const char* RegName, LLT VT, 1475 const MachineFunction &MF) const override; 1476 1477 /// If a physical register, this returns the register that receives the 1478 /// exception address on entry to an EH pad. 1479 Register 1480 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1481 1482 /// If a physical register, this returns the register that receives the 1483 /// exception typeid on entry to a landing pad. 1484 Register 1485 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1486 1487 bool needsFixedCatchObjects() const override; 1488 1489 /// This method returns a target specific FastISel object, 1490 /// or null if the target does not support "fast" ISel. 1491 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1492 const TargetLibraryInfo *libInfo) const override; 1493 1494 /// If the target has a standard location for the stack protector cookie, 1495 /// returns the address of that location. Otherwise, returns nullptr. 1496 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 1497 1498 bool useLoadStackGuardNode() const override; 1499 bool useStackGuardXorFP() const override; 1500 void insertSSPDeclarations(Module &M) const override; 1501 Value *getSDagStackGuard(const Module &M) const override; 1502 Function *getSSPStackGuardCheck(const Module &M) const override; 1503 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1504 const SDLoc &DL) const override; 1505 1506 1507 /// Return true if the target stores SafeStack pointer at a fixed offset in 1508 /// some non-standard address space, and populates the address space and 1509 /// offset as appropriate. 1510 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 1511 1512 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1513 SDValue Chain, SDValue Pointer, 1514 MachinePointerInfo PtrInfo, 1515 Align Alignment, 1516 SelectionDAG &DAG) const; 1517 1518 /// Customize the preferred legalization strategy for certain types. 1519 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1520 1521 bool softPromoteHalfType() const override { return true; } 1522 1523 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1524 EVT VT) const override; 1525 1526 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1527 CallingConv::ID CC, 1528 EVT VT) const override; 1529 1530 unsigned getVectorTypeBreakdownForCallingConv( 1531 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1532 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1533 1534 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1535 1536 bool supportSwiftError() const override; 1537 1538 bool supportKCFIBundles() const override { return true; } 1539 1540 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 1541 MachineBasicBlock::instr_iterator &MBBI, 1542 const TargetInstrInfo *TII) const override; 1543 1544 bool hasStackProbeSymbol(const MachineFunction &MF) const override; 1545 bool hasInlineStackProbe(const MachineFunction &MF) const override; 1546 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; 1547 1548 unsigned getStackProbeSize(const MachineFunction &MF) const; 1549 1550 bool hasVectorBlend() const override { return true; } 1551 1552 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1553 1554 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, 1555 unsigned OpNo) const override; 1556 1557 /// Lower interleaved load(s) into target specific 1558 /// instructions/intrinsics. 1559 bool lowerInterleavedLoad(LoadInst *LI, 1560 ArrayRef<ShuffleVectorInst *> Shuffles, 1561 ArrayRef<unsigned> Indices, 1562 unsigned Factor) const override; 1563 1564 /// Lower interleaved store(s) into target specific 1565 /// instructions/intrinsics. 1566 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1567 unsigned Factor) const override; 1568 1569 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, 1570 int JTI, SelectionDAG &DAG) const override; 1571 1572 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1573 1574 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { 1575 if (VT == MVT::f80) 1576 return EVT::getIntegerVT(Context, 96); 1577 return TargetLoweringBase::getTypeToTransformTo(Context, VT); 1578 } 1579 1580 protected: 1581 std::pair<const TargetRegisterClass *, uint8_t> 1582 findRepresentativeClass(const TargetRegisterInfo *TRI, 1583 MVT VT) const override; 1584 1585 private: 1586 /// Keep a reference to the X86Subtarget around so that we can 1587 /// make the right decision when generating code for different targets. 1588 const X86Subtarget &Subtarget; 1589 1590 /// A list of legal FP immediates. 1591 std::vector<APFloat> LegalFPImmediates; 1592 1593 /// Indicate that this x86 target can instruction 1594 /// select the specified FP immediate natively. 1595 void addLegalFPImmediate(const APFloat& Imm) { 1596 LegalFPImmediates.push_back(Imm); 1597 } 1598 1599 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1600 CallingConv::ID CallConv, bool isVarArg, 1601 const SmallVectorImpl<ISD::InputArg> &Ins, 1602 const SDLoc &dl, SelectionDAG &DAG, 1603 SmallVectorImpl<SDValue> &InVals, 1604 uint32_t *RegMask) const; 1605 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1606 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1607 const SDLoc &dl, SelectionDAG &DAG, 1608 const CCValAssign &VA, MachineFrameInfo &MFI, 1609 unsigned i) const; 1610 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1611 const SDLoc &dl, SelectionDAG &DAG, 1612 const CCValAssign &VA, 1613 ISD::ArgFlagsTy Flags, bool isByval) const; 1614 1615 // Call lowering helpers. 1616 1617 /// Check whether the call is eligible for tail call optimization. Targets 1618 /// that want to do tail call optimization should implement this function. 1619 bool IsEligibleForTailCallOptimization( 1620 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet, 1621 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, 1622 const SmallVectorImpl<SDValue> &OutVals, 1623 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 1624 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1625 SDValue Chain, bool IsTailCall, 1626 bool Is64Bit, int FPDiff, 1627 const SDLoc &dl) const; 1628 1629 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1630 SelectionDAG &DAG) const; 1631 1632 unsigned getAddressSpace() const; 1633 1634 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1635 SDValue &Chain) const; 1636 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1637 1638 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1639 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1640 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1641 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1642 1643 unsigned getGlobalWrapperKind(const GlobalValue *GV, 1644 const unsigned char OpFlags) const; 1645 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1646 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1647 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1648 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1649 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1650 1651 /// Creates target global address or external symbol nodes for calls or 1652 /// other uses. 1653 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1654 bool ForCall) const; 1655 1656 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1657 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1658 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1659 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1660 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1661 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1662 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1663 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1664 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1665 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1666 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1667 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1668 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1669 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1670 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1671 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1672 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1673 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1674 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1675 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1676 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1677 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1678 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1679 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1680 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1681 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1682 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1683 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; 1684 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1685 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, 1686 SDValue &Chain) const; 1687 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1688 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1689 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1690 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1691 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1692 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1693 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; 1694 1695 SDValue 1696 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1697 const SmallVectorImpl<ISD::InputArg> &Ins, 1698 const SDLoc &dl, SelectionDAG &DAG, 1699 SmallVectorImpl<SDValue> &InVals) const override; 1700 SDValue LowerCall(CallLoweringInfo &CLI, 1701 SmallVectorImpl<SDValue> &InVals) const override; 1702 1703 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1704 const SmallVectorImpl<ISD::OutputArg> &Outs, 1705 const SmallVectorImpl<SDValue> &OutVals, 1706 const SDLoc &dl, SelectionDAG &DAG) const override; 1707 1708 bool supportSplitCSR(MachineFunction *MF) const override { 1709 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1710 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1711 } 1712 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1713 void insertCopiesSplitCSR( 1714 MachineBasicBlock *Entry, 1715 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1716 1717 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1718 1719 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1720 1721 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1722 ISD::NodeType ExtendKind) const override; 1723 1724 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1725 bool isVarArg, 1726 const SmallVectorImpl<ISD::OutputArg> &Outs, 1727 LLVMContext &Context) const override; 1728 1729 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1730 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 1731 1732 TargetLoweringBase::AtomicExpansionKind 1733 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1734 TargetLoweringBase::AtomicExpansionKind 1735 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1736 TargetLoweringBase::AtomicExpansionKind 1737 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1738 TargetLoweringBase::AtomicExpansionKind 1739 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; 1740 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1741 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1742 1743 LoadInst * 1744 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1745 1746 bool needsCmpXchgNb(Type *MemType) const; 1747 1748 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1749 MachineBasicBlock *DispatchBB, int FI) const; 1750 1751 // Utility function to emit the low-level va_arg code for X86-64. 1752 MachineBasicBlock * 1753 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1754 1755 /// Utility function to emit the xmm reg save portion of va_start. 1756 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1757 MachineInstr &MI2, 1758 MachineBasicBlock *BB) const; 1759 1760 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1761 MachineBasicBlock *BB) const; 1762 1763 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1764 MachineBasicBlock *BB) const; 1765 1766 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1767 MachineBasicBlock *BB) const; 1768 1769 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1770 MachineBasicBlock *BB) const; 1771 1772 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1773 MachineBasicBlock *BB) const; 1774 1775 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1776 MachineBasicBlock *BB) const; 1777 1778 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1779 MachineBasicBlock *BB) const; 1780 1781 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1782 MachineBasicBlock *MBB) const; 1783 1784 void emitSetJmpShadowStackFix(MachineInstr &MI, 1785 MachineBasicBlock *MBB) const; 1786 1787 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1788 MachineBasicBlock *MBB) const; 1789 1790 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1791 MachineBasicBlock *MBB) const; 1792 1793 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1794 MachineBasicBlock *MBB) const; 1795 1796 /// Emit flags for the given setcc condition and operands. Also returns the 1797 /// corresponding X86 condition code constant in X86CC. 1798 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1799 const SDLoc &dl, SelectionDAG &DAG, 1800 SDValue &X86CC) const; 1801 1802 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, 1803 SDValue IntPow2) const override; 1804 1805 /// Check if replacement of SQRT with RSQRT should be disabled. 1806 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1807 1808 /// Use rsqrt* to speed up sqrt calculations. 1809 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1810 int &RefinementSteps, bool &UseOneConstNR, 1811 bool Reciprocal) const override; 1812 1813 /// Use rcp* to speed up fdiv calculations. 1814 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1815 int &RefinementSteps) const override; 1816 1817 /// Reassociate floating point divisions into multiply by reciprocal. 1818 unsigned combineRepeatedFPDivisors() const override; 1819 1820 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1821 SmallVectorImpl<SDNode *> &Created) const override; 1822 1823 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, 1824 SDValue V2) const; 1825 }; 1826 1827 namespace X86 { 1828 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1829 const TargetLibraryInfo *libInfo); 1830 } // end namespace X86 1831 1832 // X86 specific Gather/Scatter nodes. 1833 // The class has the same order of operands as MaskedGatherScatterSDNode for 1834 // convenience. 1835 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1836 public: 1837 // This is a intended as a utility and should never be directly created. 1838 X86MaskedGatherScatterSDNode() = delete; 1839 ~X86MaskedGatherScatterSDNode() = delete; 1840 1841 const SDValue &getBasePtr() const { return getOperand(3); } 1842 const SDValue &getIndex() const { return getOperand(4); } 1843 const SDValue &getMask() const { return getOperand(2); } 1844 const SDValue &getScale() const { return getOperand(5); } 1845 1846 static bool classof(const SDNode *N) { 1847 return N->getOpcode() == X86ISD::MGATHER || 1848 N->getOpcode() == X86ISD::MSCATTER; 1849 } 1850 }; 1851 1852 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1853 public: 1854 const SDValue &getPassThru() const { return getOperand(1); } 1855 1856 static bool classof(const SDNode *N) { 1857 return N->getOpcode() == X86ISD::MGATHER; 1858 } 1859 }; 1860 1861 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1862 public: 1863 const SDValue &getValue() const { return getOperand(1); } 1864 1865 static bool classof(const SDNode *N) { 1866 return N->getOpcode() == X86ISD::MSCATTER; 1867 } 1868 }; 1869 1870 /// Generate unpacklo/unpackhi shuffle mask. 1871 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1872 bool Unary); 1873 1874 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1875 /// imposed by AVX and specific to the unary pattern. Example: 1876 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1877 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1878 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1879 1880 } // end namespace llvm 1881 1882 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1883