1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/TargetLowering.h" 19 20 namespace llvm { 21 class X86Subtarget; 22 class X86TargetMachine; 23 24 namespace X86ISD { 25 // X86 Specific DAG Nodes 26 enum NodeType : unsigned { 27 // Start the numbering where the builtin ops leave off. 28 FIRST_NUMBER = ISD::BUILTIN_OP_END, 29 30 /// Bit scan forward. 31 BSF, 32 /// Bit scan reverse. 33 BSR, 34 35 /// X86 funnel/double shift i16 instructions. These correspond to 36 /// X86::SHLDW and X86::SHRDW instructions which have different amt 37 /// modulo rules to generic funnel shifts. 38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 39 FSHL, 40 FSHR, 41 42 /// Bitwise logical AND of floating point values. This corresponds 43 /// to X86::ANDPS or X86::ANDPD. 44 FAND, 45 46 /// Bitwise logical OR of floating point values. This corresponds 47 /// to X86::ORPS or X86::ORPD. 48 FOR, 49 50 /// Bitwise logical XOR of floating point values. This corresponds 51 /// to X86::XORPS or X86::XORPD. 52 FXOR, 53 54 /// Bitwise logical ANDNOT of floating point values. This 55 /// corresponds to X86::ANDNPS or X86::ANDNPD. 56 FANDN, 57 58 /// These operations represent an abstract X86 call 59 /// instruction, which includes a bunch of information. In particular the 60 /// operands of these node are: 61 /// 62 /// #0 - The incoming token chain 63 /// #1 - The callee 64 /// #2 - The number of arg bytes the caller pushes on the stack. 65 /// #3 - The number of arg bytes the callee pops off the stack. 66 /// #4 - The value to pass in AL/AX/EAX (optional) 67 /// #5 - The value to pass in DL/DX/EDX (optional) 68 /// 69 /// The result values of these nodes are: 70 /// 71 /// #0 - The outgoing token chain 72 /// #1 - The first register result value (optional) 73 /// #2 - The second register result value (optional) 74 /// 75 CALL, 76 77 /// Same as call except it adds the NoTrack prefix. 78 NT_CALL, 79 80 // Pseudo for a OBJC call that gets emitted together with a special 81 // marker instruction. 82 CALL_RVMARKER, 83 84 /// X86 compare and logical compare instructions. 85 CMP, 86 FCMP, 87 COMI, 88 UCOMI, 89 90 /// X86 bit-test instructions. 91 BT, 92 93 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 94 /// operand, usually produced by a CMP instruction. 95 SETCC, 96 97 /// X86 Select 98 SELECTS, 99 100 // Same as SETCC except it's materialized with a sbb and the value is all 101 // one's or all zero's. 102 SETCC_CARRY, // R = carry_bit ? ~0 : 0 103 104 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 105 /// Operands are two FP values to compare; result is a mask of 106 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 107 FSETCC, 108 109 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 110 /// and a version with SAE. 111 FSETCCM, 112 FSETCCM_SAE, 113 114 /// X86 conditional moves. Operand 0 and operand 1 are the two values 115 /// to select from. Operand 2 is the condition code, and operand 3 is the 116 /// flag operand produced by a CMP or TEST instruction. 117 CMOV, 118 119 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 120 /// is the block to branch if condition is true, operand 2 is the 121 /// condition code, and operand 3 is the flag operand produced by a CMP 122 /// or TEST instruction. 123 BRCOND, 124 125 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 126 /// operand 1 is the target address. 127 NT_BRIND, 128 129 /// Return with a flag operand. Operand 0 is the chain operand, operand 130 /// 1 is the number of bytes of stack to pop. 131 RET_FLAG, 132 133 /// Return from interrupt. Operand 0 is the number of bytes to pop. 134 IRET, 135 136 /// Repeat fill, corresponds to X86::REP_STOSx. 137 REP_STOS, 138 139 /// Repeat move, corresponds to X86::REP_MOVSx. 140 REP_MOVS, 141 142 /// On Darwin, this node represents the result of the popl 143 /// at function entry, used for PIC code. 144 GlobalBaseReg, 145 146 /// A wrapper node for TargetConstantPool, TargetJumpTable, 147 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 148 /// MCSymbol and TargetBlockAddress. 149 Wrapper, 150 151 /// Special wrapper used under X86-64 PIC mode for RIP 152 /// relative displacements. 153 WrapperRIP, 154 155 /// Copies a 64-bit value from an MMX vector to the low word 156 /// of an XMM vector, with the high word zero filled. 157 MOVQ2DQ, 158 159 /// Copies a 64-bit value from the low word of an XMM vector 160 /// to an MMX vector. 161 MOVDQ2Q, 162 163 /// Copies a 32-bit value from the low word of a MMX 164 /// vector to a GPR. 165 MMX_MOVD2W, 166 167 /// Copies a GPR into the low 32-bit word of a MMX vector 168 /// and zero out the high word. 169 MMX_MOVW2D, 170 171 /// Extract an 8-bit value from a vector and zero extend it to 172 /// i32, corresponds to X86::PEXTRB. 173 PEXTRB, 174 175 /// Extract a 16-bit value from a vector and zero extend it to 176 /// i32, corresponds to X86::PEXTRW. 177 PEXTRW, 178 179 /// Insert any element of a 4 x float vector into any element 180 /// of a destination 4 x floatvector. 181 INSERTPS, 182 183 /// Insert the lower 8-bits of a 32-bit value to a vector, 184 /// corresponds to X86::PINSRB. 185 PINSRB, 186 187 /// Insert the lower 16-bits of a 32-bit value to a vector, 188 /// corresponds to X86::PINSRW. 189 PINSRW, 190 191 /// Shuffle 16 8-bit values within a vector. 192 PSHUFB, 193 194 /// Compute Sum of Absolute Differences. 195 PSADBW, 196 /// Compute Double Block Packed Sum-Absolute-Differences 197 DBPSADBW, 198 199 /// Bitwise Logical AND NOT of Packed FP values. 200 ANDNP, 201 202 /// Blend where the selector is an immediate. 203 BLENDI, 204 205 /// Dynamic (non-constant condition) vector blend where only the sign bits 206 /// of the condition elements are used. This is used to enforce that the 207 /// condition mask is not valid for generic VSELECT optimizations. This 208 /// is also used to implement the intrinsics. 209 /// Operands are in VSELECT order: MASK, TRUE, FALSE 210 BLENDV, 211 212 /// Combined add and sub on an FP vector. 213 ADDSUB, 214 215 // FP vector ops with rounding mode. 216 FADD_RND, 217 FADDS, 218 FADDS_RND, 219 FSUB_RND, 220 FSUBS, 221 FSUBS_RND, 222 FMUL_RND, 223 FMULS, 224 FMULS_RND, 225 FDIV_RND, 226 FDIVS, 227 FDIVS_RND, 228 FMAX_SAE, 229 FMAXS_SAE, 230 FMIN_SAE, 231 FMINS_SAE, 232 FSQRT_RND, 233 FSQRTS, 234 FSQRTS_RND, 235 236 // FP vector get exponent. 237 FGETEXP, 238 FGETEXP_SAE, 239 FGETEXPS, 240 FGETEXPS_SAE, 241 // Extract Normalized Mantissas. 242 VGETMANT, 243 VGETMANT_SAE, 244 VGETMANTS, 245 VGETMANTS_SAE, 246 // FP Scale. 247 SCALEF, 248 SCALEF_RND, 249 SCALEFS, 250 SCALEFS_RND, 251 252 /// Integer horizontal add/sub. 253 HADD, 254 HSUB, 255 256 /// Floating point horizontal add/sub. 257 FHADD, 258 FHSUB, 259 260 // Detect Conflicts Within a Vector 261 CONFLICT, 262 263 /// Floating point max and min. 264 FMAX, 265 FMIN, 266 267 /// Commutative FMIN and FMAX. 268 FMAXC, 269 FMINC, 270 271 /// Scalar intrinsic floating point max and min. 272 FMAXS, 273 FMINS, 274 275 /// Floating point reciprocal-sqrt and reciprocal approximation. 276 /// Note that these typically require refinement 277 /// in order to obtain suitable precision. 278 FRSQRT, 279 FRCP, 280 281 // AVX-512 reciprocal approximations with a little more precision. 282 RSQRT14, 283 RSQRT14S, 284 RCP14, 285 RCP14S, 286 287 // Thread Local Storage. 288 TLSADDR, 289 290 // Thread Local Storage. A call to get the start address 291 // of the TLS block for the current module. 292 TLSBASEADDR, 293 294 // Thread Local Storage. When calling to an OS provided 295 // thunk at the address from an earlier relocation. 296 TLSCALL, 297 298 // Exception Handling helpers. 299 EH_RETURN, 300 301 // SjLj exception handling setjmp. 302 EH_SJLJ_SETJMP, 303 304 // SjLj exception handling longjmp. 305 EH_SJLJ_LONGJMP, 306 307 // SjLj exception handling dispatch. 308 EH_SJLJ_SETUP_DISPATCH, 309 310 /// Tail call return. See X86TargetLowering::LowerCall for 311 /// the list of operands. 312 TC_RETURN, 313 314 // Vector move to low scalar and zero higher vector elements. 315 VZEXT_MOVL, 316 317 // Vector integer truncate. 318 VTRUNC, 319 // Vector integer truncate with unsigned/signed saturation. 320 VTRUNCUS, 321 VTRUNCS, 322 323 // Masked version of the above. Used when less than a 128-bit result is 324 // produced since the mask only applies to the lower elements and can't 325 // be represented by a select. 326 // SRC, PASSTHRU, MASK 327 VMTRUNC, 328 VMTRUNCUS, 329 VMTRUNCS, 330 331 // Vector FP extend. 332 VFPEXT, 333 VFPEXT_SAE, 334 VFPEXTS, 335 VFPEXTS_SAE, 336 337 // Vector FP round. 338 VFPROUND, 339 VFPROUND_RND, 340 VFPROUNDS, 341 VFPROUNDS_RND, 342 343 // Masked version of above. Used for v2f64->v4f32. 344 // SRC, PASSTHRU, MASK 345 VMFPROUND, 346 347 // 128-bit vector logical left / right shift 348 VSHLDQ, 349 VSRLDQ, 350 351 // Vector shift elements 352 VSHL, 353 VSRL, 354 VSRA, 355 356 // Vector variable shift 357 VSHLV, 358 VSRLV, 359 VSRAV, 360 361 // Vector shift elements by immediate 362 VSHLI, 363 VSRLI, 364 VSRAI, 365 366 // Shifts of mask registers. 367 KSHIFTL, 368 KSHIFTR, 369 370 // Bit rotate by immediate 371 VROTLI, 372 VROTRI, 373 374 // Vector packed double/float comparison. 375 CMPP, 376 377 // Vector integer comparisons. 378 PCMPEQ, 379 PCMPGT, 380 381 // v8i16 Horizontal minimum and position. 382 PHMINPOS, 383 384 MULTISHIFT, 385 386 /// Vector comparison generating mask bits for fp and 387 /// integer signed and unsigned data types. 388 CMPM, 389 // Vector mask comparison generating mask bits for FP values. 390 CMPMM, 391 // Vector mask comparison with SAE for FP values. 392 CMPMM_SAE, 393 394 // Arithmetic operations with FLAGS results. 395 ADD, 396 SUB, 397 ADC, 398 SBB, 399 SMUL, 400 UMUL, 401 OR, 402 XOR, 403 AND, 404 405 // Bit field extract. 406 BEXTR, 407 BEXTRI, 408 409 // Zero High Bits Starting with Specified Bit Position. 410 BZHI, 411 412 // Parallel extract and deposit. 413 PDEP, 414 PEXT, 415 416 // X86-specific multiply by immediate. 417 MUL_IMM, 418 419 // Vector sign bit extraction. 420 MOVMSK, 421 422 // Vector bitwise comparisons. 423 PTEST, 424 425 // Vector packed fp sign bitwise comparisons. 426 TESTP, 427 428 // OR/AND test for masks. 429 KORTEST, 430 KTEST, 431 432 // ADD for masks. 433 KADD, 434 435 // Several flavors of instructions with vector shuffle behaviors. 436 // Saturated signed/unnsigned packing. 437 PACKSS, 438 PACKUS, 439 // Intra-lane alignr. 440 PALIGNR, 441 // AVX512 inter-lane alignr. 442 VALIGN, 443 PSHUFD, 444 PSHUFHW, 445 PSHUFLW, 446 SHUFP, 447 // VBMI2 Concat & Shift. 448 VSHLD, 449 VSHRD, 450 VSHLDV, 451 VSHRDV, 452 // Shuffle Packed Values at 128-bit granularity. 453 SHUF128, 454 MOVDDUP, 455 MOVSHDUP, 456 MOVSLDUP, 457 MOVLHPS, 458 MOVHLPS, 459 MOVSD, 460 MOVSS, 461 MOVSH, 462 UNPCKL, 463 UNPCKH, 464 VPERMILPV, 465 VPERMILPI, 466 VPERMI, 467 VPERM2X128, 468 469 // Variable Permute (VPERM). 470 // Res = VPERMV MaskV, V0 471 VPERMV, 472 473 // 3-op Variable Permute (VPERMT2). 474 // Res = VPERMV3 V0, MaskV, V1 475 VPERMV3, 476 477 // Bitwise ternary logic. 478 VPTERNLOG, 479 // Fix Up Special Packed Float32/64 values. 480 VFIXUPIMM, 481 VFIXUPIMM_SAE, 482 VFIXUPIMMS, 483 VFIXUPIMMS_SAE, 484 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 485 VRANGE, 486 VRANGE_SAE, 487 VRANGES, 488 VRANGES_SAE, 489 // Reduce - Perform Reduction Transformation on scalar\packed FP. 490 VREDUCE, 491 VREDUCE_SAE, 492 VREDUCES, 493 VREDUCES_SAE, 494 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 495 // Also used by the legacy (V)ROUND intrinsics where we mask out the 496 // scaling part of the immediate. 497 VRNDSCALE, 498 VRNDSCALE_SAE, 499 VRNDSCALES, 500 VRNDSCALES_SAE, 501 // Tests Types Of a FP Values for packed types. 502 VFPCLASS, 503 // Tests Types Of a FP Values for scalar types. 504 VFPCLASSS, 505 506 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 507 // a vector, this node may change the vector length as part of the splat. 508 VBROADCAST, 509 // Broadcast mask to vector. 510 VBROADCASTM, 511 512 /// SSE4A Extraction and Insertion. 513 EXTRQI, 514 INSERTQI, 515 516 // XOP arithmetic/logical shifts. 517 VPSHA, 518 VPSHL, 519 // XOP signed/unsigned integer comparisons. 520 VPCOM, 521 VPCOMU, 522 // XOP packed permute bytes. 523 VPPERM, 524 // XOP two source permutation. 525 VPERMIL2, 526 527 // Vector multiply packed unsigned doubleword integers. 528 PMULUDQ, 529 // Vector multiply packed signed doubleword integers. 530 PMULDQ, 531 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 532 MULHRS, 533 534 // Multiply and Add Packed Integers. 535 VPMADDUBSW, 536 VPMADDWD, 537 538 // AVX512IFMA multiply and add. 539 // NOTE: These are different than the instruction and perform 540 // op0 x op1 + op2. 541 VPMADD52L, 542 VPMADD52H, 543 544 // VNNI 545 VPDPBUSD, 546 VPDPBUSDS, 547 VPDPWSSD, 548 VPDPWSSDS, 549 550 // FMA nodes. 551 // We use the target independent ISD::FMA for the non-inverted case. 552 FNMADD, 553 FMSUB, 554 FNMSUB, 555 FMADDSUB, 556 FMSUBADD, 557 558 // FMA with rounding mode. 559 FMADD_RND, 560 FNMADD_RND, 561 FMSUB_RND, 562 FNMSUB_RND, 563 FMADDSUB_RND, 564 FMSUBADD_RND, 565 566 // AVX512-FP16 complex addition and multiplication. 567 VFMADDC, 568 VFMADDC_RND, 569 VFCMADDC, 570 VFCMADDC_RND, 571 572 VFMULC, 573 VFMULC_RND, 574 VFCMULC, 575 VFCMULC_RND, 576 577 VFMADDCSH, 578 VFMADDCSH_RND, 579 VFCMADDCSH, 580 VFCMADDCSH_RND, 581 582 VFMULCSH, 583 VFMULCSH_RND, 584 VFCMULCSH, 585 VFCMULCSH_RND, 586 587 // Compress and expand. 588 COMPRESS, 589 EXPAND, 590 591 // Bits shuffle 592 VPSHUFBITQMB, 593 594 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 595 SINT_TO_FP_RND, 596 UINT_TO_FP_RND, 597 SCALAR_SINT_TO_FP, 598 SCALAR_UINT_TO_FP, 599 SCALAR_SINT_TO_FP_RND, 600 SCALAR_UINT_TO_FP_RND, 601 602 // Vector float/double to signed/unsigned integer. 603 CVTP2SI, 604 CVTP2UI, 605 CVTP2SI_RND, 606 CVTP2UI_RND, 607 // Scalar float/double to signed/unsigned integer. 608 CVTS2SI, 609 CVTS2UI, 610 CVTS2SI_RND, 611 CVTS2UI_RND, 612 613 // Vector float/double to signed/unsigned integer with truncation. 614 CVTTP2SI, 615 CVTTP2UI, 616 CVTTP2SI_SAE, 617 CVTTP2UI_SAE, 618 // Scalar float/double to signed/unsigned integer with truncation. 619 CVTTS2SI, 620 CVTTS2UI, 621 CVTTS2SI_SAE, 622 CVTTS2UI_SAE, 623 624 // Vector signed/unsigned integer to float/double. 625 CVTSI2P, 626 CVTUI2P, 627 628 // Masked versions of above. Used for v2f64->v4f32. 629 // SRC, PASSTHRU, MASK 630 MCVTP2SI, 631 MCVTP2UI, 632 MCVTTP2SI, 633 MCVTTP2UI, 634 MCVTSI2P, 635 MCVTUI2P, 636 637 // Vector float to bfloat16. 638 // Convert TWO packed single data to one packed BF16 data 639 CVTNE2PS2BF16, 640 // Convert packed single data to packed BF16 data 641 CVTNEPS2BF16, 642 // Masked version of above. 643 // SRC, PASSTHRU, MASK 644 MCVTNEPS2BF16, 645 646 // Dot product of BF16 pairs to accumulated into 647 // packed single precision. 648 DPBF16PS, 649 650 // A stack checking function call. On Windows it's _chkstk call. 651 DYN_ALLOCA, 652 653 // For allocating variable amounts of stack space when using 654 // segmented stacks. Check if the current stacklet has enough space, and 655 // falls back to heap allocation if not. 656 SEG_ALLOCA, 657 658 // For allocating stack space when using stack clash protector. 659 // Allocation is performed by block, and each block is probed. 660 PROBED_ALLOCA, 661 662 // Memory barriers. 663 MEMBARRIER, 664 MFENCE, 665 666 // Get a random integer and indicate whether it is valid in CF. 667 RDRAND, 668 669 // Get a NIST SP800-90B & C compliant random integer and 670 // indicate whether it is valid in CF. 671 RDSEED, 672 673 // Protection keys 674 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 675 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 676 // value for ECX. 677 RDPKRU, 678 WRPKRU, 679 680 // SSE42 string comparisons. 681 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 682 // will emit one or two instructions based on which results are used. If 683 // flags and index/mask this allows us to use a single instruction since 684 // we won't have to pick and opcode for flags. Instead we can rely on the 685 // DAG to CSE everything and decide at isel. 686 PCMPISTR, 687 PCMPESTR, 688 689 // Test if in transactional execution. 690 XTEST, 691 692 // ERI instructions. 693 RSQRT28, 694 RSQRT28_SAE, 695 RSQRT28S, 696 RSQRT28S_SAE, 697 RCP28, 698 RCP28_SAE, 699 RCP28S, 700 RCP28S_SAE, 701 EXP2, 702 EXP2_SAE, 703 704 // Conversions between float and half-float. 705 CVTPS2PH, 706 CVTPH2PS, 707 CVTPH2PS_SAE, 708 709 // Masked version of above. 710 // SRC, RND, PASSTHRU, MASK 711 MCVTPS2PH, 712 713 // Galois Field Arithmetic Instructions 714 GF2P8AFFINEINVQB, 715 GF2P8AFFINEQB, 716 GF2P8MULB, 717 718 // LWP insert record. 719 LWPINS, 720 721 // User level wait 722 UMWAIT, 723 TPAUSE, 724 725 // Enqueue Stores Instructions 726 ENQCMD, 727 ENQCMDS, 728 729 // For avx512-vp2intersect 730 VP2INTERSECT, 731 732 // User level interrupts - testui 733 TESTUI, 734 735 /// X86 strict FP compare instructions. 736 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 737 STRICT_FCMPS, 738 739 // Vector packed double/float comparison. 740 STRICT_CMPP, 741 742 /// Vector comparison generating mask bits for fp and 743 /// integer signed and unsigned data types. 744 STRICT_CMPM, 745 746 // Vector float/double to signed/unsigned integer with truncation. 747 STRICT_CVTTP2SI, 748 STRICT_CVTTP2UI, 749 750 // Vector FP extend. 751 STRICT_VFPEXT, 752 753 // Vector FP round. 754 STRICT_VFPROUND, 755 756 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 757 // Also used by the legacy (V)ROUND intrinsics where we mask out the 758 // scaling part of the immediate. 759 STRICT_VRNDSCALE, 760 761 // Vector signed/unsigned integer to float/double. 762 STRICT_CVTSI2P, 763 STRICT_CVTUI2P, 764 765 // Strict FMA nodes. 766 STRICT_FNMADD, 767 STRICT_FMSUB, 768 STRICT_FNMSUB, 769 770 // Conversions between float and half-float. 771 STRICT_CVTPS2PH, 772 STRICT_CVTPH2PS, 773 774 // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and 775 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. 776 777 // Compare and swap. 778 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 779 LCMPXCHG8_DAG, 780 LCMPXCHG16_DAG, 781 LCMPXCHG16_SAVE_RBX_DAG, 782 783 /// LOCK-prefixed arithmetic read-modify-write instructions. 784 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 785 LADD, 786 LSUB, 787 LOR, 788 LXOR, 789 LAND, 790 LBTS, 791 LBTC, 792 LBTR, 793 794 // Load, scalar_to_vector, and zero extend. 795 VZEXT_LOAD, 796 797 // extract_vector_elt, store. 798 VEXTRACT_STORE, 799 800 // scalar broadcast from memory. 801 VBROADCAST_LOAD, 802 803 // subvector broadcast from memory. 804 SUBV_BROADCAST_LOAD, 805 806 // Store FP control word into i16 memory. 807 FNSTCW16m, 808 809 // Load FP control word from i16 memory. 810 FLDCW16m, 811 812 /// This instruction implements FP_TO_SINT with the 813 /// integer destination in memory and a FP reg source. This corresponds 814 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 815 /// has two inputs (token chain and address) and two outputs (int value 816 /// and token chain). Memory VT specifies the type to store to. 817 FP_TO_INT_IN_MEM, 818 819 /// This instruction implements SINT_TO_FP with the 820 /// integer source in memory and FP reg result. This corresponds to the 821 /// X86::FILD*m instructions. It has two inputs (token chain and address) 822 /// and two outputs (FP value and token chain). The integer source type is 823 /// specified by the memory VT. 824 FILD, 825 826 /// This instruction implements a fp->int store from FP stack 827 /// slots. This corresponds to the fist instruction. It takes a 828 /// chain operand, value to store, address, and glue. The memory VT 829 /// specifies the type to store as. 830 FIST, 831 832 /// This instruction implements an extending load to FP stack slots. 833 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 834 /// operand, and ptr to load from. The memory VT specifies the type to 835 /// load from. 836 FLD, 837 838 /// This instruction implements a truncating store from FP stack 839 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 840 /// chain operand, value to store, address, and glue. The memory VT 841 /// specifies the type to store as. 842 FST, 843 844 /// These instructions grab the address of the next argument 845 /// from a va_list. (reads and modifies the va_list in memory) 846 VAARG_64, 847 VAARG_X32, 848 849 // Vector truncating store with unsigned/signed saturation 850 VTRUNCSTOREUS, 851 VTRUNCSTORES, 852 // Vector truncating masked store with unsigned/signed saturation 853 VMTRUNCSTOREUS, 854 VMTRUNCSTORES, 855 856 // X86 specific gather and scatter 857 MGATHER, 858 MSCATTER, 859 860 // Key locker nodes that produce flags. 861 AESENC128KL, 862 AESDEC128KL, 863 AESENC256KL, 864 AESDEC256KL, 865 AESENCWIDE128KL, 866 AESDECWIDE128KL, 867 AESENCWIDE256KL, 868 AESDECWIDE256KL, 869 870 // Save xmm argument registers to the stack, according to %al. An operator 871 // is needed so that this can be expanded with control flow. 872 VASTART_SAVE_XMM_REGS, 873 874 // WARNING: Do not add anything in the end unless you want the node to 875 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 876 // opcodes will be thought as target memory ops! 877 }; 878 } // end namespace X86ISD 879 880 namespace X86 { 881 /// Current rounding mode is represented in bits 11:10 of FPSR. These 882 /// values are same as corresponding constants for rounding mode used 883 /// in glibc. 884 enum RoundingMode { 885 rmToNearest = 0, // FE_TONEAREST 886 rmDownward = 1 << 10, // FE_DOWNWARD 887 rmUpward = 2 << 10, // FE_UPWARD 888 rmTowardZero = 3 << 10, // FE_TOWARDZERO 889 rmMask = 3 << 10 // Bit mask selecting rounding mode 890 }; 891 } 892 893 /// Define some predicates that are used for node matching. 894 namespace X86 { 895 /// Returns true if Elt is a constant zero or floating point constant +0.0. 896 bool isZeroNode(SDValue Elt); 897 898 /// Returns true of the given offset can be 899 /// fit into displacement field of the instruction. 900 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 901 bool hasSymbolicDisplacement); 902 903 /// Determines whether the callee is required to pop its 904 /// own arguments. Callee pop is necessary to support tail calls. 905 bool isCalleePop(CallingConv::ID CallingConv, 906 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 907 908 /// If Op is a constant whose elements are all the same constant or 909 /// undefined, return true and return the constant value in \p SplatVal. 910 /// If we have undef bits that don't cover an entire element, we treat these 911 /// as zero if AllowPartialUndefs is set, else we fail and return false. 912 bool isConstantSplat(SDValue Op, APInt &SplatVal, 913 bool AllowPartialUndefs = true); 914 915 /// Check if Op is a load operation that could be folded into some other x86 916 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. 917 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, 918 bool AssumeSingleUse = false); 919 920 /// Check if Op is a load operation that could be folded into a vector splat 921 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. 922 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, 923 const X86Subtarget &Subtarget, 924 bool AssumeSingleUse = false); 925 926 /// Check if Op is a value that could be used to fold a store into some 927 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). 928 bool mayFoldIntoStore(SDValue Op); 929 930 /// Check if Op is an operation that could be folded into a zero extend x86 931 /// instruction. 932 bool mayFoldIntoZeroExtend(SDValue Op); 933 } // end namespace X86 934 935 //===--------------------------------------------------------------------===// 936 // X86 Implementation of the TargetLowering interface 937 class X86TargetLowering final : public TargetLowering { 938 public: 939 explicit X86TargetLowering(const X86TargetMachine &TM, 940 const X86Subtarget &STI); 941 942 unsigned getJumpTableEncoding() const override; 943 bool useSoftFloat() const override; 944 945 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 946 ArgListTy &Args) const override; 947 948 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 949 return MVT::i8; 950 } 951 952 const MCExpr * 953 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 954 const MachineBasicBlock *MBB, unsigned uid, 955 MCContext &Ctx) const override; 956 957 /// Returns relocation base for the given PIC jumptable. 958 SDValue getPICJumpTableRelocBase(SDValue Table, 959 SelectionDAG &DAG) const override; 960 const MCExpr * 961 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 962 unsigned JTI, MCContext &Ctx) const override; 963 964 /// Return the desired alignment for ByVal aggregate 965 /// function arguments in the caller parameter area. For X86, aggregates 966 /// that contains are placed at 16-byte boundaries while the rest are at 967 /// 4-byte boundaries. 968 uint64_t getByValTypeAlignment(Type *Ty, 969 const DataLayout &DL) const override; 970 971 EVT getOptimalMemOpType(const MemOp &Op, 972 const AttributeList &FuncAttributes) const override; 973 974 /// Returns true if it's safe to use load / store of the 975 /// specified type to expand memcpy / memset inline. This is mostly true 976 /// for all types except for some special cases. For example, on X86 977 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 978 /// also does type conversion. Note the specified type doesn't have to be 979 /// legal as the hook is used before type legalization. 980 bool isSafeMemOpType(MVT VT) const override; 981 982 /// Returns true if the target allows unaligned memory accesses of the 983 /// specified type. Returns whether it is "fast" in the last argument. 984 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, 985 MachineMemOperand::Flags Flags, 986 bool *Fast) const override; 987 988 /// Provide custom lowering hooks for some operations. 989 /// 990 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 991 992 /// Replace the results of node with an illegal result 993 /// type with new values built out of custom code. 994 /// 995 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 996 SelectionDAG &DAG) const override; 997 998 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 999 1000 /// Return true if the target has native support for 1001 /// the specified value type and it is 'desirable' to use the type for the 1002 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 1003 /// instruction encodings are longer and some i16 instructions are slow. 1004 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 1005 1006 /// Return true if the target has native support for the 1007 /// specified value type and it is 'desirable' to use the type. e.g. On x86 1008 /// i16 is legal, but undesirable since i16 instruction encodings are longer 1009 /// and some i16 instructions are slow. 1010 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 1011 1012 /// Return the newly negated expression if the cost is not expensive and 1013 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 1014 /// do the negation. 1015 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 1016 bool LegalOperations, bool ForCodeSize, 1017 NegatibleCost &Cost, 1018 unsigned Depth) const override; 1019 1020 MachineBasicBlock * 1021 EmitInstrWithCustomInserter(MachineInstr &MI, 1022 MachineBasicBlock *MBB) const override; 1023 1024 /// This method returns the name of a target specific DAG node. 1025 const char *getTargetNodeName(unsigned Opcode) const override; 1026 1027 /// Do not merge vector stores after legalization because that may conflict 1028 /// with x86-specific store splitting optimizations. 1029 bool mergeStoresAfterLegalization(EVT MemVT) const override { 1030 return !MemVT.isVector(); 1031 } 1032 1033 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 1034 const MachineFunction &MF) const override; 1035 1036 bool isCheapToSpeculateCttz() const override; 1037 1038 bool isCheapToSpeculateCtlz() const override; 1039 1040 bool isCtlzFast() const override; 1041 1042 bool hasBitPreservingFPLogic(EVT VT) const override; 1043 1044 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 1045 // If the pair to store is a mixture of float and int values, we will 1046 // save two bitwise instructions and one float-to-int instruction and 1047 // increase one store instruction. There is potentially a more 1048 // significant benefit because it avoids the float->int domain switch 1049 // for input value. So It is more likely a win. 1050 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 1051 (LTy.isInteger() && HTy.isFloatingPoint())) 1052 return true; 1053 // If the pair only contains int values, we will save two bitwise 1054 // instructions and increase one store instruction (costing one more 1055 // store buffer). Since the benefit is more blurred so we leave 1056 // such pair out until we get testcase to prove it is a win. 1057 return false; 1058 } 1059 1060 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1061 1062 bool hasAndNotCompare(SDValue Y) const override; 1063 1064 bool hasAndNot(SDValue Y) const override; 1065 1066 bool hasBitTest(SDValue X, SDValue Y) const override; 1067 1068 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1069 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1070 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1071 SelectionDAG &DAG) const override; 1072 1073 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1074 CombineLevel Level) const override; 1075 1076 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1077 1078 bool 1079 shouldTransformSignedTruncationCheck(EVT XVT, 1080 unsigned KeptBits) const override { 1081 // For vectors, we don't have a preference.. 1082 if (XVT.isVector()) 1083 return false; 1084 1085 auto VTIsOk = [](EVT VT) -> bool { 1086 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1087 VT == MVT::i64; 1088 }; 1089 1090 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1091 // XVT will be larger than KeptBitsVT. 1092 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1093 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1094 } 1095 1096 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 1097 1098 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1099 1100 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { 1101 // Converting to sat variants holds little benefit on X86 as we will just 1102 // need to saturate the value back using fp arithmatic. 1103 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); 1104 } 1105 1106 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1107 return VT.isScalarInteger(); 1108 } 1109 1110 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1111 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1112 1113 /// Return the value type to use for ISD::SETCC. 1114 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1115 EVT VT) const override; 1116 1117 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1118 const APInt &DemandedElts, 1119 TargetLoweringOpt &TLO) const override; 1120 1121 /// Determine which of the bits specified in Mask are known to be either 1122 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1123 void computeKnownBitsForTargetNode(const SDValue Op, 1124 KnownBits &Known, 1125 const APInt &DemandedElts, 1126 const SelectionDAG &DAG, 1127 unsigned Depth = 0) const override; 1128 1129 /// Determine the number of bits in the operation that are sign bits. 1130 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1131 const APInt &DemandedElts, 1132 const SelectionDAG &DAG, 1133 unsigned Depth) const override; 1134 1135 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1136 const APInt &DemandedElts, 1137 APInt &KnownUndef, 1138 APInt &KnownZero, 1139 TargetLoweringOpt &TLO, 1140 unsigned Depth) const override; 1141 1142 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1143 const APInt &DemandedElts, 1144 unsigned MaskIndex, 1145 TargetLoweringOpt &TLO, 1146 unsigned Depth) const; 1147 1148 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1149 const APInt &DemandedBits, 1150 const APInt &DemandedElts, 1151 KnownBits &Known, 1152 TargetLoweringOpt &TLO, 1153 unsigned Depth) const override; 1154 1155 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1156 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1157 SelectionDAG &DAG, unsigned Depth) const override; 1158 1159 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, 1160 APInt &UndefElts, 1161 unsigned Depth) const override; 1162 1163 bool isTargetCanonicalConstantNode(SDValue Op) const override { 1164 // Peek through bitcasts/extracts/inserts to see if we have a broadcast 1165 // vector from memory. 1166 while (Op.getOpcode() == ISD::BITCAST || 1167 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || 1168 (Op.getOpcode() == ISD::INSERT_SUBVECTOR && 1169 Op.getOperand(0).isUndef())) 1170 Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); 1171 1172 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || 1173 TargetLowering::isTargetCanonicalConstantNode(Op); 1174 } 1175 1176 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1177 1178 SDValue unwrapAddress(SDValue N) const override; 1179 1180 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1181 1182 bool ExpandInlineAsm(CallInst *CI) const override; 1183 1184 ConstraintType getConstraintType(StringRef Constraint) const override; 1185 1186 /// Examine constraint string and operand type and determine a weight value. 1187 /// The operand object must already have been set up with the operand type. 1188 ConstraintWeight 1189 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1190 const char *constraint) const override; 1191 1192 const char *LowerXConstraint(EVT ConstraintVT) const override; 1193 1194 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1195 /// add anything to Ops. If hasMemory is true it means one of the asm 1196 /// constraint of the inline asm instruction being processed is 'm'. 1197 void LowerAsmOperandForConstraint(SDValue Op, 1198 std::string &Constraint, 1199 std::vector<SDValue> &Ops, 1200 SelectionDAG &DAG) const override; 1201 1202 unsigned 1203 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1204 if (ConstraintCode == "v") 1205 return InlineAsm::Constraint_v; 1206 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1207 } 1208 1209 /// Handle Lowering flag assembly outputs. 1210 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1211 const SDLoc &DL, 1212 const AsmOperandInfo &Constraint, 1213 SelectionDAG &DAG) const override; 1214 1215 /// Given a physical register constraint 1216 /// (e.g. {edx}), return the register number and the register class for the 1217 /// register. This should only be used for C_Register constraints. On 1218 /// error, this returns a register number of 0. 1219 std::pair<unsigned, const TargetRegisterClass *> 1220 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1221 StringRef Constraint, MVT VT) const override; 1222 1223 /// Return true if the addressing mode represented 1224 /// by AM is legal for this target, for a load/store of the specified type. 1225 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1226 Type *Ty, unsigned AS, 1227 Instruction *I = nullptr) const override; 1228 1229 /// Return true if the specified immediate is legal 1230 /// icmp immediate, that is the target has icmp instructions which can 1231 /// compare a register against the immediate without having to materialize 1232 /// the immediate into a register. 1233 bool isLegalICmpImmediate(int64_t Imm) const override; 1234 1235 /// Return true if the specified immediate is legal 1236 /// add immediate, that is the target has add instructions which can 1237 /// add a register and the immediate without having to materialize 1238 /// the immediate into a register. 1239 bool isLegalAddImmediate(int64_t Imm) const override; 1240 1241 bool isLegalStoreImmediate(int64_t Imm) const override; 1242 1243 /// Return the cost of the scaling factor used in the addressing 1244 /// mode represented by AM for this target, for a load/store 1245 /// of the specified type. 1246 /// If the AM is supported, the return value must be >= 0. 1247 /// If the AM is not supported, it returns a negative value. 1248 InstructionCost getScalingFactorCost(const DataLayout &DL, 1249 const AddrMode &AM, Type *Ty, 1250 unsigned AS) const override; 1251 1252 /// This is used to enable splatted operand transforms for vector shifts 1253 /// and vector funnel shifts. 1254 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1255 1256 /// Add x86-specific opcodes to the default list. 1257 bool isBinOp(unsigned Opcode) const override; 1258 1259 /// Returns true if the opcode is a commutative binary operation. 1260 bool isCommutativeBinOp(unsigned Opcode) const override; 1261 1262 /// Return true if it's free to truncate a value of 1263 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1264 /// register EAX to i16 by referencing its sub-register AX. 1265 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1266 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1267 1268 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1269 1270 /// Return true if any actual instruction that defines a 1271 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1272 /// register. This does not necessarily include registers defined in 1273 /// unknown ways, such as incoming arguments, or copies from unknown 1274 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1275 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1276 /// all instructions that define 32-bit values implicit zero-extend the 1277 /// result out to 64 bits. 1278 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1279 bool isZExtFree(EVT VT1, EVT VT2) const override; 1280 bool isZExtFree(SDValue Val, EVT VT2) const override; 1281 1282 bool shouldSinkOperands(Instruction *I, 1283 SmallVectorImpl<Use *> &Ops) const override; 1284 bool shouldConvertPhiType(Type *From, Type *To) const override; 1285 1286 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1287 /// extend node) is profitable. 1288 bool isVectorLoadExtDesirable(SDValue) const override; 1289 1290 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1291 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1292 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1293 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1294 EVT VT) const override; 1295 1296 /// Return true if it's profitable to narrow 1297 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1298 /// from i32 to i8 but not from i32 to i16. 1299 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1300 1301 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 1302 EVT VT) const override; 1303 1304 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1305 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1306 /// true and stores the intrinsic information into the IntrinsicInfo that was 1307 /// passed to the function. 1308 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1309 MachineFunction &MF, 1310 unsigned Intrinsic) const override; 1311 1312 /// Returns true if the target can instruction select the 1313 /// specified FP immediate natively. If false, the legalizer will 1314 /// materialize the FP immediate as a load from a constant pool. 1315 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1316 bool ForCodeSize) const override; 1317 1318 /// Targets can use this to indicate that they only support *some* 1319 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1320 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1321 /// be legal. 1322 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1323 1324 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1325 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1326 /// constant pool entry. 1327 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1328 1329 /// Returns true if lowering to a jump table is allowed. 1330 bool areJTsAllowed(const Function *Fn) const override; 1331 1332 MVT getPreferredSwitchConditionType(LLVMContext &Context, 1333 EVT ConditionVT) const override; 1334 1335 /// If true, then instruction selection should 1336 /// seek to shrink the FP constant of the specified type to a smaller type 1337 /// in order to save space and / or reduce runtime. 1338 bool ShouldShrinkFPConstant(EVT VT) const override; 1339 1340 /// Return true if we believe it is correct and profitable to reduce the 1341 /// load node to a smaller type. 1342 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1343 EVT NewVT) const override; 1344 1345 /// Return true if the specified scalar FP type is computed in an SSE 1346 /// register, not on the X87 floating point stack. 1347 bool isScalarFPTypeInSSEReg(EVT VT) const; 1348 1349 /// Returns true if it is beneficial to convert a load of a constant 1350 /// to just the constant itself. 1351 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1352 Type *Ty) const override; 1353 1354 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1355 1356 bool convertSelectOfConstantsToMath(EVT VT) const override; 1357 1358 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1359 SDValue C) const override; 1360 1361 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1362 /// with this index. 1363 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1364 unsigned Index) const override; 1365 1366 /// Scalar ops always have equal or better analysis/performance/power than 1367 /// the vector equivalent, so this always makes sense if the scalar op is 1368 /// supported. 1369 bool shouldScalarizeBinop(SDValue) const override; 1370 1371 /// Extract of a scalar FP value from index 0 of a vector is free. 1372 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1373 EVT EltVT = VT.getScalarType(); 1374 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1375 } 1376 1377 /// Overflow nodes should get combined/lowered to optimal instructions 1378 /// (they should allow eliminating explicit compares by getting flags from 1379 /// math ops). 1380 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1381 bool MathUsed) const override; 1382 1383 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1384 unsigned AddrSpace) const override { 1385 // If we can replace more than 2 scalar stores, there will be a reduction 1386 // in instructions even after we add a vector constant load. 1387 return NumElem > 2; 1388 } 1389 1390 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1391 const SelectionDAG &DAG, 1392 const MachineMemOperand &MMO) const override; 1393 1394 /// Intel processors have a unified instruction and data cache 1395 const char * getClearCacheBuiltinName() const override { 1396 return nullptr; // nothing to do, move along. 1397 } 1398 1399 Register getRegisterByName(const char* RegName, LLT VT, 1400 const MachineFunction &MF) const override; 1401 1402 /// If a physical register, this returns the register that receives the 1403 /// exception address on entry to an EH pad. 1404 Register 1405 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1406 1407 /// If a physical register, this returns the register that receives the 1408 /// exception typeid on entry to a landing pad. 1409 Register 1410 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1411 1412 bool needsFixedCatchObjects() const override; 1413 1414 /// This method returns a target specific FastISel object, 1415 /// or null if the target does not support "fast" ISel. 1416 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1417 const TargetLibraryInfo *libInfo) const override; 1418 1419 /// If the target has a standard location for the stack protector cookie, 1420 /// returns the address of that location. Otherwise, returns nullptr. 1421 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 1422 1423 bool useLoadStackGuardNode() const override; 1424 bool useStackGuardXorFP() const override; 1425 void insertSSPDeclarations(Module &M) const override; 1426 Value *getSDagStackGuard(const Module &M) const override; 1427 Function *getSSPStackGuardCheck(const Module &M) const override; 1428 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1429 const SDLoc &DL) const override; 1430 1431 1432 /// Return true if the target stores SafeStack pointer at a fixed offset in 1433 /// some non-standard address space, and populates the address space and 1434 /// offset as appropriate. 1435 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 1436 1437 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1438 SDValue Chain, SDValue Pointer, 1439 MachinePointerInfo PtrInfo, 1440 Align Alignment, 1441 SelectionDAG &DAG) const; 1442 1443 /// Customize the preferred legalization strategy for certain types. 1444 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1445 1446 bool softPromoteHalfType() const override { return true; } 1447 1448 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1449 EVT VT) const override; 1450 1451 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1452 CallingConv::ID CC, 1453 EVT VT) const override; 1454 1455 unsigned getVectorTypeBreakdownForCallingConv( 1456 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1457 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1458 1459 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1460 1461 bool supportSwiftError() const override; 1462 1463 bool hasStackProbeSymbol(MachineFunction &MF) const override; 1464 bool hasInlineStackProbe(MachineFunction &MF) const override; 1465 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1466 1467 unsigned getStackProbeSize(MachineFunction &MF) const; 1468 1469 bool hasVectorBlend() const override { return true; } 1470 1471 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1472 1473 /// Lower interleaved load(s) into target specific 1474 /// instructions/intrinsics. 1475 bool lowerInterleavedLoad(LoadInst *LI, 1476 ArrayRef<ShuffleVectorInst *> Shuffles, 1477 ArrayRef<unsigned> Indices, 1478 unsigned Factor) const override; 1479 1480 /// Lower interleaved store(s) into target specific 1481 /// instructions/intrinsics. 1482 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1483 unsigned Factor) const override; 1484 1485 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1486 SDValue Addr, SelectionDAG &DAG) 1487 const override; 1488 1489 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1490 1491 protected: 1492 std::pair<const TargetRegisterClass *, uint8_t> 1493 findRepresentativeClass(const TargetRegisterInfo *TRI, 1494 MVT VT) const override; 1495 1496 private: 1497 /// Keep a reference to the X86Subtarget around so that we can 1498 /// make the right decision when generating code for different targets. 1499 const X86Subtarget &Subtarget; 1500 1501 /// A list of legal FP immediates. 1502 std::vector<APFloat> LegalFPImmediates; 1503 1504 /// Indicate that this x86 target can instruction 1505 /// select the specified FP immediate natively. 1506 void addLegalFPImmediate(const APFloat& Imm) { 1507 LegalFPImmediates.push_back(Imm); 1508 } 1509 1510 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1511 CallingConv::ID CallConv, bool isVarArg, 1512 const SmallVectorImpl<ISD::InputArg> &Ins, 1513 const SDLoc &dl, SelectionDAG &DAG, 1514 SmallVectorImpl<SDValue> &InVals, 1515 uint32_t *RegMask) const; 1516 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1517 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1518 const SDLoc &dl, SelectionDAG &DAG, 1519 const CCValAssign &VA, MachineFrameInfo &MFI, 1520 unsigned i) const; 1521 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1522 const SDLoc &dl, SelectionDAG &DAG, 1523 const CCValAssign &VA, 1524 ISD::ArgFlagsTy Flags, bool isByval) const; 1525 1526 // Call lowering helpers. 1527 1528 /// Check whether the call is eligible for tail call optimization. Targets 1529 /// that want to do tail call optimization should implement this function. 1530 bool IsEligibleForTailCallOptimization( 1531 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet, 1532 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, 1533 const SmallVectorImpl<SDValue> &OutVals, 1534 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 1535 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1536 SDValue Chain, bool IsTailCall, 1537 bool Is64Bit, int FPDiff, 1538 const SDLoc &dl) const; 1539 1540 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1541 SelectionDAG &DAG) const; 1542 1543 unsigned getAddressSpace() const; 1544 1545 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1546 SDValue &Chain) const; 1547 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1548 1549 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1550 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1551 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1552 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1553 1554 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1555 const unsigned char OpFlags = 0) const; 1556 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1557 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1558 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1559 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1560 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1561 1562 /// Creates target global address or external symbol nodes for calls or 1563 /// other uses. 1564 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1565 bool ForCall) const; 1566 1567 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1568 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1569 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1570 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1571 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1572 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1573 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1574 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1575 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1576 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1577 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1578 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1579 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1580 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1581 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1582 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1583 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1584 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1585 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1586 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1587 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1588 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1589 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1590 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1591 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1592 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1593 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, 1594 SDValue &Chain) const; 1595 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1596 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1597 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1598 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1599 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1600 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1601 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; 1602 1603 SDValue 1604 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1605 const SmallVectorImpl<ISD::InputArg> &Ins, 1606 const SDLoc &dl, SelectionDAG &DAG, 1607 SmallVectorImpl<SDValue> &InVals) const override; 1608 SDValue LowerCall(CallLoweringInfo &CLI, 1609 SmallVectorImpl<SDValue> &InVals) const override; 1610 1611 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1612 const SmallVectorImpl<ISD::OutputArg> &Outs, 1613 const SmallVectorImpl<SDValue> &OutVals, 1614 const SDLoc &dl, SelectionDAG &DAG) const override; 1615 1616 bool supportSplitCSR(MachineFunction *MF) const override { 1617 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1618 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1619 } 1620 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1621 void insertCopiesSplitCSR( 1622 MachineBasicBlock *Entry, 1623 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1624 1625 bool 1626 splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, 1627 SDValue *Parts, unsigned NumParts, MVT PartVT, 1628 Optional<CallingConv::ID> CC) const override; 1629 1630 SDValue 1631 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, 1632 const SDValue *Parts, unsigned NumParts, 1633 MVT PartVT, EVT ValueVT, 1634 Optional<CallingConv::ID> CC) const override; 1635 1636 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1637 1638 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1639 1640 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1641 ISD::NodeType ExtendKind) const override; 1642 1643 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1644 bool isVarArg, 1645 const SmallVectorImpl<ISD::OutputArg> &Outs, 1646 LLVMContext &Context) const override; 1647 1648 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1649 1650 TargetLoweringBase::AtomicExpansionKind 1651 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1652 TargetLoweringBase::AtomicExpansionKind 1653 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1654 TargetLoweringBase::AtomicExpansionKind 1655 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1656 TargetLoweringBase::AtomicExpansionKind 1657 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; 1658 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1659 1660 LoadInst * 1661 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1662 1663 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; 1664 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; 1665 1666 bool needsCmpXchgNb(Type *MemType) const; 1667 1668 template<typename T> bool isSoftFP16(T VT) const; 1669 1670 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1671 MachineBasicBlock *DispatchBB, int FI) const; 1672 1673 // Utility function to emit the low-level va_arg code for X86-64. 1674 MachineBasicBlock * 1675 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1676 1677 /// Utility function to emit the xmm reg save portion of va_start. 1678 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1679 MachineInstr &MI2, 1680 MachineBasicBlock *BB) const; 1681 1682 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1683 MachineBasicBlock *BB) const; 1684 1685 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1686 MachineBasicBlock *BB) const; 1687 1688 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1689 MachineBasicBlock *BB) const; 1690 1691 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1692 MachineBasicBlock *BB) const; 1693 1694 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1695 MachineBasicBlock *BB) const; 1696 1697 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1698 MachineBasicBlock *BB) const; 1699 1700 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1701 MachineBasicBlock *BB) const; 1702 1703 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1704 MachineBasicBlock *MBB) const; 1705 1706 void emitSetJmpShadowStackFix(MachineInstr &MI, 1707 MachineBasicBlock *MBB) const; 1708 1709 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1710 MachineBasicBlock *MBB) const; 1711 1712 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1713 MachineBasicBlock *MBB) const; 1714 1715 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1716 MachineBasicBlock *MBB) const; 1717 1718 /// Emit flags for the given setcc condition and operands. Also returns the 1719 /// corresponding X86 condition code constant in X86CC. 1720 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1721 const SDLoc &dl, SelectionDAG &DAG, 1722 SDValue &X86CC) const; 1723 1724 /// Check if replacement of SQRT with RSQRT should be disabled. 1725 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1726 1727 /// Use rsqrt* to speed up sqrt calculations. 1728 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1729 int &RefinementSteps, bool &UseOneConstNR, 1730 bool Reciprocal) const override; 1731 1732 /// Use rcp* to speed up fdiv calculations. 1733 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1734 int &RefinementSteps) const override; 1735 1736 /// Reassociate floating point divisions into multiply by reciprocal. 1737 unsigned combineRepeatedFPDivisors() const override; 1738 1739 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1740 SmallVectorImpl<SDNode *> &Created) const override; 1741 }; 1742 1743 namespace X86 { 1744 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1745 const TargetLibraryInfo *libInfo); 1746 } // end namespace X86 1747 1748 // X86 specific Gather/Scatter nodes. 1749 // The class has the same order of operands as MaskedGatherScatterSDNode for 1750 // convenience. 1751 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1752 public: 1753 // This is a intended as a utility and should never be directly created. 1754 X86MaskedGatherScatterSDNode() = delete; 1755 ~X86MaskedGatherScatterSDNode() = delete; 1756 1757 const SDValue &getBasePtr() const { return getOperand(3); } 1758 const SDValue &getIndex() const { return getOperand(4); } 1759 const SDValue &getMask() const { return getOperand(2); } 1760 const SDValue &getScale() const { return getOperand(5); } 1761 1762 static bool classof(const SDNode *N) { 1763 return N->getOpcode() == X86ISD::MGATHER || 1764 N->getOpcode() == X86ISD::MSCATTER; 1765 } 1766 }; 1767 1768 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1769 public: 1770 const SDValue &getPassThru() const { return getOperand(1); } 1771 1772 static bool classof(const SDNode *N) { 1773 return N->getOpcode() == X86ISD::MGATHER; 1774 } 1775 }; 1776 1777 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1778 public: 1779 const SDValue &getValue() const { return getOperand(1); } 1780 1781 static bool classof(const SDNode *N) { 1782 return N->getOpcode() == X86ISD::MSCATTER; 1783 } 1784 }; 1785 1786 /// Generate unpacklo/unpackhi shuffle mask. 1787 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1788 bool Unary); 1789 1790 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1791 /// imposed by AVX and specific to the unary pattern. Example: 1792 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1793 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1794 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1795 1796 } // end namespace llvm 1797 1798 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1799