1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/TargetLowering.h" 18 19 namespace llvm { 20 class X86Subtarget; 21 class X86TargetMachine; 22 23 namespace X86ISD { 24 // X86 Specific DAG Nodes 25 enum NodeType : unsigned { 26 // Start the numbering where the builtin ops leave off. 27 FIRST_NUMBER = ISD::BUILTIN_OP_END, 28 29 /// Bit scan forward. 30 BSF, 31 /// Bit scan reverse. 32 BSR, 33 34 /// X86 funnel/double shift i16 instructions. These correspond to 35 /// X86::SHLDW and X86::SHRDW instructions which have different amt 36 /// modulo rules to generic funnel shifts. 37 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 38 FSHL, 39 FSHR, 40 41 /// Bitwise logical AND of floating point values. This corresponds 42 /// to X86::ANDPS or X86::ANDPD. 43 FAND, 44 45 /// Bitwise logical OR of floating point values. This corresponds 46 /// to X86::ORPS or X86::ORPD. 47 FOR, 48 49 /// Bitwise logical XOR of floating point values. This corresponds 50 /// to X86::XORPS or X86::XORPD. 51 FXOR, 52 53 /// Bitwise logical ANDNOT of floating point values. This 54 /// corresponds to X86::ANDNPS or X86::ANDNPD. 55 FANDN, 56 57 /// These operations represent an abstract X86 call 58 /// instruction, which includes a bunch of information. In particular the 59 /// operands of these node are: 60 /// 61 /// #0 - The incoming token chain 62 /// #1 - The callee 63 /// #2 - The number of arg bytes the caller pushes on the stack. 64 /// #3 - The number of arg bytes the callee pops off the stack. 65 /// #4 - The value to pass in AL/AX/EAX (optional) 66 /// #5 - The value to pass in DL/DX/EDX (optional) 67 /// 68 /// The result values of these nodes are: 69 /// 70 /// #0 - The outgoing token chain 71 /// #1 - The first register result value (optional) 72 /// #2 - The second register result value (optional) 73 /// 74 CALL, 75 76 /// Same as call except it adds the NoTrack prefix. 77 NT_CALL, 78 79 /// X86 compare and logical compare instructions. 80 CMP, 81 FCMP, 82 COMI, 83 UCOMI, 84 85 /// X86 bit-test instructions. 86 BT, 87 88 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 89 /// operand, usually produced by a CMP instruction. 90 SETCC, 91 92 /// X86 Select 93 SELECTS, 94 95 // Same as SETCC except it's materialized with a sbb and the value is all 96 // one's or all zero's. 97 SETCC_CARRY, // R = carry_bit ? ~0 : 0 98 99 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 100 /// Operands are two FP values to compare; result is a mask of 101 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 102 FSETCC, 103 104 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 105 /// and a version with SAE. 106 FSETCCM, 107 FSETCCM_SAE, 108 109 /// X86 conditional moves. Operand 0 and operand 1 are the two values 110 /// to select from. Operand 2 is the condition code, and operand 3 is the 111 /// flag operand produced by a CMP or TEST instruction. 112 CMOV, 113 114 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 115 /// is the block to branch if condition is true, operand 2 is the 116 /// condition code, and operand 3 is the flag operand produced by a CMP 117 /// or TEST instruction. 118 BRCOND, 119 120 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 121 /// operand 1 is the target address. 122 NT_BRIND, 123 124 /// Return with a flag operand. Operand 0 is the chain operand, operand 125 /// 1 is the number of bytes of stack to pop. 126 RET_FLAG, 127 128 /// Return from interrupt. Operand 0 is the number of bytes to pop. 129 IRET, 130 131 /// Repeat fill, corresponds to X86::REP_STOSx. 132 REP_STOS, 133 134 /// Repeat move, corresponds to X86::REP_MOVSx. 135 REP_MOVS, 136 137 /// On Darwin, this node represents the result of the popl 138 /// at function entry, used for PIC code. 139 GlobalBaseReg, 140 141 /// A wrapper node for TargetConstantPool, TargetJumpTable, 142 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 143 /// MCSymbol and TargetBlockAddress. 144 Wrapper, 145 146 /// Special wrapper used under X86-64 PIC mode for RIP 147 /// relative displacements. 148 WrapperRIP, 149 150 /// Copies a 64-bit value from an MMX vector to the low word 151 /// of an XMM vector, with the high word zero filled. 152 MOVQ2DQ, 153 154 /// Copies a 64-bit value from the low word of an XMM vector 155 /// to an MMX vector. 156 MOVDQ2Q, 157 158 /// Copies a 32-bit value from the low word of a MMX 159 /// vector to a GPR. 160 MMX_MOVD2W, 161 162 /// Copies a GPR into the low 32-bit word of a MMX vector 163 /// and zero out the high word. 164 MMX_MOVW2D, 165 166 /// Extract an 8-bit value from a vector and zero extend it to 167 /// i32, corresponds to X86::PEXTRB. 168 PEXTRB, 169 170 /// Extract a 16-bit value from a vector and zero extend it to 171 /// i32, corresponds to X86::PEXTRW. 172 PEXTRW, 173 174 /// Insert any element of a 4 x float vector into any element 175 /// of a destination 4 x floatvector. 176 INSERTPS, 177 178 /// Insert the lower 8-bits of a 32-bit value to a vector, 179 /// corresponds to X86::PINSRB. 180 PINSRB, 181 182 /// Insert the lower 16-bits of a 32-bit value to a vector, 183 /// corresponds to X86::PINSRW. 184 PINSRW, 185 186 /// Shuffle 16 8-bit values within a vector. 187 PSHUFB, 188 189 /// Compute Sum of Absolute Differences. 190 PSADBW, 191 /// Compute Double Block Packed Sum-Absolute-Differences 192 DBPSADBW, 193 194 /// Bitwise Logical AND NOT of Packed FP values. 195 ANDNP, 196 197 /// Blend where the selector is an immediate. 198 BLENDI, 199 200 /// Dynamic (non-constant condition) vector blend where only the sign bits 201 /// of the condition elements are used. This is used to enforce that the 202 /// condition mask is not valid for generic VSELECT optimizations. This 203 /// is also used to implement the intrinsics. 204 /// Operands are in VSELECT order: MASK, TRUE, FALSE 205 BLENDV, 206 207 /// Combined add and sub on an FP vector. 208 ADDSUB, 209 210 // FP vector ops with rounding mode. 211 FADD_RND, 212 FADDS, 213 FADDS_RND, 214 FSUB_RND, 215 FSUBS, 216 FSUBS_RND, 217 FMUL_RND, 218 FMULS, 219 FMULS_RND, 220 FDIV_RND, 221 FDIVS, 222 FDIVS_RND, 223 FMAX_SAE, 224 FMAXS_SAE, 225 FMIN_SAE, 226 FMINS_SAE, 227 FSQRT_RND, 228 FSQRTS, 229 FSQRTS_RND, 230 231 // FP vector get exponent. 232 FGETEXP, 233 FGETEXP_SAE, 234 FGETEXPS, 235 FGETEXPS_SAE, 236 // Extract Normalized Mantissas. 237 VGETMANT, 238 VGETMANT_SAE, 239 VGETMANTS, 240 VGETMANTS_SAE, 241 // FP Scale. 242 SCALEF, 243 SCALEF_RND, 244 SCALEFS, 245 SCALEFS_RND, 246 247 // Unsigned Integer average. 248 AVG, 249 250 /// Integer horizontal add/sub. 251 HADD, 252 HSUB, 253 254 /// Floating point horizontal add/sub. 255 FHADD, 256 FHSUB, 257 258 // Detect Conflicts Within a Vector 259 CONFLICT, 260 261 /// Floating point max and min. 262 FMAX, 263 FMIN, 264 265 /// Commutative FMIN and FMAX. 266 FMAXC, 267 FMINC, 268 269 /// Scalar intrinsic floating point max and min. 270 FMAXS, 271 FMINS, 272 273 /// Floating point reciprocal-sqrt and reciprocal approximation. 274 /// Note that these typically require refinement 275 /// in order to obtain suitable precision. 276 FRSQRT, 277 FRCP, 278 279 // AVX-512 reciprocal approximations with a little more precision. 280 RSQRT14, 281 RSQRT14S, 282 RCP14, 283 RCP14S, 284 285 // Thread Local Storage. 286 TLSADDR, 287 288 // Thread Local Storage. A call to get the start address 289 // of the TLS block for the current module. 290 TLSBASEADDR, 291 292 // Thread Local Storage. When calling to an OS provided 293 // thunk at the address from an earlier relocation. 294 TLSCALL, 295 296 // Exception Handling helpers. 297 EH_RETURN, 298 299 // SjLj exception handling setjmp. 300 EH_SJLJ_SETJMP, 301 302 // SjLj exception handling longjmp. 303 EH_SJLJ_LONGJMP, 304 305 // SjLj exception handling dispatch. 306 EH_SJLJ_SETUP_DISPATCH, 307 308 /// Tail call return. See X86TargetLowering::LowerCall for 309 /// the list of operands. 310 TC_RETURN, 311 312 // Vector move to low scalar and zero higher vector elements. 313 VZEXT_MOVL, 314 315 // Vector integer truncate. 316 VTRUNC, 317 // Vector integer truncate with unsigned/signed saturation. 318 VTRUNCUS, 319 VTRUNCS, 320 321 // Masked version of the above. Used when less than a 128-bit result is 322 // produced since the mask only applies to the lower elements and can't 323 // be represented by a select. 324 // SRC, PASSTHRU, MASK 325 VMTRUNC, 326 VMTRUNCUS, 327 VMTRUNCS, 328 329 // Vector FP extend. 330 VFPEXT, 331 VFPEXT_SAE, 332 VFPEXTS, 333 VFPEXTS_SAE, 334 335 // Vector FP round. 336 VFPROUND, 337 VFPROUND_RND, 338 VFPROUNDS, 339 VFPROUNDS_RND, 340 341 // Masked version of above. Used for v2f64->v4f32. 342 // SRC, PASSTHRU, MASK 343 VMFPROUND, 344 345 // 128-bit vector logical left / right shift 346 VSHLDQ, 347 VSRLDQ, 348 349 // Vector shift elements 350 VSHL, 351 VSRL, 352 VSRA, 353 354 // Vector variable shift 355 VSHLV, 356 VSRLV, 357 VSRAV, 358 359 // Vector shift elements by immediate 360 VSHLI, 361 VSRLI, 362 VSRAI, 363 364 // Shifts of mask registers. 365 KSHIFTL, 366 KSHIFTR, 367 368 // Bit rotate by immediate 369 VROTLI, 370 VROTRI, 371 372 // Vector packed double/float comparison. 373 CMPP, 374 375 // Vector integer comparisons. 376 PCMPEQ, 377 PCMPGT, 378 379 // v8i16 Horizontal minimum and position. 380 PHMINPOS, 381 382 MULTISHIFT, 383 384 /// Vector comparison generating mask bits for fp and 385 /// integer signed and unsigned data types. 386 CMPM, 387 // Vector mask comparison generating mask bits for FP values. 388 CMPMM, 389 // Vector mask comparison with SAE for FP values. 390 CMPMM_SAE, 391 392 // Arithmetic operations with FLAGS results. 393 ADD, 394 SUB, 395 ADC, 396 SBB, 397 SMUL, 398 UMUL, 399 OR, 400 XOR, 401 AND, 402 403 // Bit field extract. 404 BEXTR, 405 BEXTRI, 406 407 // Zero High Bits Starting with Specified Bit Position. 408 BZHI, 409 410 // Parallel extract and deposit. 411 PDEP, 412 PEXT, 413 414 // X86-specific multiply by immediate. 415 MUL_IMM, 416 417 // Vector sign bit extraction. 418 MOVMSK, 419 420 // Vector bitwise comparisons. 421 PTEST, 422 423 // Vector packed fp sign bitwise comparisons. 424 TESTP, 425 426 // OR/AND test for masks. 427 KORTEST, 428 KTEST, 429 430 // ADD for masks. 431 KADD, 432 433 // Several flavors of instructions with vector shuffle behaviors. 434 // Saturated signed/unnsigned packing. 435 PACKSS, 436 PACKUS, 437 // Intra-lane alignr. 438 PALIGNR, 439 // AVX512 inter-lane alignr. 440 VALIGN, 441 PSHUFD, 442 PSHUFHW, 443 PSHUFLW, 444 SHUFP, 445 // VBMI2 Concat & Shift. 446 VSHLD, 447 VSHRD, 448 VSHLDV, 449 VSHRDV, 450 // Shuffle Packed Values at 128-bit granularity. 451 SHUF128, 452 MOVDDUP, 453 MOVSHDUP, 454 MOVSLDUP, 455 MOVLHPS, 456 MOVHLPS, 457 MOVSD, 458 MOVSS, 459 UNPCKL, 460 UNPCKH, 461 VPERMILPV, 462 VPERMILPI, 463 VPERMI, 464 VPERM2X128, 465 466 // Variable Permute (VPERM). 467 // Res = VPERMV MaskV, V0 468 VPERMV, 469 470 // 3-op Variable Permute (VPERMT2). 471 // Res = VPERMV3 V0, MaskV, V1 472 VPERMV3, 473 474 // Bitwise ternary logic. 475 VPTERNLOG, 476 // Fix Up Special Packed Float32/64 values. 477 VFIXUPIMM, 478 VFIXUPIMM_SAE, 479 VFIXUPIMMS, 480 VFIXUPIMMS_SAE, 481 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 482 VRANGE, 483 VRANGE_SAE, 484 VRANGES, 485 VRANGES_SAE, 486 // Reduce - Perform Reduction Transformation on scalar\packed FP. 487 VREDUCE, 488 VREDUCE_SAE, 489 VREDUCES, 490 VREDUCES_SAE, 491 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 492 // Also used by the legacy (V)ROUND intrinsics where we mask out the 493 // scaling part of the immediate. 494 VRNDSCALE, 495 VRNDSCALE_SAE, 496 VRNDSCALES, 497 VRNDSCALES_SAE, 498 // Tests Types Of a FP Values for packed types. 499 VFPCLASS, 500 // Tests Types Of a FP Values for scalar types. 501 VFPCLASSS, 502 503 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 504 // a vector, this node may change the vector length as part of the splat. 505 VBROADCAST, 506 // Broadcast mask to vector. 507 VBROADCASTM, 508 509 /// SSE4A Extraction and Insertion. 510 EXTRQI, 511 INSERTQI, 512 513 // XOP arithmetic/logical shifts. 514 VPSHA, 515 VPSHL, 516 // XOP signed/unsigned integer comparisons. 517 VPCOM, 518 VPCOMU, 519 // XOP packed permute bytes. 520 VPPERM, 521 // XOP two source permutation. 522 VPERMIL2, 523 524 // Vector multiply packed unsigned doubleword integers. 525 PMULUDQ, 526 // Vector multiply packed signed doubleword integers. 527 PMULDQ, 528 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 529 MULHRS, 530 531 // Multiply and Add Packed Integers. 532 VPMADDUBSW, 533 VPMADDWD, 534 535 // AVX512IFMA multiply and add. 536 // NOTE: These are different than the instruction and perform 537 // op0 x op1 + op2. 538 VPMADD52L, 539 VPMADD52H, 540 541 // VNNI 542 VPDPBUSD, 543 VPDPBUSDS, 544 VPDPWSSD, 545 VPDPWSSDS, 546 547 // FMA nodes. 548 // We use the target independent ISD::FMA for the non-inverted case. 549 FNMADD, 550 FMSUB, 551 FNMSUB, 552 FMADDSUB, 553 FMSUBADD, 554 555 // FMA with rounding mode. 556 FMADD_RND, 557 FNMADD_RND, 558 FMSUB_RND, 559 FNMSUB_RND, 560 FMADDSUB_RND, 561 FMSUBADD_RND, 562 563 // Compress and expand. 564 COMPRESS, 565 EXPAND, 566 567 // Bits shuffle 568 VPSHUFBITQMB, 569 570 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 571 SINT_TO_FP_RND, 572 UINT_TO_FP_RND, 573 SCALAR_SINT_TO_FP, 574 SCALAR_UINT_TO_FP, 575 SCALAR_SINT_TO_FP_RND, 576 SCALAR_UINT_TO_FP_RND, 577 578 // Vector float/double to signed/unsigned integer. 579 CVTP2SI, 580 CVTP2UI, 581 CVTP2SI_RND, 582 CVTP2UI_RND, 583 // Scalar float/double to signed/unsigned integer. 584 CVTS2SI, 585 CVTS2UI, 586 CVTS2SI_RND, 587 CVTS2UI_RND, 588 589 // Vector float/double to signed/unsigned integer with truncation. 590 CVTTP2SI, 591 CVTTP2UI, 592 CVTTP2SI_SAE, 593 CVTTP2UI_SAE, 594 // Scalar float/double to signed/unsigned integer with truncation. 595 CVTTS2SI, 596 CVTTS2UI, 597 CVTTS2SI_SAE, 598 CVTTS2UI_SAE, 599 600 // Vector signed/unsigned integer to float/double. 601 CVTSI2P, 602 CVTUI2P, 603 604 // Masked versions of above. Used for v2f64->v4f32. 605 // SRC, PASSTHRU, MASK 606 MCVTP2SI, 607 MCVTP2UI, 608 MCVTTP2SI, 609 MCVTTP2UI, 610 MCVTSI2P, 611 MCVTUI2P, 612 613 // Vector float to bfloat16. 614 // Convert TWO packed single data to one packed BF16 data 615 CVTNE2PS2BF16, 616 // Convert packed single data to packed BF16 data 617 CVTNEPS2BF16, 618 // Masked version of above. 619 // SRC, PASSTHRU, MASK 620 MCVTNEPS2BF16, 621 622 // Dot product of BF16 pairs to accumulated into 623 // packed single precision. 624 DPBF16PS, 625 626 // Save xmm argument registers to the stack, according to %al. An operator 627 // is needed so that this can be expanded with control flow. 628 VASTART_SAVE_XMM_REGS, 629 630 // Windows's _chkstk call to do stack probing. 631 WIN_ALLOCA, 632 633 // For allocating variable amounts of stack space when using 634 // segmented stacks. Check if the current stacklet has enough space, and 635 // falls back to heap allocation if not. 636 SEG_ALLOCA, 637 638 // For allocating stack space when using stack clash protector. 639 // Allocation is performed by block, and each block is probed. 640 PROBED_ALLOCA, 641 642 // Memory barriers. 643 MEMBARRIER, 644 MFENCE, 645 646 // Get a random integer and indicate whether it is valid in CF. 647 RDRAND, 648 649 // Get a NIST SP800-90B & C compliant random integer and 650 // indicate whether it is valid in CF. 651 RDSEED, 652 653 // Protection keys 654 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 655 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 656 // value for ECX. 657 RDPKRU, 658 WRPKRU, 659 660 // SSE42 string comparisons. 661 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 662 // will emit one or two instructions based on which results are used. If 663 // flags and index/mask this allows us to use a single instruction since 664 // we won't have to pick and opcode for flags. Instead we can rely on the 665 // DAG to CSE everything and decide at isel. 666 PCMPISTR, 667 PCMPESTR, 668 669 // Test if in transactional execution. 670 XTEST, 671 672 // ERI instructions. 673 RSQRT28, 674 RSQRT28_SAE, 675 RSQRT28S, 676 RSQRT28S_SAE, 677 RCP28, 678 RCP28_SAE, 679 RCP28S, 680 RCP28S_SAE, 681 EXP2, 682 EXP2_SAE, 683 684 // Conversions between float and half-float. 685 CVTPS2PH, 686 CVTPH2PS, 687 CVTPH2PS_SAE, 688 689 // Masked version of above. 690 // SRC, RND, PASSTHRU, MASK 691 MCVTPS2PH, 692 693 // Galois Field Arithmetic Instructions 694 GF2P8AFFINEINVQB, 695 GF2P8AFFINEQB, 696 GF2P8MULB, 697 698 // LWP insert record. 699 LWPINS, 700 701 // User level wait 702 UMWAIT, 703 TPAUSE, 704 705 // Enqueue Stores Instructions 706 ENQCMD, 707 ENQCMDS, 708 709 // For avx512-vp2intersect 710 VP2INTERSECT, 711 712 // User level interrupts - testui 713 TESTUI, 714 715 /// X86 strict FP compare instructions. 716 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 717 STRICT_FCMPS, 718 719 // Vector packed double/float comparison. 720 STRICT_CMPP, 721 722 /// Vector comparison generating mask bits for fp and 723 /// integer signed and unsigned data types. 724 STRICT_CMPM, 725 726 // Vector float/double to signed/unsigned integer with truncation. 727 STRICT_CVTTP2SI, 728 STRICT_CVTTP2UI, 729 730 // Vector FP extend. 731 STRICT_VFPEXT, 732 733 // Vector FP round. 734 STRICT_VFPROUND, 735 736 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 737 // Also used by the legacy (V)ROUND intrinsics where we mask out the 738 // scaling part of the immediate. 739 STRICT_VRNDSCALE, 740 741 // Vector signed/unsigned integer to float/double. 742 STRICT_CVTSI2P, 743 STRICT_CVTUI2P, 744 745 // Strict FMA nodes. 746 STRICT_FNMADD, 747 STRICT_FMSUB, 748 STRICT_FNMSUB, 749 750 // Conversions between float and half-float. 751 STRICT_CVTPS2PH, 752 STRICT_CVTPH2PS, 753 754 // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and 755 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. 756 757 // Compare and swap. 758 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 759 LCMPXCHG8_DAG, 760 LCMPXCHG16_DAG, 761 LCMPXCHG16_SAVE_RBX_DAG, 762 763 /// LOCK-prefixed arithmetic read-modify-write instructions. 764 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 765 LADD, 766 LSUB, 767 LOR, 768 LXOR, 769 LAND, 770 771 // Load, scalar_to_vector, and zero extend. 772 VZEXT_LOAD, 773 774 // extract_vector_elt, store. 775 VEXTRACT_STORE, 776 777 // scalar broadcast from memory. 778 VBROADCAST_LOAD, 779 780 // subvector broadcast from memory. 781 SUBV_BROADCAST_LOAD, 782 783 // Store FP control world into i16 memory. 784 FNSTCW16m, 785 786 /// This instruction implements FP_TO_SINT with the 787 /// integer destination in memory and a FP reg source. This corresponds 788 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 789 /// has two inputs (token chain and address) and two outputs (int value 790 /// and token chain). Memory VT specifies the type to store to. 791 FP_TO_INT_IN_MEM, 792 793 /// This instruction implements SINT_TO_FP with the 794 /// integer source in memory and FP reg result. This corresponds to the 795 /// X86::FILD*m instructions. It has two inputs (token chain and address) 796 /// and two outputs (FP value and token chain). The integer source type is 797 /// specified by the memory VT. 798 FILD, 799 800 /// This instruction implements a fp->int store from FP stack 801 /// slots. This corresponds to the fist instruction. It takes a 802 /// chain operand, value to store, address, and glue. The memory VT 803 /// specifies the type to store as. 804 FIST, 805 806 /// This instruction implements an extending load to FP stack slots. 807 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 808 /// operand, and ptr to load from. The memory VT specifies the type to 809 /// load from. 810 FLD, 811 812 /// This instruction implements a truncating store from FP stack 813 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 814 /// chain operand, value to store, address, and glue. The memory VT 815 /// specifies the type to store as. 816 FST, 817 818 /// These instructions grab the address of the next argument 819 /// from a va_list. (reads and modifies the va_list in memory) 820 VAARG_64, 821 VAARG_X32, 822 823 // Vector truncating store with unsigned/signed saturation 824 VTRUNCSTOREUS, 825 VTRUNCSTORES, 826 // Vector truncating masked store with unsigned/signed saturation 827 VMTRUNCSTOREUS, 828 VMTRUNCSTORES, 829 830 // X86 specific gather and scatter 831 MGATHER, 832 MSCATTER, 833 834 // Key locker nodes that produce flags. 835 AESENC128KL, 836 AESDEC128KL, 837 AESENC256KL, 838 AESDEC256KL, 839 AESENCWIDE128KL, 840 AESDECWIDE128KL, 841 AESENCWIDE256KL, 842 AESDECWIDE256KL, 843 844 // WARNING: Do not add anything in the end unless you want the node to 845 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 846 // opcodes will be thought as target memory ops! 847 }; 848 } // end namespace X86ISD 849 850 /// Define some predicates that are used for node matching. 851 namespace X86 { 852 /// Returns true if Elt is a constant zero or floating point constant +0.0. 853 bool isZeroNode(SDValue Elt); 854 855 /// Returns true of the given offset can be 856 /// fit into displacement field of the instruction. 857 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 858 bool hasSymbolicDisplacement); 859 860 /// Determines whether the callee is required to pop its 861 /// own arguments. Callee pop is necessary to support tail calls. 862 bool isCalleePop(CallingConv::ID CallingConv, 863 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 864 865 /// If Op is a constant whose elements are all the same constant or 866 /// undefined, return true and return the constant value in \p SplatVal. 867 /// If we have undef bits that don't cover an entire element, we treat these 868 /// as zero if AllowPartialUndefs is set, else we fail and return false. 869 bool isConstantSplat(SDValue Op, APInt &SplatVal, 870 bool AllowPartialUndefs = true); 871 } // end namespace X86 872 873 //===--------------------------------------------------------------------===// 874 // X86 Implementation of the TargetLowering interface 875 class X86TargetLowering final : public TargetLowering { 876 public: 877 explicit X86TargetLowering(const X86TargetMachine &TM, 878 const X86Subtarget &STI); 879 880 unsigned getJumpTableEncoding() const override; 881 bool useSoftFloat() const override; 882 883 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 884 ArgListTy &Args) const override; 885 886 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 887 return MVT::i8; 888 } 889 890 const MCExpr * 891 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 892 const MachineBasicBlock *MBB, unsigned uid, 893 MCContext &Ctx) const override; 894 895 /// Returns relocation base for the given PIC jumptable. 896 SDValue getPICJumpTableRelocBase(SDValue Table, 897 SelectionDAG &DAG) const override; 898 const MCExpr * 899 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 900 unsigned JTI, MCContext &Ctx) const override; 901 902 /// Return the desired alignment for ByVal aggregate 903 /// function arguments in the caller parameter area. For X86, aggregates 904 /// that contains are placed at 16-byte boundaries while the rest are at 905 /// 4-byte boundaries. 906 unsigned getByValTypeAlignment(Type *Ty, 907 const DataLayout &DL) const override; 908 909 EVT getOptimalMemOpType(const MemOp &Op, 910 const AttributeList &FuncAttributes) const override; 911 912 /// Returns true if it's safe to use load / store of the 913 /// specified type to expand memcpy / memset inline. This is mostly true 914 /// for all types except for some special cases. For example, on X86 915 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 916 /// also does type conversion. Note the specified type doesn't have to be 917 /// legal as the hook is used before type legalization. 918 bool isSafeMemOpType(MVT VT) const override; 919 920 /// Returns true if the target allows unaligned memory accesses of the 921 /// specified type. Returns whether it is "fast" in the last argument. 922 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 923 MachineMemOperand::Flags Flags, 924 bool *Fast) const override; 925 926 /// Provide custom lowering hooks for some operations. 927 /// 928 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 929 930 /// Replace the results of node with an illegal result 931 /// type with new values built out of custom code. 932 /// 933 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 934 SelectionDAG &DAG) const override; 935 936 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 937 938 /// Return true if the target has native support for 939 /// the specified value type and it is 'desirable' to use the type for the 940 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 941 /// instruction encodings are longer and some i16 instructions are slow. 942 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 943 944 /// Return true if the target has native support for the 945 /// specified value type and it is 'desirable' to use the type. e.g. On x86 946 /// i16 is legal, but undesirable since i16 instruction encodings are longer 947 /// and some i16 instructions are slow. 948 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 949 950 /// Return the newly negated expression if the cost is not expensive and 951 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 952 /// do the negation. 953 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 954 bool LegalOperations, bool ForCodeSize, 955 NegatibleCost &Cost, 956 unsigned Depth) const override; 957 958 MachineBasicBlock * 959 EmitInstrWithCustomInserter(MachineInstr &MI, 960 MachineBasicBlock *MBB) const override; 961 962 /// This method returns the name of a target specific DAG node. 963 const char *getTargetNodeName(unsigned Opcode) const override; 964 965 /// Do not merge vector stores after legalization because that may conflict 966 /// with x86-specific store splitting optimizations. 967 bool mergeStoresAfterLegalization(EVT MemVT) const override { 968 return !MemVT.isVector(); 969 } 970 971 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 972 const SelectionDAG &DAG) const override; 973 974 bool isCheapToSpeculateCttz() const override; 975 976 bool isCheapToSpeculateCtlz() const override; 977 978 bool isCtlzFast() const override; 979 980 bool hasBitPreservingFPLogic(EVT VT) const override { 981 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 982 } 983 984 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 985 // If the pair to store is a mixture of float and int values, we will 986 // save two bitwise instructions and one float-to-int instruction and 987 // increase one store instruction. There is potentially a more 988 // significant benefit because it avoids the float->int domain switch 989 // for input value. So It is more likely a win. 990 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 991 (LTy.isInteger() && HTy.isFloatingPoint())) 992 return true; 993 // If the pair only contains int values, we will save two bitwise 994 // instructions and increase one store instruction (costing one more 995 // store buffer). Since the benefit is more blurred so we leave 996 // such pair out until we get testcase to prove it is a win. 997 return false; 998 } 999 1000 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1001 1002 bool hasAndNotCompare(SDValue Y) const override; 1003 1004 bool hasAndNot(SDValue Y) const override; 1005 1006 bool hasBitTest(SDValue X, SDValue Y) const override; 1007 1008 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1009 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1010 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1011 SelectionDAG &DAG) const override; 1012 1013 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1014 CombineLevel Level) const override; 1015 1016 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1017 1018 bool 1019 shouldTransformSignedTruncationCheck(EVT XVT, 1020 unsigned KeptBits) const override { 1021 // For vectors, we don't have a preference.. 1022 if (XVT.isVector()) 1023 return false; 1024 1025 auto VTIsOk = [](EVT VT) -> bool { 1026 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1027 VT == MVT::i64; 1028 }; 1029 1030 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1031 // XVT will be larger than KeptBitsVT. 1032 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1033 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1034 } 1035 1036 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 1037 1038 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1039 1040 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1041 return VT.isScalarInteger(); 1042 } 1043 1044 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1045 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1046 1047 /// Return the value type to use for ISD::SETCC. 1048 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1049 EVT VT) const override; 1050 1051 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1052 const APInt &DemandedElts, 1053 TargetLoweringOpt &TLO) const override; 1054 1055 /// Determine which of the bits specified in Mask are known to be either 1056 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1057 void computeKnownBitsForTargetNode(const SDValue Op, 1058 KnownBits &Known, 1059 const APInt &DemandedElts, 1060 const SelectionDAG &DAG, 1061 unsigned Depth = 0) const override; 1062 1063 /// Determine the number of bits in the operation that are sign bits. 1064 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1065 const APInt &DemandedElts, 1066 const SelectionDAG &DAG, 1067 unsigned Depth) const override; 1068 1069 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1070 const APInt &DemandedElts, 1071 APInt &KnownUndef, 1072 APInt &KnownZero, 1073 TargetLoweringOpt &TLO, 1074 unsigned Depth) const override; 1075 1076 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1077 const APInt &DemandedElts, 1078 unsigned MaskIndex, 1079 TargetLoweringOpt &TLO, 1080 unsigned Depth) const; 1081 1082 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1083 const APInt &DemandedBits, 1084 const APInt &DemandedElts, 1085 KnownBits &Known, 1086 TargetLoweringOpt &TLO, 1087 unsigned Depth) const override; 1088 1089 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1090 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1091 SelectionDAG &DAG, unsigned Depth) const override; 1092 1093 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1094 1095 SDValue unwrapAddress(SDValue N) const override; 1096 1097 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1098 1099 bool ExpandInlineAsm(CallInst *CI) const override; 1100 1101 ConstraintType getConstraintType(StringRef Constraint) const override; 1102 1103 /// Examine constraint string and operand type and determine a weight value. 1104 /// The operand object must already have been set up with the operand type. 1105 ConstraintWeight 1106 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1107 const char *constraint) const override; 1108 1109 const char *LowerXConstraint(EVT ConstraintVT) const override; 1110 1111 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1112 /// add anything to Ops. If hasMemory is true it means one of the asm 1113 /// constraint of the inline asm instruction being processed is 'm'. 1114 void LowerAsmOperandForConstraint(SDValue Op, 1115 std::string &Constraint, 1116 std::vector<SDValue> &Ops, 1117 SelectionDAG &DAG) const override; 1118 1119 unsigned 1120 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1121 if (ConstraintCode == "o") 1122 return InlineAsm::Constraint_o; 1123 else if (ConstraintCode == "v") 1124 return InlineAsm::Constraint_v; 1125 else if (ConstraintCode == "X") 1126 return InlineAsm::Constraint_X; 1127 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1128 } 1129 1130 /// Handle Lowering flag assembly outputs. 1131 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1132 const SDLoc &DL, 1133 const AsmOperandInfo &Constraint, 1134 SelectionDAG &DAG) const override; 1135 1136 /// Given a physical register constraint 1137 /// (e.g. {edx}), return the register number and the register class for the 1138 /// register. This should only be used for C_Register constraints. On 1139 /// error, this returns a register number of 0. 1140 std::pair<unsigned, const TargetRegisterClass *> 1141 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1142 StringRef Constraint, MVT VT) const override; 1143 1144 /// Return true if the addressing mode represented 1145 /// by AM is legal for this target, for a load/store of the specified type. 1146 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1147 Type *Ty, unsigned AS, 1148 Instruction *I = nullptr) const override; 1149 1150 /// Return true if the specified immediate is legal 1151 /// icmp immediate, that is the target has icmp instructions which can 1152 /// compare a register against the immediate without having to materialize 1153 /// the immediate into a register. 1154 bool isLegalICmpImmediate(int64_t Imm) const override; 1155 1156 /// Return true if the specified immediate is legal 1157 /// add immediate, that is the target has add instructions which can 1158 /// add a register and the immediate without having to materialize 1159 /// the immediate into a register. 1160 bool isLegalAddImmediate(int64_t Imm) const override; 1161 1162 bool isLegalStoreImmediate(int64_t Imm) const override; 1163 1164 /// Return the cost of the scaling factor used in the addressing 1165 /// mode represented by AM for this target, for a load/store 1166 /// of the specified type. 1167 /// If the AM is supported, the return value must be >= 0. 1168 /// If the AM is not supported, it returns a negative value. 1169 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 1170 unsigned AS) const override; 1171 1172 /// This is used to enable splatted operand transforms for vector shifts 1173 /// and vector funnel shifts. 1174 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1175 1176 /// Add x86-specific opcodes to the default list. 1177 bool isBinOp(unsigned Opcode) const override; 1178 1179 /// Returns true if the opcode is a commutative binary operation. 1180 bool isCommutativeBinOp(unsigned Opcode) const override; 1181 1182 /// Return true if it's free to truncate a value of 1183 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1184 /// register EAX to i16 by referencing its sub-register AX. 1185 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1186 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1187 1188 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1189 1190 /// Return true if any actual instruction that defines a 1191 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1192 /// register. This does not necessarily include registers defined in 1193 /// unknown ways, such as incoming arguments, or copies from unknown 1194 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1195 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1196 /// all instructions that define 32-bit values implicit zero-extend the 1197 /// result out to 64 bits. 1198 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1199 bool isZExtFree(EVT VT1, EVT VT2) const override; 1200 bool isZExtFree(SDValue Val, EVT VT2) const override; 1201 1202 bool shouldSinkOperands(Instruction *I, 1203 SmallVectorImpl<Use *> &Ops) const override; 1204 bool shouldConvertPhiType(Type *From, Type *To) const override; 1205 1206 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1207 /// extend node) is profitable. 1208 bool isVectorLoadExtDesirable(SDValue) const override; 1209 1210 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1211 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1212 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1213 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1214 EVT VT) const override; 1215 1216 /// Return true if it's profitable to narrow 1217 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1218 /// from i32 to i8 but not from i32 to i16. 1219 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1220 1221 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1222 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1223 /// true and stores the intrinsic information into the IntrinsicInfo that was 1224 /// passed to the function. 1225 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1226 MachineFunction &MF, 1227 unsigned Intrinsic) const override; 1228 1229 /// Returns true if the target can instruction select the 1230 /// specified FP immediate natively. If false, the legalizer will 1231 /// materialize the FP immediate as a load from a constant pool. 1232 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1233 bool ForCodeSize) const override; 1234 1235 /// Targets can use this to indicate that they only support *some* 1236 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1237 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1238 /// be legal. 1239 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1240 1241 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1242 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1243 /// constant pool entry. 1244 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1245 1246 /// Returns true if lowering to a jump table is allowed. 1247 bool areJTsAllowed(const Function *Fn) const override; 1248 1249 /// If true, then instruction selection should 1250 /// seek to shrink the FP constant of the specified type to a smaller type 1251 /// in order to save space and / or reduce runtime. 1252 bool ShouldShrinkFPConstant(EVT VT) const override { 1253 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1254 // expensive than a straight movsd. On the other hand, it's important to 1255 // shrink long double fp constant since fldt is very slow. 1256 return !X86ScalarSSEf64 || VT == MVT::f80; 1257 } 1258 1259 /// Return true if we believe it is correct and profitable to reduce the 1260 /// load node to a smaller type. 1261 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1262 EVT NewVT) const override; 1263 1264 /// Return true if the specified scalar FP type is computed in an SSE 1265 /// register, not on the X87 floating point stack. 1266 bool isScalarFPTypeInSSEReg(EVT VT) const { 1267 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1268 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1269 } 1270 1271 /// Returns true if it is beneficial to convert a load of a constant 1272 /// to just the constant itself. 1273 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1274 Type *Ty) const override; 1275 1276 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1277 1278 bool convertSelectOfConstantsToMath(EVT VT) const override; 1279 1280 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1281 SDValue C) const override; 1282 1283 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1284 /// with this index. 1285 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1286 unsigned Index) const override; 1287 1288 /// Scalar ops always have equal or better analysis/performance/power than 1289 /// the vector equivalent, so this always makes sense if the scalar op is 1290 /// supported. 1291 bool shouldScalarizeBinop(SDValue) const override; 1292 1293 /// Extract of a scalar FP value from index 0 of a vector is free. 1294 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1295 EVT EltVT = VT.getScalarType(); 1296 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1297 } 1298 1299 /// Overflow nodes should get combined/lowered to optimal instructions 1300 /// (they should allow eliminating explicit compares by getting flags from 1301 /// math ops). 1302 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1303 bool MathUsed) const override; 1304 1305 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1306 unsigned AddrSpace) const override { 1307 // If we can replace more than 2 scalar stores, there will be a reduction 1308 // in instructions even after we add a vector constant load. 1309 return NumElem > 2; 1310 } 1311 1312 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1313 const SelectionDAG &DAG, 1314 const MachineMemOperand &MMO) const override; 1315 1316 /// Intel processors have a unified instruction and data cache 1317 const char * getClearCacheBuiltinName() const override { 1318 return nullptr; // nothing to do, move along. 1319 } 1320 1321 Register getRegisterByName(const char* RegName, LLT VT, 1322 const MachineFunction &MF) const override; 1323 1324 /// If a physical register, this returns the register that receives the 1325 /// exception address on entry to an EH pad. 1326 Register 1327 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1328 1329 /// If a physical register, this returns the register that receives the 1330 /// exception typeid on entry to a landing pad. 1331 Register 1332 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1333 1334 virtual bool needsFixedCatchObjects() const override; 1335 1336 /// This method returns a target specific FastISel object, 1337 /// or null if the target does not support "fast" ISel. 1338 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1339 const TargetLibraryInfo *libInfo) const override; 1340 1341 /// If the target has a standard location for the stack protector cookie, 1342 /// returns the address of that location. Otherwise, returns nullptr. 1343 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1344 1345 bool useLoadStackGuardNode() const override; 1346 bool useStackGuardXorFP() const override; 1347 void insertSSPDeclarations(Module &M) const override; 1348 Value *getSDagStackGuard(const Module &M) const override; 1349 Function *getSSPStackGuardCheck(const Module &M) const override; 1350 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1351 const SDLoc &DL) const override; 1352 1353 1354 /// Return true if the target stores SafeStack pointer at a fixed offset in 1355 /// some non-standard address space, and populates the address space and 1356 /// offset as appropriate. 1357 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1358 1359 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1360 SDValue Chain, SDValue Pointer, 1361 MachinePointerInfo PtrInfo, 1362 Align Alignment, 1363 SelectionDAG &DAG) const; 1364 1365 /// Customize the preferred legalization strategy for certain types. 1366 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1367 1368 bool softPromoteHalfType() const override { return true; } 1369 1370 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1371 EVT VT) const override; 1372 1373 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1374 CallingConv::ID CC, 1375 EVT VT) const override; 1376 1377 unsigned getVectorTypeBreakdownForCallingConv( 1378 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1379 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1380 1381 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1382 1383 bool supportSwiftError() const override; 1384 1385 bool hasStackProbeSymbol(MachineFunction &MF) const override; 1386 bool hasInlineStackProbe(MachineFunction &MF) const override; 1387 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1388 1389 unsigned getStackProbeSize(MachineFunction &MF) const; 1390 1391 bool hasVectorBlend() const override { return true; } 1392 1393 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1394 1395 /// Lower interleaved load(s) into target specific 1396 /// instructions/intrinsics. 1397 bool lowerInterleavedLoad(LoadInst *LI, 1398 ArrayRef<ShuffleVectorInst *> Shuffles, 1399 ArrayRef<unsigned> Indices, 1400 unsigned Factor) const override; 1401 1402 /// Lower interleaved store(s) into target specific 1403 /// instructions/intrinsics. 1404 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1405 unsigned Factor) const override; 1406 1407 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1408 SDValue Addr, SelectionDAG &DAG) 1409 const override; 1410 1411 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1412 1413 protected: 1414 std::pair<const TargetRegisterClass *, uint8_t> 1415 findRepresentativeClass(const TargetRegisterInfo *TRI, 1416 MVT VT) const override; 1417 1418 private: 1419 /// Keep a reference to the X86Subtarget around so that we can 1420 /// make the right decision when generating code for different targets. 1421 const X86Subtarget &Subtarget; 1422 1423 /// Select between SSE or x87 floating point ops. 1424 /// When SSE is available, use it for f32 operations. 1425 /// When SSE2 is available, use it for f64 operations. 1426 bool X86ScalarSSEf32; 1427 bool X86ScalarSSEf64; 1428 1429 /// A list of legal FP immediates. 1430 std::vector<APFloat> LegalFPImmediates; 1431 1432 /// Indicate that this x86 target can instruction 1433 /// select the specified FP immediate natively. 1434 void addLegalFPImmediate(const APFloat& Imm) { 1435 LegalFPImmediates.push_back(Imm); 1436 } 1437 1438 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1439 CallingConv::ID CallConv, bool isVarArg, 1440 const SmallVectorImpl<ISD::InputArg> &Ins, 1441 const SDLoc &dl, SelectionDAG &DAG, 1442 SmallVectorImpl<SDValue> &InVals, 1443 uint32_t *RegMask) const; 1444 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1445 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1446 const SDLoc &dl, SelectionDAG &DAG, 1447 const CCValAssign &VA, MachineFrameInfo &MFI, 1448 unsigned i) const; 1449 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1450 const SDLoc &dl, SelectionDAG &DAG, 1451 const CCValAssign &VA, 1452 ISD::ArgFlagsTy Flags, bool isByval) const; 1453 1454 // Call lowering helpers. 1455 1456 /// Check whether the call is eligible for tail call optimization. Targets 1457 /// that want to do tail call optimization should implement this function. 1458 bool IsEligibleForTailCallOptimization(SDValue Callee, 1459 CallingConv::ID CalleeCC, 1460 bool isVarArg, 1461 bool isCalleeStructRet, 1462 bool isCallerStructRet, 1463 Type *RetTy, 1464 const SmallVectorImpl<ISD::OutputArg> &Outs, 1465 const SmallVectorImpl<SDValue> &OutVals, 1466 const SmallVectorImpl<ISD::InputArg> &Ins, 1467 SelectionDAG& DAG) const; 1468 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1469 SDValue Chain, bool IsTailCall, 1470 bool Is64Bit, int FPDiff, 1471 const SDLoc &dl) const; 1472 1473 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1474 SelectionDAG &DAG) const; 1475 1476 unsigned getAddressSpace(void) const; 1477 1478 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1479 SDValue &Chain) const; 1480 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1481 1482 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1483 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1484 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1485 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1486 1487 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1488 const unsigned char OpFlags = 0) const; 1489 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1490 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1491 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1492 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1493 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1494 1495 /// Creates target global address or external symbol nodes for calls or 1496 /// other uses. 1497 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1498 bool ForCall) const; 1499 1500 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1501 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1502 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1503 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1504 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1505 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1506 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1507 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1508 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1509 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1510 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1511 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1512 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1513 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1514 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1515 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1516 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1517 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1518 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1519 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1520 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1521 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1522 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1523 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1524 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1525 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1526 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1527 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1528 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1529 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1530 1531 SDValue 1532 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1533 const SmallVectorImpl<ISD::InputArg> &Ins, 1534 const SDLoc &dl, SelectionDAG &DAG, 1535 SmallVectorImpl<SDValue> &InVals) const override; 1536 SDValue LowerCall(CallLoweringInfo &CLI, 1537 SmallVectorImpl<SDValue> &InVals) const override; 1538 1539 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1540 const SmallVectorImpl<ISD::OutputArg> &Outs, 1541 const SmallVectorImpl<SDValue> &OutVals, 1542 const SDLoc &dl, SelectionDAG &DAG) const override; 1543 1544 bool supportSplitCSR(MachineFunction *MF) const override { 1545 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1546 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1547 } 1548 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1549 void insertCopiesSplitCSR( 1550 MachineBasicBlock *Entry, 1551 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1552 1553 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1554 1555 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1556 1557 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1558 ISD::NodeType ExtendKind) const override; 1559 1560 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1561 bool isVarArg, 1562 const SmallVectorImpl<ISD::OutputArg> &Outs, 1563 LLVMContext &Context) const override; 1564 1565 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1566 1567 TargetLoweringBase::AtomicExpansionKind 1568 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1569 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1570 TargetLoweringBase::AtomicExpansionKind 1571 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1572 1573 LoadInst * 1574 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1575 1576 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; 1577 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; 1578 1579 bool needsCmpXchgNb(Type *MemType) const; 1580 1581 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1582 MachineBasicBlock *DispatchBB, int FI) const; 1583 1584 // Utility function to emit the low-level va_arg code for X86-64. 1585 MachineBasicBlock * 1586 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1587 1588 /// Utility function to emit the xmm reg save portion of va_start. 1589 MachineBasicBlock * 1590 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1591 MachineBasicBlock *BB) const; 1592 1593 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1594 MachineInstr &MI2, 1595 MachineBasicBlock *BB) const; 1596 1597 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1598 MachineBasicBlock *BB) const; 1599 1600 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1601 MachineBasicBlock *BB) const; 1602 1603 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1604 MachineBasicBlock *BB) const; 1605 1606 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1607 MachineBasicBlock *BB) const; 1608 1609 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1610 MachineBasicBlock *BB) const; 1611 1612 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1613 MachineBasicBlock *BB) const; 1614 1615 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1616 MachineBasicBlock *BB) const; 1617 1618 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1619 MachineBasicBlock *MBB) const; 1620 1621 void emitSetJmpShadowStackFix(MachineInstr &MI, 1622 MachineBasicBlock *MBB) const; 1623 1624 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1625 MachineBasicBlock *MBB) const; 1626 1627 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1628 MachineBasicBlock *MBB) const; 1629 1630 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1631 MachineBasicBlock *MBB) const; 1632 1633 /// Emit flags for the given setcc condition and operands. Also returns the 1634 /// corresponding X86 condition code constant in X86CC. 1635 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1636 const SDLoc &dl, SelectionDAG &DAG, 1637 SDValue &X86CC) const; 1638 1639 /// Check if replacement of SQRT with RSQRT should be disabled. 1640 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1641 1642 /// Use rsqrt* to speed up sqrt calculations. 1643 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1644 int &RefinementSteps, bool &UseOneConstNR, 1645 bool Reciprocal) const override; 1646 1647 /// Use rcp* to speed up fdiv calculations. 1648 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1649 int &RefinementSteps) const override; 1650 1651 /// Reassociate floating point divisions into multiply by reciprocal. 1652 unsigned combineRepeatedFPDivisors() const override; 1653 1654 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1655 SmallVectorImpl<SDNode *> &Created) const override; 1656 }; 1657 1658 namespace X86 { 1659 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1660 const TargetLibraryInfo *libInfo); 1661 } // end namespace X86 1662 1663 // X86 specific Gather/Scatter nodes. 1664 // The class has the same order of operands as MaskedGatherScatterSDNode for 1665 // convenience. 1666 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1667 public: 1668 // This is a intended as a utility and should never be directly created. 1669 X86MaskedGatherScatterSDNode() = delete; 1670 ~X86MaskedGatherScatterSDNode() = delete; 1671 1672 const SDValue &getBasePtr() const { return getOperand(3); } 1673 const SDValue &getIndex() const { return getOperand(4); } 1674 const SDValue &getMask() const { return getOperand(2); } 1675 const SDValue &getScale() const { return getOperand(5); } 1676 1677 static bool classof(const SDNode *N) { 1678 return N->getOpcode() == X86ISD::MGATHER || 1679 N->getOpcode() == X86ISD::MSCATTER; 1680 } 1681 }; 1682 1683 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1684 public: 1685 const SDValue &getPassThru() const { return getOperand(1); } 1686 1687 static bool classof(const SDNode *N) { 1688 return N->getOpcode() == X86ISD::MGATHER; 1689 } 1690 }; 1691 1692 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1693 public: 1694 const SDValue &getValue() const { return getOperand(1); } 1695 1696 static bool classof(const SDNode *N) { 1697 return N->getOpcode() == X86ISD::MSCATTER; 1698 } 1699 }; 1700 1701 /// Generate unpacklo/unpackhi shuffle mask. 1702 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1703 bool Unary); 1704 1705 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1706 /// imposed by AVX and specific to the unary pattern. Example: 1707 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1708 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1709 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1710 1711 } // end namespace llvm 1712 1713 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1714