1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/CodeGen/SelectionDAG.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 #include "llvm/Target/TargetOptions.h" 21 22 namespace llvm { 23 class X86Subtarget; 24 class X86TargetMachine; 25 26 namespace X86ISD { 27 // X86 Specific DAG Nodes 28 enum NodeType : unsigned { 29 // Start the numbering where the builtin ops leave off. 30 FIRST_NUMBER = ISD::BUILTIN_OP_END, 31 32 /// Bit scan forward. 33 BSF, 34 /// Bit scan reverse. 35 BSR, 36 37 /// Double shift instructions. These correspond to 38 /// X86::SHLDxx and X86::SHRDxx instructions. 39 SHLD, 40 SHRD, 41 42 /// Bitwise logical AND of floating point values. This corresponds 43 /// to X86::ANDPS or X86::ANDPD. 44 FAND, 45 46 /// Bitwise logical OR of floating point values. This corresponds 47 /// to X86::ORPS or X86::ORPD. 48 FOR, 49 50 /// Bitwise logical XOR of floating point values. This corresponds 51 /// to X86::XORPS or X86::XORPD. 52 FXOR, 53 54 /// Bitwise logical ANDNOT of floating point values. This 55 /// corresponds to X86::ANDNPS or X86::ANDNPD. 56 FANDN, 57 58 /// These operations represent an abstract X86 call 59 /// instruction, which includes a bunch of information. In particular the 60 /// operands of these node are: 61 /// 62 /// #0 - The incoming token chain 63 /// #1 - The callee 64 /// #2 - The number of arg bytes the caller pushes on the stack. 65 /// #3 - The number of arg bytes the callee pops off the stack. 66 /// #4 - The value to pass in AL/AX/EAX (optional) 67 /// #5 - The value to pass in DL/DX/EDX (optional) 68 /// 69 /// The result values of these nodes are: 70 /// 71 /// #0 - The outgoing token chain 72 /// #1 - The first register result value (optional) 73 /// #2 - The second register result value (optional) 74 /// 75 CALL, 76 77 /// Same as call except it adds the NoTrack prefix. 78 NT_CALL, 79 80 /// X86 compare and logical compare instructions. 81 CMP, COMI, UCOMI, 82 83 /// X86 bit-test instructions. 84 BT, 85 86 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 87 /// operand, usually produced by a CMP instruction. 88 SETCC, 89 90 /// X86 Select 91 SELECTS, 92 93 // Same as SETCC except it's materialized with a sbb and the value is all 94 // one's or all zero's. 95 SETCC_CARRY, // R = carry_bit ? ~0 : 0 96 97 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 98 /// Operands are two FP values to compare; result is a mask of 99 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 100 FSETCC, 101 102 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 103 /// and a version with SAE. 104 FSETCCM, FSETCCM_SAE, 105 106 /// X86 conditional moves. Operand 0 and operand 1 are the two values 107 /// to select from. Operand 2 is the condition code, and operand 3 is the 108 /// flag operand produced by a CMP or TEST instruction. 109 CMOV, 110 111 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 112 /// is the block to branch if condition is true, operand 2 is the 113 /// condition code, and operand 3 is the flag operand produced by a CMP 114 /// or TEST instruction. 115 BRCOND, 116 117 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 118 /// operand 1 is the target address. 119 NT_BRIND, 120 121 /// Return with a flag operand. Operand 0 is the chain operand, operand 122 /// 1 is the number of bytes of stack to pop. 123 RET_FLAG, 124 125 /// Return from interrupt. Operand 0 is the number of bytes to pop. 126 IRET, 127 128 /// Repeat fill, corresponds to X86::REP_STOSx. 129 REP_STOS, 130 131 /// Repeat move, corresponds to X86::REP_MOVSx. 132 REP_MOVS, 133 134 /// On Darwin, this node represents the result of the popl 135 /// at function entry, used for PIC code. 136 GlobalBaseReg, 137 138 /// A wrapper node for TargetConstantPool, TargetJumpTable, 139 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 140 /// MCSymbol and TargetBlockAddress. 141 Wrapper, 142 143 /// Special wrapper used under X86-64 PIC mode for RIP 144 /// relative displacements. 145 WrapperRIP, 146 147 /// Copies a 64-bit value from the low word of an XMM vector 148 /// to an MMX vector. 149 MOVDQ2Q, 150 151 /// Copies a 32-bit value from the low word of a MMX 152 /// vector to a GPR. 153 MMX_MOVD2W, 154 155 /// Copies a GPR into the low 32-bit word of a MMX vector 156 /// and zero out the high word. 157 MMX_MOVW2D, 158 159 /// Extract an 8-bit value from a vector and zero extend it to 160 /// i32, corresponds to X86::PEXTRB. 161 PEXTRB, 162 163 /// Extract a 16-bit value from a vector and zero extend it to 164 /// i32, corresponds to X86::PEXTRW. 165 PEXTRW, 166 167 /// Insert any element of a 4 x float vector into any element 168 /// of a destination 4 x floatvector. 169 INSERTPS, 170 171 /// Insert the lower 8-bits of a 32-bit value to a vector, 172 /// corresponds to X86::PINSRB. 173 PINSRB, 174 175 /// Insert the lower 16-bits of a 32-bit value to a vector, 176 /// corresponds to X86::PINSRW. 177 PINSRW, 178 179 /// Shuffle 16 8-bit values within a vector. 180 PSHUFB, 181 182 /// Compute Sum of Absolute Differences. 183 PSADBW, 184 /// Compute Double Block Packed Sum-Absolute-Differences 185 DBPSADBW, 186 187 /// Bitwise Logical AND NOT of Packed FP values. 188 ANDNP, 189 190 /// Blend where the selector is an immediate. 191 BLENDI, 192 193 /// Dynamic (non-constant condition) vector blend where only the sign bits 194 /// of the condition elements are used. This is used to enforce that the 195 /// condition mask is not valid for generic VSELECT optimizations. This 196 /// is also used to implement the intrinsics. 197 /// Operands are in VSELECT order: MASK, TRUE, FALSE 198 BLENDV, 199 200 /// Combined add and sub on an FP vector. 201 ADDSUB, 202 203 // FP vector ops with rounding mode. 204 FADD_RND, FADDS, FADDS_RND, 205 FSUB_RND, FSUBS, FSUBS_RND, 206 FMUL_RND, FMULS, FMULS_RND, 207 FDIV_RND, FDIVS, FDIVS_RND, 208 FMAX_SAE, FMAXS_SAE, 209 FMIN_SAE, FMINS_SAE, 210 FSQRT_RND, FSQRTS, FSQRTS_RND, 211 212 // FP vector get exponent. 213 FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE, 214 // Extract Normalized Mantissas. 215 VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE, 216 // FP Scale. 217 SCALEF, SCALEF_RND, 218 SCALEFS, SCALEFS_RND, 219 220 // Unsigned Integer average. 221 AVG, 222 223 /// Integer horizontal add/sub. 224 HADD, 225 HSUB, 226 227 /// Floating point horizontal add/sub. 228 FHADD, 229 FHSUB, 230 231 // Detect Conflicts Within a Vector 232 CONFLICT, 233 234 /// Floating point max and min. 235 FMAX, FMIN, 236 237 /// Commutative FMIN and FMAX. 238 FMAXC, FMINC, 239 240 /// Scalar intrinsic floating point max and min. 241 FMAXS, FMINS, 242 243 /// Floating point reciprocal-sqrt and reciprocal approximation. 244 /// Note that these typically require refinement 245 /// in order to obtain suitable precision. 246 FRSQRT, FRCP, 247 248 // AVX-512 reciprocal approximations with a little more precision. 249 RSQRT14, RSQRT14S, RCP14, RCP14S, 250 251 // Thread Local Storage. 252 TLSADDR, 253 254 // Thread Local Storage. A call to get the start address 255 // of the TLS block for the current module. 256 TLSBASEADDR, 257 258 // Thread Local Storage. When calling to an OS provided 259 // thunk at the address from an earlier relocation. 260 TLSCALL, 261 262 // Exception Handling helpers. 263 EH_RETURN, 264 265 // SjLj exception handling setjmp. 266 EH_SJLJ_SETJMP, 267 268 // SjLj exception handling longjmp. 269 EH_SJLJ_LONGJMP, 270 271 // SjLj exception handling dispatch. 272 EH_SJLJ_SETUP_DISPATCH, 273 274 /// Tail call return. See X86TargetLowering::LowerCall for 275 /// the list of operands. 276 TC_RETURN, 277 278 // Vector move to low scalar and zero higher vector elements. 279 VZEXT_MOVL, 280 281 // Vector integer truncate. 282 VTRUNC, 283 // Vector integer truncate with unsigned/signed saturation. 284 VTRUNCUS, VTRUNCS, 285 286 // Masked version of the above. Used when less than a 128-bit result is 287 // produced since the mask only applies to the lower elements and can't 288 // be represented by a select. 289 // SRC, PASSTHRU, MASK 290 VMTRUNC, VMTRUNCUS, VMTRUNCS, 291 292 // Vector FP extend. 293 VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, 294 295 // Vector FP round. 296 VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, 297 298 // Masked version of above. Used for v2f64->v4f32. 299 // SRC, PASSTHRU, MASK 300 VMFPROUND, 301 302 // 128-bit vector logical left / right shift 303 VSHLDQ, VSRLDQ, 304 305 // Vector shift elements 306 VSHL, VSRL, VSRA, 307 308 // Vector variable shift 309 VSHLV, VSRLV, VSRAV, 310 311 // Vector shift elements by immediate 312 VSHLI, VSRLI, VSRAI, 313 314 // Shifts of mask registers. 315 KSHIFTL, KSHIFTR, 316 317 // Bit rotate by immediate 318 VROTLI, VROTRI, 319 320 // Vector packed double/float comparison. 321 CMPP, 322 323 // Vector integer comparisons. 324 PCMPEQ, PCMPGT, 325 326 // v8i16 Horizontal minimum and position. 327 PHMINPOS, 328 329 MULTISHIFT, 330 331 /// Vector comparison generating mask bits for fp and 332 /// integer signed and unsigned data types. 333 CMPM, 334 // Vector comparison with SAE for FP values 335 CMPM_SAE, 336 337 // Arithmetic operations with FLAGS results. 338 ADD, SUB, ADC, SBB, SMUL, UMUL, 339 OR, XOR, AND, 340 341 // Bit field extract. 342 BEXTR, 343 344 // Zero High Bits Starting with Specified Bit Position. 345 BZHI, 346 347 // X86-specific multiply by immediate. 348 MUL_IMM, 349 350 // Vector sign bit extraction. 351 MOVMSK, 352 353 // Vector bitwise comparisons. 354 PTEST, 355 356 // Vector packed fp sign bitwise comparisons. 357 TESTP, 358 359 // OR/AND test for masks. 360 KORTEST, 361 KTEST, 362 363 // ADD for masks. 364 KADD, 365 366 // Several flavors of instructions with vector shuffle behaviors. 367 // Saturated signed/unnsigned packing. 368 PACKSS, 369 PACKUS, 370 // Intra-lane alignr. 371 PALIGNR, 372 // AVX512 inter-lane alignr. 373 VALIGN, 374 PSHUFD, 375 PSHUFHW, 376 PSHUFLW, 377 SHUFP, 378 // VBMI2 Concat & Shift. 379 VSHLD, 380 VSHRD, 381 VSHLDV, 382 VSHRDV, 383 //Shuffle Packed Values at 128-bit granularity. 384 SHUF128, 385 MOVDDUP, 386 MOVSHDUP, 387 MOVSLDUP, 388 MOVLHPS, 389 MOVHLPS, 390 MOVSD, 391 MOVSS, 392 UNPCKL, 393 UNPCKH, 394 VPERMILPV, 395 VPERMILPI, 396 VPERMI, 397 VPERM2X128, 398 399 // Variable Permute (VPERM). 400 // Res = VPERMV MaskV, V0 401 VPERMV, 402 403 // 3-op Variable Permute (VPERMT2). 404 // Res = VPERMV3 V0, MaskV, V1 405 VPERMV3, 406 407 // Bitwise ternary logic. 408 VPTERNLOG, 409 // Fix Up Special Packed Float32/64 values. 410 VFIXUPIMM, VFIXUPIMM_SAE, 411 VFIXUPIMMS, VFIXUPIMMS_SAE, 412 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 413 VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE, 414 // Reduce - Perform Reduction Transformation on scalar\packed FP. 415 VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE, 416 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 417 // Also used by the legacy (V)ROUND intrinsics where we mask out the 418 // scaling part of the immediate. 419 VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, 420 // Tests Types Of a FP Values for packed types. 421 VFPCLASS, 422 // Tests Types Of a FP Values for scalar types. 423 VFPCLASSS, 424 425 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 426 // a vector, this node may change the vector length as part of the splat. 427 VBROADCAST, 428 // Broadcast mask to vector. 429 VBROADCASTM, 430 // Broadcast subvector to vector. 431 SUBV_BROADCAST, 432 433 /// SSE4A Extraction and Insertion. 434 EXTRQI, INSERTQI, 435 436 // XOP arithmetic/logical shifts. 437 VPSHA, VPSHL, 438 // XOP signed/unsigned integer comparisons. 439 VPCOM, VPCOMU, 440 // XOP packed permute bytes. 441 VPPERM, 442 // XOP two source permutation. 443 VPERMIL2, 444 445 // Vector multiply packed unsigned doubleword integers. 446 PMULUDQ, 447 // Vector multiply packed signed doubleword integers. 448 PMULDQ, 449 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 450 MULHRS, 451 452 // Multiply and Add Packed Integers. 453 VPMADDUBSW, VPMADDWD, 454 455 // AVX512IFMA multiply and add. 456 // NOTE: These are different than the instruction and perform 457 // op0 x op1 + op2. 458 VPMADD52L, VPMADD52H, 459 460 // VNNI 461 VPDPBUSD, 462 VPDPBUSDS, 463 VPDPWSSD, 464 VPDPWSSDS, 465 466 // FMA nodes. 467 // We use the target independent ISD::FMA for the non-inverted case. 468 FNMADD, 469 FMSUB, 470 FNMSUB, 471 FMADDSUB, 472 FMSUBADD, 473 474 // FMA with rounding mode. 475 FMADD_RND, 476 FNMADD_RND, 477 FMSUB_RND, 478 FNMSUB_RND, 479 FMADDSUB_RND, 480 FMSUBADD_RND, 481 482 // Compress and expand. 483 COMPRESS, 484 EXPAND, 485 486 // Bits shuffle 487 VPSHUFBITQMB, 488 489 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 490 SINT_TO_FP_RND, UINT_TO_FP_RND, 491 SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP, 492 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, 493 494 // Vector float/double to signed/unsigned integer. 495 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND, 496 // Scalar float/double to signed/unsigned integer. 497 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND, 498 499 // Vector float/double to signed/unsigned integer with truncation. 500 CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, 501 // Scalar float/double to signed/unsigned integer with truncation. 502 CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, 503 504 // Vector signed/unsigned integer to float/double. 505 CVTSI2P, CVTUI2P, 506 507 // Masked versions of above. Used for v2f64->v4f32. 508 // SRC, PASSTHRU, MASK 509 MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, 510 MCVTSI2P, MCVTUI2P, 511 512 // Vector float to bfloat16. 513 // Convert TWO packed single data to one packed BF16 data 514 CVTNE2PS2BF16, 515 // Convert packed single data to packed BF16 data 516 CVTNEPS2BF16, 517 // Masked version of above. 518 // SRC, PASSTHRU, MASK 519 MCVTNEPS2BF16, 520 521 // Dot product of BF16 pairs to accumulated into 522 // packed single precision. 523 DPBF16PS, 524 525 // Save xmm argument registers to the stack, according to %al. An operator 526 // is needed so that this can be expanded with control flow. 527 VASTART_SAVE_XMM_REGS, 528 529 // Windows's _chkstk call to do stack probing. 530 WIN_ALLOCA, 531 532 // For allocating variable amounts of stack space when using 533 // segmented stacks. Check if the current stacklet has enough space, and 534 // falls back to heap allocation if not. 535 SEG_ALLOCA, 536 537 // Memory barriers. 538 MEMBARRIER, 539 MFENCE, 540 541 // Store FP status word into i16 register. 542 FNSTSW16r, 543 544 // Store contents of %ah into %eflags. 545 SAHF, 546 547 // Get a random integer and indicate whether it is valid in CF. 548 RDRAND, 549 550 // Get a NIST SP800-90B & C compliant random integer and 551 // indicate whether it is valid in CF. 552 RDSEED, 553 554 // Protection keys 555 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 556 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 557 // value for ECX. 558 RDPKRU, WRPKRU, 559 560 // SSE42 string comparisons. 561 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 562 // will emit one or two instructions based on which results are used. If 563 // flags and index/mask this allows us to use a single instruction since 564 // we won't have to pick and opcode for flags. Instead we can rely on the 565 // DAG to CSE everything and decide at isel. 566 PCMPISTR, 567 PCMPESTR, 568 569 // Test if in transactional execution. 570 XTEST, 571 572 // ERI instructions. 573 RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE, 574 RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE, 575 576 // Conversions between float and half-float. 577 CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE, 578 579 // Masked version of above. 580 // SRC, RND, PASSTHRU, MASK 581 MCVTPS2PH, 582 583 // Galois Field Arithmetic Instructions 584 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, 585 586 // LWP insert record. 587 LWPINS, 588 589 // User level wait 590 UMWAIT, TPAUSE, 591 592 // Enqueue Stores Instructions 593 ENQCMD, ENQCMDS, 594 595 // For avx512-vp2intersect 596 VP2INTERSECT, 597 598 // Compare and swap. 599 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 600 LCMPXCHG8_DAG, 601 LCMPXCHG16_DAG, 602 LCMPXCHG8_SAVE_EBX_DAG, 603 LCMPXCHG16_SAVE_RBX_DAG, 604 605 /// LOCK-prefixed arithmetic read-modify-write instructions. 606 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 607 LADD, LSUB, LOR, LXOR, LAND, 608 609 // Load, scalar_to_vector, and zero extend. 610 VZEXT_LOAD, 611 612 // extract_vector_elt, store. 613 VEXTRACT_STORE, 614 615 // Store FP control world into i16 memory. 616 FNSTCW16m, 617 618 /// This instruction implements FP_TO_SINT with the 619 /// integer destination in memory and a FP reg source. This corresponds 620 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 621 /// has two inputs (token chain and address) and two outputs (int value 622 /// and token chain). Memory VT specifies the type to store to. 623 FP_TO_INT_IN_MEM, 624 625 /// This instruction implements SINT_TO_FP with the 626 /// integer source in memory and FP reg result. This corresponds to the 627 /// X86::FILD*m instructions. It has two inputs (token chain and address) 628 /// and two outputs (FP value and token chain). FILD_FLAG also produces a 629 /// flag). The integer source type is specified by the memory VT. 630 FILD, 631 FILD_FLAG, 632 633 /// This instruction implements a fp->int store from FP stack 634 /// slots. This corresponds to the fist instruction. It takes a 635 /// chain operand, value to store, address, and glue. The memory VT 636 /// specifies the type to store as. 637 FIST, 638 639 /// This instruction implements an extending load to FP stack slots. 640 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 641 /// operand, and ptr to load from. The memory VT specifies the type to 642 /// load from. 643 FLD, 644 645 /// This instruction implements a truncating store from FP stack 646 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 647 /// chain operand, value to store, address, and glue. The memory VT 648 /// specifies the type to store as. 649 FST, 650 651 /// This instruction grabs the address of the next argument 652 /// from a va_list. (reads and modifies the va_list in memory) 653 VAARG_64, 654 655 // Vector truncating store with unsigned/signed saturation 656 VTRUNCSTOREUS, VTRUNCSTORES, 657 // Vector truncating masked store with unsigned/signed saturation 658 VMTRUNCSTOREUS, VMTRUNCSTORES, 659 660 // X86 specific gather and scatter 661 MGATHER, MSCATTER, 662 663 // WARNING: Do not add anything in the end unless you want the node to 664 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 665 // opcodes will be thought as target memory ops! 666 }; 667 } // end namespace X86ISD 668 669 /// Define some predicates that are used for node matching. 670 namespace X86 { 671 /// Returns true if Elt is a constant zero or floating point constant +0.0. 672 bool isZeroNode(SDValue Elt); 673 674 /// Returns true of the given offset can be 675 /// fit into displacement field of the instruction. 676 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 677 bool hasSymbolicDisplacement = true); 678 679 /// Determines whether the callee is required to pop its 680 /// own arguments. Callee pop is necessary to support tail calls. 681 bool isCalleePop(CallingConv::ID CallingConv, 682 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 683 684 } // end namespace X86 685 686 //===--------------------------------------------------------------------===// 687 // X86 Implementation of the TargetLowering interface 688 class X86TargetLowering final : public TargetLowering { 689 public: 690 explicit X86TargetLowering(const X86TargetMachine &TM, 691 const X86Subtarget &STI); 692 693 unsigned getJumpTableEncoding() const override; 694 bool useSoftFloat() const override; 695 696 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 697 ArgListTy &Args) const override; 698 699 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 700 return MVT::i8; 701 } 702 703 const MCExpr * 704 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 705 const MachineBasicBlock *MBB, unsigned uid, 706 MCContext &Ctx) const override; 707 708 /// Returns relocation base for the given PIC jumptable. 709 SDValue getPICJumpTableRelocBase(SDValue Table, 710 SelectionDAG &DAG) const override; 711 const MCExpr * 712 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 713 unsigned JTI, MCContext &Ctx) const override; 714 715 /// Return the desired alignment for ByVal aggregate 716 /// function arguments in the caller parameter area. For X86, aggregates 717 /// that contains are placed at 16-byte boundaries while the rest are at 718 /// 4-byte boundaries. 719 unsigned getByValTypeAlignment(Type *Ty, 720 const DataLayout &DL) const override; 721 722 /// Returns the target specific optimal type for load 723 /// and store operations as a result of memset, memcpy, and memmove 724 /// lowering. If DstAlign is zero that means it's safe to destination 725 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 726 /// means there isn't a need to check it against alignment requirement, 727 /// probably because the source does not need to be loaded. If 'IsMemset' is 728 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 729 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 730 /// source is constant so it does not need to be loaded. 731 /// It returns EVT::Other if the type should be determined using generic 732 /// target-independent logic. 733 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 734 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 735 const AttributeList &FuncAttributes) const override; 736 737 /// Returns true if it's safe to use load / store of the 738 /// specified type to expand memcpy / memset inline. This is mostly true 739 /// for all types except for some special cases. For example, on X86 740 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 741 /// also does type conversion. Note the specified type doesn't have to be 742 /// legal as the hook is used before type legalization. 743 bool isSafeMemOpType(MVT VT) const override; 744 745 /// Returns true if the target allows unaligned memory accesses of the 746 /// specified type. Returns whether it is "fast" in the last argument. 747 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 748 MachineMemOperand::Flags Flags, 749 bool *Fast) const override; 750 751 /// Provide custom lowering hooks for some operations. 752 /// 753 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 754 755 /// Places new result values for the node in Results (their number 756 /// and types must exactly match those of the original return values of 757 /// the node), or leaves Results empty, which indicates that the node is not 758 /// to be custom lowered after all. 759 void LowerOperationWrapper(SDNode *N, 760 SmallVectorImpl<SDValue> &Results, 761 SelectionDAG &DAG) const override; 762 763 /// Replace the results of node with an illegal result 764 /// type with new values built out of custom code. 765 /// 766 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 767 SelectionDAG &DAG) const override; 768 769 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 770 771 // Return true if it is profitable to combine a BUILD_VECTOR with a 772 // stride-pattern to a shuffle and a truncate. 773 // Example of such a combine: 774 // v4i32 build_vector((extract_elt V, 1), 775 // (extract_elt V, 3), 776 // (extract_elt V, 5), 777 // (extract_elt V, 7)) 778 // --> 779 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to 780 // v4i64) 781 bool isDesirableToCombineBuildVectorToShuffleTruncate( 782 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override; 783 784 /// Return true if the target has native support for 785 /// the specified value type and it is 'desirable' to use the type for the 786 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 787 /// instruction encodings are longer and some i16 instructions are slow. 788 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 789 790 /// Return true if the target has native support for the 791 /// specified value type and it is 'desirable' to use the type. e.g. On x86 792 /// i16 is legal, but undesirable since i16 instruction encodings are longer 793 /// and some i16 instructions are slow. 794 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 795 796 MachineBasicBlock * 797 EmitInstrWithCustomInserter(MachineInstr &MI, 798 MachineBasicBlock *MBB) const override; 799 800 /// This method returns the name of a target specific DAG node. 801 const char *getTargetNodeName(unsigned Opcode) const override; 802 803 /// Do not merge vector stores after legalization because that may conflict 804 /// with x86-specific store splitting optimizations. 805 bool mergeStoresAfterLegalization(EVT MemVT) const override { 806 return !MemVT.isVector(); 807 } 808 809 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 810 const SelectionDAG &DAG) const override; 811 812 bool isCheapToSpeculateCttz() const override; 813 814 bool isCheapToSpeculateCtlz() const override; 815 816 bool isCtlzFast() const override; 817 818 bool hasBitPreservingFPLogic(EVT VT) const override { 819 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 820 } 821 822 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 823 // If the pair to store is a mixture of float and int values, we will 824 // save two bitwise instructions and one float-to-int instruction and 825 // increase one store instruction. There is potentially a more 826 // significant benefit because it avoids the float->int domain switch 827 // for input value. So It is more likely a win. 828 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 829 (LTy.isInteger() && HTy.isFloatingPoint())) 830 return true; 831 // If the pair only contains int values, we will save two bitwise 832 // instructions and increase one store instruction (costing one more 833 // store buffer). Since the benefit is more blurred so we leave 834 // such pair out until we get testcase to prove it is a win. 835 return false; 836 } 837 838 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 839 840 bool hasAndNotCompare(SDValue Y) const override; 841 842 bool hasAndNot(SDValue Y) const override; 843 844 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 845 CombineLevel Level) const override; 846 847 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 848 849 bool 850 shouldTransformSignedTruncationCheck(EVT XVT, 851 unsigned KeptBits) const override { 852 // For vectors, we don't have a preference.. 853 if (XVT.isVector()) 854 return false; 855 856 auto VTIsOk = [](EVT VT) -> bool { 857 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 858 VT == MVT::i64; 859 }; 860 861 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 862 // XVT will be larger than KeptBitsVT. 863 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 864 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 865 } 866 867 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 868 869 bool shouldSplatInsEltVarIndex(EVT VT) const override; 870 871 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 872 return VT.isScalarInteger(); 873 } 874 875 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 876 MVT hasFastEqualityCompare(unsigned NumBits) const override; 877 878 /// Return the value type to use for ISD::SETCC. 879 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 880 EVT VT) const override; 881 882 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, 883 TargetLoweringOpt &TLO) const override; 884 885 /// Determine which of the bits specified in Mask are known to be either 886 /// zero or one and return them in the KnownZero/KnownOne bitsets. 887 void computeKnownBitsForTargetNode(const SDValue Op, 888 KnownBits &Known, 889 const APInt &DemandedElts, 890 const SelectionDAG &DAG, 891 unsigned Depth = 0) const override; 892 893 /// Determine the number of bits in the operation that are sign bits. 894 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 895 const APInt &DemandedElts, 896 const SelectionDAG &DAG, 897 unsigned Depth) const override; 898 899 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 900 const APInt &DemandedElts, 901 APInt &KnownUndef, 902 APInt &KnownZero, 903 TargetLoweringOpt &TLO, 904 unsigned Depth) const override; 905 906 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 907 const APInt &DemandedBits, 908 const APInt &DemandedElts, 909 KnownBits &Known, 910 TargetLoweringOpt &TLO, 911 unsigned Depth) const override; 912 913 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 914 915 SDValue unwrapAddress(SDValue N) const override; 916 917 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 918 919 bool ExpandInlineAsm(CallInst *CI) const override; 920 921 ConstraintType getConstraintType(StringRef Constraint) const override; 922 923 /// Examine constraint string and operand type and determine a weight value. 924 /// The operand object must already have been set up with the operand type. 925 ConstraintWeight 926 getSingleConstraintMatchWeight(AsmOperandInfo &info, 927 const char *constraint) const override; 928 929 const char *LowerXConstraint(EVT ConstraintVT) const override; 930 931 /// Lower the specified operand into the Ops vector. If it is invalid, don't 932 /// add anything to Ops. If hasMemory is true it means one of the asm 933 /// constraint of the inline asm instruction being processed is 'm'. 934 void LowerAsmOperandForConstraint(SDValue Op, 935 std::string &Constraint, 936 std::vector<SDValue> &Ops, 937 SelectionDAG &DAG) const override; 938 939 unsigned 940 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 941 if (ConstraintCode == "i") 942 return InlineAsm::Constraint_i; 943 else if (ConstraintCode == "o") 944 return InlineAsm::Constraint_o; 945 else if (ConstraintCode == "v") 946 return InlineAsm::Constraint_v; 947 else if (ConstraintCode == "X") 948 return InlineAsm::Constraint_X; 949 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 950 } 951 952 /// Handle Lowering flag assembly outputs. 953 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL, 954 const AsmOperandInfo &Constraint, 955 SelectionDAG &DAG) const override; 956 957 /// Given a physical register constraint 958 /// (e.g. {edx}), return the register number and the register class for the 959 /// register. This should only be used for C_Register constraints. On 960 /// error, this returns a register number of 0. 961 std::pair<unsigned, const TargetRegisterClass *> 962 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 963 StringRef Constraint, MVT VT) const override; 964 965 /// Return true if the addressing mode represented 966 /// by AM is legal for this target, for a load/store of the specified type. 967 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 968 Type *Ty, unsigned AS, 969 Instruction *I = nullptr) const override; 970 971 /// Return true if the specified immediate is legal 972 /// icmp immediate, that is the target has icmp instructions which can 973 /// compare a register against the immediate without having to materialize 974 /// the immediate into a register. 975 bool isLegalICmpImmediate(int64_t Imm) const override; 976 977 /// Return true if the specified immediate is legal 978 /// add immediate, that is the target has add instructions which can 979 /// add a register and the immediate without having to materialize 980 /// the immediate into a register. 981 bool isLegalAddImmediate(int64_t Imm) const override; 982 983 bool isLegalStoreImmediate(int64_t Imm) const override; 984 985 /// Return the cost of the scaling factor used in the addressing 986 /// mode represented by AM for this target, for a load/store 987 /// of the specified type. 988 /// If the AM is supported, the return value must be >= 0. 989 /// If the AM is not supported, it returns a negative value. 990 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 991 unsigned AS) const override; 992 993 bool isVectorShiftByScalarCheap(Type *Ty) const override; 994 995 /// Add x86-specific opcodes to the default list. 996 bool isBinOp(unsigned Opcode) const override; 997 998 /// Returns true if the opcode is a commutative binary operation. 999 bool isCommutativeBinOp(unsigned Opcode) const override; 1000 1001 /// Return true if it's free to truncate a value of 1002 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1003 /// register EAX to i16 by referencing its sub-register AX. 1004 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1005 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1006 1007 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1008 1009 /// Return true if any actual instruction that defines a 1010 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1011 /// register. This does not necessarily include registers defined in 1012 /// unknown ways, such as incoming arguments, or copies from unknown 1013 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1014 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1015 /// all instructions that define 32-bit values implicit zero-extend the 1016 /// result out to 64 bits. 1017 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1018 bool isZExtFree(EVT VT1, EVT VT2) const override; 1019 bool isZExtFree(SDValue Val, EVT VT2) const override; 1020 1021 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1022 /// extend node) is profitable. 1023 bool isVectorLoadExtDesirable(SDValue) const override; 1024 1025 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1026 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1027 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1028 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 1029 1030 /// Return true if it's profitable to narrow 1031 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1032 /// from i32 to i8 but not from i32 to i16. 1033 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1034 1035 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1036 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1037 /// true and stores the intrinsic information into the IntrinsicInfo that was 1038 /// passed to the function. 1039 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1040 MachineFunction &MF, 1041 unsigned Intrinsic) const override; 1042 1043 /// Returns true if the target can instruction select the 1044 /// specified FP immediate natively. If false, the legalizer will 1045 /// materialize the FP immediate as a load from a constant pool. 1046 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1047 bool ForCodeSize) const override; 1048 1049 /// Targets can use this to indicate that they only support *some* 1050 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1051 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1052 /// be legal. 1053 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1054 1055 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1056 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1057 /// constant pool entry. 1058 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1059 1060 /// Returns true if lowering to a jump table is allowed. 1061 bool areJTsAllowed(const Function *Fn) const override; 1062 1063 /// If true, then instruction selection should 1064 /// seek to shrink the FP constant of the specified type to a smaller type 1065 /// in order to save space and / or reduce runtime. 1066 bool ShouldShrinkFPConstant(EVT VT) const override { 1067 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1068 // expensive than a straight movsd. On the other hand, it's important to 1069 // shrink long double fp constant since fldt is very slow. 1070 return !X86ScalarSSEf64 || VT == MVT::f80; 1071 } 1072 1073 /// Return true if we believe it is correct and profitable to reduce the 1074 /// load node to a smaller type. 1075 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1076 EVT NewVT) const override; 1077 1078 /// Return true if the specified scalar FP type is computed in an SSE 1079 /// register, not on the X87 floating point stack. 1080 bool isScalarFPTypeInSSEReg(EVT VT) const { 1081 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1082 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1083 } 1084 1085 /// Returns true if it is beneficial to convert a load of a constant 1086 /// to just the constant itself. 1087 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1088 Type *Ty) const override; 1089 1090 bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override; 1091 1092 bool convertSelectOfConstantsToMath(EVT VT) const override; 1093 1094 bool decomposeMulByConstant(EVT VT, SDValue C) const override; 1095 1096 bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, 1097 bool IsSigned) const override; 1098 1099 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1100 /// with this index. 1101 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1102 unsigned Index) const override; 1103 1104 /// Scalar ops always have equal or better analysis/performance/power than 1105 /// the vector equivalent, so this always makes sense if the scalar op is 1106 /// supported. 1107 bool shouldScalarizeBinop(SDValue) const override; 1108 1109 /// Extract of a scalar FP value from index 0 of a vector is free. 1110 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1111 EVT EltVT = VT.getScalarType(); 1112 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1113 } 1114 1115 /// Overflow nodes should get combined/lowered to optimal instructions 1116 /// (they should allow eliminating explicit compares by getting flags from 1117 /// math ops). 1118 bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override; 1119 1120 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1121 unsigned AddrSpace) const override { 1122 // If we can replace more than 2 scalar stores, there will be a reduction 1123 // in instructions even after we add a vector constant load. 1124 return NumElem > 2; 1125 } 1126 1127 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1128 const SelectionDAG &DAG, 1129 const MachineMemOperand &MMO) const override; 1130 1131 /// Intel processors have a unified instruction and data cache 1132 const char * getClearCacheBuiltinName() const override { 1133 return nullptr; // nothing to do, move along. 1134 } 1135 1136 unsigned getRegisterByName(const char* RegName, EVT VT, 1137 SelectionDAG &DAG) const override; 1138 1139 /// If a physical register, this returns the register that receives the 1140 /// exception address on entry to an EH pad. 1141 unsigned 1142 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1143 1144 /// If a physical register, this returns the register that receives the 1145 /// exception typeid on entry to a landing pad. 1146 unsigned 1147 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1148 1149 virtual bool needsFixedCatchObjects() const override; 1150 1151 /// This method returns a target specific FastISel object, 1152 /// or null if the target does not support "fast" ISel. 1153 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1154 const TargetLibraryInfo *libInfo) const override; 1155 1156 /// If the target has a standard location for the stack protector cookie, 1157 /// returns the address of that location. Otherwise, returns nullptr. 1158 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1159 1160 bool useLoadStackGuardNode() const override; 1161 bool useStackGuardXorFP() const override; 1162 void insertSSPDeclarations(Module &M) const override; 1163 Value *getSDagStackGuard(const Module &M) const override; 1164 Function *getSSPStackGuardCheck(const Module &M) const override; 1165 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1166 const SDLoc &DL) const override; 1167 1168 1169 /// Return true if the target stores SafeStack pointer at a fixed offset in 1170 /// some non-standard address space, and populates the address space and 1171 /// offset as appropriate. 1172 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1173 1174 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 1175 SelectionDAG &DAG) const; 1176 1177 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 1178 1179 /// Customize the preferred legalization strategy for certain types. 1180 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1181 1182 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1183 EVT VT) const override; 1184 1185 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1186 CallingConv::ID CC, 1187 EVT VT) const override; 1188 1189 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1190 1191 bool supportSwiftError() const override; 1192 1193 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1194 1195 bool hasVectorBlend() const override { return true; } 1196 1197 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1198 1199 /// Lower interleaved load(s) into target specific 1200 /// instructions/intrinsics. 1201 bool lowerInterleavedLoad(LoadInst *LI, 1202 ArrayRef<ShuffleVectorInst *> Shuffles, 1203 ArrayRef<unsigned> Indices, 1204 unsigned Factor) const override; 1205 1206 /// Lower interleaved store(s) into target specific 1207 /// instructions/intrinsics. 1208 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1209 unsigned Factor) const override; 1210 1211 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1212 SDValue Addr, SelectionDAG &DAG) 1213 const override; 1214 1215 protected: 1216 std::pair<const TargetRegisterClass *, uint8_t> 1217 findRepresentativeClass(const TargetRegisterInfo *TRI, 1218 MVT VT) const override; 1219 1220 private: 1221 /// Keep a reference to the X86Subtarget around so that we can 1222 /// make the right decision when generating code for different targets. 1223 const X86Subtarget &Subtarget; 1224 1225 /// Select between SSE or x87 floating point ops. 1226 /// When SSE is available, use it for f32 operations. 1227 /// When SSE2 is available, use it for f64 operations. 1228 bool X86ScalarSSEf32; 1229 bool X86ScalarSSEf64; 1230 1231 /// A list of legal FP immediates. 1232 std::vector<APFloat> LegalFPImmediates; 1233 1234 /// Indicate that this x86 target can instruction 1235 /// select the specified FP immediate natively. 1236 void addLegalFPImmediate(const APFloat& Imm) { 1237 LegalFPImmediates.push_back(Imm); 1238 } 1239 1240 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1241 CallingConv::ID CallConv, bool isVarArg, 1242 const SmallVectorImpl<ISD::InputArg> &Ins, 1243 const SDLoc &dl, SelectionDAG &DAG, 1244 SmallVectorImpl<SDValue> &InVals, 1245 uint32_t *RegMask) const; 1246 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1247 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1248 const SDLoc &dl, SelectionDAG &DAG, 1249 const CCValAssign &VA, MachineFrameInfo &MFI, 1250 unsigned i) const; 1251 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1252 const SDLoc &dl, SelectionDAG &DAG, 1253 const CCValAssign &VA, 1254 ISD::ArgFlagsTy Flags) const; 1255 1256 // Call lowering helpers. 1257 1258 /// Check whether the call is eligible for tail call optimization. Targets 1259 /// that want to do tail call optimization should implement this function. 1260 bool IsEligibleForTailCallOptimization(SDValue Callee, 1261 CallingConv::ID CalleeCC, 1262 bool isVarArg, 1263 bool isCalleeStructRet, 1264 bool isCallerStructRet, 1265 Type *RetTy, 1266 const SmallVectorImpl<ISD::OutputArg> &Outs, 1267 const SmallVectorImpl<SDValue> &OutVals, 1268 const SmallVectorImpl<ISD::InputArg> &Ins, 1269 SelectionDAG& DAG) const; 1270 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1271 SDValue Chain, bool IsTailCall, 1272 bool Is64Bit, int FPDiff, 1273 const SDLoc &dl) const; 1274 1275 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1276 SelectionDAG &DAG) const; 1277 1278 unsigned getAddressSpace(void) const; 1279 1280 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const; 1281 1282 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1283 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1284 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1285 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1286 1287 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1288 const unsigned char OpFlags = 0) const; 1289 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1290 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1291 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1292 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1293 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1294 1295 /// Creates target global address or external symbol nodes for calls or 1296 /// other uses. 1297 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1298 bool ForCall) const; 1299 1300 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1301 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1302 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1303 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1304 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1305 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1306 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1307 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1308 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1309 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1310 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1311 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1312 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1313 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1314 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1315 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1316 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1317 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1318 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1319 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1320 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1321 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1322 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1323 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; 1324 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; 1325 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1326 1327 SDValue 1328 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1329 const SmallVectorImpl<ISD::InputArg> &Ins, 1330 const SDLoc &dl, SelectionDAG &DAG, 1331 SmallVectorImpl<SDValue> &InVals) const override; 1332 SDValue LowerCall(CallLoweringInfo &CLI, 1333 SmallVectorImpl<SDValue> &InVals) const override; 1334 1335 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1336 const SmallVectorImpl<ISD::OutputArg> &Outs, 1337 const SmallVectorImpl<SDValue> &OutVals, 1338 const SDLoc &dl, SelectionDAG &DAG) const override; 1339 1340 bool supportSplitCSR(MachineFunction *MF) const override { 1341 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1342 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1343 } 1344 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1345 void insertCopiesSplitCSR( 1346 MachineBasicBlock *Entry, 1347 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1348 1349 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1350 1351 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1352 1353 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1354 ISD::NodeType ExtendKind) const override; 1355 1356 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1357 bool isVarArg, 1358 const SmallVectorImpl<ISD::OutputArg> &Outs, 1359 LLVMContext &Context) const override; 1360 1361 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1362 1363 TargetLoweringBase::AtomicExpansionKind 1364 shouldExpandAtomicLoadInIR(LoadInst *SI) const override; 1365 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1366 TargetLoweringBase::AtomicExpansionKind 1367 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1368 1369 LoadInst * 1370 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1371 1372 bool needsCmpXchgNb(Type *MemType) const; 1373 1374 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1375 MachineBasicBlock *DispatchBB, int FI) const; 1376 1377 // Utility function to emit the low-level va_arg code for X86-64. 1378 MachineBasicBlock * 1379 EmitVAARG64WithCustomInserter(MachineInstr &MI, 1380 MachineBasicBlock *MBB) const; 1381 1382 /// Utility function to emit the xmm reg save portion of va_start. 1383 MachineBasicBlock * 1384 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1385 MachineBasicBlock *BB) const; 1386 1387 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1388 MachineInstr &MI2, 1389 MachineBasicBlock *BB) const; 1390 1391 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1392 MachineBasicBlock *BB) const; 1393 1394 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I, 1395 MachineBasicBlock *BB) const; 1396 1397 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1398 MachineBasicBlock *BB) const; 1399 1400 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, 1401 MachineBasicBlock *BB) const; 1402 1403 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1404 MachineBasicBlock *BB) const; 1405 1406 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1407 MachineBasicBlock *BB) const; 1408 1409 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1410 MachineBasicBlock *BB) const; 1411 1412 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, 1413 MachineBasicBlock *BB) const; 1414 1415 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1416 MachineBasicBlock *MBB) const; 1417 1418 void emitSetJmpShadowStackFix(MachineInstr &MI, 1419 MachineBasicBlock *MBB) const; 1420 1421 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1422 MachineBasicBlock *MBB) const; 1423 1424 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1425 MachineBasicBlock *MBB) const; 1426 1427 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI, 1428 MachineBasicBlock *MBB) const; 1429 1430 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1431 MachineBasicBlock *MBB) const; 1432 1433 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 1434 /// equivalent, for use with the given x86 condition code. 1435 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, 1436 SelectionDAG &DAG) const; 1437 1438 /// Convert a comparison if required by the subtarget. 1439 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1440 1441 /// Emit flags for the given setcc condition and operands. Also returns the 1442 /// corresponding X86 condition code constant in X86CC. 1443 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, 1444 ISD::CondCode CC, const SDLoc &dl, 1445 SelectionDAG &DAG, 1446 SDValue &X86CC) const; 1447 1448 /// Check if replacement of SQRT with RSQRT should be disabled. 1449 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; 1450 1451 /// Use rsqrt* to speed up sqrt calculations. 1452 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1453 int &RefinementSteps, bool &UseOneConstNR, 1454 bool Reciprocal) const override; 1455 1456 /// Use rcp* to speed up fdiv calculations. 1457 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1458 int &RefinementSteps) const override; 1459 1460 /// Reassociate floating point divisions into multiply by reciprocal. 1461 unsigned combineRepeatedFPDivisors() const override; 1462 }; 1463 1464 namespace X86 { 1465 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1466 const TargetLibraryInfo *libInfo); 1467 } // end namespace X86 1468 1469 // Base class for all X86 non-masked store operations. 1470 class X86StoreSDNode : public MemSDNode { 1471 public: 1472 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, 1473 SDVTList VTs, EVT MemVT, 1474 MachineMemOperand *MMO) 1475 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1476 const SDValue &getValue() const { return getOperand(1); } 1477 const SDValue &getBasePtr() const { return getOperand(2); } 1478 1479 static bool classof(const SDNode *N) { 1480 return N->getOpcode() == X86ISD::VTRUNCSTORES || 1481 N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1482 } 1483 }; 1484 1485 // Base class for all X86 masked store operations. 1486 // The class has the same order of operands as MaskedStoreSDNode for 1487 // convenience. 1488 class X86MaskedStoreSDNode : public MemSDNode { 1489 public: 1490 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, 1491 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1492 MachineMemOperand *MMO) 1493 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1494 1495 const SDValue &getValue() const { return getOperand(1); } 1496 const SDValue &getBasePtr() const { return getOperand(2); } 1497 const SDValue &getMask() const { return getOperand(3); } 1498 1499 static bool classof(const SDNode *N) { 1500 return N->getOpcode() == X86ISD::VMTRUNCSTORES || 1501 N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1502 } 1503 }; 1504 1505 // X86 Truncating Store with Signed saturation. 1506 class TruncSStoreSDNode : public X86StoreSDNode { 1507 public: 1508 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, 1509 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1510 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1511 1512 static bool classof(const SDNode *N) { 1513 return N->getOpcode() == X86ISD::VTRUNCSTORES; 1514 } 1515 }; 1516 1517 // X86 Truncating Store with Unsigned saturation. 1518 class TruncUSStoreSDNode : public X86StoreSDNode { 1519 public: 1520 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, 1521 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1522 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1523 1524 static bool classof(const SDNode *N) { 1525 return N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1526 } 1527 }; 1528 1529 // X86 Truncating Masked Store with Signed saturation. 1530 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { 1531 public: 1532 MaskedTruncSStoreSDNode(unsigned Order, 1533 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1534 MachineMemOperand *MMO) 1535 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1536 1537 static bool classof(const SDNode *N) { 1538 return N->getOpcode() == X86ISD::VMTRUNCSTORES; 1539 } 1540 }; 1541 1542 // X86 Truncating Masked Store with Unsigned saturation. 1543 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { 1544 public: 1545 MaskedTruncUSStoreSDNode(unsigned Order, 1546 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1547 MachineMemOperand *MMO) 1548 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1549 1550 static bool classof(const SDNode *N) { 1551 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1552 } 1553 }; 1554 1555 // X86 specific Gather/Scatter nodes. 1556 // The class has the same order of operands as MaskedGatherScatterSDNode for 1557 // convenience. 1558 class X86MaskedGatherScatterSDNode : public MemSDNode { 1559 public: 1560 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order, 1561 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1562 MachineMemOperand *MMO) 1563 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {} 1564 1565 const SDValue &getBasePtr() const { return getOperand(3); } 1566 const SDValue &getIndex() const { return getOperand(4); } 1567 const SDValue &getMask() const { return getOperand(2); } 1568 const SDValue &getScale() const { return getOperand(5); } 1569 1570 static bool classof(const SDNode *N) { 1571 return N->getOpcode() == X86ISD::MGATHER || 1572 N->getOpcode() == X86ISD::MSCATTER; 1573 } 1574 }; 1575 1576 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1577 public: 1578 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1579 EVT MemVT, MachineMemOperand *MMO) 1580 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, 1581 MMO) {} 1582 1583 const SDValue &getPassThru() const { return getOperand(1); } 1584 1585 static bool classof(const SDNode *N) { 1586 return N->getOpcode() == X86ISD::MGATHER; 1587 } 1588 }; 1589 1590 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1591 public: 1592 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1593 EVT MemVT, MachineMemOperand *MMO) 1594 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT, 1595 MMO) {} 1596 1597 const SDValue &getValue() const { return getOperand(1); } 1598 1599 static bool classof(const SDNode *N) { 1600 return N->getOpcode() == X86ISD::MSCATTER; 1601 } 1602 }; 1603 1604 /// Generate unpacklo/unpackhi shuffle mask. 1605 template <typename T = int> 1606 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo, 1607 bool Unary) { 1608 assert(Mask.empty() && "Expected an empty shuffle mask vector"); 1609 int NumElts = VT.getVectorNumElements(); 1610 int NumEltsInLane = 128 / VT.getScalarSizeInBits(); 1611 for (int i = 0; i < NumElts; ++i) { 1612 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; 1613 int Pos = (i % NumEltsInLane) / 2 + LaneStart; 1614 Pos += (Unary ? 0 : NumElts * (i % 2)); 1615 Pos += (Lo ? 0 : NumEltsInLane / 2); 1616 Mask.push_back(Pos); 1617 } 1618 } 1619 1620 /// Helper function to scale a shuffle or target shuffle mask, replacing each 1621 /// mask index with the scaled sequential indices for an equivalent narrowed 1622 /// mask. This is the reverse process to canWidenShuffleElements, but can 1623 /// always succeed. 1624 template <typename T> 1625 void scaleShuffleMask(int Scale, ArrayRef<T> Mask, 1626 SmallVectorImpl<T> &ScaledMask) { 1627 assert(0 < Scale && "Unexpected scaling factor"); 1628 size_t NumElts = Mask.size(); 1629 ScaledMask.assign(NumElts * Scale, -1); 1630 1631 for (int i = 0; i != (int)NumElts; ++i) { 1632 int M = Mask[i]; 1633 1634 // Repeat sentinel values in every mask element. 1635 if (M < 0) { 1636 for (int s = 0; s != Scale; ++s) 1637 ScaledMask[(Scale * i) + s] = M; 1638 continue; 1639 } 1640 1641 // Scale mask element and increment across each mask element. 1642 for (int s = 0; s != Scale; ++s) 1643 ScaledMask[(Scale * i) + s] = (Scale * M) + s; 1644 } 1645 } 1646 } // end namespace llvm 1647 1648 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1649