1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/TargetLowering.h" 18 19 namespace llvm { 20 class X86Subtarget; 21 class X86TargetMachine; 22 23 namespace X86ISD { 24 // X86 Specific DAG Nodes 25 enum NodeType : unsigned { 26 // Start the numbering where the builtin ops leave off. 27 FIRST_NUMBER = ISD::BUILTIN_OP_END, 28 29 /// Bit scan forward. 30 BSF, 31 /// Bit scan reverse. 32 BSR, 33 34 /// X86 funnel/double shift i16 instructions. These correspond to 35 /// X86::SHLDW and X86::SHRDW instructions which have different amt 36 /// modulo rules to generic funnel shifts. 37 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 38 FSHL, 39 FSHR, 40 41 /// Bitwise logical AND of floating point values. This corresponds 42 /// to X86::ANDPS or X86::ANDPD. 43 FAND, 44 45 /// Bitwise logical OR of floating point values. This corresponds 46 /// to X86::ORPS or X86::ORPD. 47 FOR, 48 49 /// Bitwise logical XOR of floating point values. This corresponds 50 /// to X86::XORPS or X86::XORPD. 51 FXOR, 52 53 /// Bitwise logical ANDNOT of floating point values. This 54 /// corresponds to X86::ANDNPS or X86::ANDNPD. 55 FANDN, 56 57 /// These operations represent an abstract X86 call 58 /// instruction, which includes a bunch of information. In particular the 59 /// operands of these node are: 60 /// 61 /// #0 - The incoming token chain 62 /// #1 - The callee 63 /// #2 - The number of arg bytes the caller pushes on the stack. 64 /// #3 - The number of arg bytes the callee pops off the stack. 65 /// #4 - The value to pass in AL/AX/EAX (optional) 66 /// #5 - The value to pass in DL/DX/EDX (optional) 67 /// 68 /// The result values of these nodes are: 69 /// 70 /// #0 - The outgoing token chain 71 /// #1 - The first register result value (optional) 72 /// #2 - The second register result value (optional) 73 /// 74 CALL, 75 76 /// Same as call except it adds the NoTrack prefix. 77 NT_CALL, 78 79 /// X86 compare and logical compare instructions. 80 CMP, 81 FCMP, 82 COMI, 83 UCOMI, 84 85 /// X86 bit-test instructions. 86 BT, 87 88 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 89 /// operand, usually produced by a CMP instruction. 90 SETCC, 91 92 /// X86 Select 93 SELECTS, 94 95 // Same as SETCC except it's materialized with a sbb and the value is all 96 // one's or all zero's. 97 SETCC_CARRY, // R = carry_bit ? ~0 : 0 98 99 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 100 /// Operands are two FP values to compare; result is a mask of 101 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 102 FSETCC, 103 104 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 105 /// and a version with SAE. 106 FSETCCM, 107 FSETCCM_SAE, 108 109 /// X86 conditional moves. Operand 0 and operand 1 are the two values 110 /// to select from. Operand 2 is the condition code, and operand 3 is the 111 /// flag operand produced by a CMP or TEST instruction. 112 CMOV, 113 114 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 115 /// is the block to branch if condition is true, operand 2 is the 116 /// condition code, and operand 3 is the flag operand produced by a CMP 117 /// or TEST instruction. 118 BRCOND, 119 120 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 121 /// operand 1 is the target address. 122 NT_BRIND, 123 124 /// Return with a flag operand. Operand 0 is the chain operand, operand 125 /// 1 is the number of bytes of stack to pop. 126 RET_FLAG, 127 128 /// Return from interrupt. Operand 0 is the number of bytes to pop. 129 IRET, 130 131 /// Repeat fill, corresponds to X86::REP_STOSx. 132 REP_STOS, 133 134 /// Repeat move, corresponds to X86::REP_MOVSx. 135 REP_MOVS, 136 137 /// On Darwin, this node represents the result of the popl 138 /// at function entry, used for PIC code. 139 GlobalBaseReg, 140 141 /// A wrapper node for TargetConstantPool, TargetJumpTable, 142 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 143 /// MCSymbol and TargetBlockAddress. 144 Wrapper, 145 146 /// Special wrapper used under X86-64 PIC mode for RIP 147 /// relative displacements. 148 WrapperRIP, 149 150 /// Copies a 64-bit value from an MMX vector to the low word 151 /// of an XMM vector, with the high word zero filled. 152 MOVQ2DQ, 153 154 /// Copies a 64-bit value from the low word of an XMM vector 155 /// to an MMX vector. 156 MOVDQ2Q, 157 158 /// Copies a 32-bit value from the low word of a MMX 159 /// vector to a GPR. 160 MMX_MOVD2W, 161 162 /// Copies a GPR into the low 32-bit word of a MMX vector 163 /// and zero out the high word. 164 MMX_MOVW2D, 165 166 /// Extract an 8-bit value from a vector and zero extend it to 167 /// i32, corresponds to X86::PEXTRB. 168 PEXTRB, 169 170 /// Extract a 16-bit value from a vector and zero extend it to 171 /// i32, corresponds to X86::PEXTRW. 172 PEXTRW, 173 174 /// Insert any element of a 4 x float vector into any element 175 /// of a destination 4 x floatvector. 176 INSERTPS, 177 178 /// Insert the lower 8-bits of a 32-bit value to a vector, 179 /// corresponds to X86::PINSRB. 180 PINSRB, 181 182 /// Insert the lower 16-bits of a 32-bit value to a vector, 183 /// corresponds to X86::PINSRW. 184 PINSRW, 185 186 /// Shuffle 16 8-bit values within a vector. 187 PSHUFB, 188 189 /// Compute Sum of Absolute Differences. 190 PSADBW, 191 /// Compute Double Block Packed Sum-Absolute-Differences 192 DBPSADBW, 193 194 /// Bitwise Logical AND NOT of Packed FP values. 195 ANDNP, 196 197 /// Blend where the selector is an immediate. 198 BLENDI, 199 200 /// Dynamic (non-constant condition) vector blend where only the sign bits 201 /// of the condition elements are used. This is used to enforce that the 202 /// condition mask is not valid for generic VSELECT optimizations. This 203 /// is also used to implement the intrinsics. 204 /// Operands are in VSELECT order: MASK, TRUE, FALSE 205 BLENDV, 206 207 /// Combined add and sub on an FP vector. 208 ADDSUB, 209 210 // FP vector ops with rounding mode. 211 FADD_RND, 212 FADDS, 213 FADDS_RND, 214 FSUB_RND, 215 FSUBS, 216 FSUBS_RND, 217 FMUL_RND, 218 FMULS, 219 FMULS_RND, 220 FDIV_RND, 221 FDIVS, 222 FDIVS_RND, 223 FMAX_SAE, 224 FMAXS_SAE, 225 FMIN_SAE, 226 FMINS_SAE, 227 FSQRT_RND, 228 FSQRTS, 229 FSQRTS_RND, 230 231 // FP vector get exponent. 232 FGETEXP, 233 FGETEXP_SAE, 234 FGETEXPS, 235 FGETEXPS_SAE, 236 // Extract Normalized Mantissas. 237 VGETMANT, 238 VGETMANT_SAE, 239 VGETMANTS, 240 VGETMANTS_SAE, 241 // FP Scale. 242 SCALEF, 243 SCALEF_RND, 244 SCALEFS, 245 SCALEFS_RND, 246 247 // Unsigned Integer average. 248 AVG, 249 250 /// Integer horizontal add/sub. 251 HADD, 252 HSUB, 253 254 /// Floating point horizontal add/sub. 255 FHADD, 256 FHSUB, 257 258 // Detect Conflicts Within a Vector 259 CONFLICT, 260 261 /// Floating point max and min. 262 FMAX, 263 FMIN, 264 265 /// Commutative FMIN and FMAX. 266 FMAXC, 267 FMINC, 268 269 /// Scalar intrinsic floating point max and min. 270 FMAXS, 271 FMINS, 272 273 /// Floating point reciprocal-sqrt and reciprocal approximation. 274 /// Note that these typically require refinement 275 /// in order to obtain suitable precision. 276 FRSQRT, 277 FRCP, 278 279 // AVX-512 reciprocal approximations with a little more precision. 280 RSQRT14, 281 RSQRT14S, 282 RCP14, 283 RCP14S, 284 285 // Thread Local Storage. 286 TLSADDR, 287 288 // Thread Local Storage. A call to get the start address 289 // of the TLS block for the current module. 290 TLSBASEADDR, 291 292 // Thread Local Storage. When calling to an OS provided 293 // thunk at the address from an earlier relocation. 294 TLSCALL, 295 296 // Exception Handling helpers. 297 EH_RETURN, 298 299 // SjLj exception handling setjmp. 300 EH_SJLJ_SETJMP, 301 302 // SjLj exception handling longjmp. 303 EH_SJLJ_LONGJMP, 304 305 // SjLj exception handling dispatch. 306 EH_SJLJ_SETUP_DISPATCH, 307 308 /// Tail call return. See X86TargetLowering::LowerCall for 309 /// the list of operands. 310 TC_RETURN, 311 312 // Vector move to low scalar and zero higher vector elements. 313 VZEXT_MOVL, 314 315 // Vector integer truncate. 316 VTRUNC, 317 // Vector integer truncate with unsigned/signed saturation. 318 VTRUNCUS, 319 VTRUNCS, 320 321 // Masked version of the above. Used when less than a 128-bit result is 322 // produced since the mask only applies to the lower elements and can't 323 // be represented by a select. 324 // SRC, PASSTHRU, MASK 325 VMTRUNC, 326 VMTRUNCUS, 327 VMTRUNCS, 328 329 // Vector FP extend. 330 VFPEXT, 331 VFPEXT_SAE, 332 VFPEXTS, 333 VFPEXTS_SAE, 334 335 // Vector FP round. 336 VFPROUND, 337 VFPROUND_RND, 338 VFPROUNDS, 339 VFPROUNDS_RND, 340 341 // Masked version of above. Used for v2f64->v4f32. 342 // SRC, PASSTHRU, MASK 343 VMFPROUND, 344 345 // 128-bit vector logical left / right shift 346 VSHLDQ, 347 VSRLDQ, 348 349 // Vector shift elements 350 VSHL, 351 VSRL, 352 VSRA, 353 354 // Vector variable shift 355 VSHLV, 356 VSRLV, 357 VSRAV, 358 359 // Vector shift elements by immediate 360 VSHLI, 361 VSRLI, 362 VSRAI, 363 364 // Shifts of mask registers. 365 KSHIFTL, 366 KSHIFTR, 367 368 // Bit rotate by immediate 369 VROTLI, 370 VROTRI, 371 372 // Vector packed double/float comparison. 373 CMPP, 374 375 // Vector integer comparisons. 376 PCMPEQ, 377 PCMPGT, 378 379 // v8i16 Horizontal minimum and position. 380 PHMINPOS, 381 382 MULTISHIFT, 383 384 /// Vector comparison generating mask bits for fp and 385 /// integer signed and unsigned data types. 386 CMPM, 387 // Vector comparison with SAE for FP values 388 CMPM_SAE, 389 390 // Arithmetic operations with FLAGS results. 391 ADD, 392 SUB, 393 ADC, 394 SBB, 395 SMUL, 396 UMUL, 397 OR, 398 XOR, 399 AND, 400 401 // Bit field extract. 402 BEXTR, 403 404 // Zero High Bits Starting with Specified Bit Position. 405 BZHI, 406 407 // Parallel extract and deposit. 408 PDEP, 409 PEXT, 410 411 // X86-specific multiply by immediate. 412 MUL_IMM, 413 414 // Vector sign bit extraction. 415 MOVMSK, 416 417 // Vector bitwise comparisons. 418 PTEST, 419 420 // Vector packed fp sign bitwise comparisons. 421 TESTP, 422 423 // OR/AND test for masks. 424 KORTEST, 425 KTEST, 426 427 // ADD for masks. 428 KADD, 429 430 // Several flavors of instructions with vector shuffle behaviors. 431 // Saturated signed/unnsigned packing. 432 PACKSS, 433 PACKUS, 434 // Intra-lane alignr. 435 PALIGNR, 436 // AVX512 inter-lane alignr. 437 VALIGN, 438 PSHUFD, 439 PSHUFHW, 440 PSHUFLW, 441 SHUFP, 442 // VBMI2 Concat & Shift. 443 VSHLD, 444 VSHRD, 445 VSHLDV, 446 VSHRDV, 447 // Shuffle Packed Values at 128-bit granularity. 448 SHUF128, 449 MOVDDUP, 450 MOVSHDUP, 451 MOVSLDUP, 452 MOVLHPS, 453 MOVHLPS, 454 MOVSD, 455 MOVSS, 456 UNPCKL, 457 UNPCKH, 458 VPERMILPV, 459 VPERMILPI, 460 VPERMI, 461 VPERM2X128, 462 463 // Variable Permute (VPERM). 464 // Res = VPERMV MaskV, V0 465 VPERMV, 466 467 // 3-op Variable Permute (VPERMT2). 468 // Res = VPERMV3 V0, MaskV, V1 469 VPERMV3, 470 471 // Bitwise ternary logic. 472 VPTERNLOG, 473 // Fix Up Special Packed Float32/64 values. 474 VFIXUPIMM, 475 VFIXUPIMM_SAE, 476 VFIXUPIMMS, 477 VFIXUPIMMS_SAE, 478 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 479 VRANGE, 480 VRANGE_SAE, 481 VRANGES, 482 VRANGES_SAE, 483 // Reduce - Perform Reduction Transformation on scalar\packed FP. 484 VREDUCE, 485 VREDUCE_SAE, 486 VREDUCES, 487 VREDUCES_SAE, 488 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 489 // Also used by the legacy (V)ROUND intrinsics where we mask out the 490 // scaling part of the immediate. 491 VRNDSCALE, 492 VRNDSCALE_SAE, 493 VRNDSCALES, 494 VRNDSCALES_SAE, 495 // Tests Types Of a FP Values for packed types. 496 VFPCLASS, 497 // Tests Types Of a FP Values for scalar types. 498 VFPCLASSS, 499 500 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 501 // a vector, this node may change the vector length as part of the splat. 502 VBROADCAST, 503 // Broadcast mask to vector. 504 VBROADCASTM, 505 // Broadcast subvector to vector. 506 SUBV_BROADCAST, 507 508 /// SSE4A Extraction and Insertion. 509 EXTRQI, 510 INSERTQI, 511 512 // XOP arithmetic/logical shifts. 513 VPSHA, 514 VPSHL, 515 // XOP signed/unsigned integer comparisons. 516 VPCOM, 517 VPCOMU, 518 // XOP packed permute bytes. 519 VPPERM, 520 // XOP two source permutation. 521 VPERMIL2, 522 523 // Vector multiply packed unsigned doubleword integers. 524 PMULUDQ, 525 // Vector multiply packed signed doubleword integers. 526 PMULDQ, 527 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 528 MULHRS, 529 530 // Multiply and Add Packed Integers. 531 VPMADDUBSW, 532 VPMADDWD, 533 534 // AVX512IFMA multiply and add. 535 // NOTE: These are different than the instruction and perform 536 // op0 x op1 + op2. 537 VPMADD52L, 538 VPMADD52H, 539 540 // VNNI 541 VPDPBUSD, 542 VPDPBUSDS, 543 VPDPWSSD, 544 VPDPWSSDS, 545 546 // FMA nodes. 547 // We use the target independent ISD::FMA for the non-inverted case. 548 FNMADD, 549 FMSUB, 550 FNMSUB, 551 FMADDSUB, 552 FMSUBADD, 553 554 // FMA with rounding mode. 555 FMADD_RND, 556 FNMADD_RND, 557 FMSUB_RND, 558 FNMSUB_RND, 559 FMADDSUB_RND, 560 FMSUBADD_RND, 561 562 // Compress and expand. 563 COMPRESS, 564 EXPAND, 565 566 // Bits shuffle 567 VPSHUFBITQMB, 568 569 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 570 SINT_TO_FP_RND, 571 UINT_TO_FP_RND, 572 SCALAR_SINT_TO_FP, 573 SCALAR_UINT_TO_FP, 574 SCALAR_SINT_TO_FP_RND, 575 SCALAR_UINT_TO_FP_RND, 576 577 // Vector float/double to signed/unsigned integer. 578 CVTP2SI, 579 CVTP2UI, 580 CVTP2SI_RND, 581 CVTP2UI_RND, 582 // Scalar float/double to signed/unsigned integer. 583 CVTS2SI, 584 CVTS2UI, 585 CVTS2SI_RND, 586 CVTS2UI_RND, 587 588 // Vector float/double to signed/unsigned integer with truncation. 589 CVTTP2SI, 590 CVTTP2UI, 591 CVTTP2SI_SAE, 592 CVTTP2UI_SAE, 593 // Scalar float/double to signed/unsigned integer with truncation. 594 CVTTS2SI, 595 CVTTS2UI, 596 CVTTS2SI_SAE, 597 CVTTS2UI_SAE, 598 599 // Vector signed/unsigned integer to float/double. 600 CVTSI2P, 601 CVTUI2P, 602 603 // Masked versions of above. Used for v2f64->v4f32. 604 // SRC, PASSTHRU, MASK 605 MCVTP2SI, 606 MCVTP2UI, 607 MCVTTP2SI, 608 MCVTTP2UI, 609 MCVTSI2P, 610 MCVTUI2P, 611 612 // Vector float to bfloat16. 613 // Convert TWO packed single data to one packed BF16 data 614 CVTNE2PS2BF16, 615 // Convert packed single data to packed BF16 data 616 CVTNEPS2BF16, 617 // Masked version of above. 618 // SRC, PASSTHRU, MASK 619 MCVTNEPS2BF16, 620 621 // Dot product of BF16 pairs to accumulated into 622 // packed single precision. 623 DPBF16PS, 624 625 // Save xmm argument registers to the stack, according to %al. An operator 626 // is needed so that this can be expanded with control flow. 627 VASTART_SAVE_XMM_REGS, 628 629 // Windows's _chkstk call to do stack probing. 630 WIN_ALLOCA, 631 632 // For allocating variable amounts of stack space when using 633 // segmented stacks. Check if the current stacklet has enough space, and 634 // falls back to heap allocation if not. 635 SEG_ALLOCA, 636 637 // For allocating stack space when using stack clash protector. 638 // Allocation is performed by block, and each block is probed. 639 PROBED_ALLOCA, 640 641 // Memory barriers. 642 MEMBARRIER, 643 MFENCE, 644 645 // Get a random integer and indicate whether it is valid in CF. 646 RDRAND, 647 648 // Get a NIST SP800-90B & C compliant random integer and 649 // indicate whether it is valid in CF. 650 RDSEED, 651 652 // Protection keys 653 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 654 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 655 // value for ECX. 656 RDPKRU, 657 WRPKRU, 658 659 // SSE42 string comparisons. 660 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 661 // will emit one or two instructions based on which results are used. If 662 // flags and index/mask this allows us to use a single instruction since 663 // we won't have to pick and opcode for flags. Instead we can rely on the 664 // DAG to CSE everything and decide at isel. 665 PCMPISTR, 666 PCMPESTR, 667 668 // Test if in transactional execution. 669 XTEST, 670 671 // ERI instructions. 672 RSQRT28, 673 RSQRT28_SAE, 674 RSQRT28S, 675 RSQRT28S_SAE, 676 RCP28, 677 RCP28_SAE, 678 RCP28S, 679 RCP28S_SAE, 680 EXP2, 681 EXP2_SAE, 682 683 // Conversions between float and half-float. 684 CVTPS2PH, 685 CVTPH2PS, 686 CVTPH2PS_SAE, 687 688 // Masked version of above. 689 // SRC, RND, PASSTHRU, MASK 690 MCVTPS2PH, 691 692 // Galois Field Arithmetic Instructions 693 GF2P8AFFINEINVQB, 694 GF2P8AFFINEQB, 695 GF2P8MULB, 696 697 // LWP insert record. 698 LWPINS, 699 700 // User level wait 701 UMWAIT, 702 TPAUSE, 703 704 // Enqueue Stores Instructions 705 ENQCMD, 706 ENQCMDS, 707 708 // For avx512-vp2intersect 709 VP2INTERSECT, 710 711 /// X86 strict FP compare instructions. 712 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 713 STRICT_FCMPS, 714 715 // Vector packed double/float comparison. 716 STRICT_CMPP, 717 718 /// Vector comparison generating mask bits for fp and 719 /// integer signed and unsigned data types. 720 STRICT_CMPM, 721 722 // Vector float/double to signed/unsigned integer with truncation. 723 STRICT_CVTTP2SI, 724 STRICT_CVTTP2UI, 725 726 // Vector FP extend. 727 STRICT_VFPEXT, 728 729 // Vector FP round. 730 STRICT_VFPROUND, 731 732 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 733 // Also used by the legacy (V)ROUND intrinsics where we mask out the 734 // scaling part of the immediate. 735 STRICT_VRNDSCALE, 736 737 // Vector signed/unsigned integer to float/double. 738 STRICT_CVTSI2P, 739 STRICT_CVTUI2P, 740 741 // Strict FMA nodes. 742 STRICT_FNMADD, 743 STRICT_FMSUB, 744 STRICT_FNMSUB, 745 746 // Conversions between float and half-float. 747 STRICT_CVTPS2PH, 748 STRICT_CVTPH2PS, 749 750 // Compare and swap. 751 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 752 LCMPXCHG8_DAG, 753 LCMPXCHG16_DAG, 754 LCMPXCHG8_SAVE_EBX_DAG, 755 LCMPXCHG16_SAVE_RBX_DAG, 756 757 /// LOCK-prefixed arithmetic read-modify-write instructions. 758 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 759 LADD, 760 LSUB, 761 LOR, 762 LXOR, 763 LAND, 764 765 // Load, scalar_to_vector, and zero extend. 766 VZEXT_LOAD, 767 768 // extract_vector_elt, store. 769 VEXTRACT_STORE, 770 771 // scalar broadcast from memory 772 VBROADCAST_LOAD, 773 774 // Store FP control world into i16 memory. 775 FNSTCW16m, 776 777 /// This instruction implements FP_TO_SINT with the 778 /// integer destination in memory and a FP reg source. This corresponds 779 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 780 /// has two inputs (token chain and address) and two outputs (int value 781 /// and token chain). Memory VT specifies the type to store to. 782 FP_TO_INT_IN_MEM, 783 784 /// This instruction implements SINT_TO_FP with the 785 /// integer source in memory and FP reg result. This corresponds to the 786 /// X86::FILD*m instructions. It has two inputs (token chain and address) 787 /// and two outputs (FP value and token chain). The integer source type is 788 /// specified by the memory VT. 789 FILD, 790 791 /// This instruction implements a fp->int store from FP stack 792 /// slots. This corresponds to the fist instruction. It takes a 793 /// chain operand, value to store, address, and glue. The memory VT 794 /// specifies the type to store as. 795 FIST, 796 797 /// This instruction implements an extending load to FP stack slots. 798 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 799 /// operand, and ptr to load from. The memory VT specifies the type to 800 /// load from. 801 FLD, 802 803 /// This instruction implements a truncating store from FP stack 804 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 805 /// chain operand, value to store, address, and glue. The memory VT 806 /// specifies the type to store as. 807 FST, 808 809 /// This instruction grabs the address of the next argument 810 /// from a va_list. (reads and modifies the va_list in memory) 811 VAARG_64, 812 813 // Vector truncating store with unsigned/signed saturation 814 VTRUNCSTOREUS, 815 VTRUNCSTORES, 816 // Vector truncating masked store with unsigned/signed saturation 817 VMTRUNCSTOREUS, 818 VMTRUNCSTORES, 819 820 // X86 specific gather and scatter 821 MGATHER, 822 MSCATTER, 823 824 // WARNING: Do not add anything in the end unless you want the node to 825 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 826 // opcodes will be thought as target memory ops! 827 }; 828 } // end namespace X86ISD 829 830 /// Define some predicates that are used for node matching. 831 namespace X86 { 832 /// Returns true if Elt is a constant zero or floating point constant +0.0. 833 bool isZeroNode(SDValue Elt); 834 835 /// Returns true of the given offset can be 836 /// fit into displacement field of the instruction. 837 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 838 bool hasSymbolicDisplacement = true); 839 840 /// Determines whether the callee is required to pop its 841 /// own arguments. Callee pop is necessary to support tail calls. 842 bool isCalleePop(CallingConv::ID CallingConv, 843 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 844 845 /// If Op is a constant whose elements are all the same constant or 846 /// undefined, return true and return the constant value in \p SplatVal. 847 /// If we have undef bits that don't cover an entire element, we treat these 848 /// as zero if AllowPartialUndefs is set, else we fail and return false. 849 bool isConstantSplat(SDValue Op, APInt &SplatVal, 850 bool AllowPartialUndefs = true); 851 } // end namespace X86 852 853 //===--------------------------------------------------------------------===// 854 // X86 Implementation of the TargetLowering interface 855 class X86TargetLowering final : public TargetLowering { 856 public: 857 explicit X86TargetLowering(const X86TargetMachine &TM, 858 const X86Subtarget &STI); 859 860 unsigned getJumpTableEncoding() const override; 861 bool useSoftFloat() const override; 862 863 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 864 ArgListTy &Args) const override; 865 866 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 867 return MVT::i8; 868 } 869 870 const MCExpr * 871 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 872 const MachineBasicBlock *MBB, unsigned uid, 873 MCContext &Ctx) const override; 874 875 /// Returns relocation base for the given PIC jumptable. 876 SDValue getPICJumpTableRelocBase(SDValue Table, 877 SelectionDAG &DAG) const override; 878 const MCExpr * 879 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 880 unsigned JTI, MCContext &Ctx) const override; 881 882 /// Return the desired alignment for ByVal aggregate 883 /// function arguments in the caller parameter area. For X86, aggregates 884 /// that contains are placed at 16-byte boundaries while the rest are at 885 /// 4-byte boundaries. 886 unsigned getByValTypeAlignment(Type *Ty, 887 const DataLayout &DL) const override; 888 889 EVT getOptimalMemOpType(const MemOp &Op, 890 const AttributeList &FuncAttributes) const override; 891 892 /// Returns true if it's safe to use load / store of the 893 /// specified type to expand memcpy / memset inline. This is mostly true 894 /// for all types except for some special cases. For example, on X86 895 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 896 /// also does type conversion. Note the specified type doesn't have to be 897 /// legal as the hook is used before type legalization. 898 bool isSafeMemOpType(MVT VT) const override; 899 900 /// Returns true if the target allows unaligned memory accesses of the 901 /// specified type. Returns whether it is "fast" in the last argument. 902 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 903 MachineMemOperand::Flags Flags, 904 bool *Fast) const override; 905 906 /// Provide custom lowering hooks for some operations. 907 /// 908 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 909 910 /// Places new result values for the node in Results (their number 911 /// and types must exactly match those of the original return values of 912 /// the node), or leaves Results empty, which indicates that the node is not 913 /// to be custom lowered after all. 914 void LowerOperationWrapper(SDNode *N, 915 SmallVectorImpl<SDValue> &Results, 916 SelectionDAG &DAG) const override; 917 918 /// Replace the results of node with an illegal result 919 /// type with new values built out of custom code. 920 /// 921 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 922 SelectionDAG &DAG) const override; 923 924 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 925 926 /// Return true if the target has native support for 927 /// the specified value type and it is 'desirable' to use the type for the 928 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 929 /// instruction encodings are longer and some i16 instructions are slow. 930 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 931 932 /// Return true if the target has native support for the 933 /// specified value type and it is 'desirable' to use the type. e.g. On x86 934 /// i16 is legal, but undesirable since i16 instruction encodings are longer 935 /// and some i16 instructions are slow. 936 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 937 938 /// Return the newly negated expression if the cost is not expensive and 939 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 940 /// do the negation. 941 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 942 bool LegalOperations, bool ForCodeSize, 943 NegatibleCost &Cost, 944 unsigned Depth) const override; 945 946 MachineBasicBlock * 947 EmitInstrWithCustomInserter(MachineInstr &MI, 948 MachineBasicBlock *MBB) const override; 949 950 /// This method returns the name of a target specific DAG node. 951 const char *getTargetNodeName(unsigned Opcode) const override; 952 953 /// Do not merge vector stores after legalization because that may conflict 954 /// with x86-specific store splitting optimizations. 955 bool mergeStoresAfterLegalization(EVT MemVT) const override { 956 return !MemVT.isVector(); 957 } 958 959 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 960 const SelectionDAG &DAG) const override; 961 962 bool isCheapToSpeculateCttz() const override; 963 964 bool isCheapToSpeculateCtlz() const override; 965 966 bool isCtlzFast() const override; 967 968 bool hasBitPreservingFPLogic(EVT VT) const override { 969 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 970 } 971 972 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 973 // If the pair to store is a mixture of float and int values, we will 974 // save two bitwise instructions and one float-to-int instruction and 975 // increase one store instruction. There is potentially a more 976 // significant benefit because it avoids the float->int domain switch 977 // for input value. So It is more likely a win. 978 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 979 (LTy.isInteger() && HTy.isFloatingPoint())) 980 return true; 981 // If the pair only contains int values, we will save two bitwise 982 // instructions and increase one store instruction (costing one more 983 // store buffer). Since the benefit is more blurred so we leave 984 // such pair out until we get testcase to prove it is a win. 985 return false; 986 } 987 988 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 989 990 bool hasAndNotCompare(SDValue Y) const override; 991 992 bool hasAndNot(SDValue Y) const override; 993 994 bool hasBitTest(SDValue X, SDValue Y) const override; 995 996 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 997 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 998 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 999 SelectionDAG &DAG) const override; 1000 1001 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1002 CombineLevel Level) const override; 1003 1004 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1005 1006 bool 1007 shouldTransformSignedTruncationCheck(EVT XVT, 1008 unsigned KeptBits) const override { 1009 // For vectors, we don't have a preference.. 1010 if (XVT.isVector()) 1011 return false; 1012 1013 auto VTIsOk = [](EVT VT) -> bool { 1014 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1015 VT == MVT::i64; 1016 }; 1017 1018 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1019 // XVT will be larger than KeptBitsVT. 1020 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1021 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1022 } 1023 1024 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 1025 1026 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1027 1028 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1029 return VT.isScalarInteger(); 1030 } 1031 1032 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1033 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1034 1035 /// Return the value type to use for ISD::SETCC. 1036 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1037 EVT VT) const override; 1038 1039 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1040 const APInt &DemandedElts, 1041 TargetLoweringOpt &TLO) const override; 1042 1043 /// Determine which of the bits specified in Mask are known to be either 1044 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1045 void computeKnownBitsForTargetNode(const SDValue Op, 1046 KnownBits &Known, 1047 const APInt &DemandedElts, 1048 const SelectionDAG &DAG, 1049 unsigned Depth = 0) const override; 1050 1051 /// Determine the number of bits in the operation that are sign bits. 1052 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1053 const APInt &DemandedElts, 1054 const SelectionDAG &DAG, 1055 unsigned Depth) const override; 1056 1057 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1058 const APInt &DemandedElts, 1059 APInt &KnownUndef, 1060 APInt &KnownZero, 1061 TargetLoweringOpt &TLO, 1062 unsigned Depth) const override; 1063 1064 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1065 const APInt &DemandedElts, 1066 unsigned MaskIndex, 1067 TargetLoweringOpt &TLO, 1068 unsigned Depth) const; 1069 1070 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1071 const APInt &DemandedBits, 1072 const APInt &DemandedElts, 1073 KnownBits &Known, 1074 TargetLoweringOpt &TLO, 1075 unsigned Depth) const override; 1076 1077 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1078 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1079 SelectionDAG &DAG, unsigned Depth) const override; 1080 1081 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1082 1083 SDValue unwrapAddress(SDValue N) const override; 1084 1085 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1086 1087 bool ExpandInlineAsm(CallInst *CI) const override; 1088 1089 ConstraintType getConstraintType(StringRef Constraint) const override; 1090 1091 /// Examine constraint string and operand type and determine a weight value. 1092 /// The operand object must already have been set up with the operand type. 1093 ConstraintWeight 1094 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1095 const char *constraint) const override; 1096 1097 const char *LowerXConstraint(EVT ConstraintVT) const override; 1098 1099 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1100 /// add anything to Ops. If hasMemory is true it means one of the asm 1101 /// constraint of the inline asm instruction being processed is 'm'. 1102 void LowerAsmOperandForConstraint(SDValue Op, 1103 std::string &Constraint, 1104 std::vector<SDValue> &Ops, 1105 SelectionDAG &DAG) const override; 1106 1107 unsigned 1108 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1109 if (ConstraintCode == "o") 1110 return InlineAsm::Constraint_o; 1111 else if (ConstraintCode == "v") 1112 return InlineAsm::Constraint_v; 1113 else if (ConstraintCode == "X") 1114 return InlineAsm::Constraint_X; 1115 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1116 } 1117 1118 /// Handle Lowering flag assembly outputs. 1119 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL, 1120 const AsmOperandInfo &Constraint, 1121 SelectionDAG &DAG) const override; 1122 1123 /// Given a physical register constraint 1124 /// (e.g. {edx}), return the register number and the register class for the 1125 /// register. This should only be used for C_Register constraints. On 1126 /// error, this returns a register number of 0. 1127 std::pair<unsigned, const TargetRegisterClass *> 1128 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1129 StringRef Constraint, MVT VT) const override; 1130 1131 /// Return true if the addressing mode represented 1132 /// by AM is legal for this target, for a load/store of the specified type. 1133 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1134 Type *Ty, unsigned AS, 1135 Instruction *I = nullptr) const override; 1136 1137 /// Return true if the specified immediate is legal 1138 /// icmp immediate, that is the target has icmp instructions which can 1139 /// compare a register against the immediate without having to materialize 1140 /// the immediate into a register. 1141 bool isLegalICmpImmediate(int64_t Imm) const override; 1142 1143 /// Return true if the specified immediate is legal 1144 /// add immediate, that is the target has add instructions which can 1145 /// add a register and the immediate without having to materialize 1146 /// the immediate into a register. 1147 bool isLegalAddImmediate(int64_t Imm) const override; 1148 1149 bool isLegalStoreImmediate(int64_t Imm) const override; 1150 1151 /// Return the cost of the scaling factor used in the addressing 1152 /// mode represented by AM for this target, for a load/store 1153 /// of the specified type. 1154 /// If the AM is supported, the return value must be >= 0. 1155 /// If the AM is not supported, it returns a negative value. 1156 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 1157 unsigned AS) const override; 1158 1159 /// This is used to enable splatted operand transforms for vector shifts 1160 /// and vector funnel shifts. 1161 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1162 1163 /// Add x86-specific opcodes to the default list. 1164 bool isBinOp(unsigned Opcode) const override; 1165 1166 /// Returns true if the opcode is a commutative binary operation. 1167 bool isCommutativeBinOp(unsigned Opcode) const override; 1168 1169 /// Return true if it's free to truncate a value of 1170 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1171 /// register EAX to i16 by referencing its sub-register AX. 1172 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1173 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1174 1175 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1176 1177 /// Return true if any actual instruction that defines a 1178 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1179 /// register. This does not necessarily include registers defined in 1180 /// unknown ways, such as incoming arguments, or copies from unknown 1181 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1182 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1183 /// all instructions that define 32-bit values implicit zero-extend the 1184 /// result out to 64 bits. 1185 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1186 bool isZExtFree(EVT VT1, EVT VT2) const override; 1187 bool isZExtFree(SDValue Val, EVT VT2) const override; 1188 1189 bool shouldSinkOperands(Instruction *I, 1190 SmallVectorImpl<Use *> &Ops) const override; 1191 bool shouldConvertPhiType(Type *From, Type *To) const override; 1192 1193 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1194 /// extend node) is profitable. 1195 bool isVectorLoadExtDesirable(SDValue) const override; 1196 1197 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1198 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1199 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1200 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1201 EVT VT) const override; 1202 1203 /// Return true if it's profitable to narrow 1204 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1205 /// from i32 to i8 but not from i32 to i16. 1206 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1207 1208 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1209 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1210 /// true and stores the intrinsic information into the IntrinsicInfo that was 1211 /// passed to the function. 1212 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1213 MachineFunction &MF, 1214 unsigned Intrinsic) const override; 1215 1216 /// Returns true if the target can instruction select the 1217 /// specified FP immediate natively. If false, the legalizer will 1218 /// materialize the FP immediate as a load from a constant pool. 1219 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1220 bool ForCodeSize) const override; 1221 1222 /// Targets can use this to indicate that they only support *some* 1223 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1224 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1225 /// be legal. 1226 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1227 1228 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1229 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1230 /// constant pool entry. 1231 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1232 1233 /// Returns true if lowering to a jump table is allowed. 1234 bool areJTsAllowed(const Function *Fn) const override; 1235 1236 /// If true, then instruction selection should 1237 /// seek to shrink the FP constant of the specified type to a smaller type 1238 /// in order to save space and / or reduce runtime. 1239 bool ShouldShrinkFPConstant(EVT VT) const override { 1240 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1241 // expensive than a straight movsd. On the other hand, it's important to 1242 // shrink long double fp constant since fldt is very slow. 1243 return !X86ScalarSSEf64 || VT == MVT::f80; 1244 } 1245 1246 /// Return true if we believe it is correct and profitable to reduce the 1247 /// load node to a smaller type. 1248 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1249 EVT NewVT) const override; 1250 1251 /// Return true if the specified scalar FP type is computed in an SSE 1252 /// register, not on the X87 floating point stack. 1253 bool isScalarFPTypeInSSEReg(EVT VT) const { 1254 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1255 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1256 } 1257 1258 /// Returns true if it is beneficial to convert a load of a constant 1259 /// to just the constant itself. 1260 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1261 Type *Ty) const override; 1262 1263 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1264 1265 bool convertSelectOfConstantsToMath(EVT VT) const override; 1266 1267 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1268 SDValue C) const override; 1269 1270 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1271 /// with this index. 1272 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1273 unsigned Index) const override; 1274 1275 /// Scalar ops always have equal or better analysis/performance/power than 1276 /// the vector equivalent, so this always makes sense if the scalar op is 1277 /// supported. 1278 bool shouldScalarizeBinop(SDValue) const override; 1279 1280 /// Extract of a scalar FP value from index 0 of a vector is free. 1281 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1282 EVT EltVT = VT.getScalarType(); 1283 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1284 } 1285 1286 /// Overflow nodes should get combined/lowered to optimal instructions 1287 /// (they should allow eliminating explicit compares by getting flags from 1288 /// math ops). 1289 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1290 bool MathUsed) const override; 1291 1292 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1293 unsigned AddrSpace) const override { 1294 // If we can replace more than 2 scalar stores, there will be a reduction 1295 // in instructions even after we add a vector constant load. 1296 return NumElem > 2; 1297 } 1298 1299 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1300 const SelectionDAG &DAG, 1301 const MachineMemOperand &MMO) const override; 1302 1303 /// Intel processors have a unified instruction and data cache 1304 const char * getClearCacheBuiltinName() const override { 1305 return nullptr; // nothing to do, move along. 1306 } 1307 1308 Register getRegisterByName(const char* RegName, LLT VT, 1309 const MachineFunction &MF) const override; 1310 1311 /// If a physical register, this returns the register that receives the 1312 /// exception address on entry to an EH pad. 1313 Register 1314 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1315 1316 /// If a physical register, this returns the register that receives the 1317 /// exception typeid on entry to a landing pad. 1318 Register 1319 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1320 1321 virtual bool needsFixedCatchObjects() const override; 1322 1323 /// This method returns a target specific FastISel object, 1324 /// or null if the target does not support "fast" ISel. 1325 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1326 const TargetLibraryInfo *libInfo) const override; 1327 1328 /// If the target has a standard location for the stack protector cookie, 1329 /// returns the address of that location. Otherwise, returns nullptr. 1330 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1331 1332 bool useLoadStackGuardNode() const override; 1333 bool useStackGuardXorFP() const override; 1334 void insertSSPDeclarations(Module &M) const override; 1335 Value *getSDagStackGuard(const Module &M) const override; 1336 Function *getSSPStackGuardCheck(const Module &M) const override; 1337 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1338 const SDLoc &DL) const override; 1339 1340 1341 /// Return true if the target stores SafeStack pointer at a fixed offset in 1342 /// some non-standard address space, and populates the address space and 1343 /// offset as appropriate. 1344 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1345 1346 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1347 SDValue Chain, SDValue Pointer, 1348 MachinePointerInfo PtrInfo, 1349 Align Alignment, 1350 SelectionDAG &DAG) const; 1351 1352 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 1353 1354 /// Customize the preferred legalization strategy for certain types. 1355 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1356 1357 bool softPromoteHalfType() const override { return true; } 1358 1359 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1360 EVT VT) const override; 1361 1362 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1363 CallingConv::ID CC, 1364 EVT VT) const override; 1365 1366 unsigned getVectorTypeBreakdownForCallingConv( 1367 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1368 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1369 1370 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1371 1372 bool supportSwiftError() const override; 1373 1374 bool hasStackProbeSymbol(MachineFunction &MF) const override; 1375 bool hasInlineStackProbe(MachineFunction &MF) const override; 1376 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1377 1378 unsigned getStackProbeSize(MachineFunction &MF) const; 1379 1380 bool hasVectorBlend() const override { return true; } 1381 1382 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1383 1384 /// Lower interleaved load(s) into target specific 1385 /// instructions/intrinsics. 1386 bool lowerInterleavedLoad(LoadInst *LI, 1387 ArrayRef<ShuffleVectorInst *> Shuffles, 1388 ArrayRef<unsigned> Indices, 1389 unsigned Factor) const override; 1390 1391 /// Lower interleaved store(s) into target specific 1392 /// instructions/intrinsics. 1393 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1394 unsigned Factor) const override; 1395 1396 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1397 SDValue Addr, SelectionDAG &DAG) 1398 const override; 1399 1400 protected: 1401 std::pair<const TargetRegisterClass *, uint8_t> 1402 findRepresentativeClass(const TargetRegisterInfo *TRI, 1403 MVT VT) const override; 1404 1405 private: 1406 /// Keep a reference to the X86Subtarget around so that we can 1407 /// make the right decision when generating code for different targets. 1408 const X86Subtarget &Subtarget; 1409 1410 /// Select between SSE or x87 floating point ops. 1411 /// When SSE is available, use it for f32 operations. 1412 /// When SSE2 is available, use it for f64 operations. 1413 bool X86ScalarSSEf32; 1414 bool X86ScalarSSEf64; 1415 1416 /// A list of legal FP immediates. 1417 std::vector<APFloat> LegalFPImmediates; 1418 1419 /// Indicate that this x86 target can instruction 1420 /// select the specified FP immediate natively. 1421 void addLegalFPImmediate(const APFloat& Imm) { 1422 LegalFPImmediates.push_back(Imm); 1423 } 1424 1425 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1426 CallingConv::ID CallConv, bool isVarArg, 1427 const SmallVectorImpl<ISD::InputArg> &Ins, 1428 const SDLoc &dl, SelectionDAG &DAG, 1429 SmallVectorImpl<SDValue> &InVals, 1430 uint32_t *RegMask) const; 1431 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1432 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1433 const SDLoc &dl, SelectionDAG &DAG, 1434 const CCValAssign &VA, MachineFrameInfo &MFI, 1435 unsigned i) const; 1436 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1437 const SDLoc &dl, SelectionDAG &DAG, 1438 const CCValAssign &VA, 1439 ISD::ArgFlagsTy Flags, bool isByval) const; 1440 1441 // Call lowering helpers. 1442 1443 /// Check whether the call is eligible for tail call optimization. Targets 1444 /// that want to do tail call optimization should implement this function. 1445 bool IsEligibleForTailCallOptimization(SDValue Callee, 1446 CallingConv::ID CalleeCC, 1447 bool isVarArg, 1448 bool isCalleeStructRet, 1449 bool isCallerStructRet, 1450 Type *RetTy, 1451 const SmallVectorImpl<ISD::OutputArg> &Outs, 1452 const SmallVectorImpl<SDValue> &OutVals, 1453 const SmallVectorImpl<ISD::InputArg> &Ins, 1454 SelectionDAG& DAG) const; 1455 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1456 SDValue Chain, bool IsTailCall, 1457 bool Is64Bit, int FPDiff, 1458 const SDLoc &dl) const; 1459 1460 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1461 SelectionDAG &DAG) const; 1462 1463 unsigned getAddressSpace(void) const; 1464 1465 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1466 SDValue &Chain) const; 1467 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1468 1469 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1470 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1471 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1472 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1473 1474 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1475 const unsigned char OpFlags = 0) const; 1476 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1477 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1478 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1479 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1480 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1481 1482 /// Creates target global address or external symbol nodes for calls or 1483 /// other uses. 1484 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1485 bool ForCall) const; 1486 1487 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1488 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1489 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1490 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1491 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1492 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1493 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1494 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1495 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1496 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1497 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1498 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1499 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1500 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1501 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1502 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1503 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1504 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1505 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1506 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1507 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1508 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1509 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1510 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1511 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1512 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1513 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1514 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1515 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1516 1517 SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, 1518 RTLIB::Libcall Call) const; 1519 1520 SDValue 1521 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1522 const SmallVectorImpl<ISD::InputArg> &Ins, 1523 const SDLoc &dl, SelectionDAG &DAG, 1524 SmallVectorImpl<SDValue> &InVals) const override; 1525 SDValue LowerCall(CallLoweringInfo &CLI, 1526 SmallVectorImpl<SDValue> &InVals) const override; 1527 1528 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1529 const SmallVectorImpl<ISD::OutputArg> &Outs, 1530 const SmallVectorImpl<SDValue> &OutVals, 1531 const SDLoc &dl, SelectionDAG &DAG) const override; 1532 1533 bool supportSplitCSR(MachineFunction *MF) const override { 1534 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1535 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1536 } 1537 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1538 void insertCopiesSplitCSR( 1539 MachineBasicBlock *Entry, 1540 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1541 1542 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1543 1544 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1545 1546 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1547 ISD::NodeType ExtendKind) const override; 1548 1549 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1550 bool isVarArg, 1551 const SmallVectorImpl<ISD::OutputArg> &Outs, 1552 LLVMContext &Context) const override; 1553 1554 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1555 1556 TargetLoweringBase::AtomicExpansionKind 1557 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1558 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1559 TargetLoweringBase::AtomicExpansionKind 1560 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1561 1562 LoadInst * 1563 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1564 1565 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; 1566 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; 1567 1568 bool needsCmpXchgNb(Type *MemType) const; 1569 1570 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1571 MachineBasicBlock *DispatchBB, int FI) const; 1572 1573 // Utility function to emit the low-level va_arg code for X86-64. 1574 MachineBasicBlock * 1575 EmitVAARG64WithCustomInserter(MachineInstr &MI, 1576 MachineBasicBlock *MBB) const; 1577 1578 /// Utility function to emit the xmm reg save portion of va_start. 1579 MachineBasicBlock * 1580 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1581 MachineBasicBlock *BB) const; 1582 1583 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1584 MachineInstr &MI2, 1585 MachineBasicBlock *BB) const; 1586 1587 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1588 MachineBasicBlock *BB) const; 1589 1590 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1591 MachineBasicBlock *BB) const; 1592 1593 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1594 MachineBasicBlock *BB) const; 1595 1596 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1597 MachineBasicBlock *BB) const; 1598 1599 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1600 MachineBasicBlock *BB) const; 1601 1602 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1603 MachineBasicBlock *BB) const; 1604 1605 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1606 MachineBasicBlock *BB) const; 1607 1608 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1609 MachineBasicBlock *MBB) const; 1610 1611 void emitSetJmpShadowStackFix(MachineInstr &MI, 1612 MachineBasicBlock *MBB) const; 1613 1614 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1615 MachineBasicBlock *MBB) const; 1616 1617 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1618 MachineBasicBlock *MBB) const; 1619 1620 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1621 MachineBasicBlock *MBB) const; 1622 1623 /// Emit flags for the given setcc condition and operands. Also returns the 1624 /// corresponding X86 condition code constant in X86CC. 1625 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1626 const SDLoc &dl, SelectionDAG &DAG, 1627 SDValue &X86CC) const; 1628 1629 /// Check if replacement of SQRT with RSQRT should be disabled. 1630 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1631 1632 /// Use rsqrt* to speed up sqrt calculations. 1633 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1634 int &RefinementSteps, bool &UseOneConstNR, 1635 bool Reciprocal) const override; 1636 1637 /// Use rcp* to speed up fdiv calculations. 1638 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1639 int &RefinementSteps) const override; 1640 1641 /// Reassociate floating point divisions into multiply by reciprocal. 1642 unsigned combineRepeatedFPDivisors() const override; 1643 1644 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1645 SmallVectorImpl<SDNode *> &Created) const override; 1646 }; 1647 1648 namespace X86 { 1649 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1650 const TargetLibraryInfo *libInfo); 1651 } // end namespace X86 1652 1653 // X86 specific Gather/Scatter nodes. 1654 // The class has the same order of operands as MaskedGatherScatterSDNode for 1655 // convenience. 1656 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1657 public: 1658 // This is a intended as a utility and should never be directly created. 1659 X86MaskedGatherScatterSDNode() = delete; 1660 ~X86MaskedGatherScatterSDNode() = delete; 1661 1662 const SDValue &getBasePtr() const { return getOperand(3); } 1663 const SDValue &getIndex() const { return getOperand(4); } 1664 const SDValue &getMask() const { return getOperand(2); } 1665 const SDValue &getScale() const { return getOperand(5); } 1666 1667 static bool classof(const SDNode *N) { 1668 return N->getOpcode() == X86ISD::MGATHER || 1669 N->getOpcode() == X86ISD::MSCATTER; 1670 } 1671 }; 1672 1673 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1674 public: 1675 const SDValue &getPassThru() const { return getOperand(1); } 1676 1677 static bool classof(const SDNode *N) { 1678 return N->getOpcode() == X86ISD::MGATHER; 1679 } 1680 }; 1681 1682 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1683 public: 1684 const SDValue &getValue() const { return getOperand(1); } 1685 1686 static bool classof(const SDNode *N) { 1687 return N->getOpcode() == X86ISD::MSCATTER; 1688 } 1689 }; 1690 1691 /// Generate unpacklo/unpackhi shuffle mask. 1692 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1693 bool Unary); 1694 1695 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1696 /// imposed by AVX and specific to the unary pattern. Example: 1697 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1698 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1699 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1700 1701 } // end namespace llvm 1702 1703 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1704