1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/TargetLowering.h" 18 19 namespace llvm { 20 class X86Subtarget; 21 class X86TargetMachine; 22 23 namespace X86ISD { 24 // X86 Specific DAG Nodes 25 enum NodeType : unsigned { 26 // Start the numbering where the builtin ops leave off. 27 FIRST_NUMBER = ISD::BUILTIN_OP_END, 28 29 /// Bit scan forward. 30 BSF, 31 /// Bit scan reverse. 32 BSR, 33 34 /// X86 funnel/double shift i16 instructions. These correspond to 35 /// X86::SHLDW and X86::SHRDW instructions which have different amt 36 /// modulo rules to generic funnel shifts. 37 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 38 FSHL, 39 FSHR, 40 41 /// Bitwise logical AND of floating point values. This corresponds 42 /// to X86::ANDPS or X86::ANDPD. 43 FAND, 44 45 /// Bitwise logical OR of floating point values. This corresponds 46 /// to X86::ORPS or X86::ORPD. 47 FOR, 48 49 /// Bitwise logical XOR of floating point values. This corresponds 50 /// to X86::XORPS or X86::XORPD. 51 FXOR, 52 53 /// Bitwise logical ANDNOT of floating point values. This 54 /// corresponds to X86::ANDNPS or X86::ANDNPD. 55 FANDN, 56 57 /// These operations represent an abstract X86 call 58 /// instruction, which includes a bunch of information. In particular the 59 /// operands of these node are: 60 /// 61 /// #0 - The incoming token chain 62 /// #1 - The callee 63 /// #2 - The number of arg bytes the caller pushes on the stack. 64 /// #3 - The number of arg bytes the callee pops off the stack. 65 /// #4 - The value to pass in AL/AX/EAX (optional) 66 /// #5 - The value to pass in DL/DX/EDX (optional) 67 /// 68 /// The result values of these nodes are: 69 /// 70 /// #0 - The outgoing token chain 71 /// #1 - The first register result value (optional) 72 /// #2 - The second register result value (optional) 73 /// 74 CALL, 75 76 /// Same as call except it adds the NoTrack prefix. 77 NT_CALL, 78 79 // Pseudo for a OBJC call that gets emitted together with a special 80 // marker instruction. 81 CALL_RVMARKER, 82 83 /// X86 compare and logical compare instructions. 84 CMP, 85 FCMP, 86 COMI, 87 UCOMI, 88 89 /// X86 bit-test instructions. 90 BT, 91 92 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 93 /// operand, usually produced by a CMP instruction. 94 SETCC, 95 96 /// X86 Select 97 SELECTS, 98 99 // Same as SETCC except it's materialized with a sbb and the value is all 100 // one's or all zero's. 101 SETCC_CARRY, // R = carry_bit ? ~0 : 0 102 103 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 104 /// Operands are two FP values to compare; result is a mask of 105 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 106 FSETCC, 107 108 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 109 /// and a version with SAE. 110 FSETCCM, 111 FSETCCM_SAE, 112 113 /// X86 conditional moves. Operand 0 and operand 1 are the two values 114 /// to select from. Operand 2 is the condition code, and operand 3 is the 115 /// flag operand produced by a CMP or TEST instruction. 116 CMOV, 117 118 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 119 /// is the block to branch if condition is true, operand 2 is the 120 /// condition code, and operand 3 is the flag operand produced by a CMP 121 /// or TEST instruction. 122 BRCOND, 123 124 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 125 /// operand 1 is the target address. 126 NT_BRIND, 127 128 /// Return with a flag operand. Operand 0 is the chain operand, operand 129 /// 1 is the number of bytes of stack to pop. 130 RET_FLAG, 131 132 /// Return from interrupt. Operand 0 is the number of bytes to pop. 133 IRET, 134 135 /// Repeat fill, corresponds to X86::REP_STOSx. 136 REP_STOS, 137 138 /// Repeat move, corresponds to X86::REP_MOVSx. 139 REP_MOVS, 140 141 /// On Darwin, this node represents the result of the popl 142 /// at function entry, used for PIC code. 143 GlobalBaseReg, 144 145 /// A wrapper node for TargetConstantPool, TargetJumpTable, 146 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 147 /// MCSymbol and TargetBlockAddress. 148 Wrapper, 149 150 /// Special wrapper used under X86-64 PIC mode for RIP 151 /// relative displacements. 152 WrapperRIP, 153 154 /// Copies a 64-bit value from an MMX vector to the low word 155 /// of an XMM vector, with the high word zero filled. 156 MOVQ2DQ, 157 158 /// Copies a 64-bit value from the low word of an XMM vector 159 /// to an MMX vector. 160 MOVDQ2Q, 161 162 /// Copies a 32-bit value from the low word of a MMX 163 /// vector to a GPR. 164 MMX_MOVD2W, 165 166 /// Copies a GPR into the low 32-bit word of a MMX vector 167 /// and zero out the high word. 168 MMX_MOVW2D, 169 170 /// Extract an 8-bit value from a vector and zero extend it to 171 /// i32, corresponds to X86::PEXTRB. 172 PEXTRB, 173 174 /// Extract a 16-bit value from a vector and zero extend it to 175 /// i32, corresponds to X86::PEXTRW. 176 PEXTRW, 177 178 /// Insert any element of a 4 x float vector into any element 179 /// of a destination 4 x floatvector. 180 INSERTPS, 181 182 /// Insert the lower 8-bits of a 32-bit value to a vector, 183 /// corresponds to X86::PINSRB. 184 PINSRB, 185 186 /// Insert the lower 16-bits of a 32-bit value to a vector, 187 /// corresponds to X86::PINSRW. 188 PINSRW, 189 190 /// Shuffle 16 8-bit values within a vector. 191 PSHUFB, 192 193 /// Compute Sum of Absolute Differences. 194 PSADBW, 195 /// Compute Double Block Packed Sum-Absolute-Differences 196 DBPSADBW, 197 198 /// Bitwise Logical AND NOT of Packed FP values. 199 ANDNP, 200 201 /// Blend where the selector is an immediate. 202 BLENDI, 203 204 /// Dynamic (non-constant condition) vector blend where only the sign bits 205 /// of the condition elements are used. This is used to enforce that the 206 /// condition mask is not valid for generic VSELECT optimizations. This 207 /// is also used to implement the intrinsics. 208 /// Operands are in VSELECT order: MASK, TRUE, FALSE 209 BLENDV, 210 211 /// Combined add and sub on an FP vector. 212 ADDSUB, 213 214 // FP vector ops with rounding mode. 215 FADD_RND, 216 FADDS, 217 FADDS_RND, 218 FSUB_RND, 219 FSUBS, 220 FSUBS_RND, 221 FMUL_RND, 222 FMULS, 223 FMULS_RND, 224 FDIV_RND, 225 FDIVS, 226 FDIVS_RND, 227 FMAX_SAE, 228 FMAXS_SAE, 229 FMIN_SAE, 230 FMINS_SAE, 231 FSQRT_RND, 232 FSQRTS, 233 FSQRTS_RND, 234 235 // FP vector get exponent. 236 FGETEXP, 237 FGETEXP_SAE, 238 FGETEXPS, 239 FGETEXPS_SAE, 240 // Extract Normalized Mantissas. 241 VGETMANT, 242 VGETMANT_SAE, 243 VGETMANTS, 244 VGETMANTS_SAE, 245 // FP Scale. 246 SCALEF, 247 SCALEF_RND, 248 SCALEFS, 249 SCALEFS_RND, 250 251 // Unsigned Integer average. 252 AVG, 253 254 /// Integer horizontal add/sub. 255 HADD, 256 HSUB, 257 258 /// Floating point horizontal add/sub. 259 FHADD, 260 FHSUB, 261 262 // Detect Conflicts Within a Vector 263 CONFLICT, 264 265 /// Floating point max and min. 266 FMAX, 267 FMIN, 268 269 /// Commutative FMIN and FMAX. 270 FMAXC, 271 FMINC, 272 273 /// Scalar intrinsic floating point max and min. 274 FMAXS, 275 FMINS, 276 277 /// Floating point reciprocal-sqrt and reciprocal approximation. 278 /// Note that these typically require refinement 279 /// in order to obtain suitable precision. 280 FRSQRT, 281 FRCP, 282 283 // AVX-512 reciprocal approximations with a little more precision. 284 RSQRT14, 285 RSQRT14S, 286 RCP14, 287 RCP14S, 288 289 // Thread Local Storage. 290 TLSADDR, 291 292 // Thread Local Storage. A call to get the start address 293 // of the TLS block for the current module. 294 TLSBASEADDR, 295 296 // Thread Local Storage. When calling to an OS provided 297 // thunk at the address from an earlier relocation. 298 TLSCALL, 299 300 // Exception Handling helpers. 301 EH_RETURN, 302 303 // SjLj exception handling setjmp. 304 EH_SJLJ_SETJMP, 305 306 // SjLj exception handling longjmp. 307 EH_SJLJ_LONGJMP, 308 309 // SjLj exception handling dispatch. 310 EH_SJLJ_SETUP_DISPATCH, 311 312 /// Tail call return. See X86TargetLowering::LowerCall for 313 /// the list of operands. 314 TC_RETURN, 315 316 // Vector move to low scalar and zero higher vector elements. 317 VZEXT_MOVL, 318 319 // Vector integer truncate. 320 VTRUNC, 321 // Vector integer truncate with unsigned/signed saturation. 322 VTRUNCUS, 323 VTRUNCS, 324 325 // Masked version of the above. Used when less than a 128-bit result is 326 // produced since the mask only applies to the lower elements and can't 327 // be represented by a select. 328 // SRC, PASSTHRU, MASK 329 VMTRUNC, 330 VMTRUNCUS, 331 VMTRUNCS, 332 333 // Vector FP extend. 334 VFPEXT, 335 VFPEXT_SAE, 336 VFPEXTS, 337 VFPEXTS_SAE, 338 339 // Vector FP round. 340 VFPROUND, 341 VFPROUND_RND, 342 VFPROUNDS, 343 VFPROUNDS_RND, 344 345 // Masked version of above. Used for v2f64->v4f32. 346 // SRC, PASSTHRU, MASK 347 VMFPROUND, 348 349 // 128-bit vector logical left / right shift 350 VSHLDQ, 351 VSRLDQ, 352 353 // Vector shift elements 354 VSHL, 355 VSRL, 356 VSRA, 357 358 // Vector variable shift 359 VSHLV, 360 VSRLV, 361 VSRAV, 362 363 // Vector shift elements by immediate 364 VSHLI, 365 VSRLI, 366 VSRAI, 367 368 // Shifts of mask registers. 369 KSHIFTL, 370 KSHIFTR, 371 372 // Bit rotate by immediate 373 VROTLI, 374 VROTRI, 375 376 // Vector packed double/float comparison. 377 CMPP, 378 379 // Vector integer comparisons. 380 PCMPEQ, 381 PCMPGT, 382 383 // v8i16 Horizontal minimum and position. 384 PHMINPOS, 385 386 MULTISHIFT, 387 388 /// Vector comparison generating mask bits for fp and 389 /// integer signed and unsigned data types. 390 CMPM, 391 // Vector mask comparison generating mask bits for FP values. 392 CMPMM, 393 // Vector mask comparison with SAE for FP values. 394 CMPMM_SAE, 395 396 // Arithmetic operations with FLAGS results. 397 ADD, 398 SUB, 399 ADC, 400 SBB, 401 SMUL, 402 UMUL, 403 OR, 404 XOR, 405 AND, 406 407 // Bit field extract. 408 BEXTR, 409 BEXTRI, 410 411 // Zero High Bits Starting with Specified Bit Position. 412 BZHI, 413 414 // Parallel extract and deposit. 415 PDEP, 416 PEXT, 417 418 // X86-specific multiply by immediate. 419 MUL_IMM, 420 421 // Vector sign bit extraction. 422 MOVMSK, 423 424 // Vector bitwise comparisons. 425 PTEST, 426 427 // Vector packed fp sign bitwise comparisons. 428 TESTP, 429 430 // OR/AND test for masks. 431 KORTEST, 432 KTEST, 433 434 // ADD for masks. 435 KADD, 436 437 // Several flavors of instructions with vector shuffle behaviors. 438 // Saturated signed/unnsigned packing. 439 PACKSS, 440 PACKUS, 441 // Intra-lane alignr. 442 PALIGNR, 443 // AVX512 inter-lane alignr. 444 VALIGN, 445 PSHUFD, 446 PSHUFHW, 447 PSHUFLW, 448 SHUFP, 449 // VBMI2 Concat & Shift. 450 VSHLD, 451 VSHRD, 452 VSHLDV, 453 VSHRDV, 454 // Shuffle Packed Values at 128-bit granularity. 455 SHUF128, 456 MOVDDUP, 457 MOVSHDUP, 458 MOVSLDUP, 459 MOVLHPS, 460 MOVHLPS, 461 MOVSD, 462 MOVSS, 463 UNPCKL, 464 UNPCKH, 465 VPERMILPV, 466 VPERMILPI, 467 VPERMI, 468 VPERM2X128, 469 470 // Variable Permute (VPERM). 471 // Res = VPERMV MaskV, V0 472 VPERMV, 473 474 // 3-op Variable Permute (VPERMT2). 475 // Res = VPERMV3 V0, MaskV, V1 476 VPERMV3, 477 478 // Bitwise ternary logic. 479 VPTERNLOG, 480 // Fix Up Special Packed Float32/64 values. 481 VFIXUPIMM, 482 VFIXUPIMM_SAE, 483 VFIXUPIMMS, 484 VFIXUPIMMS_SAE, 485 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 486 VRANGE, 487 VRANGE_SAE, 488 VRANGES, 489 VRANGES_SAE, 490 // Reduce - Perform Reduction Transformation on scalar\packed FP. 491 VREDUCE, 492 VREDUCE_SAE, 493 VREDUCES, 494 VREDUCES_SAE, 495 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 496 // Also used by the legacy (V)ROUND intrinsics where we mask out the 497 // scaling part of the immediate. 498 VRNDSCALE, 499 VRNDSCALE_SAE, 500 VRNDSCALES, 501 VRNDSCALES_SAE, 502 // Tests Types Of a FP Values for packed types. 503 VFPCLASS, 504 // Tests Types Of a FP Values for scalar types. 505 VFPCLASSS, 506 507 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 508 // a vector, this node may change the vector length as part of the splat. 509 VBROADCAST, 510 // Broadcast mask to vector. 511 VBROADCASTM, 512 513 /// SSE4A Extraction and Insertion. 514 EXTRQI, 515 INSERTQI, 516 517 // XOP arithmetic/logical shifts. 518 VPSHA, 519 VPSHL, 520 // XOP signed/unsigned integer comparisons. 521 VPCOM, 522 VPCOMU, 523 // XOP packed permute bytes. 524 VPPERM, 525 // XOP two source permutation. 526 VPERMIL2, 527 528 // Vector multiply packed unsigned doubleword integers. 529 PMULUDQ, 530 // Vector multiply packed signed doubleword integers. 531 PMULDQ, 532 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 533 MULHRS, 534 535 // Multiply and Add Packed Integers. 536 VPMADDUBSW, 537 VPMADDWD, 538 539 // AVX512IFMA multiply and add. 540 // NOTE: These are different than the instruction and perform 541 // op0 x op1 + op2. 542 VPMADD52L, 543 VPMADD52H, 544 545 // VNNI 546 VPDPBUSD, 547 VPDPBUSDS, 548 VPDPWSSD, 549 VPDPWSSDS, 550 551 // FMA nodes. 552 // We use the target independent ISD::FMA for the non-inverted case. 553 FNMADD, 554 FMSUB, 555 FNMSUB, 556 FMADDSUB, 557 FMSUBADD, 558 559 // FMA with rounding mode. 560 FMADD_RND, 561 FNMADD_RND, 562 FMSUB_RND, 563 FNMSUB_RND, 564 FMADDSUB_RND, 565 FMSUBADD_RND, 566 567 // Compress and expand. 568 COMPRESS, 569 EXPAND, 570 571 // Bits shuffle 572 VPSHUFBITQMB, 573 574 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 575 SINT_TO_FP_RND, 576 UINT_TO_FP_RND, 577 SCALAR_SINT_TO_FP, 578 SCALAR_UINT_TO_FP, 579 SCALAR_SINT_TO_FP_RND, 580 SCALAR_UINT_TO_FP_RND, 581 582 // Vector float/double to signed/unsigned integer. 583 CVTP2SI, 584 CVTP2UI, 585 CVTP2SI_RND, 586 CVTP2UI_RND, 587 // Scalar float/double to signed/unsigned integer. 588 CVTS2SI, 589 CVTS2UI, 590 CVTS2SI_RND, 591 CVTS2UI_RND, 592 593 // Vector float/double to signed/unsigned integer with truncation. 594 CVTTP2SI, 595 CVTTP2UI, 596 CVTTP2SI_SAE, 597 CVTTP2UI_SAE, 598 // Scalar float/double to signed/unsigned integer with truncation. 599 CVTTS2SI, 600 CVTTS2UI, 601 CVTTS2SI_SAE, 602 CVTTS2UI_SAE, 603 604 // Vector signed/unsigned integer to float/double. 605 CVTSI2P, 606 CVTUI2P, 607 608 // Masked versions of above. Used for v2f64->v4f32. 609 // SRC, PASSTHRU, MASK 610 MCVTP2SI, 611 MCVTP2UI, 612 MCVTTP2SI, 613 MCVTTP2UI, 614 MCVTSI2P, 615 MCVTUI2P, 616 617 // Vector float to bfloat16. 618 // Convert TWO packed single data to one packed BF16 data 619 CVTNE2PS2BF16, 620 // Convert packed single data to packed BF16 data 621 CVTNEPS2BF16, 622 // Masked version of above. 623 // SRC, PASSTHRU, MASK 624 MCVTNEPS2BF16, 625 626 // Dot product of BF16 pairs to accumulated into 627 // packed single precision. 628 DPBF16PS, 629 630 // Save xmm argument registers to the stack, according to %al. An operator 631 // is needed so that this can be expanded with control flow. 632 VASTART_SAVE_XMM_REGS, 633 634 // Windows's _chkstk call to do stack probing. 635 WIN_ALLOCA, 636 637 // For allocating variable amounts of stack space when using 638 // segmented stacks. Check if the current stacklet has enough space, and 639 // falls back to heap allocation if not. 640 SEG_ALLOCA, 641 642 // For allocating stack space when using stack clash protector. 643 // Allocation is performed by block, and each block is probed. 644 PROBED_ALLOCA, 645 646 // Memory barriers. 647 MEMBARRIER, 648 MFENCE, 649 650 // Get a random integer and indicate whether it is valid in CF. 651 RDRAND, 652 653 // Get a NIST SP800-90B & C compliant random integer and 654 // indicate whether it is valid in CF. 655 RDSEED, 656 657 // Protection keys 658 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 659 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 660 // value for ECX. 661 RDPKRU, 662 WRPKRU, 663 664 // SSE42 string comparisons. 665 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 666 // will emit one or two instructions based on which results are used. If 667 // flags and index/mask this allows us to use a single instruction since 668 // we won't have to pick and opcode for flags. Instead we can rely on the 669 // DAG to CSE everything and decide at isel. 670 PCMPISTR, 671 PCMPESTR, 672 673 // Test if in transactional execution. 674 XTEST, 675 676 // ERI instructions. 677 RSQRT28, 678 RSQRT28_SAE, 679 RSQRT28S, 680 RSQRT28S_SAE, 681 RCP28, 682 RCP28_SAE, 683 RCP28S, 684 RCP28S_SAE, 685 EXP2, 686 EXP2_SAE, 687 688 // Conversions between float and half-float. 689 CVTPS2PH, 690 CVTPH2PS, 691 CVTPH2PS_SAE, 692 693 // Masked version of above. 694 // SRC, RND, PASSTHRU, MASK 695 MCVTPS2PH, 696 697 // Galois Field Arithmetic Instructions 698 GF2P8AFFINEINVQB, 699 GF2P8AFFINEQB, 700 GF2P8MULB, 701 702 // LWP insert record. 703 LWPINS, 704 705 // User level wait 706 UMWAIT, 707 TPAUSE, 708 709 // Enqueue Stores Instructions 710 ENQCMD, 711 ENQCMDS, 712 713 // For avx512-vp2intersect 714 VP2INTERSECT, 715 716 // User level interrupts - testui 717 TESTUI, 718 719 /// X86 strict FP compare instructions. 720 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 721 STRICT_FCMPS, 722 723 // Vector packed double/float comparison. 724 STRICT_CMPP, 725 726 /// Vector comparison generating mask bits for fp and 727 /// integer signed and unsigned data types. 728 STRICT_CMPM, 729 730 // Vector float/double to signed/unsigned integer with truncation. 731 STRICT_CVTTP2SI, 732 STRICT_CVTTP2UI, 733 734 // Vector FP extend. 735 STRICT_VFPEXT, 736 737 // Vector FP round. 738 STRICT_VFPROUND, 739 740 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 741 // Also used by the legacy (V)ROUND intrinsics where we mask out the 742 // scaling part of the immediate. 743 STRICT_VRNDSCALE, 744 745 // Vector signed/unsigned integer to float/double. 746 STRICT_CVTSI2P, 747 STRICT_CVTUI2P, 748 749 // Strict FMA nodes. 750 STRICT_FNMADD, 751 STRICT_FMSUB, 752 STRICT_FNMSUB, 753 754 // Conversions between float and half-float. 755 STRICT_CVTPS2PH, 756 STRICT_CVTPH2PS, 757 758 // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and 759 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. 760 761 // Compare and swap. 762 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 763 LCMPXCHG8_DAG, 764 LCMPXCHG16_DAG, 765 LCMPXCHG16_SAVE_RBX_DAG, 766 767 /// LOCK-prefixed arithmetic read-modify-write instructions. 768 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 769 LADD, 770 LSUB, 771 LOR, 772 LXOR, 773 LAND, 774 775 // Load, scalar_to_vector, and zero extend. 776 VZEXT_LOAD, 777 778 // extract_vector_elt, store. 779 VEXTRACT_STORE, 780 781 // scalar broadcast from memory. 782 VBROADCAST_LOAD, 783 784 // subvector broadcast from memory. 785 SUBV_BROADCAST_LOAD, 786 787 // Store FP control word into i16 memory. 788 FNSTCW16m, 789 790 // Load FP control word from i16 memory. 791 FLDCW16m, 792 793 /// This instruction implements FP_TO_SINT with the 794 /// integer destination in memory and a FP reg source. This corresponds 795 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 796 /// has two inputs (token chain and address) and two outputs (int value 797 /// and token chain). Memory VT specifies the type to store to. 798 FP_TO_INT_IN_MEM, 799 800 /// This instruction implements SINT_TO_FP with the 801 /// integer source in memory and FP reg result. This corresponds to the 802 /// X86::FILD*m instructions. It has two inputs (token chain and address) 803 /// and two outputs (FP value and token chain). The integer source type is 804 /// specified by the memory VT. 805 FILD, 806 807 /// This instruction implements a fp->int store from FP stack 808 /// slots. This corresponds to the fist instruction. It takes a 809 /// chain operand, value to store, address, and glue. The memory VT 810 /// specifies the type to store as. 811 FIST, 812 813 /// This instruction implements an extending load to FP stack slots. 814 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 815 /// operand, and ptr to load from. The memory VT specifies the type to 816 /// load from. 817 FLD, 818 819 /// This instruction implements a truncating store from FP stack 820 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 821 /// chain operand, value to store, address, and glue. The memory VT 822 /// specifies the type to store as. 823 FST, 824 825 /// These instructions grab the address of the next argument 826 /// from a va_list. (reads and modifies the va_list in memory) 827 VAARG_64, 828 VAARG_X32, 829 830 // Vector truncating store with unsigned/signed saturation 831 VTRUNCSTOREUS, 832 VTRUNCSTORES, 833 // Vector truncating masked store with unsigned/signed saturation 834 VMTRUNCSTOREUS, 835 VMTRUNCSTORES, 836 837 // X86 specific gather and scatter 838 MGATHER, 839 MSCATTER, 840 841 // Key locker nodes that produce flags. 842 AESENC128KL, 843 AESDEC128KL, 844 AESENC256KL, 845 AESDEC256KL, 846 AESENCWIDE128KL, 847 AESDECWIDE128KL, 848 AESENCWIDE256KL, 849 AESDECWIDE256KL, 850 851 // WARNING: Do not add anything in the end unless you want the node to 852 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 853 // opcodes will be thought as target memory ops! 854 }; 855 } // end namespace X86ISD 856 857 namespace X86 { 858 /// Current rounding mode is represented in bits 11:10 of FPSR. These 859 /// values are same as corresponding constants for rounding mode used 860 /// in glibc. 861 enum RoundingMode { 862 rmToNearest = 0, // FE_TONEAREST 863 rmDownward = 1 << 10, // FE_DOWNWARD 864 rmUpward = 2 << 10, // FE_UPWARD 865 rmTowardZero = 3 << 10, // FE_TOWARDZERO 866 rmMask = 3 << 10 // Bit mask selecting rounding mode 867 }; 868 } 869 870 /// Define some predicates that are used for node matching. 871 namespace X86 { 872 /// Returns true if Elt is a constant zero or floating point constant +0.0. 873 bool isZeroNode(SDValue Elt); 874 875 /// Returns true of the given offset can be 876 /// fit into displacement field of the instruction. 877 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 878 bool hasSymbolicDisplacement); 879 880 /// Determines whether the callee is required to pop its 881 /// own arguments. Callee pop is necessary to support tail calls. 882 bool isCalleePop(CallingConv::ID CallingConv, 883 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 884 885 /// If Op is a constant whose elements are all the same constant or 886 /// undefined, return true and return the constant value in \p SplatVal. 887 /// If we have undef bits that don't cover an entire element, we treat these 888 /// as zero if AllowPartialUndefs is set, else we fail and return false. 889 bool isConstantSplat(SDValue Op, APInt &SplatVal, 890 bool AllowPartialUndefs = true); 891 } // end namespace X86 892 893 //===--------------------------------------------------------------------===// 894 // X86 Implementation of the TargetLowering interface 895 class X86TargetLowering final : public TargetLowering { 896 public: 897 explicit X86TargetLowering(const X86TargetMachine &TM, 898 const X86Subtarget &STI); 899 900 unsigned getJumpTableEncoding() const override; 901 bool useSoftFloat() const override; 902 903 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 904 ArgListTy &Args) const override; 905 906 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 907 return MVT::i8; 908 } 909 910 const MCExpr * 911 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 912 const MachineBasicBlock *MBB, unsigned uid, 913 MCContext &Ctx) const override; 914 915 /// Returns relocation base for the given PIC jumptable. 916 SDValue getPICJumpTableRelocBase(SDValue Table, 917 SelectionDAG &DAG) const override; 918 const MCExpr * 919 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 920 unsigned JTI, MCContext &Ctx) const override; 921 922 /// Return the desired alignment for ByVal aggregate 923 /// function arguments in the caller parameter area. For X86, aggregates 924 /// that contains are placed at 16-byte boundaries while the rest are at 925 /// 4-byte boundaries. 926 unsigned getByValTypeAlignment(Type *Ty, 927 const DataLayout &DL) const override; 928 929 EVT getOptimalMemOpType(const MemOp &Op, 930 const AttributeList &FuncAttributes) const override; 931 932 /// Returns true if it's safe to use load / store of the 933 /// specified type to expand memcpy / memset inline. This is mostly true 934 /// for all types except for some special cases. For example, on X86 935 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 936 /// also does type conversion. Note the specified type doesn't have to be 937 /// legal as the hook is used before type legalization. 938 bool isSafeMemOpType(MVT VT) const override; 939 940 /// Returns true if the target allows unaligned memory accesses of the 941 /// specified type. Returns whether it is "fast" in the last argument. 942 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, 943 MachineMemOperand::Flags Flags, 944 bool *Fast) const override; 945 946 /// Provide custom lowering hooks for some operations. 947 /// 948 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 949 950 /// Replace the results of node with an illegal result 951 /// type with new values built out of custom code. 952 /// 953 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 954 SelectionDAG &DAG) const override; 955 956 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 957 958 /// Return true if the target has native support for 959 /// the specified value type and it is 'desirable' to use the type for the 960 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 961 /// instruction encodings are longer and some i16 instructions are slow. 962 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 963 964 /// Return true if the target has native support for the 965 /// specified value type and it is 'desirable' to use the type. e.g. On x86 966 /// i16 is legal, but undesirable since i16 instruction encodings are longer 967 /// and some i16 instructions are slow. 968 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 969 970 /// Return the newly negated expression if the cost is not expensive and 971 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 972 /// do the negation. 973 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 974 bool LegalOperations, bool ForCodeSize, 975 NegatibleCost &Cost, 976 unsigned Depth) const override; 977 978 MachineBasicBlock * 979 EmitInstrWithCustomInserter(MachineInstr &MI, 980 MachineBasicBlock *MBB) const override; 981 982 /// This method returns the name of a target specific DAG node. 983 const char *getTargetNodeName(unsigned Opcode) const override; 984 985 /// Do not merge vector stores after legalization because that may conflict 986 /// with x86-specific store splitting optimizations. 987 bool mergeStoresAfterLegalization(EVT MemVT) const override { 988 return !MemVT.isVector(); 989 } 990 991 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 992 const SelectionDAG &DAG) const override; 993 994 bool isCheapToSpeculateCttz() const override; 995 996 bool isCheapToSpeculateCtlz() const override; 997 998 bool isCtlzFast() const override; 999 1000 bool hasBitPreservingFPLogic(EVT VT) const override { 1001 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 1002 } 1003 1004 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 1005 // If the pair to store is a mixture of float and int values, we will 1006 // save two bitwise instructions and one float-to-int instruction and 1007 // increase one store instruction. There is potentially a more 1008 // significant benefit because it avoids the float->int domain switch 1009 // for input value. So It is more likely a win. 1010 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 1011 (LTy.isInteger() && HTy.isFloatingPoint())) 1012 return true; 1013 // If the pair only contains int values, we will save two bitwise 1014 // instructions and increase one store instruction (costing one more 1015 // store buffer). Since the benefit is more blurred so we leave 1016 // such pair out until we get testcase to prove it is a win. 1017 return false; 1018 } 1019 1020 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1021 1022 bool hasAndNotCompare(SDValue Y) const override; 1023 1024 bool hasAndNot(SDValue Y) const override; 1025 1026 bool hasBitTest(SDValue X, SDValue Y) const override; 1027 1028 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1029 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1030 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1031 SelectionDAG &DAG) const override; 1032 1033 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1034 CombineLevel Level) const override; 1035 1036 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1037 1038 bool 1039 shouldTransformSignedTruncationCheck(EVT XVT, 1040 unsigned KeptBits) const override { 1041 // For vectors, we don't have a preference.. 1042 if (XVT.isVector()) 1043 return false; 1044 1045 auto VTIsOk = [](EVT VT) -> bool { 1046 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1047 VT == MVT::i64; 1048 }; 1049 1050 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1051 // XVT will be larger than KeptBitsVT. 1052 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1053 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1054 } 1055 1056 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 1057 1058 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1059 1060 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1061 return VT.isScalarInteger(); 1062 } 1063 1064 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1065 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1066 1067 /// Return the value type to use for ISD::SETCC. 1068 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1069 EVT VT) const override; 1070 1071 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1072 const APInt &DemandedElts, 1073 TargetLoweringOpt &TLO) const override; 1074 1075 /// Determine which of the bits specified in Mask are known to be either 1076 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1077 void computeKnownBitsForTargetNode(const SDValue Op, 1078 KnownBits &Known, 1079 const APInt &DemandedElts, 1080 const SelectionDAG &DAG, 1081 unsigned Depth = 0) const override; 1082 1083 /// Determine the number of bits in the operation that are sign bits. 1084 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1085 const APInt &DemandedElts, 1086 const SelectionDAG &DAG, 1087 unsigned Depth) const override; 1088 1089 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1090 const APInt &DemandedElts, 1091 APInt &KnownUndef, 1092 APInt &KnownZero, 1093 TargetLoweringOpt &TLO, 1094 unsigned Depth) const override; 1095 1096 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1097 const APInt &DemandedElts, 1098 unsigned MaskIndex, 1099 TargetLoweringOpt &TLO, 1100 unsigned Depth) const; 1101 1102 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1103 const APInt &DemandedBits, 1104 const APInt &DemandedElts, 1105 KnownBits &Known, 1106 TargetLoweringOpt &TLO, 1107 unsigned Depth) const override; 1108 1109 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1110 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1111 SelectionDAG &DAG, unsigned Depth) const override; 1112 1113 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1114 1115 SDValue unwrapAddress(SDValue N) const override; 1116 1117 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1118 1119 bool ExpandInlineAsm(CallInst *CI) const override; 1120 1121 ConstraintType getConstraintType(StringRef Constraint) const override; 1122 1123 /// Examine constraint string and operand type and determine a weight value. 1124 /// The operand object must already have been set up with the operand type. 1125 ConstraintWeight 1126 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1127 const char *constraint) const override; 1128 1129 const char *LowerXConstraint(EVT ConstraintVT) const override; 1130 1131 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1132 /// add anything to Ops. If hasMemory is true it means one of the asm 1133 /// constraint of the inline asm instruction being processed is 'm'. 1134 void LowerAsmOperandForConstraint(SDValue Op, 1135 std::string &Constraint, 1136 std::vector<SDValue> &Ops, 1137 SelectionDAG &DAG) const override; 1138 1139 unsigned 1140 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1141 if (ConstraintCode == "v") 1142 return InlineAsm::Constraint_v; 1143 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1144 } 1145 1146 /// Handle Lowering flag assembly outputs. 1147 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1148 const SDLoc &DL, 1149 const AsmOperandInfo &Constraint, 1150 SelectionDAG &DAG) const override; 1151 1152 /// Given a physical register constraint 1153 /// (e.g. {edx}), return the register number and the register class for the 1154 /// register. This should only be used for C_Register constraints. On 1155 /// error, this returns a register number of 0. 1156 std::pair<unsigned, const TargetRegisterClass *> 1157 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1158 StringRef Constraint, MVT VT) const override; 1159 1160 /// Return true if the addressing mode represented 1161 /// by AM is legal for this target, for a load/store of the specified type. 1162 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1163 Type *Ty, unsigned AS, 1164 Instruction *I = nullptr) const override; 1165 1166 /// Return true if the specified immediate is legal 1167 /// icmp immediate, that is the target has icmp instructions which can 1168 /// compare a register against the immediate without having to materialize 1169 /// the immediate into a register. 1170 bool isLegalICmpImmediate(int64_t Imm) const override; 1171 1172 /// Return true if the specified immediate is legal 1173 /// add immediate, that is the target has add instructions which can 1174 /// add a register and the immediate without having to materialize 1175 /// the immediate into a register. 1176 bool isLegalAddImmediate(int64_t Imm) const override; 1177 1178 bool isLegalStoreImmediate(int64_t Imm) const override; 1179 1180 /// Return the cost of the scaling factor used in the addressing 1181 /// mode represented by AM for this target, for a load/store 1182 /// of the specified type. 1183 /// If the AM is supported, the return value must be >= 0. 1184 /// If the AM is not supported, it returns a negative value. 1185 InstructionCost getScalingFactorCost(const DataLayout &DL, 1186 const AddrMode &AM, Type *Ty, 1187 unsigned AS) const override; 1188 1189 /// This is used to enable splatted operand transforms for vector shifts 1190 /// and vector funnel shifts. 1191 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1192 1193 /// Add x86-specific opcodes to the default list. 1194 bool isBinOp(unsigned Opcode) const override; 1195 1196 /// Returns true if the opcode is a commutative binary operation. 1197 bool isCommutativeBinOp(unsigned Opcode) const override; 1198 1199 /// Return true if it's free to truncate a value of 1200 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1201 /// register EAX to i16 by referencing its sub-register AX. 1202 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1203 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1204 1205 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1206 1207 /// Return true if any actual instruction that defines a 1208 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1209 /// register. This does not necessarily include registers defined in 1210 /// unknown ways, such as incoming arguments, or copies from unknown 1211 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1212 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1213 /// all instructions that define 32-bit values implicit zero-extend the 1214 /// result out to 64 bits. 1215 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1216 bool isZExtFree(EVT VT1, EVT VT2) const override; 1217 bool isZExtFree(SDValue Val, EVT VT2) const override; 1218 1219 bool shouldSinkOperands(Instruction *I, 1220 SmallVectorImpl<Use *> &Ops) const override; 1221 bool shouldConvertPhiType(Type *From, Type *To) const override; 1222 1223 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1224 /// extend node) is profitable. 1225 bool isVectorLoadExtDesirable(SDValue) const override; 1226 1227 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1228 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1229 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1230 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1231 EVT VT) const override; 1232 1233 /// Return true if it's profitable to narrow 1234 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1235 /// from i32 to i8 but not from i32 to i16. 1236 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1237 1238 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1239 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1240 /// true and stores the intrinsic information into the IntrinsicInfo that was 1241 /// passed to the function. 1242 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1243 MachineFunction &MF, 1244 unsigned Intrinsic) const override; 1245 1246 /// Returns true if the target can instruction select the 1247 /// specified FP immediate natively. If false, the legalizer will 1248 /// materialize the FP immediate as a load from a constant pool. 1249 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1250 bool ForCodeSize) const override; 1251 1252 /// Targets can use this to indicate that they only support *some* 1253 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1254 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1255 /// be legal. 1256 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1257 1258 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1259 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1260 /// constant pool entry. 1261 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1262 1263 /// Returns true if lowering to a jump table is allowed. 1264 bool areJTsAllowed(const Function *Fn) const override; 1265 1266 /// If true, then instruction selection should 1267 /// seek to shrink the FP constant of the specified type to a smaller type 1268 /// in order to save space and / or reduce runtime. 1269 bool ShouldShrinkFPConstant(EVT VT) const override { 1270 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1271 // expensive than a straight movsd. On the other hand, it's important to 1272 // shrink long double fp constant since fldt is very slow. 1273 return !X86ScalarSSEf64 || VT == MVT::f80; 1274 } 1275 1276 /// Return true if we believe it is correct and profitable to reduce the 1277 /// load node to a smaller type. 1278 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1279 EVT NewVT) const override; 1280 1281 /// Return true if the specified scalar FP type is computed in an SSE 1282 /// register, not on the X87 floating point stack. 1283 bool isScalarFPTypeInSSEReg(EVT VT) const { 1284 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1285 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1286 } 1287 1288 /// Returns true if it is beneficial to convert a load of a constant 1289 /// to just the constant itself. 1290 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1291 Type *Ty) const override; 1292 1293 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1294 1295 bool convertSelectOfConstantsToMath(EVT VT) const override; 1296 1297 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1298 SDValue C) const override; 1299 1300 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1301 /// with this index. 1302 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1303 unsigned Index) const override; 1304 1305 /// Scalar ops always have equal or better analysis/performance/power than 1306 /// the vector equivalent, so this always makes sense if the scalar op is 1307 /// supported. 1308 bool shouldScalarizeBinop(SDValue) const override; 1309 1310 /// Extract of a scalar FP value from index 0 of a vector is free. 1311 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1312 EVT EltVT = VT.getScalarType(); 1313 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1314 } 1315 1316 /// Overflow nodes should get combined/lowered to optimal instructions 1317 /// (they should allow eliminating explicit compares by getting flags from 1318 /// math ops). 1319 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1320 bool MathUsed) const override; 1321 1322 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1323 unsigned AddrSpace) const override { 1324 // If we can replace more than 2 scalar stores, there will be a reduction 1325 // in instructions even after we add a vector constant load. 1326 return NumElem > 2; 1327 } 1328 1329 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1330 const SelectionDAG &DAG, 1331 const MachineMemOperand &MMO) const override; 1332 1333 /// Intel processors have a unified instruction and data cache 1334 const char * getClearCacheBuiltinName() const override { 1335 return nullptr; // nothing to do, move along. 1336 } 1337 1338 Register getRegisterByName(const char* RegName, LLT VT, 1339 const MachineFunction &MF) const override; 1340 1341 /// If a physical register, this returns the register that receives the 1342 /// exception address on entry to an EH pad. 1343 Register 1344 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1345 1346 /// If a physical register, this returns the register that receives the 1347 /// exception typeid on entry to a landing pad. 1348 Register 1349 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1350 1351 virtual bool needsFixedCatchObjects() const override; 1352 1353 /// This method returns a target specific FastISel object, 1354 /// or null if the target does not support "fast" ISel. 1355 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1356 const TargetLibraryInfo *libInfo) const override; 1357 1358 /// If the target has a standard location for the stack protector cookie, 1359 /// returns the address of that location. Otherwise, returns nullptr. 1360 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 1361 1362 bool useLoadStackGuardNode() const override; 1363 bool useStackGuardXorFP() const override; 1364 void insertSSPDeclarations(Module &M) const override; 1365 Value *getSDagStackGuard(const Module &M) const override; 1366 Function *getSSPStackGuardCheck(const Module &M) const override; 1367 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1368 const SDLoc &DL) const override; 1369 1370 1371 /// Return true if the target stores SafeStack pointer at a fixed offset in 1372 /// some non-standard address space, and populates the address space and 1373 /// offset as appropriate. 1374 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 1375 1376 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1377 SDValue Chain, SDValue Pointer, 1378 MachinePointerInfo PtrInfo, 1379 Align Alignment, 1380 SelectionDAG &DAG) const; 1381 1382 /// Customize the preferred legalization strategy for certain types. 1383 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1384 1385 bool softPromoteHalfType() const override { return true; } 1386 1387 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1388 EVT VT) const override; 1389 1390 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1391 CallingConv::ID CC, 1392 EVT VT) const override; 1393 1394 unsigned getVectorTypeBreakdownForCallingConv( 1395 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1396 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1397 1398 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1399 1400 bool supportSwiftError() const override; 1401 1402 bool hasStackProbeSymbol(MachineFunction &MF) const override; 1403 bool hasInlineStackProbe(MachineFunction &MF) const override; 1404 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1405 1406 unsigned getStackProbeSize(MachineFunction &MF) const; 1407 1408 bool hasVectorBlend() const override { return true; } 1409 1410 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1411 1412 /// Lower interleaved load(s) into target specific 1413 /// instructions/intrinsics. 1414 bool lowerInterleavedLoad(LoadInst *LI, 1415 ArrayRef<ShuffleVectorInst *> Shuffles, 1416 ArrayRef<unsigned> Indices, 1417 unsigned Factor) const override; 1418 1419 /// Lower interleaved store(s) into target specific 1420 /// instructions/intrinsics. 1421 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1422 unsigned Factor) const override; 1423 1424 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1425 SDValue Addr, SelectionDAG &DAG) 1426 const override; 1427 1428 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1429 1430 protected: 1431 std::pair<const TargetRegisterClass *, uint8_t> 1432 findRepresentativeClass(const TargetRegisterInfo *TRI, 1433 MVT VT) const override; 1434 1435 private: 1436 /// Keep a reference to the X86Subtarget around so that we can 1437 /// make the right decision when generating code for different targets. 1438 const X86Subtarget &Subtarget; 1439 1440 /// Select between SSE or x87 floating point ops. 1441 /// When SSE is available, use it for f32 operations. 1442 /// When SSE2 is available, use it for f64 operations. 1443 bool X86ScalarSSEf32; 1444 bool X86ScalarSSEf64; 1445 1446 /// A list of legal FP immediates. 1447 std::vector<APFloat> LegalFPImmediates; 1448 1449 /// Indicate that this x86 target can instruction 1450 /// select the specified FP immediate natively. 1451 void addLegalFPImmediate(const APFloat& Imm) { 1452 LegalFPImmediates.push_back(Imm); 1453 } 1454 1455 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1456 CallingConv::ID CallConv, bool isVarArg, 1457 const SmallVectorImpl<ISD::InputArg> &Ins, 1458 const SDLoc &dl, SelectionDAG &DAG, 1459 SmallVectorImpl<SDValue> &InVals, 1460 uint32_t *RegMask) const; 1461 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1462 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1463 const SDLoc &dl, SelectionDAG &DAG, 1464 const CCValAssign &VA, MachineFrameInfo &MFI, 1465 unsigned i) const; 1466 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1467 const SDLoc &dl, SelectionDAG &DAG, 1468 const CCValAssign &VA, 1469 ISD::ArgFlagsTy Flags, bool isByval) const; 1470 1471 // Call lowering helpers. 1472 1473 /// Check whether the call is eligible for tail call optimization. Targets 1474 /// that want to do tail call optimization should implement this function. 1475 bool IsEligibleForTailCallOptimization(SDValue Callee, 1476 CallingConv::ID CalleeCC, 1477 bool isVarArg, 1478 bool isCalleeStructRet, 1479 bool isCallerStructRet, 1480 Type *RetTy, 1481 const SmallVectorImpl<ISD::OutputArg> &Outs, 1482 const SmallVectorImpl<SDValue> &OutVals, 1483 const SmallVectorImpl<ISD::InputArg> &Ins, 1484 SelectionDAG& DAG) const; 1485 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1486 SDValue Chain, bool IsTailCall, 1487 bool Is64Bit, int FPDiff, 1488 const SDLoc &dl) const; 1489 1490 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1491 SelectionDAG &DAG) const; 1492 1493 unsigned getAddressSpace(void) const; 1494 1495 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1496 SDValue &Chain) const; 1497 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1498 1499 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1500 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1501 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1502 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1503 1504 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1505 const unsigned char OpFlags = 0) const; 1506 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1507 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1508 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1509 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1510 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1511 1512 /// Creates target global address or external symbol nodes for calls or 1513 /// other uses. 1514 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1515 bool ForCall) const; 1516 1517 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1518 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1519 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1520 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1521 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1522 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1523 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1524 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1525 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1526 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1527 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1528 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1529 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1530 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1531 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1532 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1533 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1534 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1535 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1536 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1537 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1538 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1539 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1540 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1541 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1542 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1543 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1544 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1545 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1546 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1547 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1548 1549 SDValue 1550 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1551 const SmallVectorImpl<ISD::InputArg> &Ins, 1552 const SDLoc &dl, SelectionDAG &DAG, 1553 SmallVectorImpl<SDValue> &InVals) const override; 1554 SDValue LowerCall(CallLoweringInfo &CLI, 1555 SmallVectorImpl<SDValue> &InVals) const override; 1556 1557 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1558 const SmallVectorImpl<ISD::OutputArg> &Outs, 1559 const SmallVectorImpl<SDValue> &OutVals, 1560 const SDLoc &dl, SelectionDAG &DAG) const override; 1561 1562 bool supportSplitCSR(MachineFunction *MF) const override { 1563 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1564 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1565 } 1566 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1567 void insertCopiesSplitCSR( 1568 MachineBasicBlock *Entry, 1569 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1570 1571 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1572 1573 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1574 1575 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1576 ISD::NodeType ExtendKind) const override; 1577 1578 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1579 bool isVarArg, 1580 const SmallVectorImpl<ISD::OutputArg> &Outs, 1581 LLVMContext &Context) const override; 1582 1583 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1584 1585 TargetLoweringBase::AtomicExpansionKind 1586 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1587 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1588 TargetLoweringBase::AtomicExpansionKind 1589 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1590 1591 LoadInst * 1592 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1593 1594 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; 1595 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; 1596 1597 bool needsCmpXchgNb(Type *MemType) const; 1598 1599 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1600 MachineBasicBlock *DispatchBB, int FI) const; 1601 1602 // Utility function to emit the low-level va_arg code for X86-64. 1603 MachineBasicBlock * 1604 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1605 1606 /// Utility function to emit the xmm reg save portion of va_start. 1607 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1608 MachineInstr &MI2, 1609 MachineBasicBlock *BB) const; 1610 1611 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1612 MachineBasicBlock *BB) const; 1613 1614 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1615 MachineBasicBlock *BB) const; 1616 1617 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1618 MachineBasicBlock *BB) const; 1619 1620 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1621 MachineBasicBlock *BB) const; 1622 1623 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1624 MachineBasicBlock *BB) const; 1625 1626 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1627 MachineBasicBlock *BB) const; 1628 1629 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1630 MachineBasicBlock *BB) const; 1631 1632 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1633 MachineBasicBlock *MBB) const; 1634 1635 void emitSetJmpShadowStackFix(MachineInstr &MI, 1636 MachineBasicBlock *MBB) const; 1637 1638 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1639 MachineBasicBlock *MBB) const; 1640 1641 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1642 MachineBasicBlock *MBB) const; 1643 1644 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1645 MachineBasicBlock *MBB) const; 1646 1647 /// Emit flags for the given setcc condition and operands. Also returns the 1648 /// corresponding X86 condition code constant in X86CC. 1649 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1650 const SDLoc &dl, SelectionDAG &DAG, 1651 SDValue &X86CC) const; 1652 1653 /// Check if replacement of SQRT with RSQRT should be disabled. 1654 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1655 1656 /// Use rsqrt* to speed up sqrt calculations. 1657 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1658 int &RefinementSteps, bool &UseOneConstNR, 1659 bool Reciprocal) const override; 1660 1661 /// Use rcp* to speed up fdiv calculations. 1662 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1663 int &RefinementSteps) const override; 1664 1665 /// Reassociate floating point divisions into multiply by reciprocal. 1666 unsigned combineRepeatedFPDivisors() const override; 1667 1668 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1669 SmallVectorImpl<SDNode *> &Created) const override; 1670 }; 1671 1672 namespace X86 { 1673 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1674 const TargetLibraryInfo *libInfo); 1675 } // end namespace X86 1676 1677 // X86 specific Gather/Scatter nodes. 1678 // The class has the same order of operands as MaskedGatherScatterSDNode for 1679 // convenience. 1680 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1681 public: 1682 // This is a intended as a utility and should never be directly created. 1683 X86MaskedGatherScatterSDNode() = delete; 1684 ~X86MaskedGatherScatterSDNode() = delete; 1685 1686 const SDValue &getBasePtr() const { return getOperand(3); } 1687 const SDValue &getIndex() const { return getOperand(4); } 1688 const SDValue &getMask() const { return getOperand(2); } 1689 const SDValue &getScale() const { return getOperand(5); } 1690 1691 static bool classof(const SDNode *N) { 1692 return N->getOpcode() == X86ISD::MGATHER || 1693 N->getOpcode() == X86ISD::MSCATTER; 1694 } 1695 }; 1696 1697 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1698 public: 1699 const SDValue &getPassThru() const { return getOperand(1); } 1700 1701 static bool classof(const SDNode *N) { 1702 return N->getOpcode() == X86ISD::MGATHER; 1703 } 1704 }; 1705 1706 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1707 public: 1708 const SDValue &getValue() const { return getOperand(1); } 1709 1710 static bool classof(const SDNode *N) { 1711 return N->getOpcode() == X86ISD::MSCATTER; 1712 } 1713 }; 1714 1715 /// Generate unpacklo/unpackhi shuffle mask. 1716 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1717 bool Unary); 1718 1719 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1720 /// imposed by AVX and specific to the unary pattern. Example: 1721 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1722 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1723 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1724 1725 } // end namespace llvm 1726 1727 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1728