1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/TargetLowering.h" 19 20 namespace llvm { 21 class X86Subtarget; 22 class X86TargetMachine; 23 24 namespace X86ISD { 25 // X86 Specific DAG Nodes 26 enum NodeType : unsigned { 27 // Start the numbering where the builtin ops leave off. 28 FIRST_NUMBER = ISD::BUILTIN_OP_END, 29 30 /// Bit scan forward. 31 BSF, 32 /// Bit scan reverse. 33 BSR, 34 35 /// X86 funnel/double shift i16 instructions. These correspond to 36 /// X86::SHLDW and X86::SHRDW instructions which have different amt 37 /// modulo rules to generic funnel shifts. 38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 39 FSHL, 40 FSHR, 41 42 /// Bitwise logical AND of floating point values. This corresponds 43 /// to X86::ANDPS or X86::ANDPD. 44 FAND, 45 46 /// Bitwise logical OR of floating point values. This corresponds 47 /// to X86::ORPS or X86::ORPD. 48 FOR, 49 50 /// Bitwise logical XOR of floating point values. This corresponds 51 /// to X86::XORPS or X86::XORPD. 52 FXOR, 53 54 /// Bitwise logical ANDNOT of floating point values. This 55 /// corresponds to X86::ANDNPS or X86::ANDNPD. 56 FANDN, 57 58 /// These operations represent an abstract X86 call 59 /// instruction, which includes a bunch of information. In particular the 60 /// operands of these node are: 61 /// 62 /// #0 - The incoming token chain 63 /// #1 - The callee 64 /// #2 - The number of arg bytes the caller pushes on the stack. 65 /// #3 - The number of arg bytes the callee pops off the stack. 66 /// #4 - The value to pass in AL/AX/EAX (optional) 67 /// #5 - The value to pass in DL/DX/EDX (optional) 68 /// 69 /// The result values of these nodes are: 70 /// 71 /// #0 - The outgoing token chain 72 /// #1 - The first register result value (optional) 73 /// #2 - The second register result value (optional) 74 /// 75 CALL, 76 77 /// Same as call except it adds the NoTrack prefix. 78 NT_CALL, 79 80 // Pseudo for a OBJC call that gets emitted together with a special 81 // marker instruction. 82 CALL_RVMARKER, 83 84 /// The same as ISD::CopyFromReg except that this node makes it explicit 85 /// that it may lower to an x87 FPU stack pop. Optimizations should be more 86 /// cautious when handling this node than a normal CopyFromReg to avoid 87 /// removing a required FPU stack pop. A key requirement is optimizations 88 /// should not optimize any users of a chain that contains a 89 /// POP_FROM_X87_REG to use a chain from a point earlier than the 90 /// POP_FROM_X87_REG (which may remove a required FPU stack pop). 91 POP_FROM_X87_REG, 92 93 // Pseudo for a call to an imported function to ensure the correct machine 94 // instruction is emitted for Import Call Optimization. 95 IMP_CALL, 96 97 /// X86 compare and logical compare instructions. 98 CMP, 99 FCMP, 100 COMI, 101 UCOMI, 102 103 // X86 compare with Intrinsics similar to COMI. 104 COMX, 105 UCOMX, 106 107 /// X86 bit-test instructions. 108 BT, 109 110 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 111 /// operand, usually produced by a CMP instruction. 112 SETCC, 113 114 /// X86 Select 115 SELECTS, 116 117 // Same as SETCC except it's materialized with a sbb and the value is all 118 // one's or all zero's. 119 SETCC_CARRY, // R = carry_bit ? ~0 : 0 120 121 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 122 /// Operands are two FP values to compare; result is a mask of 123 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 124 FSETCC, 125 126 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 127 /// and a version with SAE. 128 FSETCCM, 129 FSETCCM_SAE, 130 131 /// X86 conditional moves. Operand 0 and operand 1 are the two values 132 /// to select from. Operand 2 is the condition code, and operand 3 is the 133 /// flag operand produced by a CMP or TEST instruction. 134 CMOV, 135 136 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 137 /// is the block to branch if condition is true, operand 2 is the 138 /// condition code, and operand 3 is the flag operand produced by a CMP 139 /// or TEST instruction. 140 BRCOND, 141 142 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 143 /// operand 1 is the target address. 144 NT_BRIND, 145 146 /// Return with a glue operand. Operand 0 is the chain operand, operand 147 /// 1 is the number of bytes of stack to pop. 148 RET_GLUE, 149 150 /// Return from interrupt. Operand 0 is the number of bytes to pop. 151 IRET, 152 153 /// Repeat fill, corresponds to X86::REP_STOSx. 154 REP_STOS, 155 156 /// Repeat move, corresponds to X86::REP_MOVSx. 157 REP_MOVS, 158 159 /// On Darwin, this node represents the result of the popl 160 /// at function entry, used for PIC code. 161 GlobalBaseReg, 162 163 /// A wrapper node for TargetConstantPool, TargetJumpTable, 164 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 165 /// MCSymbol and TargetBlockAddress. 166 Wrapper, 167 168 /// Special wrapper used under X86-64 PIC mode for RIP 169 /// relative displacements. 170 WrapperRIP, 171 172 /// Copies a 64-bit value from an MMX vector to the low word 173 /// of an XMM vector, with the high word zero filled. 174 MOVQ2DQ, 175 176 /// Copies a 64-bit value from the low word of an XMM vector 177 /// to an MMX vector. 178 MOVDQ2Q, 179 180 /// Copies a 32-bit value from the low word of a MMX 181 /// vector to a GPR. 182 MMX_MOVD2W, 183 184 /// Copies a GPR into the low 32-bit word of a MMX vector 185 /// and zero out the high word. 186 MMX_MOVW2D, 187 188 /// Extract an 8-bit value from a vector and zero extend it to 189 /// i32, corresponds to X86::PEXTRB. 190 PEXTRB, 191 192 /// Extract a 16-bit value from a vector and zero extend it to 193 /// i32, corresponds to X86::PEXTRW. 194 PEXTRW, 195 196 /// Insert any element of a 4 x float vector into any element 197 /// of a destination 4 x floatvector. 198 INSERTPS, 199 200 /// Insert the lower 8-bits of a 32-bit value to a vector, 201 /// corresponds to X86::PINSRB. 202 PINSRB, 203 204 /// Insert the lower 16-bits of a 32-bit value to a vector, 205 /// corresponds to X86::PINSRW. 206 PINSRW, 207 208 /// Shuffle 16 8-bit values within a vector. 209 PSHUFB, 210 211 /// Compute Sum of Absolute Differences. 212 PSADBW, 213 /// Compute Double Block Packed Sum-Absolute-Differences 214 DBPSADBW, 215 216 /// Bitwise Logical AND NOT of Packed FP values. 217 ANDNP, 218 219 /// Blend where the selector is an immediate. 220 BLENDI, 221 222 /// Dynamic (non-constant condition) vector blend where only the sign bits 223 /// of the condition elements are used. This is used to enforce that the 224 /// condition mask is not valid for generic VSELECT optimizations. This 225 /// is also used to implement the intrinsics. 226 /// Operands are in VSELECT order: MASK, TRUE, FALSE 227 BLENDV, 228 229 /// Combined add and sub on an FP vector. 230 ADDSUB, 231 232 // FP vector ops with rounding mode. 233 FADD_RND, 234 FADDS, 235 FADDS_RND, 236 FSUB_RND, 237 FSUBS, 238 FSUBS_RND, 239 FMUL_RND, 240 FMULS, 241 FMULS_RND, 242 FDIV_RND, 243 FDIVS, 244 FDIVS_RND, 245 FMAX_SAE, 246 FMAXS_SAE, 247 FMIN_SAE, 248 FMINS_SAE, 249 FSQRT_RND, 250 FSQRTS, 251 FSQRTS_RND, 252 253 // FP vector get exponent. 254 FGETEXP, 255 FGETEXP_SAE, 256 FGETEXPS, 257 FGETEXPS_SAE, 258 // Extract Normalized Mantissas. 259 VGETMANT, 260 VGETMANT_SAE, 261 VGETMANTS, 262 VGETMANTS_SAE, 263 // FP Scale. 264 SCALEF, 265 SCALEF_RND, 266 SCALEFS, 267 SCALEFS_RND, 268 269 /// Integer horizontal add/sub. 270 HADD, 271 HSUB, 272 273 /// Floating point horizontal add/sub. 274 FHADD, 275 FHSUB, 276 277 // Detect Conflicts Within a Vector 278 CONFLICT, 279 280 /// Floating point max and min. 281 FMAX, 282 FMIN, 283 284 /// Commutative FMIN and FMAX. 285 FMAXC, 286 FMINC, 287 288 /// Scalar intrinsic floating point max and min. 289 FMAXS, 290 FMINS, 291 292 /// Floating point reciprocal-sqrt and reciprocal approximation. 293 /// Note that these typically require refinement 294 /// in order to obtain suitable precision. 295 FRSQRT, 296 FRCP, 297 298 // AVX-512 reciprocal approximations with a little more precision. 299 RSQRT14, 300 RSQRT14S, 301 RCP14, 302 RCP14S, 303 304 // Thread Local Storage. 305 TLSADDR, 306 307 // Thread Local Storage. A call to get the start address 308 // of the TLS block for the current module. 309 TLSBASEADDR, 310 311 // Thread Local Storage. When calling to an OS provided 312 // thunk at the address from an earlier relocation. 313 TLSCALL, 314 315 // Thread Local Storage. A descriptor containing pointer to 316 // code and to argument to get the TLS offset for the symbol. 317 TLSDESC, 318 319 // Exception Handling helpers. 320 EH_RETURN, 321 322 // SjLj exception handling setjmp. 323 EH_SJLJ_SETJMP, 324 325 // SjLj exception handling longjmp. 326 EH_SJLJ_LONGJMP, 327 328 // SjLj exception handling dispatch. 329 EH_SJLJ_SETUP_DISPATCH, 330 331 /// Tail call return. See X86TargetLowering::LowerCall for 332 /// the list of operands. 333 TC_RETURN, 334 335 // Vector move to low scalar and zero higher vector elements. 336 VZEXT_MOVL, 337 338 // Vector integer truncate. 339 VTRUNC, 340 // Vector integer truncate with unsigned/signed saturation. 341 VTRUNCUS, 342 VTRUNCS, 343 344 // Masked version of the above. Used when less than a 128-bit result is 345 // produced since the mask only applies to the lower elements and can't 346 // be represented by a select. 347 // SRC, PASSTHRU, MASK 348 VMTRUNC, 349 VMTRUNCUS, 350 VMTRUNCS, 351 352 // Vector FP extend. 353 VFPEXT, 354 VFPEXT_SAE, 355 VFPEXTS, 356 VFPEXTS_SAE, 357 358 // Vector FP round. 359 VFPROUND, 360 // Convert TWO packed single data to one packed data 361 VFPROUND2, 362 VFPROUND2_RND, 363 VFPROUND_RND, 364 VFPROUNDS, 365 VFPROUNDS_RND, 366 367 // Masked version of above. Used for v2f64->v4f32. 368 // SRC, PASSTHRU, MASK 369 VMFPROUND, 370 371 // 128-bit vector logical left / right shift 372 VSHLDQ, 373 VSRLDQ, 374 375 // Vector shift elements 376 VSHL, 377 VSRL, 378 VSRA, 379 380 // Vector variable shift 381 VSHLV, 382 VSRLV, 383 VSRAV, 384 385 // Vector shift elements by immediate 386 VSHLI, 387 VSRLI, 388 VSRAI, 389 390 // Shifts of mask registers. 391 KSHIFTL, 392 KSHIFTR, 393 394 // Bit rotate by immediate 395 VROTLI, 396 VROTRI, 397 398 // Vector packed double/float comparison. 399 CMPP, 400 401 // Vector integer comparisons. 402 PCMPEQ, 403 PCMPGT, 404 405 // v8i16 Horizontal minimum and position. 406 PHMINPOS, 407 408 MULTISHIFT, 409 410 /// Vector comparison generating mask bits for fp and 411 /// integer signed and unsigned data types. 412 CMPM, 413 // Vector mask comparison generating mask bits for FP values. 414 CMPMM, 415 // Vector mask comparison with SAE for FP values. 416 CMPMM_SAE, 417 418 // Arithmetic operations with FLAGS results. 419 ADD, 420 SUB, 421 ADC, 422 SBB, 423 SMUL, 424 UMUL, 425 OR, 426 XOR, 427 AND, 428 429 // Bit field extract. 430 BEXTR, 431 BEXTRI, 432 433 // Zero High Bits Starting with Specified Bit Position. 434 BZHI, 435 436 // Parallel extract and deposit. 437 PDEP, 438 PEXT, 439 440 // X86-specific multiply by immediate. 441 MUL_IMM, 442 443 // Vector sign bit extraction. 444 MOVMSK, 445 446 // Vector bitwise comparisons. 447 PTEST, 448 449 // Vector packed fp sign bitwise comparisons. 450 TESTP, 451 452 // OR/AND test for masks. 453 KORTEST, 454 KTEST, 455 456 // ADD for masks. 457 KADD, 458 459 // Several flavors of instructions with vector shuffle behaviors. 460 // Saturated signed/unnsigned packing. 461 PACKSS, 462 PACKUS, 463 // Intra-lane alignr. 464 PALIGNR, 465 // AVX512 inter-lane alignr. 466 VALIGN, 467 PSHUFD, 468 PSHUFHW, 469 PSHUFLW, 470 SHUFP, 471 // VBMI2 Concat & Shift. 472 VSHLD, 473 VSHRD, 474 VSHLDV, 475 VSHRDV, 476 // Shuffle Packed Values at 128-bit granularity. 477 SHUF128, 478 MOVDDUP, 479 MOVSHDUP, 480 MOVSLDUP, 481 MOVLHPS, 482 MOVHLPS, 483 MOVSD, 484 MOVSS, 485 MOVSH, 486 UNPCKL, 487 UNPCKH, 488 VPERMILPV, 489 VPERMILPI, 490 VPERMI, 491 VPERM2X128, 492 493 // Variable Permute (VPERM). 494 // Res = VPERMV MaskV, V0 495 VPERMV, 496 497 // 3-op Variable Permute (VPERMT2). 498 // Res = VPERMV3 V0, MaskV, V1 499 VPERMV3, 500 501 // Bitwise ternary logic. 502 VPTERNLOG, 503 // Fix Up Special Packed Float32/64 values. 504 VFIXUPIMM, 505 VFIXUPIMM_SAE, 506 VFIXUPIMMS, 507 VFIXUPIMMS_SAE, 508 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 509 VRANGE, 510 VRANGE_SAE, 511 VRANGES, 512 VRANGES_SAE, 513 // Reduce - Perform Reduction Transformation on scalar\packed FP. 514 VREDUCE, 515 VREDUCE_SAE, 516 VREDUCES, 517 VREDUCES_SAE, 518 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 519 // Also used by the legacy (V)ROUND intrinsics where we mask out the 520 // scaling part of the immediate. 521 VRNDSCALE, 522 VRNDSCALE_SAE, 523 VRNDSCALES, 524 VRNDSCALES_SAE, 525 // Tests Types Of a FP Values for packed types. 526 VFPCLASS, 527 // Tests Types Of a FP Values for scalar types. 528 VFPCLASSS, 529 530 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 531 // a vector, this node may change the vector length as part of the splat. 532 VBROADCAST, 533 // Broadcast mask to vector. 534 VBROADCASTM, 535 536 /// SSE4A Extraction and Insertion. 537 EXTRQI, 538 INSERTQI, 539 540 // XOP arithmetic/logical shifts. 541 VPSHA, 542 VPSHL, 543 // XOP signed/unsigned integer comparisons. 544 VPCOM, 545 VPCOMU, 546 // XOP packed permute bytes. 547 VPPERM, 548 // XOP two source permutation. 549 VPERMIL2, 550 551 // Vector multiply packed unsigned doubleword integers. 552 PMULUDQ, 553 // Vector multiply packed signed doubleword integers. 554 PMULDQ, 555 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 556 MULHRS, 557 558 // Multiply and Add Packed Integers. 559 VPMADDUBSW, 560 VPMADDWD, 561 562 // AVX512IFMA multiply and add. 563 // NOTE: These are different than the instruction and perform 564 // op0 x op1 + op2. 565 VPMADD52L, 566 VPMADD52H, 567 568 // VNNI 569 VPDPBUSD, 570 VPDPBUSDS, 571 VPDPWSSD, 572 VPDPWSSDS, 573 574 // FMA nodes. 575 // We use the target independent ISD::FMA for the non-inverted case. 576 FNMADD, 577 FMSUB, 578 FNMSUB, 579 FMADDSUB, 580 FMSUBADD, 581 582 // FMA with rounding mode. 583 FMADD_RND, 584 FNMADD_RND, 585 FMSUB_RND, 586 FNMSUB_RND, 587 FMADDSUB_RND, 588 FMSUBADD_RND, 589 590 // AVX512-FP16 complex addition and multiplication. 591 VFMADDC, 592 VFMADDC_RND, 593 VFCMADDC, 594 VFCMADDC_RND, 595 596 VFMULC, 597 VFMULC_RND, 598 VFCMULC, 599 VFCMULC_RND, 600 601 VFMADDCSH, 602 VFMADDCSH_RND, 603 VFCMADDCSH, 604 VFCMADDCSH_RND, 605 606 VFMULCSH, 607 VFMULCSH_RND, 608 VFCMULCSH, 609 VFCMULCSH_RND, 610 611 VPDPBSUD, 612 VPDPBSUDS, 613 VPDPBUUD, 614 VPDPBUUDS, 615 VPDPBSSD, 616 VPDPBSSDS, 617 618 VPDPWSUD, 619 VPDPWSUDS, 620 VPDPWUSD, 621 VPDPWUSDS, 622 VPDPWUUD, 623 VPDPWUUDS, 624 625 VMINMAX, 626 VMINMAX_SAE, 627 VMINMAXS, 628 VMINMAXS_SAE, 629 630 CVTP2IBS, 631 CVTP2IUBS, 632 CVTP2IBS_RND, 633 CVTP2IUBS_RND, 634 CVTTP2IBS, 635 CVTTP2IUBS, 636 CVTTP2IBS_SAE, 637 CVTTP2IUBS_SAE, 638 639 MPSADBW, 640 641 VCVT2PH2BF8, 642 VCVT2PH2BF8S, 643 VCVT2PH2HF8, 644 VCVT2PH2HF8S, 645 VCVTBIASPH2BF8, 646 VCVTBIASPH2BF8S, 647 VCVTBIASPH2HF8, 648 VCVTBIASPH2HF8S, 649 VCVTPH2BF8, 650 VCVTPH2BF8S, 651 VCVTPH2HF8, 652 VCVTPH2HF8S, 653 VMCVTBIASPH2BF8, 654 VMCVTBIASPH2BF8S, 655 VMCVTBIASPH2HF8, 656 VMCVTBIASPH2HF8S, 657 VMCVTPH2BF8, 658 VMCVTPH2BF8S, 659 VMCVTPH2HF8, 660 VMCVTPH2HF8S, 661 VCVTHF82PH, 662 663 // Compress and expand. 664 COMPRESS, 665 EXPAND, 666 667 // Bits shuffle 668 VPSHUFBITQMB, 669 670 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 671 SINT_TO_FP_RND, 672 UINT_TO_FP_RND, 673 SCALAR_SINT_TO_FP, 674 SCALAR_UINT_TO_FP, 675 SCALAR_SINT_TO_FP_RND, 676 SCALAR_UINT_TO_FP_RND, 677 678 // Vector float/double to signed/unsigned integer. 679 CVTP2SI, 680 CVTP2UI, 681 CVTP2SI_RND, 682 CVTP2UI_RND, 683 // Scalar float/double to signed/unsigned integer. 684 CVTS2SI, 685 CVTS2UI, 686 CVTS2SI_RND, 687 CVTS2UI_RND, 688 689 // Vector float/double to signed/unsigned integer with truncation. 690 CVTTP2SI, 691 CVTTP2UI, 692 CVTTP2SI_SAE, 693 CVTTP2UI_SAE, 694 695 // Saturation enabled Vector float/double to signed/unsigned 696 // integer with truncation. 697 CVTTP2SIS, 698 CVTTP2UIS, 699 CVTTP2SIS_SAE, 700 CVTTP2UIS_SAE, 701 // Masked versions of above. Used for v2f64 to v4i32. 702 // SRC, PASSTHRU, MASK 703 MCVTTP2SIS, 704 MCVTTP2UIS, 705 706 // Scalar float/double to signed/unsigned integer with truncation. 707 CVTTS2SI, 708 CVTTS2UI, 709 CVTTS2SI_SAE, 710 CVTTS2UI_SAE, 711 712 // Vector signed/unsigned integer to float/double. 713 CVTSI2P, 714 CVTUI2P, 715 716 // Scalar float/double to signed/unsigned integer with saturation. 717 CVTTS2SIS, 718 CVTTS2UIS, 719 CVTTS2SIS_SAE, 720 CVTTS2UIS_SAE, 721 722 // Masked versions of above. Used for v2f64->v4f32. 723 // SRC, PASSTHRU, MASK 724 MCVTP2SI, 725 MCVTP2UI, 726 MCVTTP2SI, 727 MCVTTP2UI, 728 MCVTSI2P, 729 MCVTUI2P, 730 731 // Custom handling for FP_TO_xINT_SAT 732 FP_TO_SINT_SAT, 733 FP_TO_UINT_SAT, 734 735 // Vector float to bfloat16. 736 // Convert packed single data to packed BF16 data 737 CVTNEPS2BF16, 738 // Masked version of above. 739 // SRC, PASSTHRU, MASK 740 MCVTNEPS2BF16, 741 742 // Dot product of BF16/FP16 pairs to accumulated into 743 // packed single precision. 744 DPBF16PS, 745 DPFP16PS, 746 747 // A stack checking function call. On Windows it's _chkstk call. 748 DYN_ALLOCA, 749 750 // For allocating variable amounts of stack space when using 751 // segmented stacks. Check if the current stacklet has enough space, and 752 // falls back to heap allocation if not. 753 SEG_ALLOCA, 754 755 // For allocating stack space when using stack clash protector. 756 // Allocation is performed by block, and each block is probed. 757 PROBED_ALLOCA, 758 759 // Memory barriers. 760 MFENCE, 761 762 // Get a random integer and indicate whether it is valid in CF. 763 RDRAND, 764 765 // Get a NIST SP800-90B & C compliant random integer and 766 // indicate whether it is valid in CF. 767 RDSEED, 768 769 // Protection keys 770 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 771 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 772 // value for ECX. 773 RDPKRU, 774 WRPKRU, 775 776 // SSE42 string comparisons. 777 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 778 // will emit one or two instructions based on which results are used. If 779 // flags and index/mask this allows us to use a single instruction since 780 // we won't have to pick and opcode for flags. Instead we can rely on the 781 // DAG to CSE everything and decide at isel. 782 PCMPISTR, 783 PCMPESTR, 784 785 // Test if in transactional execution. 786 XTEST, 787 788 // Conversions between float and half-float. 789 CVTPS2PH, 790 CVTPS2PH_SAE, 791 CVTPH2PS, 792 CVTPH2PS_SAE, 793 794 // Masked version of above. 795 // SRC, RND, PASSTHRU, MASK 796 MCVTPS2PH, 797 MCVTPS2PH_SAE, 798 799 // Galois Field Arithmetic Instructions 800 GF2P8AFFINEINVQB, 801 GF2P8AFFINEQB, 802 GF2P8MULB, 803 804 // LWP insert record. 805 LWPINS, 806 807 // User level wait 808 UMWAIT, 809 TPAUSE, 810 811 // Enqueue Stores Instructions 812 ENQCMD, 813 ENQCMDS, 814 815 // For avx512-vp2intersect 816 VP2INTERSECT, 817 818 // User level interrupts - testui 819 TESTUI, 820 821 // Perform an FP80 add after changing precision control in FPCW. 822 FP80_ADD, 823 824 // Conditional compare instructions 825 CCMP, 826 CTEST, 827 828 /// X86 strict FP compare instructions. 829 FIRST_STRICTFP_OPCODE, 830 STRICT_FCMP = FIRST_STRICTFP_OPCODE, 831 STRICT_FCMPS, 832 833 // Vector packed double/float comparison. 834 STRICT_CMPP, 835 836 /// Vector comparison generating mask bits for fp and 837 /// integer signed and unsigned data types. 838 STRICT_CMPM, 839 840 // Vector float/double to signed/unsigned integer with truncation. 841 STRICT_CVTTP2SI, 842 STRICT_CVTTP2UI, 843 844 // Vector FP extend. 845 STRICT_VFPEXT, 846 847 // Vector FP round. 848 STRICT_VFPROUND, 849 850 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 851 // Also used by the legacy (V)ROUND intrinsics where we mask out the 852 // scaling part of the immediate. 853 STRICT_VRNDSCALE, 854 855 // Vector signed/unsigned integer to float/double. 856 STRICT_CVTSI2P, 857 STRICT_CVTUI2P, 858 859 // Strict FMA nodes. 860 STRICT_FNMADD, 861 STRICT_FMSUB, 862 STRICT_FNMSUB, 863 864 // Conversions between float and half-float. 865 STRICT_CVTPS2PH, 866 STRICT_CVTPH2PS, 867 868 // Perform an FP80 add after changing precision control in FPCW. 869 STRICT_FP80_ADD, 870 871 /// Floating point max and min. 872 STRICT_FMAX, 873 STRICT_FMIN, 874 LAST_STRICTFP_OPCODE = STRICT_FMIN, 875 876 // Compare and swap. 877 FIRST_MEMORY_OPCODE, 878 LCMPXCHG_DAG = FIRST_MEMORY_OPCODE, 879 LCMPXCHG8_DAG, 880 LCMPXCHG16_DAG, 881 LCMPXCHG16_SAVE_RBX_DAG, 882 883 /// LOCK-prefixed arithmetic read-modify-write instructions. 884 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 885 LADD, 886 LSUB, 887 LOR, 888 LXOR, 889 LAND, 890 LBTS, 891 LBTC, 892 LBTR, 893 LBTS_RM, 894 LBTC_RM, 895 LBTR_RM, 896 897 /// RAO arithmetic instructions. 898 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) 899 AADD, 900 AOR, 901 AXOR, 902 AAND, 903 904 // Load, scalar_to_vector, and zero extend. 905 VZEXT_LOAD, 906 907 // extract_vector_elt, store. 908 VEXTRACT_STORE, 909 910 // scalar broadcast from memory. 911 VBROADCAST_LOAD, 912 913 // subvector broadcast from memory. 914 SUBV_BROADCAST_LOAD, 915 916 // Store FP control word into i16 memory. 917 FNSTCW16m, 918 919 // Load FP control word from i16 memory. 920 FLDCW16m, 921 922 // Store x87 FPU environment into memory. 923 FNSTENVm, 924 925 // Load x87 FPU environment from memory. 926 FLDENVm, 927 928 /// This instruction implements FP_TO_SINT with the 929 /// integer destination in memory and a FP reg source. This corresponds 930 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 931 /// has two inputs (token chain and address) and two outputs (int value 932 /// and token chain). Memory VT specifies the type to store to. 933 FP_TO_INT_IN_MEM, 934 935 /// This instruction implements SINT_TO_FP with the 936 /// integer source in memory and FP reg result. This corresponds to the 937 /// X86::FILD*m instructions. It has two inputs (token chain and address) 938 /// and two outputs (FP value and token chain). The integer source type is 939 /// specified by the memory VT. 940 FILD, 941 942 /// This instruction implements a fp->int store from FP stack 943 /// slots. This corresponds to the fist instruction. It takes a 944 /// chain operand, value to store, address, and glue. The memory VT 945 /// specifies the type to store as. 946 FIST, 947 948 /// This instruction implements an extending load to FP stack slots. 949 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 950 /// operand, and ptr to load from. The memory VT specifies the type to 951 /// load from. 952 FLD, 953 954 /// This instruction implements a truncating store from FP stack 955 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 956 /// chain operand, value to store, address, and glue. The memory VT 957 /// specifies the type to store as. 958 FST, 959 960 /// These instructions grab the address of the next argument 961 /// from a va_list. (reads and modifies the va_list in memory) 962 VAARG_64, 963 VAARG_X32, 964 965 // Vector truncating store with unsigned/signed saturation 966 VTRUNCSTOREUS, 967 VTRUNCSTORES, 968 // Vector truncating masked store with unsigned/signed saturation 969 VMTRUNCSTOREUS, 970 VMTRUNCSTORES, 971 972 // X86 specific gather and scatter 973 MGATHER, 974 MSCATTER, 975 976 // Key locker nodes that produce flags. 977 AESENC128KL, 978 AESDEC128KL, 979 AESENC256KL, 980 AESDEC256KL, 981 AESENCWIDE128KL, 982 AESDECWIDE128KL, 983 AESENCWIDE256KL, 984 AESDECWIDE256KL, 985 986 /// Compare and Add if Condition is Met. Compare value in operand 2 with 987 /// value in memory of operand 1. If condition of operand 4 is met, add 988 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is 989 /// always updated with the original value from operand 1. 990 CMPCCXADD, 991 992 // Save xmm argument registers to the stack, according to %al. An operator 993 // is needed so that this can be expanded with control flow. 994 VASTART_SAVE_XMM_REGS, 995 996 // Conditional load/store instructions 997 CLOAD, 998 CSTORE, 999 LAST_MEMORY_OPCODE = CSTORE, 1000 }; 1001 } // end namespace X86ISD 1002 1003 namespace X86 { 1004 /// Current rounding mode is represented in bits 11:10 of FPSR. These 1005 /// values are same as corresponding constants for rounding mode used 1006 /// in glibc. 1007 enum RoundingMode { 1008 rmToNearest = 0, // FE_TONEAREST 1009 rmDownward = 1 << 10, // FE_DOWNWARD 1010 rmUpward = 2 << 10, // FE_UPWARD 1011 rmTowardZero = 3 << 10, // FE_TOWARDZERO 1012 rmMask = 3 << 10 // Bit mask selecting rounding mode 1013 }; 1014 } 1015 1016 /// Define some predicates that are used for node matching. 1017 namespace X86 { 1018 /// Returns true if Elt is a constant zero or floating point constant +0.0. 1019 bool isZeroNode(SDValue Elt); 1020 1021 /// Returns true of the given offset can be 1022 /// fit into displacement field of the instruction. 1023 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 1024 bool hasSymbolicDisplacement); 1025 1026 /// Determines whether the callee is required to pop its 1027 /// own arguments. Callee pop is necessary to support tail calls. 1028 bool isCalleePop(CallingConv::ID CallingConv, 1029 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 1030 1031 /// If Op is a constant whose elements are all the same constant or 1032 /// undefined, return true and return the constant value in \p SplatVal. 1033 /// If we have undef bits that don't cover an entire element, we treat these 1034 /// as zero if AllowPartialUndefs is set, else we fail and return false. 1035 bool isConstantSplat(SDValue Op, APInt &SplatVal, 1036 bool AllowPartialUndefs = true); 1037 1038 /// Check if Op is a load operation that could be folded into some other x86 1039 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. 1040 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, 1041 bool AssumeSingleUse = false); 1042 1043 /// Check if Op is a load operation that could be folded into a vector splat 1044 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. 1045 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, 1046 const X86Subtarget &Subtarget, 1047 bool AssumeSingleUse = false); 1048 1049 /// Check if Op is a value that could be used to fold a store into some 1050 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). 1051 bool mayFoldIntoStore(SDValue Op); 1052 1053 /// Check if Op is an operation that could be folded into a zero extend x86 1054 /// instruction. 1055 bool mayFoldIntoZeroExtend(SDValue Op); 1056 1057 /// True if the target supports the extended frame for async Swift 1058 /// functions. 1059 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, 1060 const MachineFunction &MF); 1061 } // end namespace X86 1062 1063 //===--------------------------------------------------------------------===// 1064 // X86 Implementation of the TargetLowering interface 1065 class X86TargetLowering final : public TargetLowering { 1066 public: 1067 explicit X86TargetLowering(const X86TargetMachine &TM, 1068 const X86Subtarget &STI); 1069 1070 unsigned getJumpTableEncoding() const override; 1071 bool useSoftFloat() const override; 1072 1073 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 1074 ArgListTy &Args) const override; 1075 getScalarShiftAmountTy(const DataLayout &,EVT VT)1076 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 1077 return MVT::i8; 1078 } 1079 1080 const MCExpr * 1081 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 1082 const MachineBasicBlock *MBB, unsigned uid, 1083 MCContext &Ctx) const override; 1084 1085 /// Returns relocation base for the given PIC jumptable. 1086 SDValue getPICJumpTableRelocBase(SDValue Table, 1087 SelectionDAG &DAG) const override; 1088 const MCExpr * 1089 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 1090 unsigned JTI, MCContext &Ctx) const override; 1091 1092 /// Return the desired alignment for ByVal aggregate 1093 /// function arguments in the caller parameter area. For X86, aggregates 1094 /// that contains are placed at 16-byte boundaries while the rest are at 1095 /// 4-byte boundaries. 1096 Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; 1097 1098 EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, 1099 const AttributeList &FuncAttributes) const override; 1100 1101 /// Returns true if it's safe to use load / store of the 1102 /// specified type to expand memcpy / memset inline. This is mostly true 1103 /// for all types except for some special cases. For example, on X86 1104 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 1105 /// also does type conversion. Note the specified type doesn't have to be 1106 /// legal as the hook is used before type legalization. 1107 bool isSafeMemOpType(MVT VT) const override; 1108 1109 bool isMemoryAccessFast(EVT VT, Align Alignment) const; 1110 1111 /// Returns true if the target allows unaligned memory accesses of the 1112 /// specified type. Returns whether it is "fast" in the last argument. 1113 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, 1114 MachineMemOperand::Flags Flags, 1115 unsigned *Fast) const override; 1116 1117 /// This function returns true if the memory access is aligned or if the 1118 /// target allows this specific unaligned memory access. If the access is 1119 /// allowed, the optional final parameter returns a relative speed of the 1120 /// access (as defined by the target). 1121 bool allowsMemoryAccess( 1122 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, 1123 Align Alignment, 1124 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1125 unsigned *Fast = nullptr) const override; 1126 allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,const MachineMemOperand & MMO,unsigned * Fast)1127 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1128 const MachineMemOperand &MMO, 1129 unsigned *Fast) const { 1130 return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), 1131 MMO.getAlign(), MMO.getFlags(), Fast); 1132 } 1133 1134 /// Provide custom lowering hooks for some operations. 1135 /// 1136 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 1137 1138 /// Replace the results of node with an illegal result 1139 /// type with new values built out of custom code. 1140 /// 1141 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 1142 SelectionDAG &DAG) const override; 1143 1144 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 1145 1146 bool preferABDSToABSWithNSW(EVT VT) const override; 1147 1148 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, 1149 EVT ExtVT) const override; 1150 1151 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, 1152 EVT VT) const override; 1153 1154 /// Return true if the target has native support for 1155 /// the specified value type and it is 'desirable' to use the type for the 1156 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 1157 /// instruction encodings are longer and some i16 instructions are slow. 1158 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 1159 1160 /// Return true if the target has native support for the 1161 /// specified value type and it is 'desirable' to use the type. e.g. On x86 1162 /// i16 is legal, but undesirable since i16 instruction encodings are longer 1163 /// and some i16 instructions are slow. 1164 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 1165 1166 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an 1167 /// integer, None otherwise. 1168 TargetLowering::AndOrSETCCFoldKind 1169 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, 1170 const SDNode *SETCC0, 1171 const SDNode *SETCC1) const override; 1172 1173 /// Return the newly negated expression if the cost is not expensive and 1174 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 1175 /// do the negation. 1176 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 1177 bool LegalOperations, bool ForCodeSize, 1178 NegatibleCost &Cost, 1179 unsigned Depth) const override; 1180 1181 MachineBasicBlock * 1182 EmitInstrWithCustomInserter(MachineInstr &MI, 1183 MachineBasicBlock *MBB) const override; 1184 1185 /// This method returns the name of a target specific DAG node. 1186 const char *getTargetNodeName(unsigned Opcode) const override; 1187 1188 /// Do not merge vector stores after legalization because that may conflict 1189 /// with x86-specific store splitting optimizations. mergeStoresAfterLegalization(EVT MemVT)1190 bool mergeStoresAfterLegalization(EVT MemVT) const override { 1191 return !MemVT.isVector(); 1192 } 1193 1194 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 1195 const MachineFunction &MF) const override; 1196 1197 bool isCheapToSpeculateCttz(Type *Ty) const override; 1198 1199 bool isCheapToSpeculateCtlz(Type *Ty) const override; 1200 1201 bool isCtlzFast() const override; 1202 isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1203 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 1204 // If the pair to store is a mixture of float and int values, we will 1205 // save two bitwise instructions and one float-to-int instruction and 1206 // increase one store instruction. There is potentially a more 1207 // significant benefit because it avoids the float->int domain switch 1208 // for input value. So It is more likely a win. 1209 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 1210 (LTy.isInteger() && HTy.isFloatingPoint())) 1211 return true; 1212 // If the pair only contains int values, we will save two bitwise 1213 // instructions and increase one store instruction (costing one more 1214 // store buffer). Since the benefit is more blurred so we leave 1215 // such pair out until we get testcase to prove it is a win. 1216 return false; 1217 } 1218 1219 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1220 1221 bool hasAndNotCompare(SDValue Y) const override; 1222 1223 bool hasAndNot(SDValue Y) const override; 1224 1225 bool hasBitTest(SDValue X, SDValue Y) const override; 1226 1227 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1228 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1229 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1230 SelectionDAG &DAG) const override; 1231 1232 unsigned preferedOpcodeForCmpEqPiecesOfOperand( 1233 EVT VT, unsigned ShiftOpc, bool MayTransformRotate, 1234 const APInt &ShiftOrRotateAmt, 1235 const std::optional<APInt> &AndMask) const override; 1236 1237 bool preferScalarizeSplat(SDNode *N) const override; 1238 1239 CondMergingParams 1240 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, 1241 const Value *Rhs) const override; 1242 1243 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1244 CombineLevel Level) const override; 1245 1246 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1247 1248 bool shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1249 shouldTransformSignedTruncationCheck(EVT XVT, 1250 unsigned KeptBits) const override { 1251 // For vectors, we don't have a preference.. 1252 if (XVT.isVector()) 1253 return false; 1254 1255 auto VTIsOk = [](EVT VT) -> bool { 1256 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1257 VT == MVT::i64; 1258 }; 1259 1260 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1261 // XVT will be larger than KeptBitsVT. 1262 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1263 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1264 } 1265 1266 ShiftLegalizationStrategy 1267 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 1268 unsigned ExpansionFactor) const override; 1269 1270 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1271 shouldConvertFpToSat(unsigned Op,EVT FPVT,EVT VT)1272 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { 1273 // Converting to sat variants holds little benefit on X86 as we will just 1274 // need to saturate the value back using fp arithmatic. 1275 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); 1276 } 1277 convertSetCCLogicToBitwiseLogic(EVT VT)1278 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1279 return VT.isScalarInteger(); 1280 } 1281 1282 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1283 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1284 1285 /// Return the value type to use for ISD::SETCC. 1286 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1287 EVT VT) const override; 1288 1289 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1290 const APInt &DemandedElts, 1291 TargetLoweringOpt &TLO) const override; 1292 1293 /// Determine which of the bits specified in Mask are known to be either 1294 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1295 void computeKnownBitsForTargetNode(const SDValue Op, 1296 KnownBits &Known, 1297 const APInt &DemandedElts, 1298 const SelectionDAG &DAG, 1299 unsigned Depth = 0) const override; 1300 1301 /// Determine the number of bits in the operation that are sign bits. 1302 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1303 const APInt &DemandedElts, 1304 const SelectionDAG &DAG, 1305 unsigned Depth) const override; 1306 1307 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1308 const APInt &DemandedElts, 1309 APInt &KnownUndef, 1310 APInt &KnownZero, 1311 TargetLoweringOpt &TLO, 1312 unsigned Depth) const override; 1313 1314 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1315 const APInt &DemandedElts, 1316 unsigned MaskIndex, 1317 TargetLoweringOpt &TLO, 1318 unsigned Depth) const; 1319 1320 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1321 const APInt &DemandedBits, 1322 const APInt &DemandedElts, 1323 KnownBits &Known, 1324 TargetLoweringOpt &TLO, 1325 unsigned Depth) const override; 1326 1327 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1328 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1329 SelectionDAG &DAG, unsigned Depth) const override; 1330 1331 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( 1332 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1333 bool PoisonOnly, unsigned Depth) const override; 1334 1335 bool canCreateUndefOrPoisonForTargetNode( 1336 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1337 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; 1338 1339 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, 1340 APInt &UndefElts, const SelectionDAG &DAG, 1341 unsigned Depth) const override; 1342 isTargetCanonicalConstantNode(SDValue Op)1343 bool isTargetCanonicalConstantNode(SDValue Op) const override { 1344 // Peek through bitcasts/extracts/inserts to see if we have a vector 1345 // load/broadcast from memory. 1346 while (Op.getOpcode() == ISD::BITCAST || 1347 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || 1348 (Op.getOpcode() == ISD::INSERT_SUBVECTOR && 1349 Op.getOperand(0).isUndef())) 1350 Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); 1351 1352 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || 1353 Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD || 1354 (Op.getOpcode() == ISD::LOAD && 1355 getTargetConstantFromLoad(cast<LoadSDNode>(Op))) || 1356 TargetLowering::isTargetCanonicalConstantNode(Op); 1357 } 1358 1359 bool isTargetCanonicalSelect(SDNode *N) const override; 1360 1361 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1362 1363 SDValue unwrapAddress(SDValue N) const override; 1364 1365 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1366 1367 bool ExpandInlineAsm(CallInst *CI) const override; 1368 1369 ConstraintType getConstraintType(StringRef Constraint) const override; 1370 1371 /// Examine constraint string and operand type and determine a weight value. 1372 /// The operand object must already have been set up with the operand type. 1373 ConstraintWeight 1374 getSingleConstraintMatchWeight(AsmOperandInfo &Info, 1375 const char *Constraint) const override; 1376 1377 const char *LowerXConstraint(EVT ConstraintVT) const override; 1378 1379 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1380 /// add anything to Ops. If hasMemory is true it means one of the asm 1381 /// constraint of the inline asm instruction being processed is 'm'. 1382 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1383 std::vector<SDValue> &Ops, 1384 SelectionDAG &DAG) const override; 1385 1386 InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode)1387 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1388 if (ConstraintCode == "v") 1389 return InlineAsm::ConstraintCode::v; 1390 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1391 } 1392 1393 /// Handle Lowering flag assembly outputs. 1394 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1395 const SDLoc &DL, 1396 const AsmOperandInfo &Constraint, 1397 SelectionDAG &DAG) const override; 1398 1399 /// Given a physical register constraint 1400 /// (e.g. {edx}), return the register number and the register class for the 1401 /// register. This should only be used for C_Register constraints. On 1402 /// error, this returns a register number of 0. 1403 std::pair<unsigned, const TargetRegisterClass *> 1404 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1405 StringRef Constraint, MVT VT) const override; 1406 1407 /// Return true if the addressing mode represented 1408 /// by AM is legal for this target, for a load/store of the specified type. 1409 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1410 Type *Ty, unsigned AS, 1411 Instruction *I = nullptr) const override; 1412 1413 bool addressingModeSupportsTLS(const GlobalValue &GV) const override; 1414 1415 /// Return true if the specified immediate is legal 1416 /// icmp immediate, that is the target has icmp instructions which can 1417 /// compare a register against the immediate without having to materialize 1418 /// the immediate into a register. 1419 bool isLegalICmpImmediate(int64_t Imm) const override; 1420 1421 /// Return true if the specified immediate is legal 1422 /// add immediate, that is the target has add instructions which can 1423 /// add a register and the immediate without having to materialize 1424 /// the immediate into a register. 1425 bool isLegalAddImmediate(int64_t Imm) const override; 1426 1427 bool isLegalStoreImmediate(int64_t Imm) const override; 1428 1429 /// Add x86-specific opcodes to the default list. 1430 bool isBinOp(unsigned Opcode) const override; 1431 1432 /// Returns true if the opcode is a commutative binary operation. 1433 bool isCommutativeBinOp(unsigned Opcode) const override; 1434 1435 /// Return true if it's free to truncate a value of 1436 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1437 /// register EAX to i16 by referencing its sub-register AX. 1438 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1439 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1440 1441 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1442 1443 /// Return true if any actual instruction that defines a 1444 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1445 /// register. This does not necessarily include registers defined in 1446 /// unknown ways, such as incoming arguments, or copies from unknown 1447 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1448 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1449 /// all instructions that define 32-bit values implicit zero-extend the 1450 /// result out to 64 bits. 1451 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1452 bool isZExtFree(EVT VT1, EVT VT2) const override; 1453 bool isZExtFree(SDValue Val, EVT VT2) const override; 1454 1455 bool shouldConvertPhiType(Type *From, Type *To) const override; 1456 1457 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1458 /// extend node) is profitable. 1459 bool isVectorLoadExtDesirable(SDValue) const override; 1460 1461 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1462 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1463 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1464 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1465 EVT VT) const override; 1466 1467 /// Return true if it's profitable to narrow operations of type SrcVT to 1468 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not 1469 /// from i32 to i16. 1470 bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override; 1471 1472 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, 1473 unsigned SelectOpcode, SDValue X, 1474 SDValue Y) const override; 1475 1476 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1477 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1478 /// true and stores the intrinsic information into the IntrinsicInfo that was 1479 /// passed to the function. 1480 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1481 MachineFunction &MF, 1482 unsigned Intrinsic) const override; 1483 1484 /// Returns true if the target can instruction select the 1485 /// specified FP immediate natively. If false, the legalizer will 1486 /// materialize the FP immediate as a load from a constant pool. 1487 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1488 bool ForCodeSize) const override; 1489 1490 /// Targets can use this to indicate that they only support *some* 1491 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1492 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1493 /// be legal. 1494 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1495 1496 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1497 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1498 /// constant pool entry. 1499 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1500 1501 /// Returns true if lowering to a jump table is allowed. 1502 bool areJTsAllowed(const Function *Fn) const override; 1503 1504 MVT getPreferredSwitchConditionType(LLVMContext &Context, 1505 EVT ConditionVT) const override; 1506 1507 /// If true, then instruction selection should 1508 /// seek to shrink the FP constant of the specified type to a smaller type 1509 /// in order to save space and / or reduce runtime. 1510 bool ShouldShrinkFPConstant(EVT VT) const override; 1511 1512 /// Return true if we believe it is correct and profitable to reduce the 1513 /// load node to a smaller type. 1514 bool 1515 shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, 1516 std::optional<unsigned> ByteOffset) const override; 1517 1518 /// Return true if the specified scalar FP type is computed in an SSE 1519 /// register, not on the X87 floating point stack. 1520 bool isScalarFPTypeInSSEReg(EVT VT) const; 1521 1522 /// Returns true if it is beneficial to convert a load of a constant 1523 /// to just the constant itself. 1524 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1525 Type *Ty) const override; 1526 1527 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1528 1529 bool convertSelectOfConstantsToMath(EVT VT) const override; 1530 1531 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1532 SDValue C) const override; 1533 1534 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1535 /// with this index. 1536 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1537 unsigned Index) const override; 1538 1539 /// Scalar ops always have equal or better analysis/performance/power than 1540 /// the vector equivalent, so this always makes sense if the scalar op is 1541 /// supported. shouldScalarizeBinop(SDValue)1542 bool shouldScalarizeBinop(SDValue) const override; 1543 1544 /// Extract of a scalar FP value from index 0 of a vector is free. 1545 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1546 EVT EltVT = VT.getScalarType(); 1547 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1548 } 1549 1550 /// Overflow nodes should get combined/lowered to optimal instructions 1551 /// (they should allow eliminating explicit compares by getting flags from 1552 /// math ops). 1553 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1554 bool MathUsed) const override; 1555 storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)1556 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 1557 unsigned AddrSpace) const override { 1558 // If we can replace more than 2 scalar stores, there will be a reduction 1559 // in instructions even after we add a vector constant load. 1560 return IsZero || NumElem > 2; 1561 } 1562 1563 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1564 const SelectionDAG &DAG, 1565 const MachineMemOperand &MMO) const override; 1566 1567 Register getRegisterByName(const char* RegName, LLT VT, 1568 const MachineFunction &MF) const override; 1569 1570 /// If a physical register, this returns the register that receives the 1571 /// exception address on entry to an EH pad. 1572 Register 1573 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1574 1575 /// If a physical register, this returns the register that receives the 1576 /// exception typeid on entry to a landing pad. 1577 Register 1578 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1579 1580 bool needsFixedCatchObjects() const override; 1581 1582 /// This method returns a target specific FastISel object, 1583 /// or null if the target does not support "fast" ISel. 1584 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1585 const TargetLibraryInfo *libInfo) const override; 1586 1587 /// If the target has a standard location for the stack protector cookie, 1588 /// returns the address of that location. Otherwise, returns nullptr. 1589 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 1590 1591 bool useLoadStackGuardNode(const Module &M) const override; 1592 bool useStackGuardXorFP() const override; 1593 void insertSSPDeclarations(Module &M) const override; 1594 Value *getSDagStackGuard(const Module &M) const override; 1595 Function *getSSPStackGuardCheck(const Module &M) const override; 1596 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1597 const SDLoc &DL) const override; 1598 1599 1600 /// Return true if the target stores SafeStack pointer at a fixed offset in 1601 /// some non-standard address space, and populates the address space and 1602 /// offset as appropriate. 1603 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 1604 1605 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1606 SDValue Chain, SDValue Pointer, 1607 MachinePointerInfo PtrInfo, 1608 Align Alignment, 1609 SelectionDAG &DAG) const; 1610 1611 /// Customize the preferred legalization strategy for certain types. 1612 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1613 softPromoteHalfType()1614 bool softPromoteHalfType() const override { return true; } 1615 1616 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1617 EVT VT) const override; 1618 1619 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1620 CallingConv::ID CC, 1621 EVT VT) const override; 1622 1623 unsigned getVectorTypeBreakdownForCallingConv( 1624 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1625 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1626 1627 bool functionArgumentNeedsConsecutiveRegisters( 1628 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 1629 const DataLayout &DL) const override; 1630 1631 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1632 1633 bool supportSwiftError() const override; 1634 supportKCFIBundles()1635 bool supportKCFIBundles() const override { return true; } 1636 1637 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 1638 MachineBasicBlock::instr_iterator &MBBI, 1639 const TargetInstrInfo *TII) const override; 1640 1641 bool hasStackProbeSymbol(const MachineFunction &MF) const override; 1642 bool hasInlineStackProbe(const MachineFunction &MF) const override; 1643 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; 1644 1645 unsigned getStackProbeSize(const MachineFunction &MF) const; 1646 hasVectorBlend()1647 bool hasVectorBlend() const override { return true; } 1648 getMaxSupportedInterleaveFactor()1649 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1650 1651 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, 1652 unsigned OpNo) const override; 1653 1654 SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 1655 MachineMemOperand *MMO, SDValue &NewLoad, 1656 SDValue Ptr, SDValue PassThru, 1657 SDValue Mask) const override; 1658 SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 1659 MachineMemOperand *MMO, SDValue Ptr, SDValue Val, 1660 SDValue Mask) const override; 1661 1662 /// Lower interleaved load(s) into target specific 1663 /// instructions/intrinsics. 1664 bool lowerInterleavedLoad(LoadInst *LI, 1665 ArrayRef<ShuffleVectorInst *> Shuffles, 1666 ArrayRef<unsigned> Indices, 1667 unsigned Factor) const override; 1668 1669 /// Lower interleaved store(s) into target specific 1670 /// instructions/intrinsics. 1671 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1672 unsigned Factor) const override; 1673 1674 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, 1675 int JTI, SelectionDAG &DAG) const override; 1676 1677 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1678 getTypeToTransformTo(LLVMContext & Context,EVT VT)1679 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { 1680 if (VT == MVT::f80) 1681 return EVT::getIntegerVT(Context, 96); 1682 return TargetLoweringBase::getTypeToTransformTo(Context, VT); 1683 } 1684 1685 protected: 1686 std::pair<const TargetRegisterClass *, uint8_t> 1687 findRepresentativeClass(const TargetRegisterInfo *TRI, 1688 MVT VT) const override; 1689 1690 private: 1691 /// Keep a reference to the X86Subtarget around so that we can 1692 /// make the right decision when generating code for different targets. 1693 const X86Subtarget &Subtarget; 1694 1695 /// A list of legal FP immediates. 1696 std::vector<APFloat> LegalFPImmediates; 1697 1698 /// Indicate that this x86 target can instruction 1699 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)1700 void addLegalFPImmediate(const APFloat& Imm) { 1701 LegalFPImmediates.push_back(Imm); 1702 } 1703 1704 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1705 CallingConv::ID CallConv, bool isVarArg, 1706 const SmallVectorImpl<ISD::InputArg> &Ins, 1707 const SDLoc &dl, SelectionDAG &DAG, 1708 SmallVectorImpl<SDValue> &InVals, 1709 uint32_t *RegMask) const; 1710 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1711 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1712 const SDLoc &dl, SelectionDAG &DAG, 1713 const CCValAssign &VA, MachineFrameInfo &MFI, 1714 unsigned i) const; 1715 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1716 const SDLoc &dl, SelectionDAG &DAG, 1717 const CCValAssign &VA, 1718 ISD::ArgFlagsTy Flags, bool isByval) const; 1719 1720 // Call lowering helpers. 1721 1722 /// Check whether the call is eligible for tail call optimization. Targets 1723 /// that want to do tail call optimization should implement this function. 1724 bool IsEligibleForTailCallOptimization( 1725 TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, 1726 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const; 1727 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1728 SDValue Chain, bool IsTailCall, 1729 bool Is64Bit, int FPDiff, 1730 const SDLoc &dl) const; 1731 1732 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1733 SelectionDAG &DAG) const; 1734 1735 unsigned getAddressSpace() const; 1736 1737 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1738 SDValue &Chain) const; 1739 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1740 1741 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1742 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1743 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1744 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1745 1746 unsigned getGlobalWrapperKind(const GlobalValue *GV, 1747 const unsigned char OpFlags) const; 1748 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1749 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1750 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1751 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1752 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1753 1754 /// Creates target global address or external symbol nodes for calls or 1755 /// other uses. 1756 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall, 1757 bool *IsImpCall) const; 1758 1759 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1760 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1761 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1762 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1763 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1764 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1765 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1766 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1767 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1768 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1769 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1770 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1771 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1772 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1773 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1774 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1775 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1776 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1777 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1778 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1779 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1780 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1781 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1782 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1783 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1784 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1785 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1786 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; 1787 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1788 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, 1789 SDValue &Chain) const; 1790 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1791 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1792 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1793 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1794 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1795 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1796 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; 1797 1798 SDValue 1799 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1800 const SmallVectorImpl<ISD::InputArg> &Ins, 1801 const SDLoc &dl, SelectionDAG &DAG, 1802 SmallVectorImpl<SDValue> &InVals) const override; 1803 SDValue LowerCall(CallLoweringInfo &CLI, 1804 SmallVectorImpl<SDValue> &InVals) const override; 1805 1806 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1807 const SmallVectorImpl<ISD::OutputArg> &Outs, 1808 const SmallVectorImpl<SDValue> &OutVals, 1809 const SDLoc &dl, SelectionDAG &DAG) const override; 1810 supportSplitCSR(MachineFunction * MF)1811 bool supportSplitCSR(MachineFunction *MF) const override { 1812 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1813 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1814 } 1815 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1816 void insertCopiesSplitCSR( 1817 MachineBasicBlock *Entry, 1818 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1819 1820 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1821 1822 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1823 1824 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1825 ISD::NodeType ExtendKind) const override; 1826 1827 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1828 bool isVarArg, 1829 const SmallVectorImpl<ISD::OutputArg> &Outs, 1830 LLVMContext &Context, 1831 const Type *RetTy) const override; 1832 1833 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1834 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 1835 1836 TargetLoweringBase::AtomicExpansionKind 1837 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1838 TargetLoweringBase::AtomicExpansionKind 1839 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1840 TargetLoweringBase::AtomicExpansionKind 1841 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1842 TargetLoweringBase::AtomicExpansionKind 1843 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; 1844 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1845 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1846 1847 LoadInst * 1848 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1849 1850 bool needsCmpXchgNb(Type *MemType) const; 1851 1852 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1853 MachineBasicBlock *DispatchBB, int FI) const; 1854 1855 // Utility function to emit the low-level va_arg code for X86-64. 1856 MachineBasicBlock * 1857 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1858 1859 /// Utility function to emit the xmm reg save portion of va_start. 1860 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1861 MachineInstr &MI2, 1862 MachineBasicBlock *BB) const; 1863 1864 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1865 MachineBasicBlock *BB) const; 1866 1867 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1868 MachineBasicBlock *BB) const; 1869 1870 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1871 MachineBasicBlock *BB) const; 1872 1873 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1874 MachineBasicBlock *BB) const; 1875 1876 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1877 MachineBasicBlock *BB) const; 1878 1879 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1880 MachineBasicBlock *BB) const; 1881 1882 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1883 MachineBasicBlock *MBB) const; 1884 1885 void emitSetJmpShadowStackFix(MachineInstr &MI, 1886 MachineBasicBlock *MBB) const; 1887 1888 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1889 MachineBasicBlock *MBB) const; 1890 1891 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1892 MachineBasicBlock *MBB) const; 1893 1894 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1895 MachineBasicBlock *MBB) const; 1896 1897 MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI, 1898 MachineBasicBlock *MBB) const; 1899 1900 /// Emit flags for the given setcc condition and operands. Also returns the 1901 /// corresponding X86 condition code constant in X86CC. 1902 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1903 const SDLoc &dl, SelectionDAG &DAG, 1904 SDValue &X86CC) const; 1905 1906 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, 1907 SDValue IntPow2) const override; 1908 1909 /// Check if replacement of SQRT with RSQRT should be disabled. 1910 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1911 1912 /// Use rsqrt* to speed up sqrt calculations. 1913 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1914 int &RefinementSteps, bool &UseOneConstNR, 1915 bool Reciprocal) const override; 1916 1917 /// Use rcp* to speed up fdiv calculations. 1918 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1919 int &RefinementSteps) const override; 1920 1921 /// Reassociate floating point divisions into multiply by reciprocal. 1922 unsigned combineRepeatedFPDivisors() const override; 1923 1924 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1925 SmallVectorImpl<SDNode *> &Created) const override; 1926 1927 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, 1928 SDValue V2) const; 1929 }; 1930 1931 namespace X86 { 1932 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1933 const TargetLibraryInfo *libInfo); 1934 } // end namespace X86 1935 1936 // X86 specific Gather/Scatter nodes. 1937 // The class has the same order of operands as MaskedGatherScatterSDNode for 1938 // convenience. 1939 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1940 public: 1941 // This is a intended as a utility and should never be directly created. 1942 X86MaskedGatherScatterSDNode() = delete; 1943 ~X86MaskedGatherScatterSDNode() = delete; 1944 getBasePtr()1945 const SDValue &getBasePtr() const { return getOperand(3); } getIndex()1946 const SDValue &getIndex() const { return getOperand(4); } getMask()1947 const SDValue &getMask() const { return getOperand(2); } getScale()1948 const SDValue &getScale() const { return getOperand(5); } 1949 classof(const SDNode * N)1950 static bool classof(const SDNode *N) { 1951 return N->getOpcode() == X86ISD::MGATHER || 1952 N->getOpcode() == X86ISD::MSCATTER; 1953 } 1954 }; 1955 1956 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1957 public: getPassThru()1958 const SDValue &getPassThru() const { return getOperand(1); } 1959 classof(const SDNode * N)1960 static bool classof(const SDNode *N) { 1961 return N->getOpcode() == X86ISD::MGATHER; 1962 } 1963 }; 1964 1965 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1966 public: getValue()1967 const SDValue &getValue() const { return getOperand(1); } 1968 classof(const SDNode * N)1969 static bool classof(const SDNode *N) { 1970 return N->getOpcode() == X86ISD::MSCATTER; 1971 } 1972 }; 1973 1974 /// Generate unpacklo/unpackhi shuffle mask. 1975 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1976 bool Unary); 1977 1978 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1979 /// imposed by AVX and specific to the unary pattern. Example: 1980 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1981 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1982 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1983 1984 } // end namespace llvm 1985 1986 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1987