1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/CodeGen/SelectionDAG.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 21 namespace llvm { 22 class X86Subtarget; 23 class X86TargetMachine; 24 25 namespace X86ISD { 26 // X86 Specific DAG Nodes 27 enum NodeType : unsigned { 28 // Start the numbering where the builtin ops leave off. 29 FIRST_NUMBER = ISD::BUILTIN_OP_END, 30 31 /// Bit scan forward. 32 BSF, 33 /// Bit scan reverse. 34 BSR, 35 36 /// Double shift instructions. These correspond to 37 /// X86::SHLDxx and X86::SHRDxx instructions. 38 SHLD, 39 SHRD, 40 41 /// Bitwise logical AND of floating point values. This corresponds 42 /// to X86::ANDPS or X86::ANDPD. 43 FAND, 44 45 /// Bitwise logical OR of floating point values. This corresponds 46 /// to X86::ORPS or X86::ORPD. 47 FOR, 48 49 /// Bitwise logical XOR of floating point values. This corresponds 50 /// to X86::XORPS or X86::XORPD. 51 FXOR, 52 53 /// Bitwise logical ANDNOT of floating point values. This 54 /// corresponds to X86::ANDNPS or X86::ANDNPD. 55 FANDN, 56 57 /// These operations represent an abstract X86 call 58 /// instruction, which includes a bunch of information. In particular the 59 /// operands of these node are: 60 /// 61 /// #0 - The incoming token chain 62 /// #1 - The callee 63 /// #2 - The number of arg bytes the caller pushes on the stack. 64 /// #3 - The number of arg bytes the callee pops off the stack. 65 /// #4 - The value to pass in AL/AX/EAX (optional) 66 /// #5 - The value to pass in DL/DX/EDX (optional) 67 /// 68 /// The result values of these nodes are: 69 /// 70 /// #0 - The outgoing token chain 71 /// #1 - The first register result value (optional) 72 /// #2 - The second register result value (optional) 73 /// 74 CALL, 75 76 /// Same as call except it adds the NoTrack prefix. 77 NT_CALL, 78 79 /// X86 compare and logical compare instructions. 80 CMP, COMI, UCOMI, 81 82 /// X86 bit-test instructions. 83 BT, 84 85 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 86 /// operand, usually produced by a CMP instruction. 87 SETCC, 88 89 /// X86 Select 90 SELECTS, 91 92 // Same as SETCC except it's materialized with a sbb and the value is all 93 // one's or all zero's. 94 SETCC_CARRY, // R = carry_bit ? ~0 : 0 95 96 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 97 /// Operands are two FP values to compare; result is a mask of 98 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 99 FSETCC, 100 101 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 102 /// and a version with SAE. 103 FSETCCM, FSETCCM_SAE, 104 105 /// X86 conditional moves. Operand 0 and operand 1 are the two values 106 /// to select from. Operand 2 is the condition code, and operand 3 is the 107 /// flag operand produced by a CMP or TEST instruction. 108 CMOV, 109 110 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 111 /// is the block to branch if condition is true, operand 2 is the 112 /// condition code, and operand 3 is the flag operand produced by a CMP 113 /// or TEST instruction. 114 BRCOND, 115 116 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 117 /// operand 1 is the target address. 118 NT_BRIND, 119 120 /// Return with a flag operand. Operand 0 is the chain operand, operand 121 /// 1 is the number of bytes of stack to pop. 122 RET_FLAG, 123 124 /// Return from interrupt. Operand 0 is the number of bytes to pop. 125 IRET, 126 127 /// Repeat fill, corresponds to X86::REP_STOSx. 128 REP_STOS, 129 130 /// Repeat move, corresponds to X86::REP_MOVSx. 131 REP_MOVS, 132 133 /// On Darwin, this node represents the result of the popl 134 /// at function entry, used for PIC code. 135 GlobalBaseReg, 136 137 /// A wrapper node for TargetConstantPool, TargetJumpTable, 138 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 139 /// MCSymbol and TargetBlockAddress. 140 Wrapper, 141 142 /// Special wrapper used under X86-64 PIC mode for RIP 143 /// relative displacements. 144 WrapperRIP, 145 146 /// Copies a 64-bit value from an MMX vector to the low word 147 /// of an XMM vector, with the high word zero filled. 148 MOVQ2DQ, 149 150 /// Copies a 64-bit value from the low word of an XMM vector 151 /// to an MMX vector. 152 MOVDQ2Q, 153 154 /// Copies a 32-bit value from the low word of a MMX 155 /// vector to a GPR. 156 MMX_MOVD2W, 157 158 /// Copies a GPR into the low 32-bit word of a MMX vector 159 /// and zero out the high word. 160 MMX_MOVW2D, 161 162 /// Extract an 8-bit value from a vector and zero extend it to 163 /// i32, corresponds to X86::PEXTRB. 164 PEXTRB, 165 166 /// Extract a 16-bit value from a vector and zero extend it to 167 /// i32, corresponds to X86::PEXTRW. 168 PEXTRW, 169 170 /// Insert any element of a 4 x float vector into any element 171 /// of a destination 4 x floatvector. 172 INSERTPS, 173 174 /// Insert the lower 8-bits of a 32-bit value to a vector, 175 /// corresponds to X86::PINSRB. 176 PINSRB, 177 178 /// Insert the lower 16-bits of a 32-bit value to a vector, 179 /// corresponds to X86::PINSRW. 180 PINSRW, 181 182 /// Shuffle 16 8-bit values within a vector. 183 PSHUFB, 184 185 /// Compute Sum of Absolute Differences. 186 PSADBW, 187 /// Compute Double Block Packed Sum-Absolute-Differences 188 DBPSADBW, 189 190 /// Bitwise Logical AND NOT of Packed FP values. 191 ANDNP, 192 193 /// Blend where the selector is an immediate. 194 BLENDI, 195 196 /// Dynamic (non-constant condition) vector blend where only the sign bits 197 /// of the condition elements are used. This is used to enforce that the 198 /// condition mask is not valid for generic VSELECT optimizations. This 199 /// is also used to implement the intrinsics. 200 /// Operands are in VSELECT order: MASK, TRUE, FALSE 201 BLENDV, 202 203 /// Combined add and sub on an FP vector. 204 ADDSUB, 205 206 // FP vector ops with rounding mode. 207 FADD_RND, FADDS, FADDS_RND, 208 FSUB_RND, FSUBS, FSUBS_RND, 209 FMUL_RND, FMULS, FMULS_RND, 210 FDIV_RND, FDIVS, FDIVS_RND, 211 FMAX_SAE, FMAXS_SAE, 212 FMIN_SAE, FMINS_SAE, 213 FSQRT_RND, FSQRTS, FSQRTS_RND, 214 215 // FP vector get exponent. 216 FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE, 217 // Extract Normalized Mantissas. 218 VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE, 219 // FP Scale. 220 SCALEF, SCALEF_RND, 221 SCALEFS, SCALEFS_RND, 222 223 // Unsigned Integer average. 224 AVG, 225 226 /// Integer horizontal add/sub. 227 HADD, 228 HSUB, 229 230 /// Floating point horizontal add/sub. 231 FHADD, 232 FHSUB, 233 234 // Detect Conflicts Within a Vector 235 CONFLICT, 236 237 /// Floating point max and min. 238 FMAX, FMIN, 239 240 /// Commutative FMIN and FMAX. 241 FMAXC, FMINC, 242 243 /// Scalar intrinsic floating point max and min. 244 FMAXS, FMINS, 245 246 /// Floating point reciprocal-sqrt and reciprocal approximation. 247 /// Note that these typically require refinement 248 /// in order to obtain suitable precision. 249 FRSQRT, FRCP, 250 251 // AVX-512 reciprocal approximations with a little more precision. 252 RSQRT14, RSQRT14S, RCP14, RCP14S, 253 254 // Thread Local Storage. 255 TLSADDR, 256 257 // Thread Local Storage. A call to get the start address 258 // of the TLS block for the current module. 259 TLSBASEADDR, 260 261 // Thread Local Storage. When calling to an OS provided 262 // thunk at the address from an earlier relocation. 263 TLSCALL, 264 265 // Exception Handling helpers. 266 EH_RETURN, 267 268 // SjLj exception handling setjmp. 269 EH_SJLJ_SETJMP, 270 271 // SjLj exception handling longjmp. 272 EH_SJLJ_LONGJMP, 273 274 // SjLj exception handling dispatch. 275 EH_SJLJ_SETUP_DISPATCH, 276 277 /// Tail call return. See X86TargetLowering::LowerCall for 278 /// the list of operands. 279 TC_RETURN, 280 281 // Vector move to low scalar and zero higher vector elements. 282 VZEXT_MOVL, 283 284 // Vector integer truncate. 285 VTRUNC, 286 // Vector integer truncate with unsigned/signed saturation. 287 VTRUNCUS, VTRUNCS, 288 289 // Masked version of the above. Used when less than a 128-bit result is 290 // produced since the mask only applies to the lower elements and can't 291 // be represented by a select. 292 // SRC, PASSTHRU, MASK 293 VMTRUNC, VMTRUNCUS, VMTRUNCS, 294 295 // Vector FP extend. 296 VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, 297 298 // Vector FP round. 299 VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, 300 301 // Masked version of above. Used for v2f64->v4f32. 302 // SRC, PASSTHRU, MASK 303 VMFPROUND, 304 305 // 128-bit vector logical left / right shift 306 VSHLDQ, VSRLDQ, 307 308 // Vector shift elements 309 VSHL, VSRL, VSRA, 310 311 // Vector variable shift 312 VSHLV, VSRLV, VSRAV, 313 314 // Vector shift elements by immediate 315 VSHLI, VSRLI, VSRAI, 316 317 // Shifts of mask registers. 318 KSHIFTL, KSHIFTR, 319 320 // Bit rotate by immediate 321 VROTLI, VROTRI, 322 323 // Vector packed double/float comparison. 324 CMPP, 325 326 // Vector integer comparisons. 327 PCMPEQ, PCMPGT, 328 329 // v8i16 Horizontal minimum and position. 330 PHMINPOS, 331 332 MULTISHIFT, 333 334 /// Vector comparison generating mask bits for fp and 335 /// integer signed and unsigned data types. 336 CMPM, 337 // Vector comparison with SAE for FP values 338 CMPM_SAE, 339 340 // Arithmetic operations with FLAGS results. 341 ADD, SUB, ADC, SBB, SMUL, UMUL, 342 OR, XOR, AND, 343 344 // Bit field extract. 345 BEXTR, 346 347 // Zero High Bits Starting with Specified Bit Position. 348 BZHI, 349 350 // X86-specific multiply by immediate. 351 MUL_IMM, 352 353 // Vector sign bit extraction. 354 MOVMSK, 355 356 // Vector bitwise comparisons. 357 PTEST, 358 359 // Vector packed fp sign bitwise comparisons. 360 TESTP, 361 362 // OR/AND test for masks. 363 KORTEST, 364 KTEST, 365 366 // ADD for masks. 367 KADD, 368 369 // Several flavors of instructions with vector shuffle behaviors. 370 // Saturated signed/unnsigned packing. 371 PACKSS, 372 PACKUS, 373 // Intra-lane alignr. 374 PALIGNR, 375 // AVX512 inter-lane alignr. 376 VALIGN, 377 PSHUFD, 378 PSHUFHW, 379 PSHUFLW, 380 SHUFP, 381 // VBMI2 Concat & Shift. 382 VSHLD, 383 VSHRD, 384 VSHLDV, 385 VSHRDV, 386 //Shuffle Packed Values at 128-bit granularity. 387 SHUF128, 388 MOVDDUP, 389 MOVSHDUP, 390 MOVSLDUP, 391 MOVLHPS, 392 MOVHLPS, 393 MOVSD, 394 MOVSS, 395 UNPCKL, 396 UNPCKH, 397 VPERMILPV, 398 VPERMILPI, 399 VPERMI, 400 VPERM2X128, 401 402 // Variable Permute (VPERM). 403 // Res = VPERMV MaskV, V0 404 VPERMV, 405 406 // 3-op Variable Permute (VPERMT2). 407 // Res = VPERMV3 V0, MaskV, V1 408 VPERMV3, 409 410 // Bitwise ternary logic. 411 VPTERNLOG, 412 // Fix Up Special Packed Float32/64 values. 413 VFIXUPIMM, VFIXUPIMM_SAE, 414 VFIXUPIMMS, VFIXUPIMMS_SAE, 415 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 416 VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE, 417 // Reduce - Perform Reduction Transformation on scalar\packed FP. 418 VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE, 419 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 420 // Also used by the legacy (V)ROUND intrinsics where we mask out the 421 // scaling part of the immediate. 422 VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, 423 // Tests Types Of a FP Values for packed types. 424 VFPCLASS, 425 // Tests Types Of a FP Values for scalar types. 426 VFPCLASSS, 427 428 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 429 // a vector, this node may change the vector length as part of the splat. 430 VBROADCAST, 431 // Broadcast mask to vector. 432 VBROADCASTM, 433 // Broadcast subvector to vector. 434 SUBV_BROADCAST, 435 436 /// SSE4A Extraction and Insertion. 437 EXTRQI, INSERTQI, 438 439 // XOP arithmetic/logical shifts. 440 VPSHA, VPSHL, 441 // XOP signed/unsigned integer comparisons. 442 VPCOM, VPCOMU, 443 // XOP packed permute bytes. 444 VPPERM, 445 // XOP two source permutation. 446 VPERMIL2, 447 448 // Vector multiply packed unsigned doubleword integers. 449 PMULUDQ, 450 // Vector multiply packed signed doubleword integers. 451 PMULDQ, 452 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 453 MULHRS, 454 455 // Multiply and Add Packed Integers. 456 VPMADDUBSW, VPMADDWD, 457 458 // AVX512IFMA multiply and add. 459 // NOTE: These are different than the instruction and perform 460 // op0 x op1 + op2. 461 VPMADD52L, VPMADD52H, 462 463 // VNNI 464 VPDPBUSD, 465 VPDPBUSDS, 466 VPDPWSSD, 467 VPDPWSSDS, 468 469 // FMA nodes. 470 // We use the target independent ISD::FMA for the non-inverted case. 471 FNMADD, 472 FMSUB, 473 FNMSUB, 474 FMADDSUB, 475 FMSUBADD, 476 477 // FMA with rounding mode. 478 FMADD_RND, 479 FNMADD_RND, 480 FMSUB_RND, 481 FNMSUB_RND, 482 FMADDSUB_RND, 483 FMSUBADD_RND, 484 485 // Compress and expand. 486 COMPRESS, 487 EXPAND, 488 489 // Bits shuffle 490 VPSHUFBITQMB, 491 492 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 493 SINT_TO_FP_RND, UINT_TO_FP_RND, 494 SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP, 495 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, 496 497 // Vector float/double to signed/unsigned integer. 498 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND, 499 // Scalar float/double to signed/unsigned integer. 500 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND, 501 502 // Vector float/double to signed/unsigned integer with truncation. 503 CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, 504 // Scalar float/double to signed/unsigned integer with truncation. 505 CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, 506 507 // Vector signed/unsigned integer to float/double. 508 CVTSI2P, CVTUI2P, 509 510 // Masked versions of above. Used for v2f64->v4f32. 511 // SRC, PASSTHRU, MASK 512 MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, 513 MCVTSI2P, MCVTUI2P, 514 515 // Vector float to bfloat16. 516 // Convert TWO packed single data to one packed BF16 data 517 CVTNE2PS2BF16, 518 // Convert packed single data to packed BF16 data 519 CVTNEPS2BF16, 520 // Masked version of above. 521 // SRC, PASSTHRU, MASK 522 MCVTNEPS2BF16, 523 524 // Dot product of BF16 pairs to accumulated into 525 // packed single precision. 526 DPBF16PS, 527 528 // Save xmm argument registers to the stack, according to %al. An operator 529 // is needed so that this can be expanded with control flow. 530 VASTART_SAVE_XMM_REGS, 531 532 // Windows's _chkstk call to do stack probing. 533 WIN_ALLOCA, 534 535 // For allocating variable amounts of stack space when using 536 // segmented stacks. Check if the current stacklet has enough space, and 537 // falls back to heap allocation if not. 538 SEG_ALLOCA, 539 540 // Memory barriers. 541 MEMBARRIER, 542 MFENCE, 543 544 // Store FP status word into i16 register. 545 FNSTSW16r, 546 547 // Store contents of %ah into %eflags. 548 SAHF, 549 550 // Get a random integer and indicate whether it is valid in CF. 551 RDRAND, 552 553 // Get a NIST SP800-90B & C compliant random integer and 554 // indicate whether it is valid in CF. 555 RDSEED, 556 557 // Protection keys 558 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 559 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 560 // value for ECX. 561 RDPKRU, WRPKRU, 562 563 // SSE42 string comparisons. 564 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 565 // will emit one or two instructions based on which results are used. If 566 // flags and index/mask this allows us to use a single instruction since 567 // we won't have to pick and opcode for flags. Instead we can rely on the 568 // DAG to CSE everything and decide at isel. 569 PCMPISTR, 570 PCMPESTR, 571 572 // Test if in transactional execution. 573 XTEST, 574 575 // ERI instructions. 576 RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE, 577 RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE, 578 579 // Conversions between float and half-float. 580 CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE, 581 582 // Masked version of above. 583 // SRC, RND, PASSTHRU, MASK 584 MCVTPS2PH, 585 586 // Galois Field Arithmetic Instructions 587 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, 588 589 // LWP insert record. 590 LWPINS, 591 592 // User level wait 593 UMWAIT, TPAUSE, 594 595 // Enqueue Stores Instructions 596 ENQCMD, ENQCMDS, 597 598 // For avx512-vp2intersect 599 VP2INTERSECT, 600 601 /// X86 strict FP compare instructions. 602 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 603 STRICT_FCMPS, 604 605 // Vector packed double/float comparison. 606 STRICT_CMPP, 607 608 /// Vector comparison generating mask bits for fp and 609 /// integer signed and unsigned data types. 610 STRICT_CMPM, 611 612 // Vector float/double to signed/unsigned integer with truncation. 613 STRICT_CVTTP2SI, STRICT_CVTTP2UI, 614 615 // Vector FP extend. 616 STRICT_VFPEXT, 617 618 // Vector FP round. 619 STRICT_VFPROUND, 620 621 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 622 // Also used by the legacy (V)ROUND intrinsics where we mask out the 623 // scaling part of the immediate. 624 STRICT_VRNDSCALE, 625 626 // Vector signed/unsigned integer to float/double. 627 STRICT_CVTSI2P, STRICT_CVTUI2P, 628 629 // Compare and swap. 630 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 631 LCMPXCHG8_DAG, 632 LCMPXCHG16_DAG, 633 LCMPXCHG8_SAVE_EBX_DAG, 634 LCMPXCHG16_SAVE_RBX_DAG, 635 636 /// LOCK-prefixed arithmetic read-modify-write instructions. 637 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 638 LADD, LSUB, LOR, LXOR, LAND, 639 640 // Load, scalar_to_vector, and zero extend. 641 VZEXT_LOAD, 642 643 // extract_vector_elt, store. 644 VEXTRACT_STORE, 645 646 // scalar broadcast from memory 647 VBROADCAST_LOAD, 648 649 // Store FP control world into i16 memory. 650 FNSTCW16m, 651 652 /// This instruction implements FP_TO_SINT with the 653 /// integer destination in memory and a FP reg source. This corresponds 654 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 655 /// has two inputs (token chain and address) and two outputs (int value 656 /// and token chain). Memory VT specifies the type to store to. 657 FP_TO_INT_IN_MEM, 658 659 /// This instruction implements SINT_TO_FP with the 660 /// integer source in memory and FP reg result. This corresponds to the 661 /// X86::FILD*m instructions. It has two inputs (token chain and address) 662 /// and two outputs (FP value and token chain). FILD_FLAG also produces a 663 /// flag). The integer source type is specified by the memory VT. 664 FILD, 665 FILD_FLAG, 666 667 /// This instruction implements a fp->int store from FP stack 668 /// slots. This corresponds to the fist instruction. It takes a 669 /// chain operand, value to store, address, and glue. The memory VT 670 /// specifies the type to store as. 671 FIST, 672 673 /// This instruction implements an extending load to FP stack slots. 674 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 675 /// operand, and ptr to load from. The memory VT specifies the type to 676 /// load from. 677 FLD, 678 679 /// This instruction implements a truncating store from FP stack 680 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 681 /// chain operand, value to store, address, and glue. The memory VT 682 /// specifies the type to store as. 683 FST, 684 685 /// This instruction grabs the address of the next argument 686 /// from a va_list. (reads and modifies the va_list in memory) 687 VAARG_64, 688 689 // Vector truncating store with unsigned/signed saturation 690 VTRUNCSTOREUS, VTRUNCSTORES, 691 // Vector truncating masked store with unsigned/signed saturation 692 VMTRUNCSTOREUS, VMTRUNCSTORES, 693 694 // X86 specific gather and scatter 695 MGATHER, MSCATTER, 696 697 // WARNING: Do not add anything in the end unless you want the node to 698 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 699 // opcodes will be thought as target memory ops! 700 }; 701 } // end namespace X86ISD 702 703 /// Define some predicates that are used for node matching. 704 namespace X86 { 705 /// Returns true if Elt is a constant zero or floating point constant +0.0. 706 bool isZeroNode(SDValue Elt); 707 708 /// Returns true of the given offset can be 709 /// fit into displacement field of the instruction. 710 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 711 bool hasSymbolicDisplacement = true); 712 713 /// Determines whether the callee is required to pop its 714 /// own arguments. Callee pop is necessary to support tail calls. 715 bool isCalleePop(CallingConv::ID CallingConv, 716 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 717 718 /// If Op is a constant whose elements are all the same constant or 719 /// undefined, return true and return the constant value in \p SplatVal. 720 bool isConstantSplat(SDValue Op, APInt &SplatVal); 721 } // end namespace X86 722 723 //===--------------------------------------------------------------------===// 724 // X86 Implementation of the TargetLowering interface 725 class X86TargetLowering final : public TargetLowering { 726 public: 727 explicit X86TargetLowering(const X86TargetMachine &TM, 728 const X86Subtarget &STI); 729 730 unsigned getJumpTableEncoding() const override; 731 bool useSoftFloat() const override; 732 733 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 734 ArgListTy &Args) const override; 735 736 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 737 return MVT::i8; 738 } 739 740 const MCExpr * 741 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 742 const MachineBasicBlock *MBB, unsigned uid, 743 MCContext &Ctx) const override; 744 745 /// Returns relocation base for the given PIC jumptable. 746 SDValue getPICJumpTableRelocBase(SDValue Table, 747 SelectionDAG &DAG) const override; 748 const MCExpr * 749 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 750 unsigned JTI, MCContext &Ctx) const override; 751 752 /// Return the desired alignment for ByVal aggregate 753 /// function arguments in the caller parameter area. For X86, aggregates 754 /// that contains are placed at 16-byte boundaries while the rest are at 755 /// 4-byte boundaries. 756 unsigned getByValTypeAlignment(Type *Ty, 757 const DataLayout &DL) const override; 758 759 /// Returns the target specific optimal type for load 760 /// and store operations as a result of memset, memcpy, and memmove 761 /// lowering. If DstAlign is zero that means it's safe to destination 762 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 763 /// means there isn't a need to check it against alignment requirement, 764 /// probably because the source does not need to be loaded. If 'IsMemset' is 765 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 766 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 767 /// source is constant so it does not need to be loaded. 768 /// It returns EVT::Other if the type should be determined using generic 769 /// target-independent logic. 770 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 771 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 772 const AttributeList &FuncAttributes) const override; 773 774 /// Returns true if it's safe to use load / store of the 775 /// specified type to expand memcpy / memset inline. This is mostly true 776 /// for all types except for some special cases. For example, on X86 777 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 778 /// also does type conversion. Note the specified type doesn't have to be 779 /// legal as the hook is used before type legalization. 780 bool isSafeMemOpType(MVT VT) const override; 781 782 /// Returns true if the target allows unaligned memory accesses of the 783 /// specified type. Returns whether it is "fast" in the last argument. 784 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 785 MachineMemOperand::Flags Flags, 786 bool *Fast) const override; 787 788 /// Provide custom lowering hooks for some operations. 789 /// 790 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 791 792 /// Places new result values for the node in Results (their number 793 /// and types must exactly match those of the original return values of 794 /// the node), or leaves Results empty, which indicates that the node is not 795 /// to be custom lowered after all. 796 void LowerOperationWrapper(SDNode *N, 797 SmallVectorImpl<SDValue> &Results, 798 SelectionDAG &DAG) const override; 799 800 /// Replace the results of node with an illegal result 801 /// type with new values built out of custom code. 802 /// 803 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 804 SelectionDAG &DAG) const override; 805 806 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 807 808 // Return true if it is profitable to combine a BUILD_VECTOR with a 809 // stride-pattern to a shuffle and a truncate. 810 // Example of such a combine: 811 // v4i32 build_vector((extract_elt V, 1), 812 // (extract_elt V, 3), 813 // (extract_elt V, 5), 814 // (extract_elt V, 7)) 815 // --> 816 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to 817 // v4i64) 818 bool isDesirableToCombineBuildVectorToShuffleTruncate( 819 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override; 820 821 /// Return true if the target has native support for 822 /// the specified value type and it is 'desirable' to use the type for the 823 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 824 /// instruction encodings are longer and some i16 instructions are slow. 825 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 826 827 /// Return true if the target has native support for the 828 /// specified value type and it is 'desirable' to use the type. e.g. On x86 829 /// i16 is legal, but undesirable since i16 instruction encodings are longer 830 /// and some i16 instructions are slow. 831 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 832 833 /// Return 1 if we can compute the negated form of the specified expression 834 /// for the same cost as the expression itself, or 2 if we can compute the 835 /// negated form more cheaply than the expression itself. Else return 0. 836 char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, 837 bool ForCodeSize, unsigned Depth) const override; 838 839 /// If isNegatibleForFree returns true, return the newly negated expression. 840 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 841 bool LegalOperations, bool ForCodeSize, 842 unsigned Depth) const override; 843 844 MachineBasicBlock * 845 EmitInstrWithCustomInserter(MachineInstr &MI, 846 MachineBasicBlock *MBB) const override; 847 848 /// This method returns the name of a target specific DAG node. 849 const char *getTargetNodeName(unsigned Opcode) const override; 850 851 /// Do not merge vector stores after legalization because that may conflict 852 /// with x86-specific store splitting optimizations. 853 bool mergeStoresAfterLegalization(EVT MemVT) const override { 854 return !MemVT.isVector(); 855 } 856 857 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 858 const SelectionDAG &DAG) const override; 859 860 bool isCheapToSpeculateCttz() const override; 861 862 bool isCheapToSpeculateCtlz() const override; 863 864 bool isCtlzFast() const override; 865 866 bool hasBitPreservingFPLogic(EVT VT) const override { 867 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 868 } 869 870 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 871 // If the pair to store is a mixture of float and int values, we will 872 // save two bitwise instructions and one float-to-int instruction and 873 // increase one store instruction. There is potentially a more 874 // significant benefit because it avoids the float->int domain switch 875 // for input value. So It is more likely a win. 876 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 877 (LTy.isInteger() && HTy.isFloatingPoint())) 878 return true; 879 // If the pair only contains int values, we will save two bitwise 880 // instructions and increase one store instruction (costing one more 881 // store buffer). Since the benefit is more blurred so we leave 882 // such pair out until we get testcase to prove it is a win. 883 return false; 884 } 885 886 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 887 888 bool hasAndNotCompare(SDValue Y) const override; 889 890 bool hasAndNot(SDValue Y) const override; 891 892 bool hasBitTest(SDValue X, SDValue Y) const override; 893 894 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 895 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 896 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 897 SelectionDAG &DAG) const override; 898 899 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 900 CombineLevel Level) const override; 901 902 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 903 904 bool 905 shouldTransformSignedTruncationCheck(EVT XVT, 906 unsigned KeptBits) const override { 907 // For vectors, we don't have a preference.. 908 if (XVT.isVector()) 909 return false; 910 911 auto VTIsOk = [](EVT VT) -> bool { 912 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 913 VT == MVT::i64; 914 }; 915 916 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 917 // XVT will be larger than KeptBitsVT. 918 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 919 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 920 } 921 922 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 923 924 bool shouldSplatInsEltVarIndex(EVT VT) const override; 925 926 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 927 return VT.isScalarInteger(); 928 } 929 930 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 931 MVT hasFastEqualityCompare(unsigned NumBits) const override; 932 933 /// Return the value type to use for ISD::SETCC. 934 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 935 EVT VT) const override; 936 937 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, 938 TargetLoweringOpt &TLO) const override; 939 940 /// Determine which of the bits specified in Mask are known to be either 941 /// zero or one and return them in the KnownZero/KnownOne bitsets. 942 void computeKnownBitsForTargetNode(const SDValue Op, 943 KnownBits &Known, 944 const APInt &DemandedElts, 945 const SelectionDAG &DAG, 946 unsigned Depth = 0) const override; 947 948 /// Determine the number of bits in the operation that are sign bits. 949 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 950 const APInt &DemandedElts, 951 const SelectionDAG &DAG, 952 unsigned Depth) const override; 953 954 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 955 const APInt &DemandedElts, 956 APInt &KnownUndef, 957 APInt &KnownZero, 958 TargetLoweringOpt &TLO, 959 unsigned Depth) const override; 960 961 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 962 const APInt &DemandedBits, 963 const APInt &DemandedElts, 964 KnownBits &Known, 965 TargetLoweringOpt &TLO, 966 unsigned Depth) const override; 967 968 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 969 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 970 SelectionDAG &DAG, unsigned Depth) const override; 971 972 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 973 974 SDValue unwrapAddress(SDValue N) const override; 975 976 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 977 978 bool ExpandInlineAsm(CallInst *CI) const override; 979 980 ConstraintType getConstraintType(StringRef Constraint) const override; 981 982 /// Examine constraint string and operand type and determine a weight value. 983 /// The operand object must already have been set up with the operand type. 984 ConstraintWeight 985 getSingleConstraintMatchWeight(AsmOperandInfo &info, 986 const char *constraint) const override; 987 988 const char *LowerXConstraint(EVT ConstraintVT) const override; 989 990 /// Lower the specified operand into the Ops vector. If it is invalid, don't 991 /// add anything to Ops. If hasMemory is true it means one of the asm 992 /// constraint of the inline asm instruction being processed is 'm'. 993 void LowerAsmOperandForConstraint(SDValue Op, 994 std::string &Constraint, 995 std::vector<SDValue> &Ops, 996 SelectionDAG &DAG) const override; 997 998 unsigned 999 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1000 if (ConstraintCode == "o") 1001 return InlineAsm::Constraint_o; 1002 else if (ConstraintCode == "v") 1003 return InlineAsm::Constraint_v; 1004 else if (ConstraintCode == "X") 1005 return InlineAsm::Constraint_X; 1006 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1007 } 1008 1009 /// Handle Lowering flag assembly outputs. 1010 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL, 1011 const AsmOperandInfo &Constraint, 1012 SelectionDAG &DAG) const override; 1013 1014 /// Given a physical register constraint 1015 /// (e.g. {edx}), return the register number and the register class for the 1016 /// register. This should only be used for C_Register constraints. On 1017 /// error, this returns a register number of 0. 1018 std::pair<unsigned, const TargetRegisterClass *> 1019 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1020 StringRef Constraint, MVT VT) const override; 1021 1022 /// Return true if the addressing mode represented 1023 /// by AM is legal for this target, for a load/store of the specified type. 1024 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1025 Type *Ty, unsigned AS, 1026 Instruction *I = nullptr) const override; 1027 1028 /// Return true if the specified immediate is legal 1029 /// icmp immediate, that is the target has icmp instructions which can 1030 /// compare a register against the immediate without having to materialize 1031 /// the immediate into a register. 1032 bool isLegalICmpImmediate(int64_t Imm) const override; 1033 1034 /// Return true if the specified immediate is legal 1035 /// add immediate, that is the target has add instructions which can 1036 /// add a register and the immediate without having to materialize 1037 /// the immediate into a register. 1038 bool isLegalAddImmediate(int64_t Imm) const override; 1039 1040 bool isLegalStoreImmediate(int64_t Imm) const override; 1041 1042 /// Return the cost of the scaling factor used in the addressing 1043 /// mode represented by AM for this target, for a load/store 1044 /// of the specified type. 1045 /// If the AM is supported, the return value must be >= 0. 1046 /// If the AM is not supported, it returns a negative value. 1047 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 1048 unsigned AS) const override; 1049 1050 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1051 1052 /// Add x86-specific opcodes to the default list. 1053 bool isBinOp(unsigned Opcode) const override; 1054 1055 /// Returns true if the opcode is a commutative binary operation. 1056 bool isCommutativeBinOp(unsigned Opcode) const override; 1057 1058 /// Return true if it's free to truncate a value of 1059 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1060 /// register EAX to i16 by referencing its sub-register AX. 1061 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1062 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1063 1064 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1065 1066 /// Return true if any actual instruction that defines a 1067 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1068 /// register. This does not necessarily include registers defined in 1069 /// unknown ways, such as incoming arguments, or copies from unknown 1070 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1071 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1072 /// all instructions that define 32-bit values implicit zero-extend the 1073 /// result out to 64 bits. 1074 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1075 bool isZExtFree(EVT VT1, EVT VT2) const override; 1076 bool isZExtFree(SDValue Val, EVT VT2) const override; 1077 1078 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1079 /// extend node) is profitable. 1080 bool isVectorLoadExtDesirable(SDValue) const override; 1081 1082 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1083 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1084 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1085 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1086 EVT VT) const override; 1087 1088 /// Return true if it's profitable to narrow 1089 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1090 /// from i32 to i8 but not from i32 to i16. 1091 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1092 1093 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1094 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1095 /// true and stores the intrinsic information into the IntrinsicInfo that was 1096 /// passed to the function. 1097 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1098 MachineFunction &MF, 1099 unsigned Intrinsic) const override; 1100 1101 /// Returns true if the target can instruction select the 1102 /// specified FP immediate natively. If false, the legalizer will 1103 /// materialize the FP immediate as a load from a constant pool. 1104 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1105 bool ForCodeSize) const override; 1106 1107 /// Targets can use this to indicate that they only support *some* 1108 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1109 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1110 /// be legal. 1111 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1112 1113 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1114 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1115 /// constant pool entry. 1116 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1117 1118 /// Returns true if lowering to a jump table is allowed. 1119 bool areJTsAllowed(const Function *Fn) const override; 1120 1121 /// If true, then instruction selection should 1122 /// seek to shrink the FP constant of the specified type to a smaller type 1123 /// in order to save space and / or reduce runtime. 1124 bool ShouldShrinkFPConstant(EVT VT) const override { 1125 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1126 // expensive than a straight movsd. On the other hand, it's important to 1127 // shrink long double fp constant since fldt is very slow. 1128 return !X86ScalarSSEf64 || VT == MVT::f80; 1129 } 1130 1131 /// Return true if we believe it is correct and profitable to reduce the 1132 /// load node to a smaller type. 1133 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1134 EVT NewVT) const override; 1135 1136 /// Return true if the specified scalar FP type is computed in an SSE 1137 /// register, not on the X87 floating point stack. 1138 bool isScalarFPTypeInSSEReg(EVT VT) const { 1139 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1140 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1141 } 1142 1143 /// Returns true if it is beneficial to convert a load of a constant 1144 /// to just the constant itself. 1145 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1146 Type *Ty) const override; 1147 1148 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1149 1150 bool convertSelectOfConstantsToMath(EVT VT) const override; 1151 1152 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1153 SDValue C) const override; 1154 1155 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1156 /// with this index. 1157 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1158 unsigned Index) const override; 1159 1160 /// Scalar ops always have equal or better analysis/performance/power than 1161 /// the vector equivalent, so this always makes sense if the scalar op is 1162 /// supported. 1163 bool shouldScalarizeBinop(SDValue) const override; 1164 1165 /// Extract of a scalar FP value from index 0 of a vector is free. 1166 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1167 EVT EltVT = VT.getScalarType(); 1168 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1169 } 1170 1171 /// Overflow nodes should get combined/lowered to optimal instructions 1172 /// (they should allow eliminating explicit compares by getting flags from 1173 /// math ops). 1174 bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override; 1175 1176 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1177 unsigned AddrSpace) const override { 1178 // If we can replace more than 2 scalar stores, there will be a reduction 1179 // in instructions even after we add a vector constant load. 1180 return NumElem > 2; 1181 } 1182 1183 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1184 const SelectionDAG &DAG, 1185 const MachineMemOperand &MMO) const override; 1186 1187 /// Intel processors have a unified instruction and data cache 1188 const char * getClearCacheBuiltinName() const override { 1189 return nullptr; // nothing to do, move along. 1190 } 1191 1192 Register getRegisterByName(const char* RegName, LLT VT, 1193 const MachineFunction &MF) const override; 1194 1195 /// If a physical register, this returns the register that receives the 1196 /// exception address on entry to an EH pad. 1197 unsigned 1198 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1199 1200 /// If a physical register, this returns the register that receives the 1201 /// exception typeid on entry to a landing pad. 1202 unsigned 1203 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1204 1205 virtual bool needsFixedCatchObjects() const override; 1206 1207 /// This method returns a target specific FastISel object, 1208 /// or null if the target does not support "fast" ISel. 1209 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1210 const TargetLibraryInfo *libInfo) const override; 1211 1212 /// If the target has a standard location for the stack protector cookie, 1213 /// returns the address of that location. Otherwise, returns nullptr. 1214 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1215 1216 bool useLoadStackGuardNode() const override; 1217 bool useStackGuardXorFP() const override; 1218 void insertSSPDeclarations(Module &M) const override; 1219 Value *getSDagStackGuard(const Module &M) const override; 1220 Function *getSSPStackGuardCheck(const Module &M) const override; 1221 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1222 const SDLoc &DL) const override; 1223 1224 1225 /// Return true if the target stores SafeStack pointer at a fixed offset in 1226 /// some non-standard address space, and populates the address space and 1227 /// offset as appropriate. 1228 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1229 1230 std::pair<SDValue, SDValue> BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, 1231 SDValue StackSlot, 1232 SelectionDAG &DAG) const; 1233 1234 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 1235 1236 /// Customize the preferred legalization strategy for certain types. 1237 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1238 1239 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1240 EVT VT) const override; 1241 1242 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1243 CallingConv::ID CC, 1244 EVT VT) const override; 1245 1246 unsigned getVectorTypeBreakdownForCallingConv( 1247 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1248 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1249 1250 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1251 1252 bool supportSwiftError() const override; 1253 1254 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1255 1256 unsigned getStackProbeSize(MachineFunction &MF) const; 1257 1258 bool hasVectorBlend() const override { return true; } 1259 1260 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1261 1262 /// Lower interleaved load(s) into target specific 1263 /// instructions/intrinsics. 1264 bool lowerInterleavedLoad(LoadInst *LI, 1265 ArrayRef<ShuffleVectorInst *> Shuffles, 1266 ArrayRef<unsigned> Indices, 1267 unsigned Factor) const override; 1268 1269 /// Lower interleaved store(s) into target specific 1270 /// instructions/intrinsics. 1271 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1272 unsigned Factor) const override; 1273 1274 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1275 SDValue Addr, SelectionDAG &DAG) 1276 const override; 1277 1278 protected: 1279 std::pair<const TargetRegisterClass *, uint8_t> 1280 findRepresentativeClass(const TargetRegisterInfo *TRI, 1281 MVT VT) const override; 1282 1283 private: 1284 /// Keep a reference to the X86Subtarget around so that we can 1285 /// make the right decision when generating code for different targets. 1286 const X86Subtarget &Subtarget; 1287 1288 /// Select between SSE or x87 floating point ops. 1289 /// When SSE is available, use it for f32 operations. 1290 /// When SSE2 is available, use it for f64 operations. 1291 bool X86ScalarSSEf32; 1292 bool X86ScalarSSEf64; 1293 1294 /// A list of legal FP immediates. 1295 std::vector<APFloat> LegalFPImmediates; 1296 1297 /// Indicate that this x86 target can instruction 1298 /// select the specified FP immediate natively. 1299 void addLegalFPImmediate(const APFloat& Imm) { 1300 LegalFPImmediates.push_back(Imm); 1301 } 1302 1303 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1304 CallingConv::ID CallConv, bool isVarArg, 1305 const SmallVectorImpl<ISD::InputArg> &Ins, 1306 const SDLoc &dl, SelectionDAG &DAG, 1307 SmallVectorImpl<SDValue> &InVals, 1308 uint32_t *RegMask) const; 1309 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1310 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1311 const SDLoc &dl, SelectionDAG &DAG, 1312 const CCValAssign &VA, MachineFrameInfo &MFI, 1313 unsigned i) const; 1314 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1315 const SDLoc &dl, SelectionDAG &DAG, 1316 const CCValAssign &VA, 1317 ISD::ArgFlagsTy Flags) const; 1318 1319 // Call lowering helpers. 1320 1321 /// Check whether the call is eligible for tail call optimization. Targets 1322 /// that want to do tail call optimization should implement this function. 1323 bool IsEligibleForTailCallOptimization(SDValue Callee, 1324 CallingConv::ID CalleeCC, 1325 bool isVarArg, 1326 bool isCalleeStructRet, 1327 bool isCallerStructRet, 1328 Type *RetTy, 1329 const SmallVectorImpl<ISD::OutputArg> &Outs, 1330 const SmallVectorImpl<SDValue> &OutVals, 1331 const SmallVectorImpl<ISD::InputArg> &Ins, 1332 SelectionDAG& DAG) const; 1333 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1334 SDValue Chain, bool IsTailCall, 1335 bool Is64Bit, int FPDiff, 1336 const SDLoc &dl) const; 1337 1338 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1339 SelectionDAG &DAG) const; 1340 1341 unsigned getAddressSpace(void) const; 1342 1343 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned, 1344 SDValue &Chain) const; 1345 1346 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1347 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1348 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1349 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1350 1351 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1352 const unsigned char OpFlags = 0) const; 1353 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1354 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1355 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1356 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1357 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1358 1359 /// Creates target global address or external symbol nodes for calls or 1360 /// other uses. 1361 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1362 bool ForCall) const; 1363 1364 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1365 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1366 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1367 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1368 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1369 SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const; 1370 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1371 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1372 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1373 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1374 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1375 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1376 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1377 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1378 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1379 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1380 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1381 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1382 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1383 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1384 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1385 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1386 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1387 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1388 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1389 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1390 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1391 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1392 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1393 1394 SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, 1395 RTLIB::Libcall Call) const; 1396 1397 SDValue 1398 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1399 const SmallVectorImpl<ISD::InputArg> &Ins, 1400 const SDLoc &dl, SelectionDAG &DAG, 1401 SmallVectorImpl<SDValue> &InVals) const override; 1402 SDValue LowerCall(CallLoweringInfo &CLI, 1403 SmallVectorImpl<SDValue> &InVals) const override; 1404 1405 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1406 const SmallVectorImpl<ISD::OutputArg> &Outs, 1407 const SmallVectorImpl<SDValue> &OutVals, 1408 const SDLoc &dl, SelectionDAG &DAG) const override; 1409 1410 bool supportSplitCSR(MachineFunction *MF) const override { 1411 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1412 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1413 } 1414 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1415 void insertCopiesSplitCSR( 1416 MachineBasicBlock *Entry, 1417 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1418 1419 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1420 1421 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1422 1423 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1424 ISD::NodeType ExtendKind) const override; 1425 1426 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1427 bool isVarArg, 1428 const SmallVectorImpl<ISD::OutputArg> &Outs, 1429 LLVMContext &Context) const override; 1430 1431 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1432 1433 TargetLoweringBase::AtomicExpansionKind 1434 shouldExpandAtomicLoadInIR(LoadInst *SI) const override; 1435 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1436 TargetLoweringBase::AtomicExpansionKind 1437 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1438 1439 LoadInst * 1440 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1441 1442 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; 1443 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; 1444 1445 bool needsCmpXchgNb(Type *MemType) const; 1446 1447 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1448 MachineBasicBlock *DispatchBB, int FI) const; 1449 1450 // Utility function to emit the low-level va_arg code for X86-64. 1451 MachineBasicBlock * 1452 EmitVAARG64WithCustomInserter(MachineInstr &MI, 1453 MachineBasicBlock *MBB) const; 1454 1455 /// Utility function to emit the xmm reg save portion of va_start. 1456 MachineBasicBlock * 1457 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1458 MachineBasicBlock *BB) const; 1459 1460 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1461 MachineInstr &MI2, 1462 MachineBasicBlock *BB) const; 1463 1464 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1465 MachineBasicBlock *BB) const; 1466 1467 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I, 1468 MachineBasicBlock *BB) const; 1469 1470 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1471 MachineBasicBlock *BB) const; 1472 1473 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, 1474 MachineBasicBlock *BB) const; 1475 1476 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1477 MachineBasicBlock *BB) const; 1478 1479 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1480 MachineBasicBlock *BB) const; 1481 1482 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1483 MachineBasicBlock *BB) const; 1484 1485 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, 1486 MachineBasicBlock *BB) const; 1487 1488 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1489 MachineBasicBlock *MBB) const; 1490 1491 void emitSetJmpShadowStackFix(MachineInstr &MI, 1492 MachineBasicBlock *MBB) const; 1493 1494 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1495 MachineBasicBlock *MBB) const; 1496 1497 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1498 MachineBasicBlock *MBB) const; 1499 1500 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI, 1501 MachineBasicBlock *MBB) const; 1502 1503 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1504 MachineBasicBlock *MBB) const; 1505 1506 /// Convert a comparison if required by the subtarget. 1507 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1508 1509 /// Emit flags for the given setcc condition and operands. Also returns the 1510 /// corresponding X86 condition code constant in X86CC. 1511 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1512 const SDLoc &dl, SelectionDAG &DAG, 1513 SDValue &X86CC, SDValue &Chain, 1514 bool IsSignaling) const; 1515 1516 /// Check if replacement of SQRT with RSQRT should be disabled. 1517 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; 1518 1519 /// Use rsqrt* to speed up sqrt calculations. 1520 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1521 int &RefinementSteps, bool &UseOneConstNR, 1522 bool Reciprocal) const override; 1523 1524 /// Use rcp* to speed up fdiv calculations. 1525 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1526 int &RefinementSteps) const override; 1527 1528 /// Reassociate floating point divisions into multiply by reciprocal. 1529 unsigned combineRepeatedFPDivisors() const override; 1530 1531 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1532 SmallVectorImpl<SDNode *> &Created) const override; 1533 }; 1534 1535 namespace X86 { 1536 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1537 const TargetLibraryInfo *libInfo); 1538 } // end namespace X86 1539 1540 // Base class for all X86 non-masked store operations. 1541 class X86StoreSDNode : public MemSDNode { 1542 public: 1543 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, 1544 SDVTList VTs, EVT MemVT, 1545 MachineMemOperand *MMO) 1546 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1547 const SDValue &getValue() const { return getOperand(1); } 1548 const SDValue &getBasePtr() const { return getOperand(2); } 1549 1550 static bool classof(const SDNode *N) { 1551 return N->getOpcode() == X86ISD::VTRUNCSTORES || 1552 N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1553 } 1554 }; 1555 1556 // Base class for all X86 masked store operations. 1557 // The class has the same order of operands as MaskedStoreSDNode for 1558 // convenience. 1559 class X86MaskedStoreSDNode : public MemSDNode { 1560 public: 1561 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, 1562 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1563 MachineMemOperand *MMO) 1564 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1565 1566 const SDValue &getValue() const { return getOperand(1); } 1567 const SDValue &getBasePtr() const { return getOperand(2); } 1568 const SDValue &getMask() const { return getOperand(3); } 1569 1570 static bool classof(const SDNode *N) { 1571 return N->getOpcode() == X86ISD::VMTRUNCSTORES || 1572 N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1573 } 1574 }; 1575 1576 // X86 Truncating Store with Signed saturation. 1577 class TruncSStoreSDNode : public X86StoreSDNode { 1578 public: 1579 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, 1580 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1581 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1582 1583 static bool classof(const SDNode *N) { 1584 return N->getOpcode() == X86ISD::VTRUNCSTORES; 1585 } 1586 }; 1587 1588 // X86 Truncating Store with Unsigned saturation. 1589 class TruncUSStoreSDNode : public X86StoreSDNode { 1590 public: 1591 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, 1592 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1593 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1594 1595 static bool classof(const SDNode *N) { 1596 return N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1597 } 1598 }; 1599 1600 // X86 Truncating Masked Store with Signed saturation. 1601 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { 1602 public: 1603 MaskedTruncSStoreSDNode(unsigned Order, 1604 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1605 MachineMemOperand *MMO) 1606 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1607 1608 static bool classof(const SDNode *N) { 1609 return N->getOpcode() == X86ISD::VMTRUNCSTORES; 1610 } 1611 }; 1612 1613 // X86 Truncating Masked Store with Unsigned saturation. 1614 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { 1615 public: 1616 MaskedTruncUSStoreSDNode(unsigned Order, 1617 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1618 MachineMemOperand *MMO) 1619 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1620 1621 static bool classof(const SDNode *N) { 1622 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1623 } 1624 }; 1625 1626 // X86 specific Gather/Scatter nodes. 1627 // The class has the same order of operands as MaskedGatherScatterSDNode for 1628 // convenience. 1629 class X86MaskedGatherScatterSDNode : public MemSDNode { 1630 public: 1631 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order, 1632 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1633 MachineMemOperand *MMO) 1634 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {} 1635 1636 const SDValue &getBasePtr() const { return getOperand(3); } 1637 const SDValue &getIndex() const { return getOperand(4); } 1638 const SDValue &getMask() const { return getOperand(2); } 1639 const SDValue &getScale() const { return getOperand(5); } 1640 1641 static bool classof(const SDNode *N) { 1642 return N->getOpcode() == X86ISD::MGATHER || 1643 N->getOpcode() == X86ISD::MSCATTER; 1644 } 1645 }; 1646 1647 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1648 public: 1649 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1650 EVT MemVT, MachineMemOperand *MMO) 1651 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, 1652 MMO) {} 1653 1654 const SDValue &getPassThru() const { return getOperand(1); } 1655 1656 static bool classof(const SDNode *N) { 1657 return N->getOpcode() == X86ISD::MGATHER; 1658 } 1659 }; 1660 1661 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1662 public: 1663 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1664 EVT MemVT, MachineMemOperand *MMO) 1665 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT, 1666 MMO) {} 1667 1668 const SDValue &getValue() const { return getOperand(1); } 1669 1670 static bool classof(const SDNode *N) { 1671 return N->getOpcode() == X86ISD::MSCATTER; 1672 } 1673 }; 1674 1675 /// Generate unpacklo/unpackhi shuffle mask. 1676 template <typename T = int> 1677 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo, 1678 bool Unary) { 1679 assert(Mask.empty() && "Expected an empty shuffle mask vector"); 1680 int NumElts = VT.getVectorNumElements(); 1681 int NumEltsInLane = 128 / VT.getScalarSizeInBits(); 1682 for (int i = 0; i < NumElts; ++i) { 1683 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; 1684 int Pos = (i % NumEltsInLane) / 2 + LaneStart; 1685 Pos += (Unary ? 0 : NumElts * (i % 2)); 1686 Pos += (Lo ? 0 : NumEltsInLane / 2); 1687 Mask.push_back(Pos); 1688 } 1689 } 1690 1691 /// Helper function to scale a shuffle or target shuffle mask, replacing each 1692 /// mask index with the scaled sequential indices for an equivalent narrowed 1693 /// mask. This is the reverse process to canWidenShuffleElements, but can 1694 /// always succeed. 1695 template <typename T> 1696 void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask, 1697 SmallVectorImpl<T> &ScaledMask) { 1698 assert(0 < Scale && "Unexpected scaling factor"); 1699 size_t NumElts = Mask.size(); 1700 ScaledMask.assign(NumElts * Scale, -1); 1701 1702 for (size_t i = 0; i != NumElts; ++i) { 1703 int M = Mask[i]; 1704 1705 // Repeat sentinel values in every mask element. 1706 if (M < 0) { 1707 for (size_t s = 0; s != Scale; ++s) 1708 ScaledMask[(Scale * i) + s] = M; 1709 continue; 1710 } 1711 1712 // Scale mask element and increment across each mask element. 1713 for (size_t s = 0; s != Scale; ++s) 1714 ScaledMask[(Scale * i) + s] = (Scale * M) + s; 1715 } 1716 } 1717 } // end namespace llvm 1718 1719 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1720