1 //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements the SelectionDAG class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CodeGen/SelectionDAG.h" 14 #include "SDNodeDbgValue.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/APSInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/DenseSet.h" 21 #include "llvm/ADT/FoldingSet.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Twine.h" 26 #include "llvm/Analysis/AliasAnalysis.h" 27 #include "llvm/Analysis/MemoryLocation.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/Analysis/VectorUtils.h" 31 #include "llvm/BinaryFormat/Dwarf.h" 32 #include "llvm/CodeGen/Analysis.h" 33 #include "llvm/CodeGen/FunctionLoweringInfo.h" 34 #include "llvm/CodeGen/ISDOpcodes.h" 35 #include "llvm/CodeGen/MachineBasicBlock.h" 36 #include "llvm/CodeGen/MachineConstantPool.h" 37 #include "llvm/CodeGen/MachineFrameInfo.h" 38 #include "llvm/CodeGen/MachineFunction.h" 39 #include "llvm/CodeGen/MachineMemOperand.h" 40 #include "llvm/CodeGen/RuntimeLibcallUtil.h" 41 #include "llvm/CodeGen/SDPatternMatch.h" 42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 43 #include "llvm/CodeGen/SelectionDAGNodes.h" 44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 45 #include "llvm/CodeGen/TargetFrameLowering.h" 46 #include "llvm/CodeGen/TargetLowering.h" 47 #include "llvm/CodeGen/TargetRegisterInfo.h" 48 #include "llvm/CodeGen/TargetSubtargetInfo.h" 49 #include "llvm/CodeGen/ValueTypes.h" 50 #include "llvm/CodeGenTypes/MachineValueType.h" 51 #include "llvm/IR/Constant.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DebugInfoMetadata.h" 55 #include "llvm/IR/DebugLoc.h" 56 #include "llvm/IR/DerivedTypes.h" 57 #include "llvm/IR/Function.h" 58 #include "llvm/IR/GlobalValue.h" 59 #include "llvm/IR/Metadata.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/Support/Casting.h" 62 #include "llvm/Support/CodeGen.h" 63 #include "llvm/Support/Compiler.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/KnownBits.h" 67 #include "llvm/Support/MathExtras.h" 68 #include "llvm/Support/raw_ostream.h" 69 #include "llvm/Target/TargetMachine.h" 70 #include "llvm/Target/TargetOptions.h" 71 #include "llvm/TargetParser/Triple.h" 72 #include "llvm/Transforms/Utils/SizeOpts.h" 73 #include <algorithm> 74 #include <cassert> 75 #include <cstdint> 76 #include <cstdlib> 77 #include <limits> 78 #include <optional> 79 #include <set> 80 #include <string> 81 #include <utility> 82 #include <vector> 83 84 using namespace llvm; 85 using namespace llvm::SDPatternMatch; 86 87 /// makeVTList - Return an instance of the SDVTList struct initialized with the 88 /// specified members. 89 static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { 90 SDVTList Res = {VTs, NumVTs}; 91 return Res; 92 } 93 94 // Default null implementations of the callbacks. 95 void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} 96 void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} 97 void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} 98 99 void SelectionDAG::DAGNodeDeletedListener::anchor() {} 100 void SelectionDAG::DAGNodeInsertedListener::anchor() {} 101 102 #define DEBUG_TYPE "selectiondag" 103 104 static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", 105 cl::Hidden, cl::init(true), 106 cl::desc("Gang up loads and stores generated by inlining of memcpy")); 107 108 static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", 109 cl::desc("Number limit for gluing ld/st of memcpy."), 110 cl::Hidden, cl::init(0)); 111 112 static cl::opt<unsigned> 113 MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), 114 cl::desc("DAG combiner limit number of steps when searching DAG " 115 "for predecessor nodes")); 116 117 static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { 118 LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); 119 } 120 121 unsigned SelectionDAG::getHasPredecessorMaxSteps() { return MaxSteps; } 122 123 //===----------------------------------------------------------------------===// 124 // ConstantFPSDNode Class 125 //===----------------------------------------------------------------------===// 126 127 /// isExactlyValue - We don't rely on operator== working on double values, as 128 /// it returns true for things that are clearly not equal, like -0.0 and 0.0. 129 /// As such, this method can be used to do an exact bit-for-bit comparison of 130 /// two floating point values. 131 bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { 132 return getValueAPF().bitwiseIsEqual(V); 133 } 134 135 bool ConstantFPSDNode::isValueValidForType(EVT VT, 136 const APFloat& Val) { 137 assert(VT.isFloatingPoint() && "Can only convert between FP types"); 138 139 // convert modifies in place, so make a copy. 140 APFloat Val2 = APFloat(Val); 141 bool losesInfo; 142 (void)Val2.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 143 &losesInfo); 144 return !losesInfo; 145 } 146 147 //===----------------------------------------------------------------------===// 148 // ISD Namespace 149 //===----------------------------------------------------------------------===// 150 151 bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { 152 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 153 if (auto OptAPInt = N->getOperand(0)->bitcastToAPInt()) { 154 unsigned EltSize = 155 N->getValueType(0).getVectorElementType().getSizeInBits(); 156 SplatVal = OptAPInt->trunc(EltSize); 157 return true; 158 } 159 } 160 161 auto *BV = dyn_cast<BuildVectorSDNode>(N); 162 if (!BV) 163 return false; 164 165 APInt SplatUndef; 166 unsigned SplatBitSize; 167 bool HasUndefs; 168 unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); 169 // Endianness does not matter here. We are checking for a splat given the 170 // element size of the vector, and if we find such a splat for little endian 171 // layout, then that should be valid also for big endian (as the full vector 172 // size is known to be a multiple of the element size). 173 const bool IsBigEndian = false; 174 return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, 175 EltSize, IsBigEndian) && 176 EltSize == SplatBitSize; 177 } 178 179 // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be 180 // specializations of the more general isConstantSplatVector()? 181 182 bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { 183 // Look through a bit convert. 184 while (N->getOpcode() == ISD::BITCAST) 185 N = N->getOperand(0).getNode(); 186 187 if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { 188 APInt SplatVal; 189 return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes(); 190 } 191 192 if (N->getOpcode() != ISD::BUILD_VECTOR) return false; 193 194 unsigned i = 0, e = N->getNumOperands(); 195 196 // Skip over all of the undef values. 197 while (i != e && N->getOperand(i).isUndef()) 198 ++i; 199 200 // Do not accept an all-undef vector. 201 if (i == e) return false; 202 203 // Do not accept build_vectors that aren't all constants or which have non-~0 204 // elements. We have to be a bit careful here, as the type of the constant 205 // may not be the same as the type of the vector elements due to type 206 // legalization (the elements are promoted to a legal type for the target and 207 // a vector of a type may be legal when the base element type is not). 208 // We only want to check enough bits to cover the vector elements, because 209 // we care if the resultant vector is all ones, not whether the individual 210 // constants are. 211 SDValue NotZero = N->getOperand(i); 212 if (auto OptAPInt = NotZero->bitcastToAPInt()) { 213 unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); 214 if (OptAPInt->countr_one() < EltSize) 215 return false; 216 } else 217 return false; 218 219 // Okay, we have at least one ~0 value, check to see if the rest match or are 220 // undefs. Even with the above element type twiddling, this should be OK, as 221 // the same type legalization should have applied to all the elements. 222 for (++i; i != e; ++i) 223 if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef()) 224 return false; 225 return true; 226 } 227 228 bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { 229 // Look through a bit convert. 230 while (N->getOpcode() == ISD::BITCAST) 231 N = N->getOperand(0).getNode(); 232 233 if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { 234 APInt SplatVal; 235 return isConstantSplatVector(N, SplatVal) && SplatVal.isZero(); 236 } 237 238 if (N->getOpcode() != ISD::BUILD_VECTOR) return false; 239 240 bool IsAllUndef = true; 241 for (const SDValue &Op : N->op_values()) { 242 if (Op.isUndef()) 243 continue; 244 IsAllUndef = false; 245 // Do not accept build_vectors that aren't all constants or which have non-0 246 // elements. We have to be a bit careful here, as the type of the constant 247 // may not be the same as the type of the vector elements due to type 248 // legalization (the elements are promoted to a legal type for the target 249 // and a vector of a type may be legal when the base element type is not). 250 // We only want to check enough bits to cover the vector elements, because 251 // we care if the resultant vector is all zeros, not whether the individual 252 // constants are. 253 if (auto OptAPInt = Op->bitcastToAPInt()) { 254 unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); 255 if (OptAPInt->countr_zero() < EltSize) 256 return false; 257 } else 258 return false; 259 } 260 261 // Do not accept an all-undef vector. 262 if (IsAllUndef) 263 return false; 264 return true; 265 } 266 267 bool ISD::isBuildVectorAllOnes(const SDNode *N) { 268 return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true); 269 } 270 271 bool ISD::isBuildVectorAllZeros(const SDNode *N) { 272 return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true); 273 } 274 275 bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { 276 if (N->getOpcode() != ISD::BUILD_VECTOR) 277 return false; 278 279 for (const SDValue &Op : N->op_values()) { 280 if (Op.isUndef()) 281 continue; 282 if (!isa<ConstantSDNode>(Op)) 283 return false; 284 } 285 return true; 286 } 287 288 bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { 289 if (N->getOpcode() != ISD::BUILD_VECTOR) 290 return false; 291 292 for (const SDValue &Op : N->op_values()) { 293 if (Op.isUndef()) 294 continue; 295 if (!isa<ConstantFPSDNode>(Op)) 296 return false; 297 } 298 return true; 299 } 300 301 bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, 302 bool Signed) { 303 assert(N->getValueType(0).isVector() && "Expected a vector!"); 304 305 unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); 306 if (EltSize <= NewEltSize) 307 return false; 308 309 if (N->getOpcode() == ISD::ZERO_EXTEND) { 310 return (N->getOperand(0).getValueType().getScalarSizeInBits() <= 311 NewEltSize) && 312 !Signed; 313 } 314 if (N->getOpcode() == ISD::SIGN_EXTEND) { 315 return (N->getOperand(0).getValueType().getScalarSizeInBits() <= 316 NewEltSize) && 317 Signed; 318 } 319 if (N->getOpcode() != ISD::BUILD_VECTOR) 320 return false; 321 322 for (const SDValue &Op : N->op_values()) { 323 if (Op.isUndef()) 324 continue; 325 if (!isa<ConstantSDNode>(Op)) 326 return false; 327 328 APInt C = Op->getAsAPIntVal().trunc(EltSize); 329 if (Signed && C.trunc(NewEltSize).sext(EltSize) != C) 330 return false; 331 if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C) 332 return false; 333 } 334 335 return true; 336 } 337 338 bool ISD::allOperandsUndef(const SDNode *N) { 339 // Return false if the node has no operands. 340 // This is "logically inconsistent" with the definition of "all" but 341 // is probably the desired behavior. 342 if (N->getNumOperands() == 0) 343 return false; 344 return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); 345 } 346 347 bool ISD::isFreezeUndef(const SDNode *N) { 348 return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); 349 } 350 351 template <typename ConstNodeType> 352 bool ISD::matchUnaryPredicateImpl(SDValue Op, 353 std::function<bool(ConstNodeType *)> Match, 354 bool AllowUndefs, bool AllowTruncation) { 355 // FIXME: Add support for scalar UNDEF cases? 356 if (auto *C = dyn_cast<ConstNodeType>(Op)) 357 return Match(C); 358 359 // FIXME: Add support for vector UNDEF cases? 360 if (ISD::BUILD_VECTOR != Op.getOpcode() && 361 ISD::SPLAT_VECTOR != Op.getOpcode()) 362 return false; 363 364 EVT SVT = Op.getValueType().getScalarType(); 365 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 366 if (AllowUndefs && Op.getOperand(i).isUndef()) { 367 if (!Match(nullptr)) 368 return false; 369 continue; 370 } 371 372 auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i)); 373 if (!Cst || (!AllowTruncation && Cst->getValueType(0) != SVT) || 374 !Match(Cst)) 375 return false; 376 } 377 return true; 378 } 379 // Build used template types. 380 template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>( 381 SDValue, std::function<bool(ConstantSDNode *)>, bool, bool); 382 template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>( 383 SDValue, std::function<bool(ConstantFPSDNode *)>, bool, bool); 384 385 bool ISD::matchBinaryPredicate( 386 SDValue LHS, SDValue RHS, 387 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, 388 bool AllowUndefs, bool AllowTypeMismatch) { 389 if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()) 390 return false; 391 392 // TODO: Add support for scalar UNDEF cases? 393 if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) 394 if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) 395 return Match(LHSCst, RHSCst); 396 397 // TODO: Add support for vector UNDEF cases? 398 if (LHS.getOpcode() != RHS.getOpcode() || 399 (LHS.getOpcode() != ISD::BUILD_VECTOR && 400 LHS.getOpcode() != ISD::SPLAT_VECTOR)) 401 return false; 402 403 EVT SVT = LHS.getValueType().getScalarType(); 404 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 405 SDValue LHSOp = LHS.getOperand(i); 406 SDValue RHSOp = RHS.getOperand(i); 407 bool LHSUndef = AllowUndefs && LHSOp.isUndef(); 408 bool RHSUndef = AllowUndefs && RHSOp.isUndef(); 409 auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp); 410 auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); 411 if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) 412 return false; 413 if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || 414 LHSOp.getValueType() != RHSOp.getValueType())) 415 return false; 416 if (!Match(LHSCst, RHSCst)) 417 return false; 418 } 419 return true; 420 } 421 422 ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { 423 switch (MinMaxOpc) { 424 default: 425 llvm_unreachable("unrecognized opcode"); 426 case ISD::UMIN: 427 return ISD::UMAX; 428 case ISD::UMAX: 429 return ISD::UMIN; 430 case ISD::SMIN: 431 return ISD::SMAX; 432 case ISD::SMAX: 433 return ISD::SMIN; 434 } 435 } 436 437 ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { 438 switch (VecReduceOpcode) { 439 default: 440 llvm_unreachable("Expected VECREDUCE opcode"); 441 case ISD::VECREDUCE_FADD: 442 case ISD::VECREDUCE_SEQ_FADD: 443 case ISD::VP_REDUCE_FADD: 444 case ISD::VP_REDUCE_SEQ_FADD: 445 return ISD::FADD; 446 case ISD::VECREDUCE_FMUL: 447 case ISD::VECREDUCE_SEQ_FMUL: 448 case ISD::VP_REDUCE_FMUL: 449 case ISD::VP_REDUCE_SEQ_FMUL: 450 return ISD::FMUL; 451 case ISD::VECREDUCE_ADD: 452 case ISD::VP_REDUCE_ADD: 453 return ISD::ADD; 454 case ISD::VECREDUCE_MUL: 455 case ISD::VP_REDUCE_MUL: 456 return ISD::MUL; 457 case ISD::VECREDUCE_AND: 458 case ISD::VP_REDUCE_AND: 459 return ISD::AND; 460 case ISD::VECREDUCE_OR: 461 case ISD::VP_REDUCE_OR: 462 return ISD::OR; 463 case ISD::VECREDUCE_XOR: 464 case ISD::VP_REDUCE_XOR: 465 return ISD::XOR; 466 case ISD::VECREDUCE_SMAX: 467 case ISD::VP_REDUCE_SMAX: 468 return ISD::SMAX; 469 case ISD::VECREDUCE_SMIN: 470 case ISD::VP_REDUCE_SMIN: 471 return ISD::SMIN; 472 case ISD::VECREDUCE_UMAX: 473 case ISD::VP_REDUCE_UMAX: 474 return ISD::UMAX; 475 case ISD::VECREDUCE_UMIN: 476 case ISD::VP_REDUCE_UMIN: 477 return ISD::UMIN; 478 case ISD::VECREDUCE_FMAX: 479 case ISD::VP_REDUCE_FMAX: 480 return ISD::FMAXNUM; 481 case ISD::VECREDUCE_FMIN: 482 case ISD::VP_REDUCE_FMIN: 483 return ISD::FMINNUM; 484 case ISD::VECREDUCE_FMAXIMUM: 485 case ISD::VP_REDUCE_FMAXIMUM: 486 return ISD::FMAXIMUM; 487 case ISD::VECREDUCE_FMINIMUM: 488 case ISD::VP_REDUCE_FMINIMUM: 489 return ISD::FMINIMUM; 490 } 491 } 492 493 bool ISD::isVPOpcode(unsigned Opcode) { 494 switch (Opcode) { 495 default: 496 return false; 497 #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \ 498 case ISD::VPSD: \ 499 return true; 500 #include "llvm/IR/VPIntrinsics.def" 501 } 502 } 503 504 bool ISD::isVPBinaryOp(unsigned Opcode) { 505 switch (Opcode) { 506 default: 507 break; 508 #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: 509 #define VP_PROPERTY_BINARYOP return true; 510 #define END_REGISTER_VP_SDNODE(VPSD) break; 511 #include "llvm/IR/VPIntrinsics.def" 512 } 513 return false; 514 } 515 516 bool ISD::isVPReduction(unsigned Opcode) { 517 switch (Opcode) { 518 default: 519 return false; 520 case ISD::VP_REDUCE_ADD: 521 case ISD::VP_REDUCE_MUL: 522 case ISD::VP_REDUCE_AND: 523 case ISD::VP_REDUCE_OR: 524 case ISD::VP_REDUCE_XOR: 525 case ISD::VP_REDUCE_SMAX: 526 case ISD::VP_REDUCE_SMIN: 527 case ISD::VP_REDUCE_UMAX: 528 case ISD::VP_REDUCE_UMIN: 529 case ISD::VP_REDUCE_FMAX: 530 case ISD::VP_REDUCE_FMIN: 531 case ISD::VP_REDUCE_FMAXIMUM: 532 case ISD::VP_REDUCE_FMINIMUM: 533 case ISD::VP_REDUCE_FADD: 534 case ISD::VP_REDUCE_FMUL: 535 case ISD::VP_REDUCE_SEQ_FADD: 536 case ISD::VP_REDUCE_SEQ_FMUL: 537 return true; 538 } 539 } 540 541 /// The operand position of the vector mask. 542 std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { 543 switch (Opcode) { 544 default: 545 return std::nullopt; 546 #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ 547 case ISD::VPSD: \ 548 return MASKPOS; 549 #include "llvm/IR/VPIntrinsics.def" 550 } 551 } 552 553 /// The operand position of the explicit vector length parameter. 554 std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { 555 switch (Opcode) { 556 default: 557 return std::nullopt; 558 #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ 559 case ISD::VPSD: \ 560 return EVLPOS; 561 #include "llvm/IR/VPIntrinsics.def" 562 } 563 } 564 565 std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode, 566 bool hasFPExcept) { 567 // FIXME: Return strict opcodes in case of fp exceptions. 568 switch (VPOpcode) { 569 default: 570 return std::nullopt; 571 #define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) case ISD::VPOPC: 572 #define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) return ISD::SDOPC; 573 #define END_REGISTER_VP_SDNODE(VPOPC) break; 574 #include "llvm/IR/VPIntrinsics.def" 575 } 576 return std::nullopt; 577 } 578 579 std::optional<unsigned> ISD::getVPForBaseOpcode(unsigned Opcode) { 580 switch (Opcode) { 581 default: 582 return std::nullopt; 583 #define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break; 584 #define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC: 585 #define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC; 586 #include "llvm/IR/VPIntrinsics.def" 587 } 588 } 589 590 ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { 591 switch (ExtType) { 592 case ISD::EXTLOAD: 593 return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; 594 case ISD::SEXTLOAD: 595 return ISD::SIGN_EXTEND; 596 case ISD::ZEXTLOAD: 597 return ISD::ZERO_EXTEND; 598 default: 599 break; 600 } 601 602 llvm_unreachable("Invalid LoadExtType"); 603 } 604 605 ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { 606 // To perform this operation, we just need to swap the L and G bits of the 607 // operation. 608 unsigned OldL = (Operation >> 2) & 1; 609 unsigned OldG = (Operation >> 1) & 1; 610 return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits 611 (OldL << 1) | // New G bit 612 (OldG << 2)); // New L bit. 613 } 614 615 static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) { 616 unsigned Operation = Op; 617 if (isIntegerLike) 618 Operation ^= 7; // Flip L, G, E bits, but not U. 619 else 620 Operation ^= 15; // Flip all of the condition bits. 621 622 if (Operation > ISD::SETTRUE2) 623 Operation &= ~8; // Don't let N and U bits get set. 624 625 return ISD::CondCode(Operation); 626 } 627 628 ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) { 629 return getSetCCInverseImpl(Op, Type.isInteger()); 630 } 631 632 ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op, 633 bool isIntegerLike) { 634 return getSetCCInverseImpl(Op, isIntegerLike); 635 } 636 637 /// For an integer comparison, return 1 if the comparison is a signed operation 638 /// and 2 if the result is an unsigned comparison. Return zero if the operation 639 /// does not depend on the sign of the input (setne and seteq). 640 static int isSignedOp(ISD::CondCode Opcode) { 641 switch (Opcode) { 642 default: llvm_unreachable("Illegal integer setcc operation!"); 643 case ISD::SETEQ: 644 case ISD::SETNE: return 0; 645 case ISD::SETLT: 646 case ISD::SETLE: 647 case ISD::SETGT: 648 case ISD::SETGE: return 1; 649 case ISD::SETULT: 650 case ISD::SETULE: 651 case ISD::SETUGT: 652 case ISD::SETUGE: return 2; 653 } 654 } 655 656 ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, 657 EVT Type) { 658 bool IsInteger = Type.isInteger(); 659 if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) 660 // Cannot fold a signed integer setcc with an unsigned integer setcc. 661 return ISD::SETCC_INVALID; 662 663 unsigned Op = Op1 | Op2; // Combine all of the condition bits. 664 665 // If the N and U bits get set, then the resultant comparison DOES suddenly 666 // care about orderedness, and it is true when ordered. 667 if (Op > ISD::SETTRUE2) 668 Op &= ~16; // Clear the U bit if the N bit is set. 669 670 // Canonicalize illegal integer setcc's. 671 if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT 672 Op = ISD::SETNE; 673 674 return ISD::CondCode(Op); 675 } 676 677 ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, 678 EVT Type) { 679 bool IsInteger = Type.isInteger(); 680 if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) 681 // Cannot fold a signed setcc with an unsigned setcc. 682 return ISD::SETCC_INVALID; 683 684 // Combine all of the condition bits. 685 ISD::CondCode Result = ISD::CondCode(Op1 & Op2); 686 687 // Canonicalize illegal integer setcc's. 688 if (IsInteger) { 689 switch (Result) { 690 default: break; 691 case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT 692 case ISD::SETOEQ: // SETEQ & SETU[LG]E 693 case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE 694 case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE 695 case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE 696 } 697 } 698 699 return Result; 700 } 701 702 //===----------------------------------------------------------------------===// 703 // SDNode Profile Support 704 //===----------------------------------------------------------------------===// 705 706 /// AddNodeIDOpcode - Add the node opcode to the NodeID data. 707 static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { 708 ID.AddInteger(OpC); 709 } 710 711 /// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them 712 /// solely with their pointer. 713 static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { 714 ID.AddPointer(VTList.VTs); 715 } 716 717 /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. 718 static void AddNodeIDOperands(FoldingSetNodeID &ID, 719 ArrayRef<SDValue> Ops) { 720 for (const auto &Op : Ops) { 721 ID.AddPointer(Op.getNode()); 722 ID.AddInteger(Op.getResNo()); 723 } 724 } 725 726 /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. 727 static void AddNodeIDOperands(FoldingSetNodeID &ID, 728 ArrayRef<SDUse> Ops) { 729 for (const auto &Op : Ops) { 730 ID.AddPointer(Op.getNode()); 731 ID.AddInteger(Op.getResNo()); 732 } 733 } 734 735 static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC, 736 SDVTList VTList, ArrayRef<SDValue> OpList) { 737 AddNodeIDOpcode(ID, OpC); 738 AddNodeIDValueTypes(ID, VTList); 739 AddNodeIDOperands(ID, OpList); 740 } 741 742 /// If this is an SDNode with special info, add this info to the NodeID data. 743 static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { 744 switch (N->getOpcode()) { 745 case ISD::TargetExternalSymbol: 746 case ISD::ExternalSymbol: 747 case ISD::MCSymbol: 748 llvm_unreachable("Should only be used on nodes with operands"); 749 default: break; // Normal nodes don't need extra info. 750 case ISD::TargetConstant: 751 case ISD::Constant: { 752 const ConstantSDNode *C = cast<ConstantSDNode>(N); 753 ID.AddPointer(C->getConstantIntValue()); 754 ID.AddBoolean(C->isOpaque()); 755 break; 756 } 757 case ISD::TargetConstantFP: 758 case ISD::ConstantFP: 759 ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); 760 break; 761 case ISD::TargetGlobalAddress: 762 case ISD::GlobalAddress: 763 case ISD::TargetGlobalTLSAddress: 764 case ISD::GlobalTLSAddress: { 765 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 766 ID.AddPointer(GA->getGlobal()); 767 ID.AddInteger(GA->getOffset()); 768 ID.AddInteger(GA->getTargetFlags()); 769 break; 770 } 771 case ISD::BasicBlock: 772 ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); 773 break; 774 case ISD::Register: 775 ID.AddInteger(cast<RegisterSDNode>(N)->getReg().id()); 776 break; 777 case ISD::RegisterMask: 778 ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); 779 break; 780 case ISD::SRCVALUE: 781 ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); 782 break; 783 case ISD::FrameIndex: 784 case ISD::TargetFrameIndex: 785 ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); 786 break; 787 case ISD::LIFETIME_START: 788 case ISD::LIFETIME_END: 789 if (cast<LifetimeSDNode>(N)->hasOffset()) { 790 ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); 791 ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); 792 } 793 break; 794 case ISD::PSEUDO_PROBE: 795 ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid()); 796 ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex()); 797 ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes()); 798 break; 799 case ISD::JumpTable: 800 case ISD::TargetJumpTable: 801 ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); 802 ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); 803 break; 804 case ISD::ConstantPool: 805 case ISD::TargetConstantPool: { 806 const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); 807 ID.AddInteger(CP->getAlign().value()); 808 ID.AddInteger(CP->getOffset()); 809 if (CP->isMachineConstantPoolEntry()) 810 CP->getMachineCPVal()->addSelectionDAGCSEId(ID); 811 else 812 ID.AddPointer(CP->getConstVal()); 813 ID.AddInteger(CP->getTargetFlags()); 814 break; 815 } 816 case ISD::TargetIndex: { 817 const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); 818 ID.AddInteger(TI->getIndex()); 819 ID.AddInteger(TI->getOffset()); 820 ID.AddInteger(TI->getTargetFlags()); 821 break; 822 } 823 case ISD::LOAD: { 824 const LoadSDNode *LD = cast<LoadSDNode>(N); 825 ID.AddInteger(LD->getMemoryVT().getRawBits()); 826 ID.AddInteger(LD->getRawSubclassData()); 827 ID.AddInteger(LD->getPointerInfo().getAddrSpace()); 828 ID.AddInteger(LD->getMemOperand()->getFlags()); 829 break; 830 } 831 case ISD::STORE: { 832 const StoreSDNode *ST = cast<StoreSDNode>(N); 833 ID.AddInteger(ST->getMemoryVT().getRawBits()); 834 ID.AddInteger(ST->getRawSubclassData()); 835 ID.AddInteger(ST->getPointerInfo().getAddrSpace()); 836 ID.AddInteger(ST->getMemOperand()->getFlags()); 837 break; 838 } 839 case ISD::VP_LOAD: { 840 const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N); 841 ID.AddInteger(ELD->getMemoryVT().getRawBits()); 842 ID.AddInteger(ELD->getRawSubclassData()); 843 ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); 844 ID.AddInteger(ELD->getMemOperand()->getFlags()); 845 break; 846 } 847 case ISD::VP_STORE: { 848 const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); 849 ID.AddInteger(EST->getMemoryVT().getRawBits()); 850 ID.AddInteger(EST->getRawSubclassData()); 851 ID.AddInteger(EST->getPointerInfo().getAddrSpace()); 852 ID.AddInteger(EST->getMemOperand()->getFlags()); 853 break; 854 } 855 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: { 856 const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(N); 857 ID.AddInteger(SLD->getMemoryVT().getRawBits()); 858 ID.AddInteger(SLD->getRawSubclassData()); 859 ID.AddInteger(SLD->getPointerInfo().getAddrSpace()); 860 break; 861 } 862 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: { 863 const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N); 864 ID.AddInteger(SST->getMemoryVT().getRawBits()); 865 ID.AddInteger(SST->getRawSubclassData()); 866 ID.AddInteger(SST->getPointerInfo().getAddrSpace()); 867 break; 868 } 869 case ISD::VP_GATHER: { 870 const VPGatherSDNode *EG = cast<VPGatherSDNode>(N); 871 ID.AddInteger(EG->getMemoryVT().getRawBits()); 872 ID.AddInteger(EG->getRawSubclassData()); 873 ID.AddInteger(EG->getPointerInfo().getAddrSpace()); 874 ID.AddInteger(EG->getMemOperand()->getFlags()); 875 break; 876 } 877 case ISD::VP_SCATTER: { 878 const VPScatterSDNode *ES = cast<VPScatterSDNode>(N); 879 ID.AddInteger(ES->getMemoryVT().getRawBits()); 880 ID.AddInteger(ES->getRawSubclassData()); 881 ID.AddInteger(ES->getPointerInfo().getAddrSpace()); 882 ID.AddInteger(ES->getMemOperand()->getFlags()); 883 break; 884 } 885 case ISD::MLOAD: { 886 const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); 887 ID.AddInteger(MLD->getMemoryVT().getRawBits()); 888 ID.AddInteger(MLD->getRawSubclassData()); 889 ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); 890 ID.AddInteger(MLD->getMemOperand()->getFlags()); 891 break; 892 } 893 case ISD::MSTORE: { 894 const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); 895 ID.AddInteger(MST->getMemoryVT().getRawBits()); 896 ID.AddInteger(MST->getRawSubclassData()); 897 ID.AddInteger(MST->getPointerInfo().getAddrSpace()); 898 ID.AddInteger(MST->getMemOperand()->getFlags()); 899 break; 900 } 901 case ISD::MGATHER: { 902 const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); 903 ID.AddInteger(MG->getMemoryVT().getRawBits()); 904 ID.AddInteger(MG->getRawSubclassData()); 905 ID.AddInteger(MG->getPointerInfo().getAddrSpace()); 906 ID.AddInteger(MG->getMemOperand()->getFlags()); 907 break; 908 } 909 case ISD::MSCATTER: { 910 const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); 911 ID.AddInteger(MS->getMemoryVT().getRawBits()); 912 ID.AddInteger(MS->getRawSubclassData()); 913 ID.AddInteger(MS->getPointerInfo().getAddrSpace()); 914 ID.AddInteger(MS->getMemOperand()->getFlags()); 915 break; 916 } 917 case ISD::ATOMIC_CMP_SWAP: 918 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 919 case ISD::ATOMIC_SWAP: 920 case ISD::ATOMIC_LOAD_ADD: 921 case ISD::ATOMIC_LOAD_SUB: 922 case ISD::ATOMIC_LOAD_AND: 923 case ISD::ATOMIC_LOAD_CLR: 924 case ISD::ATOMIC_LOAD_OR: 925 case ISD::ATOMIC_LOAD_XOR: 926 case ISD::ATOMIC_LOAD_NAND: 927 case ISD::ATOMIC_LOAD_MIN: 928 case ISD::ATOMIC_LOAD_MAX: 929 case ISD::ATOMIC_LOAD_UMIN: 930 case ISD::ATOMIC_LOAD_UMAX: 931 case ISD::ATOMIC_LOAD: 932 case ISD::ATOMIC_STORE: { 933 const AtomicSDNode *AT = cast<AtomicSDNode>(N); 934 ID.AddInteger(AT->getMemoryVT().getRawBits()); 935 ID.AddInteger(AT->getRawSubclassData()); 936 ID.AddInteger(AT->getPointerInfo().getAddrSpace()); 937 ID.AddInteger(AT->getMemOperand()->getFlags()); 938 break; 939 } 940 case ISD::VECTOR_SHUFFLE: { 941 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); 942 for (int M : Mask) 943 ID.AddInteger(M); 944 break; 945 } 946 case ISD::ADDRSPACECAST: { 947 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 948 ID.AddInteger(ASC->getSrcAddressSpace()); 949 ID.AddInteger(ASC->getDestAddressSpace()); 950 break; 951 } 952 case ISD::TargetBlockAddress: 953 case ISD::BlockAddress: { 954 const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); 955 ID.AddPointer(BA->getBlockAddress()); 956 ID.AddInteger(BA->getOffset()); 957 ID.AddInteger(BA->getTargetFlags()); 958 break; 959 } 960 case ISD::AssertAlign: 961 ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value()); 962 break; 963 case ISD::PREFETCH: 964 case ISD::INTRINSIC_VOID: 965 case ISD::INTRINSIC_W_CHAIN: 966 // Handled by MemIntrinsicSDNode check after the switch. 967 break; 968 case ISD::MDNODE_SDNODE: 969 ID.AddPointer(cast<MDNodeSDNode>(N)->getMD()); 970 break; 971 } // end switch (N->getOpcode()) 972 973 // MemIntrinsic nodes could also have subclass data, address spaces, and flags 974 // to check. 975 if (auto *MN = dyn_cast<MemIntrinsicSDNode>(N)) { 976 ID.AddInteger(MN->getRawSubclassData()); 977 ID.AddInteger(MN->getPointerInfo().getAddrSpace()); 978 ID.AddInteger(MN->getMemOperand()->getFlags()); 979 ID.AddInteger(MN->getMemoryVT().getRawBits()); 980 } 981 } 982 983 /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID 984 /// data. 985 static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { 986 AddNodeIDOpcode(ID, N->getOpcode()); 987 // Add the return value info. 988 AddNodeIDValueTypes(ID, N->getVTList()); 989 // Add the operand info. 990 AddNodeIDOperands(ID, N->ops()); 991 992 // Handle SDNode leafs with special info. 993 AddNodeIDCustom(ID, N); 994 } 995 996 //===----------------------------------------------------------------------===// 997 // SelectionDAG Class 998 //===----------------------------------------------------------------------===// 999 1000 /// doNotCSE - Return true if CSE should not be performed for this node. 1001 static bool doNotCSE(SDNode *N) { 1002 if (N->getValueType(0) == MVT::Glue) 1003 return true; // Never CSE anything that produces a glue result. 1004 1005 switch (N->getOpcode()) { 1006 default: break; 1007 case ISD::HANDLENODE: 1008 case ISD::EH_LABEL: 1009 return true; // Never CSE these nodes. 1010 } 1011 1012 // Check that remaining values produced are not flags. 1013 for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) 1014 if (N->getValueType(i) == MVT::Glue) 1015 return true; // Never CSE anything that produces a glue result. 1016 1017 return false; 1018 } 1019 1020 /// RemoveDeadNodes - This method deletes all unreachable nodes in the 1021 /// SelectionDAG. 1022 void SelectionDAG::RemoveDeadNodes() { 1023 // Create a dummy node (which is not added to allnodes), that adds a reference 1024 // to the root node, preventing it from being deleted. 1025 HandleSDNode Dummy(getRoot()); 1026 1027 SmallVector<SDNode*, 128> DeadNodes; 1028 1029 // Add all obviously-dead nodes to the DeadNodes worklist. 1030 for (SDNode &Node : allnodes()) 1031 if (Node.use_empty()) 1032 DeadNodes.push_back(&Node); 1033 1034 RemoveDeadNodes(DeadNodes); 1035 1036 // If the root changed (e.g. it was a dead load, update the root). 1037 setRoot(Dummy.getValue()); 1038 } 1039 1040 /// RemoveDeadNodes - This method deletes the unreachable nodes in the 1041 /// given list, and any nodes that become unreachable as a result. 1042 void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { 1043 1044 // Process the worklist, deleting the nodes and adding their uses to the 1045 // worklist. 1046 while (!DeadNodes.empty()) { 1047 SDNode *N = DeadNodes.pop_back_val(); 1048 // Skip to next node if we've already managed to delete the node. This could 1049 // happen if replacing a node causes a node previously added to the node to 1050 // be deleted. 1051 if (N->getOpcode() == ISD::DELETED_NODE) 1052 continue; 1053 1054 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1055 DUL->NodeDeleted(N, nullptr); 1056 1057 // Take the node out of the appropriate CSE map. 1058 RemoveNodeFromCSEMaps(N); 1059 1060 // Next, brutally remove the operand list. This is safe to do, as there are 1061 // no cycles in the graph. 1062 for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { 1063 SDUse &Use = *I++; 1064 SDNode *Operand = Use.getNode(); 1065 Use.set(SDValue()); 1066 1067 // Now that we removed this operand, see if there are no uses of it left. 1068 if (Operand->use_empty()) 1069 DeadNodes.push_back(Operand); 1070 } 1071 1072 DeallocateNode(N); 1073 } 1074 } 1075 1076 void SelectionDAG::RemoveDeadNode(SDNode *N){ 1077 SmallVector<SDNode*, 16> DeadNodes(1, N); 1078 1079 // Create a dummy node that adds a reference to the root node, preventing 1080 // it from being deleted. (This matters if the root is an operand of the 1081 // dead node.) 1082 HandleSDNode Dummy(getRoot()); 1083 1084 RemoveDeadNodes(DeadNodes); 1085 } 1086 1087 void SelectionDAG::DeleteNode(SDNode *N) { 1088 // First take this out of the appropriate CSE map. 1089 RemoveNodeFromCSEMaps(N); 1090 1091 // Finally, remove uses due to operands of this node, remove from the 1092 // AllNodes list, and delete the node. 1093 DeleteNodeNotInCSEMaps(N); 1094 } 1095 1096 void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { 1097 assert(N->getIterator() != AllNodes.begin() && 1098 "Cannot delete the entry node!"); 1099 assert(N->use_empty() && "Cannot delete a node that is not dead!"); 1100 1101 // Drop all of the operands and decrement used node's use counts. 1102 N->DropOperands(); 1103 1104 DeallocateNode(N); 1105 } 1106 1107 void SDDbgInfo::add(SDDbgValue *V, bool isParameter) { 1108 assert(!(V->isVariadic() && isParameter)); 1109 if (isParameter) 1110 ByvalParmDbgValues.push_back(V); 1111 else 1112 DbgValues.push_back(V); 1113 for (const SDNode *Node : V->getSDNodes()) 1114 if (Node) 1115 DbgValMap[Node].push_back(V); 1116 } 1117 1118 void SDDbgInfo::erase(const SDNode *Node) { 1119 DbgValMapType::iterator I = DbgValMap.find(Node); 1120 if (I == DbgValMap.end()) 1121 return; 1122 for (auto &Val: I->second) 1123 Val->setIsInvalidated(); 1124 DbgValMap.erase(I); 1125 } 1126 1127 void SelectionDAG::DeallocateNode(SDNode *N) { 1128 // If we have operands, deallocate them. 1129 removeOperands(N); 1130 1131 NodeAllocator.Deallocate(AllNodes.remove(N)); 1132 1133 // Set the opcode to DELETED_NODE to help catch bugs when node 1134 // memory is reallocated. 1135 // FIXME: There are places in SDag that have grown a dependency on the opcode 1136 // value in the released node. 1137 __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); 1138 N->NodeType = ISD::DELETED_NODE; 1139 1140 // If any of the SDDbgValue nodes refer to this SDNode, invalidate 1141 // them and forget about that node. 1142 DbgInfo->erase(N); 1143 1144 // Invalidate extra info. 1145 SDEI.erase(N); 1146 } 1147 1148 #ifndef NDEBUG 1149 /// VerifySDNode - Check the given SDNode. Aborts if it is invalid. 1150 void SelectionDAG::verifyNode(SDNode *N) const { 1151 switch (N->getOpcode()) { 1152 default: 1153 if (N->isTargetOpcode()) 1154 getSelectionDAGInfo().verifyTargetNode(*this, N); 1155 break; 1156 case ISD::BUILD_PAIR: { 1157 EVT VT = N->getValueType(0); 1158 assert(N->getNumValues() == 1 && "Too many results!"); 1159 assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && 1160 "Wrong return type!"); 1161 assert(N->getNumOperands() == 2 && "Wrong number of operands!"); 1162 assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && 1163 "Mismatched operand types!"); 1164 assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && 1165 "Wrong operand type!"); 1166 assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && 1167 "Wrong return type size"); 1168 break; 1169 } 1170 case ISD::BUILD_VECTOR: { 1171 assert(N->getNumValues() == 1 && "Too many results!"); 1172 assert(N->getValueType(0).isVector() && "Wrong return type!"); 1173 assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && 1174 "Wrong number of operands!"); 1175 EVT EltVT = N->getValueType(0).getVectorElementType(); 1176 for (const SDUse &Op : N->ops()) { 1177 assert((Op.getValueType() == EltVT || 1178 (EltVT.isInteger() && Op.getValueType().isInteger() && 1179 EltVT.bitsLE(Op.getValueType()))) && 1180 "Wrong operand type!"); 1181 assert(Op.getValueType() == N->getOperand(0).getValueType() && 1182 "Operands must all have the same type"); 1183 } 1184 break; 1185 } 1186 } 1187 } 1188 #endif // NDEBUG 1189 1190 /// Insert a newly allocated node into the DAG. 1191 /// 1192 /// Handles insertion into the all nodes list and CSE map, as well as 1193 /// verification and other common operations when a new node is allocated. 1194 void SelectionDAG::InsertNode(SDNode *N) { 1195 AllNodes.push_back(N); 1196 #ifndef NDEBUG 1197 N->PersistentId = NextPersistentId++; 1198 verifyNode(N); 1199 #endif 1200 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1201 DUL->NodeInserted(N); 1202 } 1203 1204 /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that 1205 /// correspond to it. This is useful when we're about to delete or repurpose 1206 /// the node. We don't want future request for structurally identical nodes 1207 /// to return N anymore. 1208 bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { 1209 bool Erased = false; 1210 switch (N->getOpcode()) { 1211 case ISD::HANDLENODE: return false; // noop. 1212 case ISD::CONDCODE: 1213 assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && 1214 "Cond code doesn't exist!"); 1215 Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; 1216 CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; 1217 break; 1218 case ISD::ExternalSymbol: 1219 Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); 1220 break; 1221 case ISD::TargetExternalSymbol: { 1222 ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); 1223 Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>( 1224 ESN->getSymbol(), ESN->getTargetFlags())); 1225 break; 1226 } 1227 case ISD::MCSymbol: { 1228 auto *MCSN = cast<MCSymbolSDNode>(N); 1229 Erased = MCSymbols.erase(MCSN->getMCSymbol()); 1230 break; 1231 } 1232 case ISD::VALUETYPE: { 1233 EVT VT = cast<VTSDNode>(N)->getVT(); 1234 if (VT.isExtended()) { 1235 Erased = ExtendedValueTypeNodes.erase(VT); 1236 } else { 1237 Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; 1238 ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; 1239 } 1240 break; 1241 } 1242 default: 1243 // Remove it from the CSE Map. 1244 assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); 1245 assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); 1246 Erased = CSEMap.RemoveNode(N); 1247 break; 1248 } 1249 #ifndef NDEBUG 1250 // Verify that the node was actually in one of the CSE maps, unless it has a 1251 // glue result (which cannot be CSE'd) or is one of the special cases that are 1252 // not subject to CSE. 1253 if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && 1254 !N->isMachineOpcode() && !doNotCSE(N)) { 1255 N->dump(this); 1256 dbgs() << "\n"; 1257 llvm_unreachable("Node is not in map!"); 1258 } 1259 #endif 1260 return Erased; 1261 } 1262 1263 /// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE 1264 /// maps and modified in place. Add it back to the CSE maps, unless an identical 1265 /// node already exists, in which case transfer all its users to the existing 1266 /// node. This transfer can potentially trigger recursive merging. 1267 void 1268 SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { 1269 // For node types that aren't CSE'd, just act as if no identical node 1270 // already exists. 1271 if (!doNotCSE(N)) { 1272 SDNode *Existing = CSEMap.GetOrInsertNode(N); 1273 if (Existing != N) { 1274 // If there was already an existing matching node, use ReplaceAllUsesWith 1275 // to replace the dead one with the existing one. This can cause 1276 // recursive merging of other unrelated nodes down the line. 1277 Existing->intersectFlagsWith(N->getFlags()); 1278 if (auto *MemNode = dyn_cast<MemSDNode>(Existing)) 1279 MemNode->refineRanges(cast<MemSDNode>(N)->getMemOperand()); 1280 ReplaceAllUsesWith(N, Existing); 1281 1282 // N is now dead. Inform the listeners and delete it. 1283 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1284 DUL->NodeDeleted(N, Existing); 1285 DeleteNodeNotInCSEMaps(N); 1286 return; 1287 } 1288 } 1289 1290 // If the node doesn't already exist, we updated it. Inform listeners. 1291 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1292 DUL->NodeUpdated(N); 1293 } 1294 1295 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands 1296 /// were replaced with those specified. If this node is never memoized, 1297 /// return null, otherwise return a pointer to the slot it would take. If a 1298 /// node already exists with these operands, the slot will be non-null. 1299 SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, 1300 void *&InsertPos) { 1301 if (doNotCSE(N)) 1302 return nullptr; 1303 1304 SDValue Ops[] = { Op }; 1305 FoldingSetNodeID ID; 1306 AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); 1307 AddNodeIDCustom(ID, N); 1308 SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); 1309 if (Node) 1310 Node->intersectFlagsWith(N->getFlags()); 1311 return Node; 1312 } 1313 1314 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands 1315 /// were replaced with those specified. If this node is never memoized, 1316 /// return null, otherwise return a pointer to the slot it would take. If a 1317 /// node already exists with these operands, the slot will be non-null. 1318 SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, 1319 SDValue Op1, SDValue Op2, 1320 void *&InsertPos) { 1321 if (doNotCSE(N)) 1322 return nullptr; 1323 1324 SDValue Ops[] = { Op1, Op2 }; 1325 FoldingSetNodeID ID; 1326 AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); 1327 AddNodeIDCustom(ID, N); 1328 SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); 1329 if (Node) 1330 Node->intersectFlagsWith(N->getFlags()); 1331 return Node; 1332 } 1333 1334 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands 1335 /// were replaced with those specified. If this node is never memoized, 1336 /// return null, otherwise return a pointer to the slot it would take. If a 1337 /// node already exists with these operands, the slot will be non-null. 1338 SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, 1339 void *&InsertPos) { 1340 if (doNotCSE(N)) 1341 return nullptr; 1342 1343 FoldingSetNodeID ID; 1344 AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); 1345 AddNodeIDCustom(ID, N); 1346 SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); 1347 if (Node) 1348 Node->intersectFlagsWith(N->getFlags()); 1349 return Node; 1350 } 1351 1352 Align SelectionDAG::getEVTAlign(EVT VT) const { 1353 Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0) 1354 : VT.getTypeForEVT(*getContext()); 1355 1356 return getDataLayout().getABITypeAlign(Ty); 1357 } 1358 1359 // EntryNode could meaningfully have debug info if we can find it... 1360 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL) 1361 : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), 1362 getVTList(MVT::Other, MVT::Glue)), 1363 Root(getEntryNode()) { 1364 InsertNode(&EntryNode); 1365 DbgInfo = new SDDbgInfo(); 1366 } 1367 1368 void SelectionDAG::init(MachineFunction &NewMF, 1369 OptimizationRemarkEmitter &NewORE, Pass *PassPtr, 1370 const TargetLibraryInfo *LibraryInfo, 1371 UniformityInfo *NewUA, ProfileSummaryInfo *PSIin, 1372 BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin, 1373 FunctionVarLocs const *VarLocs, bool HasDivergency) { 1374 MF = &NewMF; 1375 SDAGISelPass = PassPtr; 1376 ORE = &NewORE; 1377 TLI = getSubtarget().getTargetLowering(); 1378 TSI = getSubtarget().getSelectionDAGInfo(); 1379 LibInfo = LibraryInfo; 1380 Context = &MF->getFunction().getContext(); 1381 UA = NewUA; 1382 PSI = PSIin; 1383 BFI = BFIin; 1384 MMI = &MMIin; 1385 FnVarLocs = VarLocs; 1386 DivergentTarget = HasDivergency; 1387 } 1388 1389 SelectionDAG::~SelectionDAG() { 1390 assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); 1391 allnodes_clear(); 1392 OperandRecycler.clear(OperandAllocator); 1393 delete DbgInfo; 1394 } 1395 1396 bool SelectionDAG::shouldOptForSize() const { 1397 return llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); 1398 } 1399 1400 void SelectionDAG::allnodes_clear() { 1401 assert(&*AllNodes.begin() == &EntryNode); 1402 AllNodes.remove(AllNodes.begin()); 1403 while (!AllNodes.empty()) 1404 DeallocateNode(&AllNodes.front()); 1405 #ifndef NDEBUG 1406 NextPersistentId = 0; 1407 #endif 1408 } 1409 1410 SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 1411 void *&InsertPos) { 1412 SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); 1413 if (N) { 1414 switch (N->getOpcode()) { 1415 default: break; 1416 case ISD::Constant: 1417 case ISD::ConstantFP: 1418 llvm_unreachable("Querying for Constant and ConstantFP nodes requires " 1419 "debug location. Use another overload."); 1420 } 1421 } 1422 return N; 1423 } 1424 1425 SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 1426 const SDLoc &DL, void *&InsertPos) { 1427 SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); 1428 if (N) { 1429 switch (N->getOpcode()) { 1430 case ISD::Constant: 1431 case ISD::ConstantFP: 1432 // Erase debug location from the node if the node is used at several 1433 // different places. Do not propagate one location to all uses as it 1434 // will cause a worse single stepping debugging experience. 1435 if (N->getDebugLoc() != DL.getDebugLoc()) 1436 N->setDebugLoc(DebugLoc()); 1437 break; 1438 default: 1439 // When the node's point of use is located earlier in the instruction 1440 // sequence than its prior point of use, update its debug info to the 1441 // earlier location. 1442 if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()) 1443 N->setDebugLoc(DL.getDebugLoc()); 1444 break; 1445 } 1446 } 1447 return N; 1448 } 1449 1450 void SelectionDAG::clear() { 1451 allnodes_clear(); 1452 OperandRecycler.clear(OperandAllocator); 1453 OperandAllocator.Reset(); 1454 CSEMap.clear(); 1455 1456 ExtendedValueTypeNodes.clear(); 1457 ExternalSymbols.clear(); 1458 TargetExternalSymbols.clear(); 1459 MCSymbols.clear(); 1460 SDEI.clear(); 1461 llvm::fill(CondCodeNodes, nullptr); 1462 llvm::fill(ValueTypeNodes, nullptr); 1463 1464 EntryNode.UseList = nullptr; 1465 InsertNode(&EntryNode); 1466 Root = getEntryNode(); 1467 DbgInfo->clear(); 1468 } 1469 1470 SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { 1471 return VT.bitsGT(Op.getValueType()) 1472 ? getNode(ISD::FP_EXTEND, DL, VT, Op) 1473 : getNode(ISD::FP_ROUND, DL, VT, Op, 1474 getIntPtrConstant(0, DL, /*isTarget=*/true)); 1475 } 1476 1477 std::pair<SDValue, SDValue> 1478 SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain, 1479 const SDLoc &DL, EVT VT) { 1480 assert(!VT.bitsEq(Op.getValueType()) && 1481 "Strict no-op FP extend/round not allowed."); 1482 SDValue Res = 1483 VT.bitsGT(Op.getValueType()) 1484 ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}) 1485 : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, 1486 {Chain, Op, getIntPtrConstant(0, DL, /*isTarget=*/true)}); 1487 1488 return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1)); 1489 } 1490 1491 SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1492 return VT.bitsGT(Op.getValueType()) ? 1493 getNode(ISD::ANY_EXTEND, DL, VT, Op) : 1494 getNode(ISD::TRUNCATE, DL, VT, Op); 1495 } 1496 1497 SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1498 return VT.bitsGT(Op.getValueType()) ? 1499 getNode(ISD::SIGN_EXTEND, DL, VT, Op) : 1500 getNode(ISD::TRUNCATE, DL, VT, Op); 1501 } 1502 1503 SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1504 return VT.bitsGT(Op.getValueType()) ? 1505 getNode(ISD::ZERO_EXTEND, DL, VT, Op) : 1506 getNode(ISD::TRUNCATE, DL, VT, Op); 1507 } 1508 1509 SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, 1510 EVT VT) { 1511 assert(!VT.isVector()); 1512 auto Type = Op.getValueType(); 1513 SDValue DestOp; 1514 if (Type == VT) 1515 return Op; 1516 auto Size = Op.getValueSizeInBits(); 1517 DestOp = getBitcast(EVT::getIntegerVT(*Context, Size), Op); 1518 if (DestOp.getValueType() == VT) 1519 return DestOp; 1520 1521 return getAnyExtOrTrunc(DestOp, DL, VT); 1522 } 1523 1524 SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL, 1525 EVT VT) { 1526 assert(!VT.isVector()); 1527 auto Type = Op.getValueType(); 1528 SDValue DestOp; 1529 if (Type == VT) 1530 return Op; 1531 auto Size = Op.getValueSizeInBits(); 1532 DestOp = getBitcast(MVT::getIntegerVT(Size), Op); 1533 if (DestOp.getValueType() == VT) 1534 return DestOp; 1535 1536 return getSExtOrTrunc(DestOp, DL, VT); 1537 } 1538 1539 SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL, 1540 EVT VT) { 1541 assert(!VT.isVector()); 1542 auto Type = Op.getValueType(); 1543 SDValue DestOp; 1544 if (Type == VT) 1545 return Op; 1546 auto Size = Op.getValueSizeInBits(); 1547 DestOp = getBitcast(MVT::getIntegerVT(Size), Op); 1548 if (DestOp.getValueType() == VT) 1549 return DestOp; 1550 1551 return getZExtOrTrunc(DestOp, DL, VT); 1552 } 1553 1554 SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, 1555 EVT OpVT) { 1556 if (VT.bitsLE(Op.getValueType())) 1557 return getNode(ISD::TRUNCATE, SL, VT, Op); 1558 1559 TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); 1560 return getNode(TLI->getExtendForContent(BType), SL, VT, Op); 1561 } 1562 1563 SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { 1564 EVT OpVT = Op.getValueType(); 1565 assert(VT.isInteger() && OpVT.isInteger() && 1566 "Cannot getZeroExtendInReg FP types"); 1567 assert(VT.isVector() == OpVT.isVector() && 1568 "getZeroExtendInReg type should be vector iff the operand " 1569 "type is vector!"); 1570 assert((!VT.isVector() || 1571 VT.getVectorElementCount() == OpVT.getVectorElementCount()) && 1572 "Vector element counts must match in getZeroExtendInReg"); 1573 assert(VT.bitsLE(OpVT) && "Not extending!"); 1574 if (OpVT == VT) 1575 return Op; 1576 APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), 1577 VT.getScalarSizeInBits()); 1578 return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); 1579 } 1580 1581 SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask, 1582 SDValue EVL, const SDLoc &DL, 1583 EVT VT) { 1584 EVT OpVT = Op.getValueType(); 1585 assert(VT.isInteger() && OpVT.isInteger() && 1586 "Cannot getVPZeroExtendInReg FP types"); 1587 assert(VT.isVector() && OpVT.isVector() && 1588 "getVPZeroExtendInReg type and operand type should be vector!"); 1589 assert(VT.getVectorElementCount() == OpVT.getVectorElementCount() && 1590 "Vector element counts must match in getZeroExtendInReg"); 1591 assert(VT.bitsLE(OpVT) && "Not extending!"); 1592 if (OpVT == VT) 1593 return Op; 1594 APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), 1595 VT.getScalarSizeInBits()); 1596 return getNode(ISD::VP_AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT), Mask, 1597 EVL); 1598 } 1599 1600 SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1601 // Only unsigned pointer semantics are supported right now. In the future this 1602 // might delegate to TLI to check pointer signedness. 1603 return getZExtOrTrunc(Op, DL, VT); 1604 } 1605 1606 SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { 1607 // Only unsigned pointer semantics are supported right now. In the future this 1608 // might delegate to TLI to check pointer signedness. 1609 return getZeroExtendInReg(Op, DL, VT); 1610 } 1611 1612 SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) { 1613 return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val); 1614 } 1615 1616 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). 1617 SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { 1618 return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT)); 1619 } 1620 1621 SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { 1622 SDValue TrueValue = getBoolConstant(true, DL, VT, VT); 1623 return getNode(ISD::XOR, DL, VT, Val, TrueValue); 1624 } 1625 1626 SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, 1627 SDValue Mask, SDValue EVL, EVT VT) { 1628 SDValue TrueValue = getBoolConstant(true, DL, VT, VT); 1629 return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL); 1630 } 1631 1632 SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, 1633 SDValue Mask, SDValue EVL) { 1634 return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL); 1635 } 1636 1637 SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, 1638 SDValue Mask, SDValue EVL) { 1639 if (VT.bitsGT(Op.getValueType())) 1640 return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL); 1641 if (VT.bitsLT(Op.getValueType())) 1642 return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL); 1643 return Op; 1644 } 1645 1646 SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, 1647 EVT OpVT) { 1648 if (!V) 1649 return getConstant(0, DL, VT); 1650 1651 switch (TLI->getBooleanContents(OpVT)) { 1652 case TargetLowering::ZeroOrOneBooleanContent: 1653 case TargetLowering::UndefinedBooleanContent: 1654 return getConstant(1, DL, VT); 1655 case TargetLowering::ZeroOrNegativeOneBooleanContent: 1656 return getAllOnesConstant(DL, VT); 1657 } 1658 llvm_unreachable("Unexpected boolean content enum!"); 1659 } 1660 1661 SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, 1662 bool isT, bool isO) { 1663 return getConstant(APInt(VT.getScalarSizeInBits(), Val, /*isSigned=*/false), 1664 DL, VT, isT, isO); 1665 } 1666 1667 SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, 1668 bool isT, bool isO) { 1669 return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); 1670 } 1671 1672 SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, 1673 EVT VT, bool isT, bool isO) { 1674 assert(VT.isInteger() && "Cannot create FP integer constant!"); 1675 1676 EVT EltVT = VT.getScalarType(); 1677 const ConstantInt *Elt = &Val; 1678 1679 // Vector splats are explicit within the DAG, with ConstantSDNode holding the 1680 // to-be-splatted scalar ConstantInt. 1681 if (isa<VectorType>(Elt->getType())) 1682 Elt = ConstantInt::get(*getContext(), Elt->getValue()); 1683 1684 // In some cases the vector type is legal but the element type is illegal and 1685 // needs to be promoted, for example v8i8 on ARM. In this case, promote the 1686 // inserted value (the type does not need to match the vector element type). 1687 // Any extra bits introduced will be truncated away. 1688 if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == 1689 TargetLowering::TypePromoteInteger) { 1690 EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); 1691 APInt NewVal; 1692 if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT)) 1693 NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits()); 1694 else 1695 NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); 1696 Elt = ConstantInt::get(*getContext(), NewVal); 1697 } 1698 // In other cases the element type is illegal and needs to be expanded, for 1699 // example v2i64 on MIPS32. In this case, find the nearest legal type, split 1700 // the value into n parts and use a vector type with n-times the elements. 1701 // Then bitcast to the type requested. 1702 // Legalizing constants too early makes the DAGCombiner's job harder so we 1703 // only legalize if the DAG tells us we must produce legal types. 1704 else if (NewNodesMustHaveLegalTypes && VT.isVector() && 1705 TLI->getTypeAction(*getContext(), EltVT) == 1706 TargetLowering::TypeExpandInteger) { 1707 const APInt &NewVal = Elt->getValue(); 1708 EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); 1709 unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); 1710 1711 // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. 1712 if (VT.isScalableVector() || 1713 TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) { 1714 assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && 1715 "Can only handle an even split!"); 1716 unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; 1717 1718 SmallVector<SDValue, 2> ScalarParts; 1719 for (unsigned i = 0; i != Parts; ++i) 1720 ScalarParts.push_back(getConstant( 1721 NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, 1722 ViaEltVT, isT, isO)); 1723 1724 return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts); 1725 } 1726 1727 unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; 1728 EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); 1729 1730 // Check the temporary vector is the correct size. If this fails then 1731 // getTypeToTransformTo() probably returned a type whose size (in bits) 1732 // isn't a power-of-2 factor of the requested type size. 1733 assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); 1734 1735 SmallVector<SDValue, 2> EltParts; 1736 for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) 1737 EltParts.push_back(getConstant( 1738 NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, 1739 ViaEltVT, isT, isO)); 1740 1741 // EltParts is currently in little endian order. If we actually want 1742 // big-endian order then reverse it now. 1743 if (getDataLayout().isBigEndian()) 1744 std::reverse(EltParts.begin(), EltParts.end()); 1745 1746 // The elements must be reversed when the element order is different 1747 // to the endianness of the elements (because the BITCAST is itself a 1748 // vector shuffle in this situation). However, we do not need any code to 1749 // perform this reversal because getConstant() is producing a vector 1750 // splat. 1751 // This situation occurs in MIPS MSA. 1752 1753 SmallVector<SDValue, 8> Ops; 1754 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) 1755 llvm::append_range(Ops, EltParts); 1756 1757 SDValue V = 1758 getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); 1759 return V; 1760 } 1761 1762 assert(Elt->getBitWidth() == EltVT.getSizeInBits() && 1763 "APInt size does not match type size!"); 1764 unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; 1765 SDVTList VTs = getVTList(EltVT); 1766 FoldingSetNodeID ID; 1767 AddNodeIDNode(ID, Opc, VTs, {}); 1768 ID.AddPointer(Elt); 1769 ID.AddBoolean(isO); 1770 void *IP = nullptr; 1771 SDNode *N = nullptr; 1772 if ((N = FindNodeOrInsertPos(ID, DL, IP))) 1773 if (!VT.isVector()) 1774 return SDValue(N, 0); 1775 1776 if (!N) { 1777 N = newSDNode<ConstantSDNode>(isT, isO, Elt, VTs); 1778 CSEMap.InsertNode(N, IP); 1779 InsertNode(N); 1780 NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); 1781 } 1782 1783 SDValue Result(N, 0); 1784 if (VT.isVector()) 1785 Result = getSplat(VT, DL, Result); 1786 return Result; 1787 } 1788 1789 SDValue SelectionDAG::getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, 1790 bool isT, bool isO) { 1791 unsigned Size = VT.getScalarSizeInBits(); 1792 return getConstant(APInt(Size, Val, /*isSigned=*/true), DL, VT, isT, isO); 1793 } 1794 1795 SDValue SelectionDAG::getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget, 1796 bool IsOpaque) { 1797 return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT, 1798 IsTarget, IsOpaque); 1799 } 1800 1801 SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, 1802 bool isTarget) { 1803 return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); 1804 } 1805 1806 SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, 1807 const SDLoc &DL) { 1808 assert(VT.isInteger() && "Shift amount is not an integer type!"); 1809 EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout()); 1810 return getConstant(Val, DL, ShiftVT); 1811 } 1812 1813 SDValue SelectionDAG::getShiftAmountConstant(const APInt &Val, EVT VT, 1814 const SDLoc &DL) { 1815 assert(Val.ult(VT.getScalarSizeInBits()) && "Out of range shift"); 1816 return getShiftAmountConstant(Val.getZExtValue(), VT, DL); 1817 } 1818 1819 SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, 1820 bool isTarget) { 1821 return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget); 1822 } 1823 1824 SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, 1825 bool isTarget) { 1826 return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); 1827 } 1828 1829 SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, 1830 EVT VT, bool isTarget) { 1831 assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); 1832 1833 EVT EltVT = VT.getScalarType(); 1834 const ConstantFP *Elt = &V; 1835 1836 // Vector splats are explicit within the DAG, with ConstantFPSDNode holding 1837 // the to-be-splatted scalar ConstantFP. 1838 if (isa<VectorType>(Elt->getType())) 1839 Elt = ConstantFP::get(*getContext(), Elt->getValue()); 1840 1841 // Do the map lookup using the actual bit pattern for the floating point 1842 // value, so that we don't have problems with 0.0 comparing equal to -0.0, and 1843 // we don't have issues with SNANs. 1844 unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; 1845 SDVTList VTs = getVTList(EltVT); 1846 FoldingSetNodeID ID; 1847 AddNodeIDNode(ID, Opc, VTs, {}); 1848 ID.AddPointer(Elt); 1849 void *IP = nullptr; 1850 SDNode *N = nullptr; 1851 if ((N = FindNodeOrInsertPos(ID, DL, IP))) 1852 if (!VT.isVector()) 1853 return SDValue(N, 0); 1854 1855 if (!N) { 1856 N = newSDNode<ConstantFPSDNode>(isTarget, Elt, VTs); 1857 CSEMap.InsertNode(N, IP); 1858 InsertNode(N); 1859 } 1860 1861 SDValue Result(N, 0); 1862 if (VT.isVector()) 1863 Result = getSplat(VT, DL, Result); 1864 NewSDValueDbgMsg(Result, "Creating fp constant: ", this); 1865 return Result; 1866 } 1867 1868 SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, 1869 bool isTarget) { 1870 EVT EltVT = VT.getScalarType(); 1871 if (EltVT == MVT::f32) 1872 return getConstantFP(APFloat((float)Val), DL, VT, isTarget); 1873 if (EltVT == MVT::f64) 1874 return getConstantFP(APFloat(Val), DL, VT, isTarget); 1875 if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || 1876 EltVT == MVT::f16 || EltVT == MVT::bf16) { 1877 bool Ignored; 1878 APFloat APF = APFloat(Val); 1879 APF.convert(EltVT.getFltSemantics(), APFloat::rmNearestTiesToEven, 1880 &Ignored); 1881 return getConstantFP(APF, DL, VT, isTarget); 1882 } 1883 llvm_unreachable("Unsupported type in getConstantFP"); 1884 } 1885 1886 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, 1887 EVT VT, int64_t Offset, bool isTargetGA, 1888 unsigned TargetFlags) { 1889 assert((TargetFlags == 0 || isTargetGA) && 1890 "Cannot set target flags on target-independent globals"); 1891 1892 // Truncate (with sign-extension) the offset value to the pointer size. 1893 unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); 1894 if (BitWidth < 64) 1895 Offset = SignExtend64(Offset, BitWidth); 1896 1897 unsigned Opc; 1898 if (GV->isThreadLocal()) 1899 Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; 1900 else 1901 Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; 1902 1903 SDVTList VTs = getVTList(VT); 1904 FoldingSetNodeID ID; 1905 AddNodeIDNode(ID, Opc, VTs, {}); 1906 ID.AddPointer(GV); 1907 ID.AddInteger(Offset); 1908 ID.AddInteger(TargetFlags); 1909 void *IP = nullptr; 1910 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) 1911 return SDValue(E, 0); 1912 1913 auto *N = newSDNode<GlobalAddressSDNode>( 1914 Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VTs, Offset, TargetFlags); 1915 CSEMap.InsertNode(N, IP); 1916 InsertNode(N); 1917 return SDValue(N, 0); 1918 } 1919 1920 SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { 1921 unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; 1922 SDVTList VTs = getVTList(VT); 1923 FoldingSetNodeID ID; 1924 AddNodeIDNode(ID, Opc, VTs, {}); 1925 ID.AddInteger(FI); 1926 void *IP = nullptr; 1927 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 1928 return SDValue(E, 0); 1929 1930 auto *N = newSDNode<FrameIndexSDNode>(FI, VTs, isTarget); 1931 CSEMap.InsertNode(N, IP); 1932 InsertNode(N); 1933 return SDValue(N, 0); 1934 } 1935 1936 SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, 1937 unsigned TargetFlags) { 1938 assert((TargetFlags == 0 || isTarget) && 1939 "Cannot set target flags on target-independent jump tables"); 1940 unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; 1941 SDVTList VTs = getVTList(VT); 1942 FoldingSetNodeID ID; 1943 AddNodeIDNode(ID, Opc, VTs, {}); 1944 ID.AddInteger(JTI); 1945 ID.AddInteger(TargetFlags); 1946 void *IP = nullptr; 1947 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 1948 return SDValue(E, 0); 1949 1950 auto *N = newSDNode<JumpTableSDNode>(JTI, VTs, isTarget, TargetFlags); 1951 CSEMap.InsertNode(N, IP); 1952 InsertNode(N); 1953 return SDValue(N, 0); 1954 } 1955 1956 SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, 1957 const SDLoc &DL) { 1958 EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout()); 1959 return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, 1960 getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true)); 1961 } 1962 1963 SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, 1964 MaybeAlign Alignment, int Offset, 1965 bool isTarget, unsigned TargetFlags) { 1966 assert((TargetFlags == 0 || isTarget) && 1967 "Cannot set target flags on target-independent globals"); 1968 if (!Alignment) 1969 Alignment = shouldOptForSize() 1970 ? getDataLayout().getABITypeAlign(C->getType()) 1971 : getDataLayout().getPrefTypeAlign(C->getType()); 1972 unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; 1973 SDVTList VTs = getVTList(VT); 1974 FoldingSetNodeID ID; 1975 AddNodeIDNode(ID, Opc, VTs, {}); 1976 ID.AddInteger(Alignment->value()); 1977 ID.AddInteger(Offset); 1978 ID.AddPointer(C); 1979 ID.AddInteger(TargetFlags); 1980 void *IP = nullptr; 1981 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 1982 return SDValue(E, 0); 1983 1984 auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment, 1985 TargetFlags); 1986 CSEMap.InsertNode(N, IP); 1987 InsertNode(N); 1988 SDValue V = SDValue(N, 0); 1989 NewSDValueDbgMsg(V, "Creating new constant pool: ", this); 1990 return V; 1991 } 1992 1993 SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, 1994 MaybeAlign Alignment, int Offset, 1995 bool isTarget, unsigned TargetFlags) { 1996 assert((TargetFlags == 0 || isTarget) && 1997 "Cannot set target flags on target-independent globals"); 1998 if (!Alignment) 1999 Alignment = getDataLayout().getPrefTypeAlign(C->getType()); 2000 unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; 2001 SDVTList VTs = getVTList(VT); 2002 FoldingSetNodeID ID; 2003 AddNodeIDNode(ID, Opc, VTs, {}); 2004 ID.AddInteger(Alignment->value()); 2005 ID.AddInteger(Offset); 2006 C->addSelectionDAGCSEId(ID); 2007 ID.AddInteger(TargetFlags); 2008 void *IP = nullptr; 2009 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2010 return SDValue(E, 0); 2011 2012 auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment, 2013 TargetFlags); 2014 CSEMap.InsertNode(N, IP); 2015 InsertNode(N); 2016 return SDValue(N, 0); 2017 } 2018 2019 SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { 2020 FoldingSetNodeID ID; 2021 AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), {}); 2022 ID.AddPointer(MBB); 2023 void *IP = nullptr; 2024 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2025 return SDValue(E, 0); 2026 2027 auto *N = newSDNode<BasicBlockSDNode>(MBB); 2028 CSEMap.InsertNode(N, IP); 2029 InsertNode(N); 2030 return SDValue(N, 0); 2031 } 2032 2033 SDValue SelectionDAG::getValueType(EVT VT) { 2034 if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= 2035 ValueTypeNodes.size()) 2036 ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); 2037 2038 SDNode *&N = VT.isExtended() ? 2039 ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; 2040 2041 if (N) return SDValue(N, 0); 2042 N = newSDNode<VTSDNode>(VT); 2043 InsertNode(N); 2044 return SDValue(N, 0); 2045 } 2046 2047 SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { 2048 SDNode *&N = ExternalSymbols[Sym]; 2049 if (N) return SDValue(N, 0); 2050 N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, getVTList(VT)); 2051 InsertNode(N); 2052 return SDValue(N, 0); 2053 } 2054 2055 SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { 2056 SDNode *&N = MCSymbols[Sym]; 2057 if (N) 2058 return SDValue(N, 0); 2059 N = newSDNode<MCSymbolSDNode>(Sym, getVTList(VT)); 2060 InsertNode(N); 2061 return SDValue(N, 0); 2062 } 2063 2064 SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, 2065 unsigned TargetFlags) { 2066 SDNode *&N = 2067 TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; 2068 if (N) return SDValue(N, 0); 2069 N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, getVTList(VT)); 2070 InsertNode(N); 2071 return SDValue(N, 0); 2072 } 2073 2074 SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { 2075 if ((unsigned)Cond >= CondCodeNodes.size()) 2076 CondCodeNodes.resize(Cond+1); 2077 2078 if (!CondCodeNodes[Cond]) { 2079 auto *N = newSDNode<CondCodeSDNode>(Cond); 2080 CondCodeNodes[Cond] = N; 2081 InsertNode(N); 2082 } 2083 2084 return SDValue(CondCodeNodes[Cond], 0); 2085 } 2086 2087 SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, 2088 bool ConstantFold) { 2089 assert(MulImm.getBitWidth() == VT.getSizeInBits() && 2090 "APInt size does not match type size!"); 2091 2092 if (MulImm == 0) 2093 return getConstant(0, DL, VT); 2094 2095 if (ConstantFold) { 2096 const MachineFunction &MF = getMachineFunction(); 2097 const Function &F = MF.getFunction(); 2098 ConstantRange CR = getVScaleRange(&F, 64); 2099 if (const APInt *C = CR.getSingleElement()) 2100 return getConstant(MulImm * C->getZExtValue(), DL, VT); 2101 } 2102 2103 return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); 2104 } 2105 2106 SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, 2107 bool ConstantFold) { 2108 if (EC.isScalable()) 2109 return getVScale(DL, VT, 2110 APInt(VT.getSizeInBits(), EC.getKnownMinValue())); 2111 2112 return getConstant(EC.getKnownMinValue(), DL, VT); 2113 } 2114 2115 SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { 2116 APInt One(ResVT.getScalarSizeInBits(), 1); 2117 return getStepVector(DL, ResVT, One); 2118 } 2119 2120 SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, 2121 const APInt &StepVal) { 2122 assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); 2123 if (ResVT.isScalableVector()) 2124 return getNode( 2125 ISD::STEP_VECTOR, DL, ResVT, 2126 getTargetConstant(StepVal, DL, ResVT.getVectorElementType())); 2127 2128 SmallVector<SDValue, 16> OpsStepConstants; 2129 for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) 2130 OpsStepConstants.push_back( 2131 getConstant(StepVal * i, DL, ResVT.getVectorElementType())); 2132 return getBuildVector(ResVT, DL, OpsStepConstants); 2133 } 2134 2135 /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that 2136 /// point at N1 to point at N2 and indices that point at N2 to point at N1. 2137 static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { 2138 std::swap(N1, N2); 2139 ShuffleVectorSDNode::commuteMask(M); 2140 } 2141 2142 SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, 2143 SDValue N2, ArrayRef<int> Mask) { 2144 assert(VT.getVectorNumElements() == Mask.size() && 2145 "Must have the same number of vector elements as mask elements!"); 2146 assert(VT == N1.getValueType() && VT == N2.getValueType() && 2147 "Invalid VECTOR_SHUFFLE"); 2148 2149 // Canonicalize shuffle undef, undef -> undef 2150 if (N1.isUndef() && N2.isUndef()) 2151 return getUNDEF(VT); 2152 2153 // Validate that all indices in Mask are within the range of the elements 2154 // input to the shuffle. 2155 int NElts = Mask.size(); 2156 assert(llvm::all_of(Mask, 2157 [&](int M) { return M < (NElts * 2) && M >= -1; }) && 2158 "Index out of range"); 2159 2160 // Copy the mask so we can do any needed cleanup. 2161 SmallVector<int, 8> MaskVec(Mask); 2162 2163 // Canonicalize shuffle v, v -> v, undef 2164 if (N1 == N2) { 2165 N2 = getUNDEF(VT); 2166 for (int i = 0; i != NElts; ++i) 2167 if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; 2168 } 2169 2170 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 2171 if (N1.isUndef()) 2172 commuteShuffle(N1, N2, MaskVec); 2173 2174 if (TLI->hasVectorBlend()) { 2175 // If shuffling a splat, try to blend the splat instead. We do this here so 2176 // that even when this arises during lowering we don't have to re-handle it. 2177 auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { 2178 BitVector UndefElements; 2179 SDValue Splat = BV->getSplatValue(&UndefElements); 2180 if (!Splat) 2181 return; 2182 2183 for (int i = 0; i < NElts; ++i) { 2184 if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) 2185 continue; 2186 2187 // If this input comes from undef, mark it as such. 2188 if (UndefElements[MaskVec[i] - Offset]) { 2189 MaskVec[i] = -1; 2190 continue; 2191 } 2192 2193 // If we can blend a non-undef lane, use that instead. 2194 if (!UndefElements[i]) 2195 MaskVec[i] = i + Offset; 2196 } 2197 }; 2198 if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) 2199 BlendSplat(N1BV, 0); 2200 if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) 2201 BlendSplat(N2BV, NElts); 2202 } 2203 2204 // Canonicalize all index into lhs, -> shuffle lhs, undef 2205 // Canonicalize all index into rhs, -> shuffle rhs, undef 2206 bool AllLHS = true, AllRHS = true; 2207 bool N2Undef = N2.isUndef(); 2208 for (int i = 0; i != NElts; ++i) { 2209 if (MaskVec[i] >= NElts) { 2210 if (N2Undef) 2211 MaskVec[i] = -1; 2212 else 2213 AllLHS = false; 2214 } else if (MaskVec[i] >= 0) { 2215 AllRHS = false; 2216 } 2217 } 2218 if (AllLHS && AllRHS) 2219 return getUNDEF(VT); 2220 if (AllLHS && !N2Undef) 2221 N2 = getUNDEF(VT); 2222 if (AllRHS) { 2223 N1 = getUNDEF(VT); 2224 commuteShuffle(N1, N2, MaskVec); 2225 } 2226 // Reset our undef status after accounting for the mask. 2227 N2Undef = N2.isUndef(); 2228 // Re-check whether both sides ended up undef. 2229 if (N1.isUndef() && N2Undef) 2230 return getUNDEF(VT); 2231 2232 // If Identity shuffle return that node. 2233 bool Identity = true, AllSame = true; 2234 for (int i = 0; i != NElts; ++i) { 2235 if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; 2236 if (MaskVec[i] != MaskVec[0]) AllSame = false; 2237 } 2238 if (Identity && NElts) 2239 return N1; 2240 2241 // Shuffling a constant splat doesn't change the result. 2242 if (N2Undef) { 2243 SDValue V = N1; 2244 2245 // Look through any bitcasts. We check that these don't change the number 2246 // (and size) of elements and just changes their types. 2247 while (V.getOpcode() == ISD::BITCAST) 2248 V = V->getOperand(0); 2249 2250 // A splat should always show up as a build vector node. 2251 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { 2252 BitVector UndefElements; 2253 SDValue Splat = BV->getSplatValue(&UndefElements); 2254 // If this is a splat of an undef, shuffling it is also undef. 2255 if (Splat && Splat.isUndef()) 2256 return getUNDEF(VT); 2257 2258 bool SameNumElts = 2259 V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); 2260 2261 // We only have a splat which can skip shuffles if there is a splatted 2262 // value and no undef lanes rearranged by the shuffle. 2263 if (Splat && UndefElements.none()) { 2264 // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the 2265 // number of elements match or the value splatted is a zero constant. 2266 if (SameNumElts || isNullConstant(Splat)) 2267 return N1; 2268 } 2269 2270 // If the shuffle itself creates a splat, build the vector directly. 2271 if (AllSame && SameNumElts) { 2272 EVT BuildVT = BV->getValueType(0); 2273 const SDValue &Splatted = BV->getOperand(MaskVec[0]); 2274 SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted); 2275 2276 // We may have jumped through bitcasts, so the type of the 2277 // BUILD_VECTOR may not match the type of the shuffle. 2278 if (BuildVT != VT) 2279 NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); 2280 return NewBV; 2281 } 2282 } 2283 } 2284 2285 SDVTList VTs = getVTList(VT); 2286 FoldingSetNodeID ID; 2287 SDValue Ops[2] = { N1, N2 }; 2288 AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, VTs, Ops); 2289 for (int i = 0; i != NElts; ++i) 2290 ID.AddInteger(MaskVec[i]); 2291 2292 void* IP = nullptr; 2293 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 2294 return SDValue(E, 0); 2295 2296 // Allocate the mask array for the node out of the BumpPtrAllocator, since 2297 // SDNode doesn't have access to it. This memory will be "leaked" when 2298 // the node is deallocated, but recovered when the NodeAllocator is released. 2299 int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); 2300 llvm::copy(MaskVec, MaskAlloc); 2301 2302 auto *N = newSDNode<ShuffleVectorSDNode>(VTs, dl.getIROrder(), 2303 dl.getDebugLoc(), MaskAlloc); 2304 createOperands(N, Ops); 2305 2306 CSEMap.InsertNode(N, IP); 2307 InsertNode(N); 2308 SDValue V = SDValue(N, 0); 2309 NewSDValueDbgMsg(V, "Creating new node: ", this); 2310 return V; 2311 } 2312 2313 SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { 2314 EVT VT = SV.getValueType(0); 2315 SmallVector<int, 8> MaskVec(SV.getMask()); 2316 ShuffleVectorSDNode::commuteMask(MaskVec); 2317 2318 SDValue Op0 = SV.getOperand(0); 2319 SDValue Op1 = SV.getOperand(1); 2320 return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); 2321 } 2322 2323 SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { 2324 SDVTList VTs = getVTList(VT); 2325 FoldingSetNodeID ID; 2326 AddNodeIDNode(ID, ISD::Register, VTs, {}); 2327 ID.AddInteger(Reg.id()); 2328 void *IP = nullptr; 2329 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2330 return SDValue(E, 0); 2331 2332 auto *N = newSDNode<RegisterSDNode>(Reg, VTs); 2333 N->SDNodeBits.IsDivergent = 2334 DivergentTarget && TLI->isSDNodeSourceOfDivergence(N, FLI, UA); 2335 CSEMap.InsertNode(N, IP); 2336 InsertNode(N); 2337 return SDValue(N, 0); 2338 } 2339 2340 SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { 2341 FoldingSetNodeID ID; 2342 AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), {}); 2343 ID.AddPointer(RegMask); 2344 void *IP = nullptr; 2345 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2346 return SDValue(E, 0); 2347 2348 auto *N = newSDNode<RegisterMaskSDNode>(RegMask); 2349 CSEMap.InsertNode(N, IP); 2350 InsertNode(N); 2351 return SDValue(N, 0); 2352 } 2353 2354 SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, 2355 MCSymbol *Label) { 2356 return getLabelNode(ISD::EH_LABEL, dl, Root, Label); 2357 } 2358 2359 SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, 2360 SDValue Root, MCSymbol *Label) { 2361 FoldingSetNodeID ID; 2362 SDValue Ops[] = { Root }; 2363 AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); 2364 ID.AddPointer(Label); 2365 void *IP = nullptr; 2366 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2367 return SDValue(E, 0); 2368 2369 auto *N = 2370 newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label); 2371 createOperands(N, Ops); 2372 2373 CSEMap.InsertNode(N, IP); 2374 InsertNode(N); 2375 return SDValue(N, 0); 2376 } 2377 2378 SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, 2379 int64_t Offset, bool isTarget, 2380 unsigned TargetFlags) { 2381 unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; 2382 SDVTList VTs = getVTList(VT); 2383 2384 FoldingSetNodeID ID; 2385 AddNodeIDNode(ID, Opc, VTs, {}); 2386 ID.AddPointer(BA); 2387 ID.AddInteger(Offset); 2388 ID.AddInteger(TargetFlags); 2389 void *IP = nullptr; 2390 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2391 return SDValue(E, 0); 2392 2393 auto *N = newSDNode<BlockAddressSDNode>(Opc, VTs, BA, Offset, TargetFlags); 2394 CSEMap.InsertNode(N, IP); 2395 InsertNode(N); 2396 return SDValue(N, 0); 2397 } 2398 2399 SDValue SelectionDAG::getSrcValue(const Value *V) { 2400 FoldingSetNodeID ID; 2401 AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), {}); 2402 ID.AddPointer(V); 2403 2404 void *IP = nullptr; 2405 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2406 return SDValue(E, 0); 2407 2408 auto *N = newSDNode<SrcValueSDNode>(V); 2409 CSEMap.InsertNode(N, IP); 2410 InsertNode(N); 2411 return SDValue(N, 0); 2412 } 2413 2414 SDValue SelectionDAG::getMDNode(const MDNode *MD) { 2415 FoldingSetNodeID ID; 2416 AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), {}); 2417 ID.AddPointer(MD); 2418 2419 void *IP = nullptr; 2420 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2421 return SDValue(E, 0); 2422 2423 auto *N = newSDNode<MDNodeSDNode>(MD); 2424 CSEMap.InsertNode(N, IP); 2425 InsertNode(N); 2426 return SDValue(N, 0); 2427 } 2428 2429 SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { 2430 if (VT == V.getValueType()) 2431 return V; 2432 2433 return getNode(ISD::BITCAST, SDLoc(V), VT, V); 2434 } 2435 2436 SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, 2437 unsigned SrcAS, unsigned DestAS) { 2438 SDVTList VTs = getVTList(VT); 2439 SDValue Ops[] = {Ptr}; 2440 FoldingSetNodeID ID; 2441 AddNodeIDNode(ID, ISD::ADDRSPACECAST, VTs, Ops); 2442 ID.AddInteger(SrcAS); 2443 ID.AddInteger(DestAS); 2444 2445 void *IP = nullptr; 2446 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 2447 return SDValue(E, 0); 2448 2449 auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), 2450 VTs, SrcAS, DestAS); 2451 createOperands(N, Ops); 2452 2453 CSEMap.InsertNode(N, IP); 2454 InsertNode(N); 2455 return SDValue(N, 0); 2456 } 2457 2458 SDValue SelectionDAG::getFreeze(SDValue V) { 2459 return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V); 2460 } 2461 2462 /// getShiftAmountOperand - Return the specified value casted to 2463 /// the target's desired shift amount type. 2464 SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { 2465 EVT OpTy = Op.getValueType(); 2466 EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); 2467 if (OpTy == ShTy || OpTy.isVector()) return Op; 2468 2469 return getZExtOrTrunc(Op, SDLoc(Op), ShTy); 2470 } 2471 2472 /// Given a store node \p StoreNode, return true if it is safe to fold that node 2473 /// into \p FPNode, which expands to a library call with output pointers. 2474 static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, 2475 SDNode *FPNode) { 2476 SmallVector<const SDNode *, 8> Worklist; 2477 SmallVector<const SDNode *, 8> DeferredNodes; 2478 SmallPtrSet<const SDNode *, 16> Visited; 2479 2480 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). 2481 for (SDValue Op : StoreNode->ops()) 2482 if (Op.getNode() != FPNode) 2483 Worklist.push_back(Op.getNode()); 2484 2485 unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); 2486 while (!Worklist.empty()) { 2487 const SDNode *Node = Worklist.pop_back_val(); 2488 auto [_, Inserted] = Visited.insert(Node); 2489 if (!Inserted) 2490 continue; 2491 2492 if (MaxSteps > 0 && Visited.size() >= MaxSteps) 2493 return false; 2494 2495 // Reached the FPNode (would result in a cycle). 2496 // OR Reached CALLSEQ_START (would result in nested call sequences). 2497 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) 2498 return false; 2499 2500 if (Node->getOpcode() == ISD::CALLSEQ_END) { 2501 // Defer looking into call sequences (so we can check we're outside one). 2502 // We still need to look through these for the predecessor check. 2503 DeferredNodes.push_back(Node); 2504 continue; 2505 } 2506 2507 for (SDValue Op : Node->ops()) 2508 Worklist.push_back(Op.getNode()); 2509 } 2510 2511 // True if we're outside a call sequence and don't have the FPNode as a 2512 // predecessor. No cycles or nested call sequences possible. 2513 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, 2514 MaxSteps); 2515 } 2516 2517 bool SelectionDAG::expandMultipleResultFPLibCall( 2518 RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, 2519 std::optional<unsigned> CallRetResNo) { 2520 LLVMContext &Ctx = *getContext(); 2521 EVT VT = Node->getValueType(0); 2522 unsigned NumResults = Node->getNumValues(); 2523 2524 if (LC == RTLIB::UNKNOWN_LIBCALL) 2525 return false; 2526 2527 const char *LCName = TLI->getLibcallName(LC); 2528 if (!LCName) 2529 return false; 2530 2531 auto getVecDesc = [&]() -> VecDesc const * { 2532 for (bool Masked : {false, true}) { 2533 if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( 2534 LCName, VT.getVectorElementCount(), Masked)) { 2535 return VD; 2536 } 2537 } 2538 return nullptr; 2539 }; 2540 2541 // For vector types, we must find a vector mapping for the libcall. 2542 VecDesc const *VD = nullptr; 2543 if (VT.isVector() && !(VD = getVecDesc())) 2544 return false; 2545 2546 // Find users of the node that store the results (and share input chains). The 2547 // destination pointers can be used instead of creating stack allocations. 2548 SDValue StoresInChain; 2549 SmallVector<StoreSDNode *, 2> ResultStores(NumResults); 2550 for (SDNode *User : Node->users()) { 2551 if (!ISD::isNormalStore(User)) 2552 continue; 2553 auto *ST = cast<StoreSDNode>(User); 2554 SDValue StoreValue = ST->getValue(); 2555 unsigned ResNo = StoreValue.getResNo(); 2556 // Ensure the store corresponds to an output pointer. 2557 if (CallRetResNo == ResNo) 2558 continue; 2559 // Ensure the store to the default address space and not atomic or volatile. 2560 if (!ST->isSimple() || ST->getAddressSpace() != 0) 2561 continue; 2562 // Ensure all store chains are the same (so they don't alias). 2563 if (StoresInChain && ST->getChain() != StoresInChain) 2564 continue; 2565 // Ensure the store is properly aligned. 2566 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); 2567 if (ST->getAlign() < 2568 getDataLayout().getABITypeAlign(StoreType->getScalarType())) 2569 continue; 2570 // Avoid: 2571 // 1. Creating cyclic dependencies. 2572 // 2. Expanding the node to a call within a call sequence. 2573 if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) 2574 continue; 2575 ResultStores[ResNo] = ST; 2576 StoresInChain = ST->getChain(); 2577 } 2578 2579 TargetLowering::ArgListTy Args; 2580 auto AddArgListEntry = [&](SDValue Node, Type *Ty) { 2581 TargetLowering::ArgListEntry Entry{}; 2582 Entry.Ty = Ty; 2583 Entry.Node = Node; 2584 Args.push_back(Entry); 2585 }; 2586 2587 // Pass the arguments. 2588 for (const SDValue &Op : Node->op_values()) { 2589 EVT ArgVT = Op.getValueType(); 2590 Type *ArgTy = ArgVT.getTypeForEVT(Ctx); 2591 AddArgListEntry(Op, ArgTy); 2592 } 2593 2594 // Pass the output pointers. 2595 SmallVector<SDValue, 2> ResultPtrs(NumResults); 2596 Type *PointerTy = PointerType::getUnqual(Ctx); 2597 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { 2598 if (ResNo == CallRetResNo) 2599 continue; 2600 EVT ResVT = Node->getValueType(ResNo); 2601 SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT); 2602 ResultPtrs[ResNo] = ResultPtr; 2603 AddArgListEntry(ResultPtr, PointerTy); 2604 } 2605 2606 SDLoc DL(Node); 2607 2608 // Pass the vector mask (if required). 2609 if (VD && VD->isMasked()) { 2610 EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); 2611 SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); 2612 AddArgListEntry(Mask, MaskVT.getTypeForEVT(Ctx)); 2613 } 2614 2615 Type *RetType = CallRetResNo.has_value() 2616 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) 2617 : Type::getVoidTy(Ctx); 2618 SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); 2619 SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, 2620 TLI->getPointerTy(getDataLayout())); 2621 TargetLowering::CallLoweringInfo CLI(*this); 2622 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( 2623 TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args)); 2624 2625 auto [Call, CallChain] = TLI->LowerCallTo(CLI); 2626 2627 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { 2628 if (ResNo == CallRetResNo) { 2629 Results.push_back(Call); 2630 continue; 2631 } 2632 MachinePointerInfo PtrInfo; 2633 SDValue LoadResult = 2634 getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); 2635 SDValue OutChain = LoadResult.getValue(1); 2636 2637 if (StoreSDNode *ST = ResultStores[ResNo]) { 2638 // Replace store with the library call. 2639 ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); 2640 PtrInfo = ST->getPointerInfo(); 2641 } else { 2642 PtrInfo = MachinePointerInfo::getFixedStack( 2643 getMachineFunction(), cast<FrameIndexSDNode>(ResultPtr)->getIndex()); 2644 } 2645 2646 Results.push_back(LoadResult); 2647 } 2648 2649 return true; 2650 } 2651 2652 SDValue SelectionDAG::expandVAArg(SDNode *Node) { 2653 SDLoc dl(Node); 2654 const TargetLowering &TLI = getTargetLoweringInfo(); 2655 const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2656 EVT VT = Node->getValueType(0); 2657 SDValue Tmp1 = Node->getOperand(0); 2658 SDValue Tmp2 = Node->getOperand(1); 2659 const MaybeAlign MA(Node->getConstantOperandVal(3)); 2660 2661 SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, 2662 Tmp2, MachinePointerInfo(V)); 2663 SDValue VAList = VAListLoad; 2664 2665 if (MA && *MA > TLI.getMinStackArgumentAlignment()) { 2666 VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, 2667 getConstant(MA->value() - 1, dl, VAList.getValueType())); 2668 2669 VAList = getNode( 2670 ISD::AND, dl, VAList.getValueType(), VAList, 2671 getSignedConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); 2672 } 2673 2674 // Increment the pointer, VAList, to the next vaarg 2675 Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, 2676 getConstant(getDataLayout().getTypeAllocSize( 2677 VT.getTypeForEVT(*getContext())), 2678 dl, VAList.getValueType())); 2679 // Store the incremented VAList to the legalized pointer 2680 Tmp1 = 2681 getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V)); 2682 // Load the actual argument out of the pointer VAList 2683 return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo()); 2684 } 2685 2686 SDValue SelectionDAG::expandVACopy(SDNode *Node) { 2687 SDLoc dl(Node); 2688 const TargetLowering &TLI = getTargetLoweringInfo(); 2689 // This defaults to loading a pointer from the input and storing it to the 2690 // output, returning the chain. 2691 const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); 2692 const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); 2693 SDValue Tmp1 = 2694 getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0), 2695 Node->getOperand(2), MachinePointerInfo(VS)); 2696 return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), 2697 MachinePointerInfo(VD)); 2698 } 2699 2700 Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { 2701 const DataLayout &DL = getDataLayout(); 2702 Type *Ty = VT.getTypeForEVT(*getContext()); 2703 Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); 2704 2705 if (TLI->isTypeLegal(VT) || !VT.isVector()) 2706 return RedAlign; 2707 2708 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 2709 const Align StackAlign = TFI->getStackAlign(); 2710 2711 // See if we can choose a smaller ABI alignment in cases where it's an 2712 // illegal vector type that will get broken down. 2713 if (RedAlign > StackAlign) { 2714 EVT IntermediateVT; 2715 MVT RegisterVT; 2716 unsigned NumIntermediates; 2717 TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT, 2718 NumIntermediates, RegisterVT); 2719 Ty = IntermediateVT.getTypeForEVT(*getContext()); 2720 Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); 2721 if (RedAlign2 < RedAlign) 2722 RedAlign = RedAlign2; 2723 2724 if (!getMachineFunction().getFrameInfo().isStackRealignable()) 2725 // If the stack is not realignable, the alignment should be limited to the 2726 // StackAlignment 2727 RedAlign = std::min(RedAlign, StackAlign); 2728 } 2729 2730 return RedAlign; 2731 } 2732 2733 SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { 2734 MachineFrameInfo &MFI = MF->getFrameInfo(); 2735 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 2736 int StackID = 0; 2737 if (Bytes.isScalable()) 2738 StackID = TFI->getStackIDForScalableVectors(); 2739 // The stack id gives an indication of whether the object is scalable or 2740 // not, so it's safe to pass in the minimum size here. 2741 int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment, 2742 false, nullptr, StackID); 2743 return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); 2744 } 2745 2746 SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { 2747 Type *Ty = VT.getTypeForEVT(*getContext()); 2748 Align StackAlign = 2749 std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign)); 2750 return CreateStackTemporary(VT.getStoreSize(), StackAlign); 2751 } 2752 2753 SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { 2754 TypeSize VT1Size = VT1.getStoreSize(); 2755 TypeSize VT2Size = VT2.getStoreSize(); 2756 assert(VT1Size.isScalable() == VT2Size.isScalable() && 2757 "Don't know how to choose the maximum size when creating a stack " 2758 "temporary"); 2759 TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue() 2760 ? VT1Size 2761 : VT2Size; 2762 2763 Type *Ty1 = VT1.getTypeForEVT(*getContext()); 2764 Type *Ty2 = VT2.getTypeForEVT(*getContext()); 2765 const DataLayout &DL = getDataLayout(); 2766 Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2)); 2767 return CreateStackTemporary(Bytes, Align); 2768 } 2769 2770 SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, 2771 ISD::CondCode Cond, const SDLoc &dl) { 2772 EVT OpVT = N1.getValueType(); 2773 2774 auto GetUndefBooleanConstant = [&]() { 2775 if (VT.getScalarType() == MVT::i1 || 2776 TLI->getBooleanContents(OpVT) == 2777 TargetLowering::UndefinedBooleanContent) 2778 return getUNDEF(VT); 2779 // ZeroOrOne / ZeroOrNegative require specific values for the high bits, 2780 // so we cannot use getUNDEF(). Return zero instead. 2781 return getConstant(0, dl, VT); 2782 }; 2783 2784 // These setcc operations always fold. 2785 switch (Cond) { 2786 default: break; 2787 case ISD::SETFALSE: 2788 case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); 2789 case ISD::SETTRUE: 2790 case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); 2791 2792 case ISD::SETOEQ: 2793 case ISD::SETOGT: 2794 case ISD::SETOGE: 2795 case ISD::SETOLT: 2796 case ISD::SETOLE: 2797 case ISD::SETONE: 2798 case ISD::SETO: 2799 case ISD::SETUO: 2800 case ISD::SETUEQ: 2801 case ISD::SETUNE: 2802 assert(!OpVT.isInteger() && "Illegal setcc for integer!"); 2803 break; 2804 } 2805 2806 if (OpVT.isInteger()) { 2807 // For EQ and NE, we can always pick a value for the undef to make the 2808 // predicate pass or fail, so we can return undef. 2809 // Matches behavior in llvm::ConstantFoldCompareInstruction. 2810 // icmp eq/ne X, undef -> undef. 2811 if ((N1.isUndef() || N2.isUndef()) && 2812 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) 2813 return GetUndefBooleanConstant(); 2814 2815 // If both operands are undef, we can return undef for int comparison. 2816 // icmp undef, undef -> undef. 2817 if (N1.isUndef() && N2.isUndef()) 2818 return GetUndefBooleanConstant(); 2819 2820 // icmp X, X -> true/false 2821 // icmp X, undef -> true/false because undef could be X. 2822 if (N1.isUndef() || N2.isUndef() || N1 == N2) 2823 return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); 2824 } 2825 2826 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { 2827 const APInt &C2 = N2C->getAPIntValue(); 2828 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { 2829 const APInt &C1 = N1C->getAPIntValue(); 2830 2831 return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)), 2832 dl, VT, OpVT); 2833 } 2834 } 2835 2836 auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 2837 auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); 2838 2839 if (N1CFP && N2CFP) { 2840 APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); 2841 switch (Cond) { 2842 default: break; 2843 case ISD::SETEQ: if (R==APFloat::cmpUnordered) 2844 return GetUndefBooleanConstant(); 2845 [[fallthrough]]; 2846 case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, 2847 OpVT); 2848 case ISD::SETNE: if (R==APFloat::cmpUnordered) 2849 return GetUndefBooleanConstant(); 2850 [[fallthrough]]; 2851 case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || 2852 R==APFloat::cmpLessThan, dl, VT, 2853 OpVT); 2854 case ISD::SETLT: if (R==APFloat::cmpUnordered) 2855 return GetUndefBooleanConstant(); 2856 [[fallthrough]]; 2857 case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, 2858 OpVT); 2859 case ISD::SETGT: if (R==APFloat::cmpUnordered) 2860 return GetUndefBooleanConstant(); 2861 [[fallthrough]]; 2862 case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, 2863 VT, OpVT); 2864 case ISD::SETLE: if (R==APFloat::cmpUnordered) 2865 return GetUndefBooleanConstant(); 2866 [[fallthrough]]; 2867 case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || 2868 R==APFloat::cmpEqual, dl, VT, 2869 OpVT); 2870 case ISD::SETGE: if (R==APFloat::cmpUnordered) 2871 return GetUndefBooleanConstant(); 2872 [[fallthrough]]; 2873 case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || 2874 R==APFloat::cmpEqual, dl, VT, OpVT); 2875 case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, 2876 OpVT); 2877 case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, 2878 OpVT); 2879 case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || 2880 R==APFloat::cmpEqual, dl, VT, 2881 OpVT); 2882 case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, 2883 OpVT); 2884 case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || 2885 R==APFloat::cmpLessThan, dl, VT, 2886 OpVT); 2887 case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || 2888 R==APFloat::cmpUnordered, dl, VT, 2889 OpVT); 2890 case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, 2891 VT, OpVT); 2892 case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, 2893 OpVT); 2894 } 2895 } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { 2896 // Ensure that the constant occurs on the RHS. 2897 ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); 2898 if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) 2899 return SDValue(); 2900 return getSetCC(dl, VT, N2, N1, SwappedCond); 2901 } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || 2902 (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { 2903 // If an operand is known to be a nan (or undef that could be a nan), we can 2904 // fold it. 2905 // Choosing NaN for the undef will always make unordered comparison succeed 2906 // and ordered comparison fails. 2907 // Matches behavior in llvm::ConstantFoldCompareInstruction. 2908 switch (ISD::getUnorderedFlavor(Cond)) { 2909 default: 2910 llvm_unreachable("Unknown flavor!"); 2911 case 0: // Known false. 2912 return getBoolConstant(false, dl, VT, OpVT); 2913 case 1: // Known true. 2914 return getBoolConstant(true, dl, VT, OpVT); 2915 case 2: // Undefined. 2916 return GetUndefBooleanConstant(); 2917 } 2918 } 2919 2920 // Could not fold it. 2921 return SDValue(); 2922 } 2923 2924 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We 2925 /// use this predicate to simplify operations downstream. 2926 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { 2927 unsigned BitWidth = Op.getScalarValueSizeInBits(); 2928 return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); 2929 } 2930 2931 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use 2932 /// this predicate to simplify operations downstream. Mask is known to be zero 2933 /// for bits that V cannot have. 2934 bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, 2935 unsigned Depth) const { 2936 return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero); 2937 } 2938 2939 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in 2940 /// DemandedElts. We use this predicate to simplify operations downstream. 2941 /// Mask is known to be zero for bits that V cannot have. 2942 bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, 2943 const APInt &DemandedElts, 2944 unsigned Depth) const { 2945 return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); 2946 } 2947 2948 /// MaskedVectorIsZero - Return true if 'Op' is known to be zero in 2949 /// DemandedElts. We use this predicate to simplify operations downstream. 2950 bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts, 2951 unsigned Depth /* = 0 */) const { 2952 return computeKnownBits(V, DemandedElts, Depth).isZero(); 2953 } 2954 2955 /// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. 2956 bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, 2957 unsigned Depth) const { 2958 return Mask.isSubsetOf(computeKnownBits(V, Depth).One); 2959 } 2960 2961 APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op, 2962 const APInt &DemandedElts, 2963 unsigned Depth) const { 2964 EVT VT = Op.getValueType(); 2965 assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!"); 2966 2967 unsigned NumElts = VT.getVectorNumElements(); 2968 assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask."); 2969 2970 APInt KnownZeroElements = APInt::getZero(NumElts); 2971 for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) { 2972 if (!DemandedElts[EltIdx]) 2973 continue; // Don't query elements that are not demanded. 2974 APInt Mask = APInt::getOneBitSet(NumElts, EltIdx); 2975 if (MaskedVectorIsZero(Op, Mask, Depth)) 2976 KnownZeroElements.setBit(EltIdx); 2977 } 2978 return KnownZeroElements; 2979 } 2980 2981 /// isSplatValue - Return true if the vector V has the same value 2982 /// across all DemandedElts. For scalable vectors, we don't know the 2983 /// number of lanes at compile time. Instead, we use a 1 bit APInt 2984 /// to represent a conservative value for all lanes; that is, that 2985 /// one bit value is implicitly splatted across all lanes. 2986 bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, 2987 APInt &UndefElts, unsigned Depth) const { 2988 unsigned Opcode = V.getOpcode(); 2989 EVT VT = V.getValueType(); 2990 assert(VT.isVector() && "Vector type expected"); 2991 assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) && 2992 "scalable demanded bits are ignored"); 2993 2994 if (!DemandedElts) 2995 return false; // No demanded elts, better to assume we don't know anything. 2996 2997 if (Depth >= MaxRecursionDepth) 2998 return false; // Limit search depth. 2999 3000 // Deal with some common cases here that work for both fixed and scalable 3001 // vector types. 3002 switch (Opcode) { 3003 case ISD::SPLAT_VECTOR: 3004 UndefElts = V.getOperand(0).isUndef() 3005 ? APInt::getAllOnes(DemandedElts.getBitWidth()) 3006 : APInt(DemandedElts.getBitWidth(), 0); 3007 return true; 3008 case ISD::ADD: 3009 case ISD::SUB: 3010 case ISD::AND: 3011 case ISD::XOR: 3012 case ISD::OR: { 3013 APInt UndefLHS, UndefRHS; 3014 SDValue LHS = V.getOperand(0); 3015 SDValue RHS = V.getOperand(1); 3016 // Only recognize splats with the same demanded undef elements for both 3017 // operands, otherwise we might fail to handle binop-specific undef 3018 // handling. 3019 // e.g. (and undef, 0) -> 0 etc. 3020 if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) && 3021 isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1) && 3022 (DemandedElts & UndefLHS) == (DemandedElts & UndefRHS)) { 3023 UndefElts = UndefLHS | UndefRHS; 3024 return true; 3025 } 3026 return false; 3027 } 3028 case ISD::ABS: 3029 case ISD::TRUNCATE: 3030 case ISD::SIGN_EXTEND: 3031 case ISD::ZERO_EXTEND: 3032 return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1); 3033 default: 3034 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 3035 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) 3036 return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this, 3037 Depth); 3038 break; 3039 } 3040 3041 // We don't support other cases than those above for scalable vectors at 3042 // the moment. 3043 if (VT.isScalableVector()) 3044 return false; 3045 3046 unsigned NumElts = VT.getVectorNumElements(); 3047 assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); 3048 UndefElts = APInt::getZero(NumElts); 3049 3050 switch (Opcode) { 3051 case ISD::BUILD_VECTOR: { 3052 SDValue Scl; 3053 for (unsigned i = 0; i != NumElts; ++i) { 3054 SDValue Op = V.getOperand(i); 3055 if (Op.isUndef()) { 3056 UndefElts.setBit(i); 3057 continue; 3058 } 3059 if (!DemandedElts[i]) 3060 continue; 3061 if (Scl && Scl != Op) 3062 return false; 3063 Scl = Op; 3064 } 3065 return true; 3066 } 3067 case ISD::VECTOR_SHUFFLE: { 3068 // Check if this is a shuffle node doing a splat or a shuffle of a splat. 3069 APInt DemandedLHS = APInt::getZero(NumElts); 3070 APInt DemandedRHS = APInt::getZero(NumElts); 3071 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask(); 3072 for (int i = 0; i != (int)NumElts; ++i) { 3073 int M = Mask[i]; 3074 if (M < 0) { 3075 UndefElts.setBit(i); 3076 continue; 3077 } 3078 if (!DemandedElts[i]) 3079 continue; 3080 if (M < (int)NumElts) 3081 DemandedLHS.setBit(M); 3082 else 3083 DemandedRHS.setBit(M - NumElts); 3084 } 3085 3086 // If we aren't demanding either op, assume there's no splat. 3087 // If we are demanding both ops, assume there's no splat. 3088 if ((DemandedLHS.isZero() && DemandedRHS.isZero()) || 3089 (!DemandedLHS.isZero() && !DemandedRHS.isZero())) 3090 return false; 3091 3092 // See if the demanded elts of the source op is a splat or we only demand 3093 // one element, which should always be a splat. 3094 // TODO: Handle source ops splats with undefs. 3095 auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) { 3096 APInt SrcUndefs; 3097 return (SrcElts.popcount() == 1) || 3098 (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) && 3099 (SrcElts & SrcUndefs).isZero()); 3100 }; 3101 if (!DemandedLHS.isZero()) 3102 return CheckSplatSrc(V.getOperand(0), DemandedLHS); 3103 return CheckSplatSrc(V.getOperand(1), DemandedRHS); 3104 } 3105 case ISD::EXTRACT_SUBVECTOR: { 3106 // Offset the demanded elts by the subvector index. 3107 SDValue Src = V.getOperand(0); 3108 // We don't support scalable vectors at the moment. 3109 if (Src.getValueType().isScalableVector()) 3110 return false; 3111 uint64_t Idx = V.getConstantOperandVal(1); 3112 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 3113 APInt UndefSrcElts; 3114 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 3115 if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { 3116 UndefElts = UndefSrcElts.extractBits(NumElts, Idx); 3117 return true; 3118 } 3119 break; 3120 } 3121 case ISD::ANY_EXTEND_VECTOR_INREG: 3122 case ISD::SIGN_EXTEND_VECTOR_INREG: 3123 case ISD::ZERO_EXTEND_VECTOR_INREG: { 3124 // Widen the demanded elts by the src element count. 3125 SDValue Src = V.getOperand(0); 3126 // We don't support scalable vectors at the moment. 3127 if (Src.getValueType().isScalableVector()) 3128 return false; 3129 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 3130 APInt UndefSrcElts; 3131 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); 3132 if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { 3133 UndefElts = UndefSrcElts.trunc(NumElts); 3134 return true; 3135 } 3136 break; 3137 } 3138 case ISD::BITCAST: { 3139 SDValue Src = V.getOperand(0); 3140 EVT SrcVT = Src.getValueType(); 3141 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits(); 3142 unsigned BitWidth = VT.getScalarSizeInBits(); 3143 3144 // Ignore bitcasts from unsupported types. 3145 // TODO: Add fp support? 3146 if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger()) 3147 break; 3148 3149 // Bitcast 'small element' vector to 'large element' vector. 3150 if ((BitWidth % SrcBitWidth) == 0) { 3151 // See if each sub element is a splat. 3152 unsigned Scale = BitWidth / SrcBitWidth; 3153 unsigned NumSrcElts = SrcVT.getVectorNumElements(); 3154 APInt ScaledDemandedElts = 3155 APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); 3156 for (unsigned I = 0; I != Scale; ++I) { 3157 APInt SubUndefElts; 3158 APInt SubDemandedElt = APInt::getOneBitSet(Scale, I); 3159 APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt); 3160 SubDemandedElts &= ScaledDemandedElts; 3161 if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) 3162 return false; 3163 // TODO: Add support for merging sub undef elements. 3164 if (!SubUndefElts.isZero()) 3165 return false; 3166 } 3167 return true; 3168 } 3169 break; 3170 } 3171 } 3172 3173 return false; 3174 } 3175 3176 /// Helper wrapper to main isSplatValue function. 3177 bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const { 3178 EVT VT = V.getValueType(); 3179 assert(VT.isVector() && "Vector type expected"); 3180 3181 APInt UndefElts; 3182 // Since the number of lanes in a scalable vector is unknown at compile time, 3183 // we track one bit which is implicitly broadcast to all lanes. This means 3184 // that all lanes in a scalable vector are considered demanded. 3185 APInt DemandedElts 3186 = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); 3187 return isSplatValue(V, DemandedElts, UndefElts) && 3188 (AllowUndefs || !UndefElts); 3189 } 3190 3191 SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { 3192 V = peekThroughExtractSubvectors(V); 3193 3194 EVT VT = V.getValueType(); 3195 unsigned Opcode = V.getOpcode(); 3196 switch (Opcode) { 3197 default: { 3198 APInt UndefElts; 3199 // Since the number of lanes in a scalable vector is unknown at compile time, 3200 // we track one bit which is implicitly broadcast to all lanes. This means 3201 // that all lanes in a scalable vector are considered demanded. 3202 APInt DemandedElts 3203 = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); 3204 3205 if (isSplatValue(V, DemandedElts, UndefElts)) { 3206 if (VT.isScalableVector()) { 3207 // DemandedElts and UndefElts are ignored for scalable vectors, since 3208 // the only supported cases are SPLAT_VECTOR nodes. 3209 SplatIdx = 0; 3210 } else { 3211 // Handle case where all demanded elements are UNDEF. 3212 if (DemandedElts.isSubsetOf(UndefElts)) { 3213 SplatIdx = 0; 3214 return getUNDEF(VT); 3215 } 3216 SplatIdx = (UndefElts & DemandedElts).countr_one(); 3217 } 3218 return V; 3219 } 3220 break; 3221 } 3222 case ISD::SPLAT_VECTOR: 3223 SplatIdx = 0; 3224 return V; 3225 case ISD::VECTOR_SHUFFLE: { 3226 assert(!VT.isScalableVector()); 3227 // Check if this is a shuffle node doing a splat. 3228 // TODO - remove this and rely purely on SelectionDAG::isSplatValue, 3229 // getTargetVShiftNode currently struggles without the splat source. 3230 auto *SVN = cast<ShuffleVectorSDNode>(V); 3231 if (!SVN->isSplat()) 3232 break; 3233 int Idx = SVN->getSplatIndex(); 3234 int NumElts = V.getValueType().getVectorNumElements(); 3235 SplatIdx = Idx % NumElts; 3236 return V.getOperand(Idx / NumElts); 3237 } 3238 } 3239 3240 return SDValue(); 3241 } 3242 3243 SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) { 3244 int SplatIdx; 3245 if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) { 3246 EVT SVT = SrcVector.getValueType().getScalarType(); 3247 EVT LegalSVT = SVT; 3248 if (LegalTypes && !TLI->isTypeLegal(SVT)) { 3249 if (!SVT.isInteger()) 3250 return SDValue(); 3251 LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); 3252 if (LegalSVT.bitsLT(SVT)) 3253 return SDValue(); 3254 } 3255 return getExtractVectorElt(SDLoc(V), LegalSVT, SrcVector, SplatIdx); 3256 } 3257 return SDValue(); 3258 } 3259 3260 std::optional<ConstantRange> 3261 SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts, 3262 unsigned Depth) const { 3263 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3264 V.getOpcode() == ISD::SRA) && 3265 "Unknown shift node"); 3266 // Shifting more than the bitwidth is not valid. 3267 unsigned BitWidth = V.getScalarValueSizeInBits(); 3268 3269 if (auto *Cst = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 3270 const APInt &ShAmt = Cst->getAPIntValue(); 3271 if (ShAmt.uge(BitWidth)) 3272 return std::nullopt; 3273 return ConstantRange(ShAmt); 3274 } 3275 3276 if (auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1))) { 3277 const APInt *MinAmt = nullptr, *MaxAmt = nullptr; 3278 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 3279 if (!DemandedElts[i]) 3280 continue; 3281 auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); 3282 if (!SA) { 3283 MinAmt = MaxAmt = nullptr; 3284 break; 3285 } 3286 const APInt &ShAmt = SA->getAPIntValue(); 3287 if (ShAmt.uge(BitWidth)) 3288 return std::nullopt; 3289 if (!MinAmt || MinAmt->ugt(ShAmt)) 3290 MinAmt = &ShAmt; 3291 if (!MaxAmt || MaxAmt->ult(ShAmt)) 3292 MaxAmt = &ShAmt; 3293 } 3294 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) && 3295 "Failed to find matching min/max shift amounts"); 3296 if (MinAmt && MaxAmt) 3297 return ConstantRange(*MinAmt, *MaxAmt + 1); 3298 } 3299 3300 // Use computeKnownBits to find a hidden constant/knownbits (usually type 3301 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc. 3302 KnownBits KnownAmt = computeKnownBits(V.getOperand(1), DemandedElts, Depth); 3303 if (KnownAmt.getMaxValue().ult(BitWidth)) 3304 return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false); 3305 3306 return std::nullopt; 3307 } 3308 3309 std::optional<uint64_t> 3310 SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, 3311 unsigned Depth) const { 3312 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3313 V.getOpcode() == ISD::SRA) && 3314 "Unknown shift node"); 3315 if (std::optional<ConstantRange> AmtRange = 3316 getValidShiftAmountRange(V, DemandedElts, Depth)) 3317 if (const APInt *ShAmt = AmtRange->getSingleElement()) 3318 return ShAmt->getZExtValue(); 3319 return std::nullopt; 3320 } 3321 3322 std::optional<uint64_t> 3323 SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { 3324 EVT VT = V.getValueType(); 3325 APInt DemandedElts = VT.isFixedLengthVector() 3326 ? APInt::getAllOnes(VT.getVectorNumElements()) 3327 : APInt(1, 1); 3328 return getValidShiftAmount(V, DemandedElts, Depth); 3329 } 3330 3331 std::optional<uint64_t> 3332 SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, 3333 unsigned Depth) const { 3334 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3335 V.getOpcode() == ISD::SRA) && 3336 "Unknown shift node"); 3337 if (std::optional<ConstantRange> AmtRange = 3338 getValidShiftAmountRange(V, DemandedElts, Depth)) 3339 return AmtRange->getUnsignedMin().getZExtValue(); 3340 return std::nullopt; 3341 } 3342 3343 std::optional<uint64_t> 3344 SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { 3345 EVT VT = V.getValueType(); 3346 APInt DemandedElts = VT.isFixedLengthVector() 3347 ? APInt::getAllOnes(VT.getVectorNumElements()) 3348 : APInt(1, 1); 3349 return getValidMinimumShiftAmount(V, DemandedElts, Depth); 3350 } 3351 3352 std::optional<uint64_t> 3353 SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, 3354 unsigned Depth) const { 3355 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3356 V.getOpcode() == ISD::SRA) && 3357 "Unknown shift node"); 3358 if (std::optional<ConstantRange> AmtRange = 3359 getValidShiftAmountRange(V, DemandedElts, Depth)) 3360 return AmtRange->getUnsignedMax().getZExtValue(); 3361 return std::nullopt; 3362 } 3363 3364 std::optional<uint64_t> 3365 SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const { 3366 EVT VT = V.getValueType(); 3367 APInt DemandedElts = VT.isFixedLengthVector() 3368 ? APInt::getAllOnes(VT.getVectorNumElements()) 3369 : APInt(1, 1); 3370 return getValidMaximumShiftAmount(V, DemandedElts, Depth); 3371 } 3372 3373 /// Determine which bits of Op are known to be either zero or one and return 3374 /// them in Known. For vectors, the known bits are those that are shared by 3375 /// every vector element. 3376 KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { 3377 EVT VT = Op.getValueType(); 3378 3379 // Since the number of lanes in a scalable vector is unknown at compile time, 3380 // we track one bit which is implicitly broadcast to all lanes. This means 3381 // that all lanes in a scalable vector are considered demanded. 3382 APInt DemandedElts = VT.isFixedLengthVector() 3383 ? APInt::getAllOnes(VT.getVectorNumElements()) 3384 : APInt(1, 1); 3385 return computeKnownBits(Op, DemandedElts, Depth); 3386 } 3387 3388 /// Determine which bits of Op are known to be either zero or one and return 3389 /// them in Known. The DemandedElts argument allows us to only collect the known 3390 /// bits that are shared by the requested vector elements. 3391 KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, 3392 unsigned Depth) const { 3393 unsigned BitWidth = Op.getScalarValueSizeInBits(); 3394 3395 KnownBits Known(BitWidth); // Don't know anything. 3396 3397 if (auto OptAPInt = Op->bitcastToAPInt()) { 3398 // We know all of the bits for a constant! 3399 return KnownBits::makeConstant(*std::move(OptAPInt)); 3400 } 3401 3402 if (Depth >= MaxRecursionDepth) 3403 return Known; // Limit search depth. 3404 3405 KnownBits Known2; 3406 unsigned NumElts = DemandedElts.getBitWidth(); 3407 assert((!Op.getValueType().isFixedLengthVector() || 3408 NumElts == Op.getValueType().getVectorNumElements()) && 3409 "Unexpected vector size"); 3410 3411 if (!DemandedElts) 3412 return Known; // No demanded elts, better to assume we don't know anything. 3413 3414 unsigned Opcode = Op.getOpcode(); 3415 switch (Opcode) { 3416 case ISD::MERGE_VALUES: 3417 return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, 3418 Depth + 1); 3419 case ISD::SPLAT_VECTOR: { 3420 SDValue SrcOp = Op.getOperand(0); 3421 assert(SrcOp.getValueSizeInBits() >= BitWidth && 3422 "Expected SPLAT_VECTOR implicit truncation"); 3423 // Implicitly truncate the bits to match the official semantics of 3424 // SPLAT_VECTOR. 3425 Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); 3426 break; 3427 } 3428 case ISD::SPLAT_VECTOR_PARTS: { 3429 unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits(); 3430 assert(ScalarSize * Op.getNumOperands() == BitWidth && 3431 "Expected SPLAT_VECTOR_PARTS scalars to cover element width"); 3432 for (auto [I, SrcOp] : enumerate(Op->ops())) { 3433 Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I); 3434 } 3435 break; 3436 } 3437 case ISD::STEP_VECTOR: { 3438 const APInt &Step = Op.getConstantOperandAPInt(0); 3439 3440 if (Step.isPowerOf2()) 3441 Known.Zero.setLowBits(Step.logBase2()); 3442 3443 const Function &F = getMachineFunction().getFunction(); 3444 3445 if (!isUIntN(BitWidth, Op.getValueType().getVectorMinNumElements())) 3446 break; 3447 const APInt MinNumElts = 3448 APInt(BitWidth, Op.getValueType().getVectorMinNumElements()); 3449 3450 bool Overflow; 3451 const APInt MaxNumElts = getVScaleRange(&F, BitWidth) 3452 .getUnsignedMax() 3453 .umul_ov(MinNumElts, Overflow); 3454 if (Overflow) 3455 break; 3456 3457 const APInt MaxValue = (MaxNumElts - 1).umul_ov(Step, Overflow); 3458 if (Overflow) 3459 break; 3460 3461 Known.Zero.setHighBits(MaxValue.countl_zero()); 3462 break; 3463 } 3464 case ISD::BUILD_VECTOR: 3465 assert(!Op.getValueType().isScalableVector()); 3466 // Collect the known bits that are shared by every demanded vector element. 3467 Known.Zero.setAllBits(); Known.One.setAllBits(); 3468 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 3469 if (!DemandedElts[i]) 3470 continue; 3471 3472 SDValue SrcOp = Op.getOperand(i); 3473 Known2 = computeKnownBits(SrcOp, Depth + 1); 3474 3475 // BUILD_VECTOR can implicitly truncate sources, we must handle this. 3476 if (SrcOp.getValueSizeInBits() != BitWidth) { 3477 assert(SrcOp.getValueSizeInBits() > BitWidth && 3478 "Expected BUILD_VECTOR implicit truncation"); 3479 Known2 = Known2.trunc(BitWidth); 3480 } 3481 3482 // Known bits are the values that are shared by every demanded element. 3483 Known = Known.intersectWith(Known2); 3484 3485 // If we don't know any bits, early out. 3486 if (Known.isUnknown()) 3487 break; 3488 } 3489 break; 3490 case ISD::VECTOR_SHUFFLE: { 3491 assert(!Op.getValueType().isScalableVector()); 3492 // Collect the known bits that are shared by every vector element referenced 3493 // by the shuffle. 3494 APInt DemandedLHS, DemandedRHS; 3495 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 3496 assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); 3497 if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, 3498 DemandedLHS, DemandedRHS)) 3499 break; 3500 3501 // Known bits are the values that are shared by every demanded element. 3502 Known.Zero.setAllBits(); Known.One.setAllBits(); 3503 if (!!DemandedLHS) { 3504 SDValue LHS = Op.getOperand(0); 3505 Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); 3506 Known = Known.intersectWith(Known2); 3507 } 3508 // If we don't know any bits, early out. 3509 if (Known.isUnknown()) 3510 break; 3511 if (!!DemandedRHS) { 3512 SDValue RHS = Op.getOperand(1); 3513 Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1); 3514 Known = Known.intersectWith(Known2); 3515 } 3516 break; 3517 } 3518 case ISD::VSCALE: { 3519 const Function &F = getMachineFunction().getFunction(); 3520 const APInt &Multiplier = Op.getConstantOperandAPInt(0); 3521 Known = getVScaleRange(&F, BitWidth).multiply(Multiplier).toKnownBits(); 3522 break; 3523 } 3524 case ISD::CONCAT_VECTORS: { 3525 if (Op.getValueType().isScalableVector()) 3526 break; 3527 // Split DemandedElts and test each of the demanded subvectors. 3528 Known.Zero.setAllBits(); Known.One.setAllBits(); 3529 EVT SubVectorVT = Op.getOperand(0).getValueType(); 3530 unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); 3531 unsigned NumSubVectors = Op.getNumOperands(); 3532 for (unsigned i = 0; i != NumSubVectors; ++i) { 3533 APInt DemandedSub = 3534 DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); 3535 if (!!DemandedSub) { 3536 SDValue Sub = Op.getOperand(i); 3537 Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); 3538 Known = Known.intersectWith(Known2); 3539 } 3540 // If we don't know any bits, early out. 3541 if (Known.isUnknown()) 3542 break; 3543 } 3544 break; 3545 } 3546 case ISD::INSERT_SUBVECTOR: { 3547 if (Op.getValueType().isScalableVector()) 3548 break; 3549 // Demand any elements from the subvector and the remainder from the src its 3550 // inserted into. 3551 SDValue Src = Op.getOperand(0); 3552 SDValue Sub = Op.getOperand(1); 3553 uint64_t Idx = Op.getConstantOperandVal(2); 3554 unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); 3555 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); 3556 APInt DemandedSrcElts = DemandedElts; 3557 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts); 3558 3559 Known.One.setAllBits(); 3560 Known.Zero.setAllBits(); 3561 if (!!DemandedSubElts) { 3562 Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); 3563 if (Known.isUnknown()) 3564 break; // early-out. 3565 } 3566 if (!!DemandedSrcElts) { 3567 Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); 3568 Known = Known.intersectWith(Known2); 3569 } 3570 break; 3571 } 3572 case ISD::EXTRACT_SUBVECTOR: { 3573 // Offset the demanded elts by the subvector index. 3574 SDValue Src = Op.getOperand(0); 3575 // Bail until we can represent demanded elements for scalable vectors. 3576 if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector()) 3577 break; 3578 uint64_t Idx = Op.getConstantOperandVal(1); 3579 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 3580 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 3581 Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); 3582 break; 3583 } 3584 case ISD::SCALAR_TO_VECTOR: { 3585 if (Op.getValueType().isScalableVector()) 3586 break; 3587 // We know about scalar_to_vector as much as we know about it source, 3588 // which becomes the first element of otherwise unknown vector. 3589 if (DemandedElts != 1) 3590 break; 3591 3592 SDValue N0 = Op.getOperand(0); 3593 Known = computeKnownBits(N0, Depth + 1); 3594 if (N0.getValueSizeInBits() != BitWidth) 3595 Known = Known.trunc(BitWidth); 3596 3597 break; 3598 } 3599 case ISD::BITCAST: { 3600 if (Op.getValueType().isScalableVector()) 3601 break; 3602 3603 SDValue N0 = Op.getOperand(0); 3604 EVT SubVT = N0.getValueType(); 3605 unsigned SubBitWidth = SubVT.getScalarSizeInBits(); 3606 3607 // Ignore bitcasts from unsupported types. 3608 if (!(SubVT.isInteger() || SubVT.isFloatingPoint())) 3609 break; 3610 3611 // Fast handling of 'identity' bitcasts. 3612 if (BitWidth == SubBitWidth) { 3613 Known = computeKnownBits(N0, DemandedElts, Depth + 1); 3614 break; 3615 } 3616 3617 bool IsLE = getDataLayout().isLittleEndian(); 3618 3619 // Bitcast 'small element' vector to 'large element' scalar/vector. 3620 if ((BitWidth % SubBitWidth) == 0) { 3621 assert(N0.getValueType().isVector() && "Expected bitcast from vector"); 3622 3623 // Collect known bits for the (larger) output by collecting the known 3624 // bits from each set of sub elements and shift these into place. 3625 // We need to separately call computeKnownBits for each set of 3626 // sub elements as the knownbits for each is likely to be different. 3627 unsigned SubScale = BitWidth / SubBitWidth; 3628 APInt SubDemandedElts(NumElts * SubScale, 0); 3629 for (unsigned i = 0; i != NumElts; ++i) 3630 if (DemandedElts[i]) 3631 SubDemandedElts.setBit(i * SubScale); 3632 3633 for (unsigned i = 0; i != SubScale; ++i) { 3634 Known2 = computeKnownBits(N0, SubDemandedElts.shl(i), 3635 Depth + 1); 3636 unsigned Shifts = IsLE ? i : SubScale - 1 - i; 3637 Known.insertBits(Known2, SubBitWidth * Shifts); 3638 } 3639 } 3640 3641 // Bitcast 'large element' scalar/vector to 'small element' vector. 3642 if ((SubBitWidth % BitWidth) == 0) { 3643 assert(Op.getValueType().isVector() && "Expected bitcast to vector"); 3644 3645 // Collect known bits for the (smaller) output by collecting the known 3646 // bits from the overlapping larger input elements and extracting the 3647 // sub sections we actually care about. 3648 unsigned SubScale = SubBitWidth / BitWidth; 3649 APInt SubDemandedElts = 3650 APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); 3651 Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); 3652 3653 Known.Zero.setAllBits(); Known.One.setAllBits(); 3654 for (unsigned i = 0; i != NumElts; ++i) 3655 if (DemandedElts[i]) { 3656 unsigned Shifts = IsLE ? i : NumElts - 1 - i; 3657 unsigned Offset = (Shifts % SubScale) * BitWidth; 3658 Known = Known.intersectWith(Known2.extractBits(BitWidth, Offset)); 3659 // If we don't know any bits, early out. 3660 if (Known.isUnknown()) 3661 break; 3662 } 3663 } 3664 break; 3665 } 3666 case ISD::AND: 3667 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3668 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3669 3670 Known &= Known2; 3671 break; 3672 case ISD::OR: 3673 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3674 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3675 3676 Known |= Known2; 3677 break; 3678 case ISD::XOR: 3679 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3680 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3681 3682 Known ^= Known2; 3683 break; 3684 case ISD::MUL: { 3685 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3686 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3687 bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); 3688 // TODO: SelfMultiply can be poison, but not undef. 3689 if (SelfMultiply) 3690 SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( 3691 Op.getOperand(0), DemandedElts, false, Depth + 1); 3692 Known = KnownBits::mul(Known, Known2, SelfMultiply); 3693 3694 // If the multiplication is known not to overflow, the product of a number 3695 // with itself is non-negative. Only do this if we didn't already computed 3696 // the opposite value for the sign bit. 3697 if (Op->getFlags().hasNoSignedWrap() && 3698 Op.getOperand(0) == Op.getOperand(1) && 3699 !Known.isNegative()) 3700 Known.makeNonNegative(); 3701 break; 3702 } 3703 case ISD::MULHU: { 3704 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3705 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3706 Known = KnownBits::mulhu(Known, Known2); 3707 break; 3708 } 3709 case ISD::MULHS: { 3710 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3711 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3712 Known = KnownBits::mulhs(Known, Known2); 3713 break; 3714 } 3715 case ISD::ABDU: { 3716 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3717 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3718 Known = KnownBits::abdu(Known, Known2); 3719 break; 3720 } 3721 case ISD::ABDS: { 3722 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3723 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3724 Known = KnownBits::abds(Known, Known2); 3725 unsigned SignBits1 = 3726 ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 3727 if (SignBits1 == 1) 3728 break; 3729 unsigned SignBits0 = 3730 ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 3731 Known.Zero.setHighBits(std::min(SignBits0, SignBits1) - 1); 3732 break; 3733 } 3734 case ISD::UMUL_LOHI: { 3735 assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); 3736 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3737 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3738 bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); 3739 if (Op.getResNo() == 0) 3740 Known = KnownBits::mul(Known, Known2, SelfMultiply); 3741 else 3742 Known = KnownBits::mulhu(Known, Known2); 3743 break; 3744 } 3745 case ISD::SMUL_LOHI: { 3746 assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); 3747 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3748 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3749 bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); 3750 if (Op.getResNo() == 0) 3751 Known = KnownBits::mul(Known, Known2, SelfMultiply); 3752 else 3753 Known = KnownBits::mulhs(Known, Known2); 3754 break; 3755 } 3756 case ISD::AVGFLOORU: { 3757 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3758 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3759 Known = KnownBits::avgFloorU(Known, Known2); 3760 break; 3761 } 3762 case ISD::AVGCEILU: { 3763 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3764 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3765 Known = KnownBits::avgCeilU(Known, Known2); 3766 break; 3767 } 3768 case ISD::AVGFLOORS: { 3769 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3770 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3771 Known = KnownBits::avgFloorS(Known, Known2); 3772 break; 3773 } 3774 case ISD::AVGCEILS: { 3775 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3776 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3777 Known = KnownBits::avgCeilS(Known, Known2); 3778 break; 3779 } 3780 case ISD::SELECT: 3781 case ISD::VSELECT: 3782 Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); 3783 // If we don't know any bits, early out. 3784 if (Known.isUnknown()) 3785 break; 3786 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1); 3787 3788 // Only known if known in both the LHS and RHS. 3789 Known = Known.intersectWith(Known2); 3790 break; 3791 case ISD::SELECT_CC: 3792 Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1); 3793 // If we don't know any bits, early out. 3794 if (Known.isUnknown()) 3795 break; 3796 Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); 3797 3798 // Only known if known in both the LHS and RHS. 3799 Known = Known.intersectWith(Known2); 3800 break; 3801 case ISD::SMULO: 3802 case ISD::UMULO: 3803 if (Op.getResNo() != 1) 3804 break; 3805 // The boolean result conforms to getBooleanContents. 3806 // If we know the result of a setcc has the top bits zero, use this info. 3807 // We know that we have an integer-based boolean since these operations 3808 // are only available for integer. 3809 if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == 3810 TargetLowering::ZeroOrOneBooleanContent && 3811 BitWidth > 1) 3812 Known.Zero.setBitsFrom(1); 3813 break; 3814 case ISD::SETCC: 3815 case ISD::SETCCCARRY: 3816 case ISD::STRICT_FSETCC: 3817 case ISD::STRICT_FSETCCS: { 3818 unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; 3819 // If we know the result of a setcc has the top bits zero, use this info. 3820 if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == 3821 TargetLowering::ZeroOrOneBooleanContent && 3822 BitWidth > 1) 3823 Known.Zero.setBitsFrom(1); 3824 break; 3825 } 3826 case ISD::SHL: { 3827 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3828 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3829 3830 bool NUW = Op->getFlags().hasNoUnsignedWrap(); 3831 bool NSW = Op->getFlags().hasNoSignedWrap(); 3832 3833 bool ShAmtNonZero = Known2.isNonZero(); 3834 3835 Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero); 3836 3837 // Minimum shift low bits are known zero. 3838 if (std::optional<uint64_t> ShMinAmt = 3839 getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) 3840 Known.Zero.setLowBits(*ShMinAmt); 3841 break; 3842 } 3843 case ISD::SRL: 3844 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3845 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3846 Known = KnownBits::lshr(Known, Known2, /*ShAmtNonZero=*/false, 3847 Op->getFlags().hasExact()); 3848 3849 // Minimum shift high bits are known zero. 3850 if (std::optional<uint64_t> ShMinAmt = 3851 getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) 3852 Known.Zero.setHighBits(*ShMinAmt); 3853 break; 3854 case ISD::SRA: 3855 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3856 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3857 Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false, 3858 Op->getFlags().hasExact()); 3859 break; 3860 case ISD::FSHL: 3861 case ISD::FSHR: 3862 if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { 3863 unsigned Amt = C->getAPIntValue().urem(BitWidth); 3864 3865 // For fshl, 0-shift returns the 1st arg. 3866 // For fshr, 0-shift returns the 2nd arg. 3867 if (Amt == 0) { 3868 Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1), 3869 DemandedElts, Depth + 1); 3870 break; 3871 } 3872 3873 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) 3874 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) 3875 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3876 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3877 if (Opcode == ISD::FSHL) { 3878 Known.One <<= Amt; 3879 Known.Zero <<= Amt; 3880 Known2.One.lshrInPlace(BitWidth - Amt); 3881 Known2.Zero.lshrInPlace(BitWidth - Amt); 3882 } else { 3883 Known.One <<= BitWidth - Amt; 3884 Known.Zero <<= BitWidth - Amt; 3885 Known2.One.lshrInPlace(Amt); 3886 Known2.Zero.lshrInPlace(Amt); 3887 } 3888 Known = Known.unionWith(Known2); 3889 } 3890 break; 3891 case ISD::SHL_PARTS: 3892 case ISD::SRA_PARTS: 3893 case ISD::SRL_PARTS: { 3894 assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); 3895 3896 // Collect lo/hi source values and concatenate. 3897 unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); 3898 unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); 3899 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3900 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3901 Known = Known2.concat(Known); 3902 3903 // Collect shift amount. 3904 Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); 3905 3906 if (Opcode == ISD::SHL_PARTS) 3907 Known = KnownBits::shl(Known, Known2); 3908 else if (Opcode == ISD::SRA_PARTS) 3909 Known = KnownBits::ashr(Known, Known2); 3910 else // if (Opcode == ISD::SRL_PARTS) 3911 Known = KnownBits::lshr(Known, Known2); 3912 3913 // TODO: Minimum shift low/high bits are known zero. 3914 3915 if (Op.getResNo() == 0) 3916 Known = Known.extractBits(LoBits, 0); 3917 else 3918 Known = Known.extractBits(HiBits, LoBits); 3919 break; 3920 } 3921 case ISD::SIGN_EXTEND_INREG: { 3922 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3923 EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 3924 Known = Known.sextInReg(EVT.getScalarSizeInBits()); 3925 break; 3926 } 3927 case ISD::CTTZ: 3928 case ISD::CTTZ_ZERO_UNDEF: { 3929 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3930 // If we have a known 1, its position is our upper bound. 3931 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 3932 unsigned LowBits = llvm::bit_width(PossibleTZ); 3933 Known.Zero.setBitsFrom(LowBits); 3934 break; 3935 } 3936 case ISD::CTLZ: 3937 case ISD::CTLZ_ZERO_UNDEF: { 3938 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3939 // If we have a known 1, its position is our upper bound. 3940 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 3941 unsigned LowBits = llvm::bit_width(PossibleLZ); 3942 Known.Zero.setBitsFrom(LowBits); 3943 break; 3944 } 3945 case ISD::CTPOP: { 3946 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3947 // If we know some of the bits are zero, they can't be one. 3948 unsigned PossibleOnes = Known2.countMaxPopulation(); 3949 Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes)); 3950 break; 3951 } 3952 case ISD::PARITY: { 3953 // Parity returns 0 everywhere but the LSB. 3954 Known.Zero.setBitsFrom(1); 3955 break; 3956 } 3957 case ISD::MGATHER: 3958 case ISD::MLOAD: { 3959 ISD::LoadExtType ETy = 3960 (Opcode == ISD::MGATHER) 3961 ? cast<MaskedGatherSDNode>(Op)->getExtensionType() 3962 : cast<MaskedLoadSDNode>(Op)->getExtensionType(); 3963 if (ETy == ISD::ZEXTLOAD) { 3964 EVT MemVT = cast<MemSDNode>(Op)->getMemoryVT(); 3965 KnownBits Known0(MemVT.getScalarSizeInBits()); 3966 return Known0.zext(BitWidth); 3967 } 3968 break; 3969 } 3970 case ISD::LOAD: { 3971 LoadSDNode *LD = cast<LoadSDNode>(Op); 3972 const Constant *Cst = TLI->getTargetConstantFromLoad(LD); 3973 if (ISD::isNON_EXTLoad(LD) && Cst) { 3974 // Determine any common known bits from the loaded constant pool value. 3975 Type *CstTy = Cst->getType(); 3976 if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() && 3977 !Op.getValueType().isScalableVector()) { 3978 // If its a vector splat, then we can (quickly) reuse the scalar path. 3979 // NOTE: We assume all elements match and none are UNDEF. 3980 if (CstTy->isVectorTy()) { 3981 if (const Constant *Splat = Cst->getSplatValue()) { 3982 Cst = Splat; 3983 CstTy = Cst->getType(); 3984 } 3985 } 3986 // TODO - do we need to handle different bitwidths? 3987 if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { 3988 // Iterate across all vector elements finding common known bits. 3989 Known.One.setAllBits(); 3990 Known.Zero.setAllBits(); 3991 for (unsigned i = 0; i != NumElts; ++i) { 3992 if (!DemandedElts[i]) 3993 continue; 3994 if (Constant *Elt = Cst->getAggregateElement(i)) { 3995 if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { 3996 const APInt &Value = CInt->getValue(); 3997 Known.One &= Value; 3998 Known.Zero &= ~Value; 3999 continue; 4000 } 4001 if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { 4002 APInt Value = CFP->getValueAPF().bitcastToAPInt(); 4003 Known.One &= Value; 4004 Known.Zero &= ~Value; 4005 continue; 4006 } 4007 } 4008 Known.One.clearAllBits(); 4009 Known.Zero.clearAllBits(); 4010 break; 4011 } 4012 } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { 4013 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { 4014 Known = KnownBits::makeConstant(CInt->getValue()); 4015 } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { 4016 Known = 4017 KnownBits::makeConstant(CFP->getValueAPF().bitcastToAPInt()); 4018 } 4019 } 4020 } 4021 } else if (Op.getResNo() == 0) { 4022 unsigned ScalarMemorySize = LD->getMemoryVT().getScalarSizeInBits(); 4023 KnownBits KnownScalarMemory(ScalarMemorySize); 4024 if (const MDNode *MD = LD->getRanges()) 4025 computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory); 4026 4027 // Extend the Known bits from memory to the size of the scalar result. 4028 if (ISD::isZEXTLoad(Op.getNode())) 4029 Known = KnownScalarMemory.zext(BitWidth); 4030 else if (ISD::isSEXTLoad(Op.getNode())) 4031 Known = KnownScalarMemory.sext(BitWidth); 4032 else if (ISD::isEXTLoad(Op.getNode())) 4033 Known = KnownScalarMemory.anyext(BitWidth); 4034 else 4035 Known = KnownScalarMemory; 4036 assert(Known.getBitWidth() == BitWidth); 4037 return Known; 4038 } 4039 break; 4040 } 4041 case ISD::ZERO_EXTEND_VECTOR_INREG: { 4042 if (Op.getValueType().isScalableVector()) 4043 break; 4044 EVT InVT = Op.getOperand(0).getValueType(); 4045 APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); 4046 Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); 4047 Known = Known.zext(BitWidth); 4048 break; 4049 } 4050 case ISD::ZERO_EXTEND: { 4051 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4052 Known = Known.zext(BitWidth); 4053 break; 4054 } 4055 case ISD::SIGN_EXTEND_VECTOR_INREG: { 4056 if (Op.getValueType().isScalableVector()) 4057 break; 4058 EVT InVT = Op.getOperand(0).getValueType(); 4059 APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); 4060 Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); 4061 // If the sign bit is known to be zero or one, then sext will extend 4062 // it to the top bits, else it will just zext. 4063 Known = Known.sext(BitWidth); 4064 break; 4065 } 4066 case ISD::SIGN_EXTEND: { 4067 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4068 // If the sign bit is known to be zero or one, then sext will extend 4069 // it to the top bits, else it will just zext. 4070 Known = Known.sext(BitWidth); 4071 break; 4072 } 4073 case ISD::ANY_EXTEND_VECTOR_INREG: { 4074 if (Op.getValueType().isScalableVector()) 4075 break; 4076 EVT InVT = Op.getOperand(0).getValueType(); 4077 APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); 4078 Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); 4079 Known = Known.anyext(BitWidth); 4080 break; 4081 } 4082 case ISD::ANY_EXTEND: { 4083 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4084 Known = Known.anyext(BitWidth); 4085 break; 4086 } 4087 case ISD::TRUNCATE: { 4088 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4089 Known = Known.trunc(BitWidth); 4090 break; 4091 } 4092 case ISD::AssertZext: { 4093 EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 4094 APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); 4095 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4096 Known.Zero |= (~InMask); 4097 Known.One &= (~Known.Zero); 4098 break; 4099 } 4100 case ISD::AssertAlign: { 4101 unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign()); 4102 assert(LogOfAlign != 0); 4103 4104 // TODO: Should use maximum with source 4105 // If a node is guaranteed to be aligned, set low zero bits accordingly as 4106 // well as clearing one bits. 4107 Known.Zero.setLowBits(LogOfAlign); 4108 Known.One.clearLowBits(LogOfAlign); 4109 break; 4110 } 4111 case ISD::FGETSIGN: 4112 // All bits are zero except the low bit. 4113 Known.Zero.setBitsFrom(1); 4114 break; 4115 case ISD::ADD: 4116 case ISD::SUB: { 4117 SDNodeFlags Flags = Op.getNode()->getFlags(); 4118 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4119 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4120 Known = KnownBits::computeForAddSub( 4121 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), 4122 Flags.hasNoUnsignedWrap(), Known, Known2); 4123 break; 4124 } 4125 case ISD::USUBO: 4126 case ISD::SSUBO: 4127 case ISD::USUBO_CARRY: 4128 case ISD::SSUBO_CARRY: 4129 if (Op.getResNo() == 1) { 4130 // If we know the result of a setcc has the top bits zero, use this info. 4131 if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == 4132 TargetLowering::ZeroOrOneBooleanContent && 4133 BitWidth > 1) 4134 Known.Zero.setBitsFrom(1); 4135 break; 4136 } 4137 [[fallthrough]]; 4138 case ISD::SUBC: { 4139 assert(Op.getResNo() == 0 && 4140 "We only compute knownbits for the difference here."); 4141 4142 // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in. 4143 KnownBits Borrow(1); 4144 if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) { 4145 Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); 4146 // Borrow has bit width 1 4147 Borrow = Borrow.trunc(1); 4148 } else { 4149 Borrow.setAllZero(); 4150 } 4151 4152 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4153 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4154 Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow); 4155 break; 4156 } 4157 case ISD::UADDO: 4158 case ISD::SADDO: 4159 case ISD::UADDO_CARRY: 4160 case ISD::SADDO_CARRY: 4161 if (Op.getResNo() == 1) { 4162 // If we know the result of a setcc has the top bits zero, use this info. 4163 if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == 4164 TargetLowering::ZeroOrOneBooleanContent && 4165 BitWidth > 1) 4166 Known.Zero.setBitsFrom(1); 4167 break; 4168 } 4169 [[fallthrough]]; 4170 case ISD::ADDC: 4171 case ISD::ADDE: { 4172 assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); 4173 4174 // With ADDE and UADDO_CARRY, a carry bit may be added in. 4175 KnownBits Carry(1); 4176 if (Opcode == ISD::ADDE) 4177 // Can't track carry from glue, set carry to unknown. 4178 Carry.resetAll(); 4179 else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) { 4180 Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); 4181 // Carry has bit width 1 4182 Carry = Carry.trunc(1); 4183 } else { 4184 Carry.setAllZero(); 4185 } 4186 4187 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4188 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4189 Known = KnownBits::computeForAddCarry(Known, Known2, Carry); 4190 break; 4191 } 4192 case ISD::UDIV: { 4193 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4194 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4195 Known = KnownBits::udiv(Known, Known2, Op->getFlags().hasExact()); 4196 break; 4197 } 4198 case ISD::SDIV: { 4199 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4200 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4201 Known = KnownBits::sdiv(Known, Known2, Op->getFlags().hasExact()); 4202 break; 4203 } 4204 case ISD::SREM: { 4205 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4206 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4207 Known = KnownBits::srem(Known, Known2); 4208 break; 4209 } 4210 case ISD::UREM: { 4211 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4212 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4213 Known = KnownBits::urem(Known, Known2); 4214 break; 4215 } 4216 case ISD::EXTRACT_ELEMENT: { 4217 Known = computeKnownBits(Op.getOperand(0), Depth+1); 4218 const unsigned Index = Op.getConstantOperandVal(1); 4219 const unsigned EltBitWidth = Op.getValueSizeInBits(); 4220 4221 // Remove low part of known bits mask 4222 Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); 4223 Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth); 4224 4225 // Remove high part of known bit mask 4226 Known = Known.trunc(EltBitWidth); 4227 break; 4228 } 4229 case ISD::EXTRACT_VECTOR_ELT: { 4230 SDValue InVec = Op.getOperand(0); 4231 SDValue EltNo = Op.getOperand(1); 4232 EVT VecVT = InVec.getValueType(); 4233 // computeKnownBits not yet implemented for scalable vectors. 4234 if (VecVT.isScalableVector()) 4235 break; 4236 const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); 4237 const unsigned NumSrcElts = VecVT.getVectorNumElements(); 4238 4239 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know 4240 // anything about the extended bits. 4241 if (BitWidth > EltBitWidth) 4242 Known = Known.trunc(EltBitWidth); 4243 4244 // If we know the element index, just demand that vector element, else for 4245 // an unknown element index, ignore DemandedElts and demand them all. 4246 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); 4247 auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 4248 if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) 4249 DemandedSrcElts = 4250 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); 4251 4252 Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1); 4253 if (BitWidth > EltBitWidth) 4254 Known = Known.anyext(BitWidth); 4255 break; 4256 } 4257 case ISD::INSERT_VECTOR_ELT: { 4258 if (Op.getValueType().isScalableVector()) 4259 break; 4260 4261 // If we know the element index, split the demand between the 4262 // source vector and the inserted element, otherwise assume we need 4263 // the original demanded vector elements and the value. 4264 SDValue InVec = Op.getOperand(0); 4265 SDValue InVal = Op.getOperand(1); 4266 SDValue EltNo = Op.getOperand(2); 4267 bool DemandedVal = true; 4268 APInt DemandedVecElts = DemandedElts; 4269 auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); 4270 if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { 4271 unsigned EltIdx = CEltNo->getZExtValue(); 4272 DemandedVal = !!DemandedElts[EltIdx]; 4273 DemandedVecElts.clearBit(EltIdx); 4274 } 4275 Known.One.setAllBits(); 4276 Known.Zero.setAllBits(); 4277 if (DemandedVal) { 4278 Known2 = computeKnownBits(InVal, Depth + 1); 4279 Known = Known.intersectWith(Known2.zextOrTrunc(BitWidth)); 4280 } 4281 if (!!DemandedVecElts) { 4282 Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1); 4283 Known = Known.intersectWith(Known2); 4284 } 4285 break; 4286 } 4287 case ISD::BITREVERSE: { 4288 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4289 Known = Known2.reverseBits(); 4290 break; 4291 } 4292 case ISD::BSWAP: { 4293 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4294 Known = Known2.byteSwap(); 4295 break; 4296 } 4297 case ISD::ABS: { 4298 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4299 Known = Known2.abs(); 4300 Known.Zero.setHighBits( 4301 ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1) - 1); 4302 break; 4303 } 4304 case ISD::USUBSAT: { 4305 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4306 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4307 Known = KnownBits::usub_sat(Known, Known2); 4308 break; 4309 } 4310 case ISD::UMIN: { 4311 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4312 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4313 Known = KnownBits::umin(Known, Known2); 4314 break; 4315 } 4316 case ISD::UMAX: { 4317 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4318 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4319 Known = KnownBits::umax(Known, Known2); 4320 break; 4321 } 4322 case ISD::SMIN: 4323 case ISD::SMAX: { 4324 // If we have a clamp pattern, we know that the number of sign bits will be 4325 // the minimum of the clamp min/max range. 4326 bool IsMax = (Opcode == ISD::SMAX); 4327 ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; 4328 if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) 4329 if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) 4330 CstHigh = 4331 isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); 4332 if (CstLow && CstHigh) { 4333 if (!IsMax) 4334 std::swap(CstLow, CstHigh); 4335 4336 const APInt &ValueLow = CstLow->getAPIntValue(); 4337 const APInt &ValueHigh = CstHigh->getAPIntValue(); 4338 if (ValueLow.sle(ValueHigh)) { 4339 unsigned LowSignBits = ValueLow.getNumSignBits(); 4340 unsigned HighSignBits = ValueHigh.getNumSignBits(); 4341 unsigned MinSignBits = std::min(LowSignBits, HighSignBits); 4342 if (ValueLow.isNegative() && ValueHigh.isNegative()) { 4343 Known.One.setHighBits(MinSignBits); 4344 break; 4345 } 4346 if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { 4347 Known.Zero.setHighBits(MinSignBits); 4348 break; 4349 } 4350 } 4351 } 4352 4353 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4354 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4355 if (IsMax) 4356 Known = KnownBits::smax(Known, Known2); 4357 else 4358 Known = KnownBits::smin(Known, Known2); 4359 4360 // For SMAX, if CstLow is non-negative we know the result will be 4361 // non-negative and thus all sign bits are 0. 4362 // TODO: There's an equivalent of this for smin with negative constant for 4363 // known ones. 4364 if (IsMax && CstLow) { 4365 const APInt &ValueLow = CstLow->getAPIntValue(); 4366 if (ValueLow.isNonNegative()) { 4367 unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4368 Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits())); 4369 } 4370 } 4371 4372 break; 4373 } 4374 case ISD::UINT_TO_FP: { 4375 Known.makeNonNegative(); 4376 break; 4377 } 4378 case ISD::SINT_TO_FP: { 4379 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4380 if (Known2.isNonNegative()) 4381 Known.makeNonNegative(); 4382 else if (Known2.isNegative()) 4383 Known.makeNegative(); 4384 break; 4385 } 4386 case ISD::FP_TO_UINT_SAT: { 4387 // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT. 4388 EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 4389 Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits()); 4390 break; 4391 } 4392 case ISD::ATOMIC_LOAD: { 4393 // If we are looking at the loaded value. 4394 if (Op.getResNo() == 0) { 4395 auto *AT = cast<AtomicSDNode>(Op); 4396 unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits(); 4397 KnownBits KnownScalarMemory(ScalarMemorySize); 4398 if (const MDNode *MD = AT->getRanges()) 4399 computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory); 4400 4401 switch (AT->getExtensionType()) { 4402 case ISD::ZEXTLOAD: 4403 Known = KnownScalarMemory.zext(BitWidth); 4404 break; 4405 case ISD::SEXTLOAD: 4406 Known = KnownScalarMemory.sext(BitWidth); 4407 break; 4408 case ISD::EXTLOAD: 4409 switch (TLI->getExtendForAtomicOps()) { 4410 case ISD::ZERO_EXTEND: 4411 Known = KnownScalarMemory.zext(BitWidth); 4412 break; 4413 case ISD::SIGN_EXTEND: 4414 Known = KnownScalarMemory.sext(BitWidth); 4415 break; 4416 default: 4417 Known = KnownScalarMemory.anyext(BitWidth); 4418 break; 4419 } 4420 break; 4421 case ISD::NON_EXTLOAD: 4422 Known = KnownScalarMemory; 4423 break; 4424 } 4425 assert(Known.getBitWidth() == BitWidth); 4426 } 4427 break; 4428 } 4429 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 4430 if (Op.getResNo() == 1) { 4431 // The boolean result conforms to getBooleanContents. 4432 // If we know the result of a setcc has the top bits zero, use this info. 4433 // We know that we have an integer-based boolean since these operations 4434 // are only available for integer. 4435 if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == 4436 TargetLowering::ZeroOrOneBooleanContent && 4437 BitWidth > 1) 4438 Known.Zero.setBitsFrom(1); 4439 break; 4440 } 4441 [[fallthrough]]; 4442 case ISD::ATOMIC_CMP_SWAP: 4443 case ISD::ATOMIC_SWAP: 4444 case ISD::ATOMIC_LOAD_ADD: 4445 case ISD::ATOMIC_LOAD_SUB: 4446 case ISD::ATOMIC_LOAD_AND: 4447 case ISD::ATOMIC_LOAD_CLR: 4448 case ISD::ATOMIC_LOAD_OR: 4449 case ISD::ATOMIC_LOAD_XOR: 4450 case ISD::ATOMIC_LOAD_NAND: 4451 case ISD::ATOMIC_LOAD_MIN: 4452 case ISD::ATOMIC_LOAD_MAX: 4453 case ISD::ATOMIC_LOAD_UMIN: 4454 case ISD::ATOMIC_LOAD_UMAX: { 4455 // If we are looking at the loaded value. 4456 if (Op.getResNo() == 0) { 4457 auto *AT = cast<AtomicSDNode>(Op); 4458 unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits(); 4459 4460 if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) 4461 Known.Zero.setBitsFrom(MemBits); 4462 } 4463 break; 4464 } 4465 case ISD::FrameIndex: 4466 case ISD::TargetFrameIndex: 4467 TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(), 4468 Known, getMachineFunction()); 4469 break; 4470 4471 default: 4472 if (Opcode < ISD::BUILTIN_OP_END) 4473 break; 4474 [[fallthrough]]; 4475 case ISD::INTRINSIC_WO_CHAIN: 4476 case ISD::INTRINSIC_W_CHAIN: 4477 case ISD::INTRINSIC_VOID: 4478 // TODO: Probably okay to remove after audit; here to reduce change size 4479 // in initial enablement patch for scalable vectors 4480 if (Op.getValueType().isScalableVector()) 4481 break; 4482 4483 // Allow the target to implement this method for its nodes. 4484 TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); 4485 break; 4486 } 4487 4488 return Known; 4489 } 4490 4491 /// Convert ConstantRange OverflowResult into SelectionDAG::OverflowKind. 4492 static SelectionDAG::OverflowKind mapOverflowResult(ConstantRange::OverflowResult OR) { 4493 switch (OR) { 4494 case ConstantRange::OverflowResult::MayOverflow: 4495 return SelectionDAG::OFK_Sometime; 4496 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 4497 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 4498 return SelectionDAG::OFK_Always; 4499 case ConstantRange::OverflowResult::NeverOverflows: 4500 return SelectionDAG::OFK_Never; 4501 } 4502 llvm_unreachable("Unknown OverflowResult"); 4503 } 4504 4505 SelectionDAG::OverflowKind 4506 SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const { 4507 // X + 0 never overflow 4508 if (isNullConstant(N1)) 4509 return OFK_Never; 4510 4511 // If both operands each have at least two sign bits, the addition 4512 // cannot overflow. 4513 if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) 4514 return OFK_Never; 4515 4516 // TODO: Add ConstantRange::signedAddMayOverflow handling. 4517 return OFK_Sometime; 4518 } 4519 4520 SelectionDAG::OverflowKind 4521 SelectionDAG::computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const { 4522 // X + 0 never overflow 4523 if (isNullConstant(N1)) 4524 return OFK_Never; 4525 4526 // mulhi + 1 never overflow 4527 KnownBits N1Known = computeKnownBits(N1); 4528 if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && 4529 N1Known.getMaxValue().ult(2)) 4530 return OFK_Never; 4531 4532 KnownBits N0Known = computeKnownBits(N0); 4533 if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1 && 4534 N0Known.getMaxValue().ult(2)) 4535 return OFK_Never; 4536 4537 // Fallback to ConstantRange::unsignedAddMayOverflow handling. 4538 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); 4539 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); 4540 return mapOverflowResult(N0Range.unsignedAddMayOverflow(N1Range)); 4541 } 4542 4543 SelectionDAG::OverflowKind 4544 SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const { 4545 // X - 0 never overflow 4546 if (isNullConstant(N1)) 4547 return OFK_Never; 4548 4549 // If both operands each have at least two sign bits, the subtraction 4550 // cannot overflow. 4551 if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) 4552 return OFK_Never; 4553 4554 KnownBits N0Known = computeKnownBits(N0); 4555 KnownBits N1Known = computeKnownBits(N1); 4556 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true); 4557 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true); 4558 return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range)); 4559 } 4560 4561 SelectionDAG::OverflowKind 4562 SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const { 4563 // X - 0 never overflow 4564 if (isNullConstant(N1)) 4565 return OFK_Never; 4566 4567 KnownBits N0Known = computeKnownBits(N0); 4568 KnownBits N1Known = computeKnownBits(N1); 4569 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); 4570 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); 4571 return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range)); 4572 } 4573 4574 SelectionDAG::OverflowKind 4575 SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const { 4576 // X * 0 and X * 1 never overflow. 4577 if (isNullConstant(N1) || isOneConstant(N1)) 4578 return OFK_Never; 4579 4580 KnownBits N0Known = computeKnownBits(N0); 4581 KnownBits N1Known = computeKnownBits(N1); 4582 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); 4583 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); 4584 return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range)); 4585 } 4586 4587 SelectionDAG::OverflowKind 4588 SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const { 4589 // X * 0 and X * 1 never overflow. 4590 if (isNullConstant(N1) || isOneConstant(N1)) 4591 return OFK_Never; 4592 4593 // Get the size of the result. 4594 unsigned BitWidth = N0.getScalarValueSizeInBits(); 4595 4596 // Sum of the sign bits. 4597 unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1); 4598 4599 // If we have enough sign bits, then there's no overflow. 4600 if (SignBits > BitWidth + 1) 4601 return OFK_Never; 4602 4603 if (SignBits == BitWidth + 1) { 4604 // The overflow occurs when the true multiplication of the 4605 // the operands is the minimum negative number. 4606 KnownBits N0Known = computeKnownBits(N0); 4607 KnownBits N1Known = computeKnownBits(N1); 4608 // If one of the operands is non-negative, then there's no 4609 // overflow. 4610 if (N0Known.isNonNegative() || N1Known.isNonNegative()) 4611 return OFK_Never; 4612 } 4613 4614 return OFK_Sometime; 4615 } 4616 4617 bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { 4618 if (Depth >= MaxRecursionDepth) 4619 return false; // Limit search depth. 4620 4621 EVT OpVT = Val.getValueType(); 4622 unsigned BitWidth = OpVT.getScalarSizeInBits(); 4623 4624 // Is the constant a known power of 2? 4625 if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) { 4626 return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); 4627 })) 4628 return true; 4629 4630 // A left-shift of a constant one will have exactly one bit set because 4631 // shifting the bit off the end is undefined. 4632 if (Val.getOpcode() == ISD::SHL) { 4633 auto *C = isConstOrConstSplat(Val.getOperand(0)); 4634 if (C && C->getAPIntValue() == 1) 4635 return true; 4636 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && 4637 isKnownNeverZero(Val, Depth); 4638 } 4639 4640 // Similarly, a logical right-shift of a constant sign-bit will have exactly 4641 // one bit set. 4642 if (Val.getOpcode() == ISD::SRL) { 4643 auto *C = isConstOrConstSplat(Val.getOperand(0)); 4644 if (C && C->getAPIntValue().isSignMask()) 4645 return true; 4646 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && 4647 isKnownNeverZero(Val, Depth); 4648 } 4649 4650 if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR) 4651 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4652 4653 // Are all operands of a build vector constant powers of two? 4654 if (Val.getOpcode() == ISD::BUILD_VECTOR) 4655 if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { 4656 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E)) 4657 return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); 4658 return false; 4659 })) 4660 return true; 4661 4662 // Is the operand of a splat vector a constant power of two? 4663 if (Val.getOpcode() == ISD::SPLAT_VECTOR) 4664 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0))) 4665 if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) 4666 return true; 4667 4668 // vscale(power-of-two) is a power-of-two for some targets 4669 if (Val.getOpcode() == ISD::VSCALE && 4670 getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() && 4671 isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1)) 4672 return true; 4673 4674 if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX || 4675 Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX) 4676 return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) && 4677 isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4678 4679 if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT) 4680 return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) && 4681 isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1); 4682 4683 // Looking for `x & -x` pattern: 4684 // If x == 0: 4685 // x & -x -> 0 4686 // If x != 0: 4687 // x & -x -> non-zero pow2 4688 // so if we find the pattern return whether we know `x` is non-zero. 4689 SDValue X; 4690 if (sd_match(Val, m_And(m_Value(X), m_Neg(m_Deferred(X))))) 4691 return isKnownNeverZero(X, Depth); 4692 4693 if (Val.getOpcode() == ISD::ZERO_EXTEND) 4694 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4695 4696 // More could be done here, though the above checks are enough 4697 // to handle some common cases. 4698 return false; 4699 } 4700 4701 bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const { 4702 if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true)) 4703 return C1->getValueAPF().getExactLog2Abs() >= 0; 4704 4705 if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP) 4706 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4707 4708 return false; 4709 } 4710 4711 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { 4712 EVT VT = Op.getValueType(); 4713 4714 // Since the number of lanes in a scalable vector is unknown at compile time, 4715 // we track one bit which is implicitly broadcast to all lanes. This means 4716 // that all lanes in a scalable vector are considered demanded. 4717 APInt DemandedElts = VT.isFixedLengthVector() 4718 ? APInt::getAllOnes(VT.getVectorNumElements()) 4719 : APInt(1, 1); 4720 return ComputeNumSignBits(Op, DemandedElts, Depth); 4721 } 4722 4723 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, 4724 unsigned Depth) const { 4725 EVT VT = Op.getValueType(); 4726 assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); 4727 unsigned VTBits = VT.getScalarSizeInBits(); 4728 unsigned NumElts = DemandedElts.getBitWidth(); 4729 unsigned Tmp, Tmp2; 4730 unsigned FirstAnswer = 1; 4731 4732 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4733 const APInt &Val = C->getAPIntValue(); 4734 return Val.getNumSignBits(); 4735 } 4736 4737 if (Depth >= MaxRecursionDepth) 4738 return 1; // Limit search depth. 4739 4740 if (!DemandedElts) 4741 return 1; // No demanded elts, better to assume we don't know anything. 4742 4743 unsigned Opcode = Op.getOpcode(); 4744 switch (Opcode) { 4745 default: break; 4746 case ISD::AssertSext: 4747 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); 4748 return VTBits-Tmp+1; 4749 case ISD::AssertZext: 4750 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); 4751 return VTBits-Tmp; 4752 case ISD::MERGE_VALUES: 4753 return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, 4754 Depth + 1); 4755 case ISD::SPLAT_VECTOR: { 4756 // Check if the sign bits of source go down as far as the truncated value. 4757 unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits(); 4758 unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4759 if (NumSrcSignBits > (NumSrcBits - VTBits)) 4760 return NumSrcSignBits - (NumSrcBits - VTBits); 4761 break; 4762 } 4763 case ISD::BUILD_VECTOR: 4764 assert(!VT.isScalableVector()); 4765 Tmp = VTBits; 4766 for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { 4767 if (!DemandedElts[i]) 4768 continue; 4769 4770 SDValue SrcOp = Op.getOperand(i); 4771 // BUILD_VECTOR can implicitly truncate sources, we handle this specially 4772 // for constant nodes to ensure we only look at the sign bits. 4773 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SrcOp)) { 4774 APInt T = C->getAPIntValue().trunc(VTBits); 4775 Tmp2 = T.getNumSignBits(); 4776 } else { 4777 Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1); 4778 4779 if (SrcOp.getValueSizeInBits() != VTBits) { 4780 assert(SrcOp.getValueSizeInBits() > VTBits && 4781 "Expected BUILD_VECTOR implicit truncation"); 4782 unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits; 4783 Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1); 4784 } 4785 } 4786 Tmp = std::min(Tmp, Tmp2); 4787 } 4788 return Tmp; 4789 4790 case ISD::VECTOR_SHUFFLE: { 4791 // Collect the minimum number of sign bits that are shared by every vector 4792 // element referenced by the shuffle. 4793 APInt DemandedLHS, DemandedRHS; 4794 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 4795 assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); 4796 if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, 4797 DemandedLHS, DemandedRHS)) 4798 return 1; 4799 4800 Tmp = std::numeric_limits<unsigned>::max(); 4801 if (!!DemandedLHS) 4802 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); 4803 if (!!DemandedRHS) { 4804 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); 4805 Tmp = std::min(Tmp, Tmp2); 4806 } 4807 // If we don't know anything, early out and try computeKnownBits fall-back. 4808 if (Tmp == 1) 4809 break; 4810 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 4811 return Tmp; 4812 } 4813 4814 case ISD::BITCAST: { 4815 if (VT.isScalableVector()) 4816 break; 4817 SDValue N0 = Op.getOperand(0); 4818 EVT SrcVT = N0.getValueType(); 4819 unsigned SrcBits = SrcVT.getScalarSizeInBits(); 4820 4821 // Ignore bitcasts from unsupported types.. 4822 if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint())) 4823 break; 4824 4825 // Fast handling of 'identity' bitcasts. 4826 if (VTBits == SrcBits) 4827 return ComputeNumSignBits(N0, DemandedElts, Depth + 1); 4828 4829 bool IsLE = getDataLayout().isLittleEndian(); 4830 4831 // Bitcast 'large element' scalar/vector to 'small element' vector. 4832 if ((SrcBits % VTBits) == 0) { 4833 assert(VT.isVector() && "Expected bitcast to vector"); 4834 4835 unsigned Scale = SrcBits / VTBits; 4836 APInt SrcDemandedElts = 4837 APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale); 4838 4839 // Fast case - sign splat can be simply split across the small elements. 4840 Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); 4841 if (Tmp == SrcBits) 4842 return VTBits; 4843 4844 // Slow case - determine how far the sign extends into each sub-element. 4845 Tmp2 = VTBits; 4846 for (unsigned i = 0; i != NumElts; ++i) 4847 if (DemandedElts[i]) { 4848 unsigned SubOffset = i % Scale; 4849 SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset); 4850 SubOffset = SubOffset * VTBits; 4851 if (Tmp <= SubOffset) 4852 return 1; 4853 Tmp2 = std::min(Tmp2, Tmp - SubOffset); 4854 } 4855 return Tmp2; 4856 } 4857 break; 4858 } 4859 4860 case ISD::FP_TO_SINT_SAT: 4861 // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT. 4862 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); 4863 return VTBits - Tmp + 1; 4864 case ISD::SIGN_EXTEND: 4865 Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); 4866 return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; 4867 case ISD::SIGN_EXTEND_INREG: 4868 // Max of the input and what this extends. 4869 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); 4870 Tmp = VTBits-Tmp+1; 4871 Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); 4872 return std::max(Tmp, Tmp2); 4873 case ISD::SIGN_EXTEND_VECTOR_INREG: { 4874 if (VT.isScalableVector()) 4875 break; 4876 SDValue Src = Op.getOperand(0); 4877 EVT SrcVT = Src.getValueType(); 4878 APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); 4879 Tmp = VTBits - SrcVT.getScalarSizeInBits(); 4880 return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; 4881 } 4882 case ISD::SRA: 4883 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4884 // SRA X, C -> adds C sign bits. 4885 if (std::optional<uint64_t> ShAmt = 4886 getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) 4887 Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits); 4888 return Tmp; 4889 case ISD::SHL: 4890 if (std::optional<ConstantRange> ShAmtRange = 4891 getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) { 4892 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); 4893 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); 4894 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are 4895 // shifted out, then we can compute the number of sign bits for the 4896 // operand being extended. A future improvement could be to pass along the 4897 // "shifted left by" information in the recursive calls to 4898 // ComputeKnownSignBits. Allowing us to handle this more generically. 4899 if (ISD::isExtOpcode(Op.getOperand(0).getOpcode())) { 4900 SDValue Ext = Op.getOperand(0); 4901 EVT ExtVT = Ext.getValueType(); 4902 SDValue Extendee = Ext.getOperand(0); 4903 EVT ExtendeeVT = Extendee.getValueType(); 4904 uint64_t SizeDifference = 4905 ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits(); 4906 if (SizeDifference <= MinShAmt) { 4907 Tmp = SizeDifference + 4908 ComputeNumSignBits(Extendee, DemandedElts, Depth + 1); 4909 if (MaxShAmt < Tmp) 4910 return Tmp - MaxShAmt; 4911 } 4912 } 4913 // shl destroys sign bits, ensure it doesn't shift out all sign bits. 4914 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4915 if (MaxShAmt < Tmp) 4916 return Tmp - MaxShAmt; 4917 } 4918 break; 4919 case ISD::AND: 4920 case ISD::OR: 4921 case ISD::XOR: // NOT is handled here. 4922 // Logical binary ops preserve the number of sign bits at the worst. 4923 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); 4924 if (Tmp != 1) { 4925 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); 4926 FirstAnswer = std::min(Tmp, Tmp2); 4927 // We computed what we know about the sign bits as our first 4928 // answer. Now proceed to the generic code that uses 4929 // computeKnownBits, and pick whichever answer is better. 4930 } 4931 break; 4932 4933 case ISD::SELECT: 4934 case ISD::VSELECT: 4935 Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); 4936 if (Tmp == 1) return 1; // Early out. 4937 Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); 4938 return std::min(Tmp, Tmp2); 4939 case ISD::SELECT_CC: 4940 Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); 4941 if (Tmp == 1) return 1; // Early out. 4942 Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1); 4943 return std::min(Tmp, Tmp2); 4944 4945 case ISD::SMIN: 4946 case ISD::SMAX: { 4947 // If we have a clamp pattern, we know that the number of sign bits will be 4948 // the minimum of the clamp min/max range. 4949 bool IsMax = (Opcode == ISD::SMAX); 4950 ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; 4951 if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) 4952 if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) 4953 CstHigh = 4954 isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); 4955 if (CstLow && CstHigh) { 4956 if (!IsMax) 4957 std::swap(CstLow, CstHigh); 4958 if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { 4959 Tmp = CstLow->getAPIntValue().getNumSignBits(); 4960 Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); 4961 return std::min(Tmp, Tmp2); 4962 } 4963 } 4964 4965 // Fallback - just get the minimum number of sign bits of the operands. 4966 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4967 if (Tmp == 1) 4968 return 1; // Early out. 4969 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 4970 return std::min(Tmp, Tmp2); 4971 } 4972 case ISD::UMIN: 4973 case ISD::UMAX: 4974 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4975 if (Tmp == 1) 4976 return 1; // Early out. 4977 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 4978 return std::min(Tmp, Tmp2); 4979 case ISD::SSUBO_CARRY: 4980 case ISD::USUBO_CARRY: 4981 // sub_carry(x,x,c) -> 0/-1 (sext carry) 4982 if (Op.getResNo() == 0 && Op.getOperand(0) == Op.getOperand(1)) 4983 return VTBits; 4984 [[fallthrough]]; 4985 case ISD::SADDO: 4986 case ISD::UADDO: 4987 case ISD::SADDO_CARRY: 4988 case ISD::UADDO_CARRY: 4989 case ISD::SSUBO: 4990 case ISD::USUBO: 4991 case ISD::SMULO: 4992 case ISD::UMULO: 4993 if (Op.getResNo() != 1) 4994 break; 4995 // The boolean result conforms to getBooleanContents. Fall through. 4996 // If setcc returns 0/-1, all bits are sign bits. 4997 // We know that we have an integer-based boolean since these operations 4998 // are only available for integer. 4999 if (TLI->getBooleanContents(VT.isVector(), false) == 5000 TargetLowering::ZeroOrNegativeOneBooleanContent) 5001 return VTBits; 5002 break; 5003 case ISD::SETCC: 5004 case ISD::SETCCCARRY: 5005 case ISD::STRICT_FSETCC: 5006 case ISD::STRICT_FSETCCS: { 5007 unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; 5008 // If setcc returns 0/-1, all bits are sign bits. 5009 if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == 5010 TargetLowering::ZeroOrNegativeOneBooleanContent) 5011 return VTBits; 5012 break; 5013 } 5014 case ISD::ROTL: 5015 case ISD::ROTR: 5016 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5017 5018 // If we're rotating an 0/-1 value, then it stays an 0/-1 value. 5019 if (Tmp == VTBits) 5020 return VTBits; 5021 5022 if (ConstantSDNode *C = 5023 isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { 5024 unsigned RotAmt = C->getAPIntValue().urem(VTBits); 5025 5026 // Handle rotate right by N like a rotate left by 32-N. 5027 if (Opcode == ISD::ROTR) 5028 RotAmt = (VTBits - RotAmt) % VTBits; 5029 5030 // If we aren't rotating out all of the known-in sign bits, return the 5031 // number that are left. This handles rotl(sext(x), 1) for example. 5032 if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); 5033 } 5034 break; 5035 case ISD::ADD: 5036 case ISD::ADDC: 5037 // Add can have at most one carry bit. Thus we know that the output 5038 // is, at worst, one more bit than the inputs. 5039 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5040 if (Tmp == 1) return 1; // Early out. 5041 5042 // Special case decrementing a value (ADD X, -1): 5043 if (ConstantSDNode *CRHS = 5044 isConstOrConstSplat(Op.getOperand(1), DemandedElts)) 5045 if (CRHS->isAllOnes()) { 5046 KnownBits Known = 5047 computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 5048 5049 // If the input is known to be 0 or 1, the output is 0/-1, which is all 5050 // sign bits set. 5051 if ((Known.Zero | 1).isAllOnes()) 5052 return VTBits; 5053 5054 // If we are subtracting one from a positive number, there is no carry 5055 // out of the result. 5056 if (Known.isNonNegative()) 5057 return Tmp; 5058 } 5059 5060 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5061 if (Tmp2 == 1) return 1; // Early out. 5062 return std::min(Tmp, Tmp2) - 1; 5063 case ISD::SUB: 5064 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5065 if (Tmp2 == 1) return 1; // Early out. 5066 5067 // Handle NEG. 5068 if (ConstantSDNode *CLHS = 5069 isConstOrConstSplat(Op.getOperand(0), DemandedElts)) 5070 if (CLHS->isZero()) { 5071 KnownBits Known = 5072 computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 5073 // If the input is known to be 0 or 1, the output is 0/-1, which is all 5074 // sign bits set. 5075 if ((Known.Zero | 1).isAllOnes()) 5076 return VTBits; 5077 5078 // If the input is known to be positive (the sign bit is known clear), 5079 // the output of the NEG has the same number of sign bits as the input. 5080 if (Known.isNonNegative()) 5081 return Tmp2; 5082 5083 // Otherwise, we treat this like a SUB. 5084 } 5085 5086 // Sub can have at most one carry bit. Thus we know that the output 5087 // is, at worst, one more bit than the inputs. 5088 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5089 if (Tmp == 1) return 1; // Early out. 5090 return std::min(Tmp, Tmp2) - 1; 5091 case ISD::MUL: { 5092 // The output of the Mul can be at most twice the valid bits in the inputs. 5093 unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 5094 if (SignBitsOp0 == 1) 5095 break; 5096 unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); 5097 if (SignBitsOp1 == 1) 5098 break; 5099 unsigned OutValidBits = 5100 (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); 5101 return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; 5102 } 5103 case ISD::AVGCEILS: 5104 case ISD::AVGFLOORS: 5105 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5106 if (Tmp == 1) 5107 return 1; // Early out. 5108 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5109 return std::min(Tmp, Tmp2); 5110 case ISD::SREM: 5111 // The sign bit is the LHS's sign bit, except when the result of the 5112 // remainder is zero. The magnitude of the result should be less than or 5113 // equal to the magnitude of the LHS. Therefore, the result should have 5114 // at least as many sign bits as the left hand side. 5115 return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5116 case ISD::TRUNCATE: { 5117 // Check if the sign bits of source go down as far as the truncated value. 5118 unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); 5119 unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 5120 if (NumSrcSignBits > (NumSrcBits - VTBits)) 5121 return NumSrcSignBits - (NumSrcBits - VTBits); 5122 break; 5123 } 5124 case ISD::EXTRACT_ELEMENT: { 5125 if (VT.isScalableVector()) 5126 break; 5127 const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); 5128 const int BitWidth = Op.getValueSizeInBits(); 5129 const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; 5130 5131 // Get reverse index (starting from 1), Op1 value indexes elements from 5132 // little end. Sign starts at big end. 5133 const int rIndex = Items - 1 - Op.getConstantOperandVal(1); 5134 5135 // If the sign portion ends in our element the subtraction gives correct 5136 // result. Otherwise it gives either negative or > bitwidth result 5137 return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth); 5138 } 5139 case ISD::INSERT_VECTOR_ELT: { 5140 if (VT.isScalableVector()) 5141 break; 5142 // If we know the element index, split the demand between the 5143 // source vector and the inserted element, otherwise assume we need 5144 // the original demanded vector elements and the value. 5145 SDValue InVec = Op.getOperand(0); 5146 SDValue InVal = Op.getOperand(1); 5147 SDValue EltNo = Op.getOperand(2); 5148 bool DemandedVal = true; 5149 APInt DemandedVecElts = DemandedElts; 5150 auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); 5151 if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { 5152 unsigned EltIdx = CEltNo->getZExtValue(); 5153 DemandedVal = !!DemandedElts[EltIdx]; 5154 DemandedVecElts.clearBit(EltIdx); 5155 } 5156 Tmp = std::numeric_limits<unsigned>::max(); 5157 if (DemandedVal) { 5158 // TODO - handle implicit truncation of inserted elements. 5159 if (InVal.getScalarValueSizeInBits() != VTBits) 5160 break; 5161 Tmp2 = ComputeNumSignBits(InVal, Depth + 1); 5162 Tmp = std::min(Tmp, Tmp2); 5163 } 5164 if (!!DemandedVecElts) { 5165 Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1); 5166 Tmp = std::min(Tmp, Tmp2); 5167 } 5168 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 5169 return Tmp; 5170 } 5171 case ISD::EXTRACT_VECTOR_ELT: { 5172 assert(!VT.isScalableVector()); 5173 SDValue InVec = Op.getOperand(0); 5174 SDValue EltNo = Op.getOperand(1); 5175 EVT VecVT = InVec.getValueType(); 5176 // ComputeNumSignBits not yet implemented for scalable vectors. 5177 if (VecVT.isScalableVector()) 5178 break; 5179 const unsigned BitWidth = Op.getValueSizeInBits(); 5180 const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); 5181 const unsigned NumSrcElts = VecVT.getVectorNumElements(); 5182 5183 // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know 5184 // anything about sign bits. But if the sizes match we can derive knowledge 5185 // about sign bits from the vector operand. 5186 if (BitWidth != EltBitWidth) 5187 break; 5188 5189 // If we know the element index, just demand that vector element, else for 5190 // an unknown element index, ignore DemandedElts and demand them all. 5191 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); 5192 auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 5193 if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) 5194 DemandedSrcElts = 5195 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); 5196 5197 return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); 5198 } 5199 case ISD::EXTRACT_SUBVECTOR: { 5200 // Offset the demanded elts by the subvector index. 5201 SDValue Src = Op.getOperand(0); 5202 // Bail until we can represent demanded elements for scalable vectors. 5203 if (Src.getValueType().isScalableVector()) 5204 break; 5205 uint64_t Idx = Op.getConstantOperandVal(1); 5206 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 5207 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 5208 return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); 5209 } 5210 case ISD::CONCAT_VECTORS: { 5211 if (VT.isScalableVector()) 5212 break; 5213 // Determine the minimum number of sign bits across all demanded 5214 // elts of the input vectors. Early out if the result is already 1. 5215 Tmp = std::numeric_limits<unsigned>::max(); 5216 EVT SubVectorVT = Op.getOperand(0).getValueType(); 5217 unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); 5218 unsigned NumSubVectors = Op.getNumOperands(); 5219 for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { 5220 APInt DemandedSub = 5221 DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); 5222 if (!DemandedSub) 5223 continue; 5224 Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); 5225 Tmp = std::min(Tmp, Tmp2); 5226 } 5227 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 5228 return Tmp; 5229 } 5230 case ISD::INSERT_SUBVECTOR: { 5231 if (VT.isScalableVector()) 5232 break; 5233 // Demand any elements from the subvector and the remainder from the src its 5234 // inserted into. 5235 SDValue Src = Op.getOperand(0); 5236 SDValue Sub = Op.getOperand(1); 5237 uint64_t Idx = Op.getConstantOperandVal(2); 5238 unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); 5239 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); 5240 APInt DemandedSrcElts = DemandedElts; 5241 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts); 5242 5243 Tmp = std::numeric_limits<unsigned>::max(); 5244 if (!!DemandedSubElts) { 5245 Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); 5246 if (Tmp == 1) 5247 return 1; // early-out 5248 } 5249 if (!!DemandedSrcElts) { 5250 Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); 5251 Tmp = std::min(Tmp, Tmp2); 5252 } 5253 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 5254 return Tmp; 5255 } 5256 case ISD::LOAD: { 5257 LoadSDNode *LD = cast<LoadSDNode>(Op); 5258 if (const MDNode *Ranges = LD->getRanges()) { 5259 if (DemandedElts != 1) 5260 break; 5261 5262 ConstantRange CR = getConstantRangeFromMetadata(*Ranges); 5263 if (VTBits > CR.getBitWidth()) { 5264 switch (LD->getExtensionType()) { 5265 case ISD::SEXTLOAD: 5266 CR = CR.signExtend(VTBits); 5267 break; 5268 case ISD::ZEXTLOAD: 5269 CR = CR.zeroExtend(VTBits); 5270 break; 5271 default: 5272 break; 5273 } 5274 } 5275 5276 if (VTBits != CR.getBitWidth()) 5277 break; 5278 return std::min(CR.getSignedMin().getNumSignBits(), 5279 CR.getSignedMax().getNumSignBits()); 5280 } 5281 5282 break; 5283 } 5284 case ISD::ATOMIC_CMP_SWAP: 5285 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 5286 case ISD::ATOMIC_SWAP: 5287 case ISD::ATOMIC_LOAD_ADD: 5288 case ISD::ATOMIC_LOAD_SUB: 5289 case ISD::ATOMIC_LOAD_AND: 5290 case ISD::ATOMIC_LOAD_CLR: 5291 case ISD::ATOMIC_LOAD_OR: 5292 case ISD::ATOMIC_LOAD_XOR: 5293 case ISD::ATOMIC_LOAD_NAND: 5294 case ISD::ATOMIC_LOAD_MIN: 5295 case ISD::ATOMIC_LOAD_MAX: 5296 case ISD::ATOMIC_LOAD_UMIN: 5297 case ISD::ATOMIC_LOAD_UMAX: 5298 case ISD::ATOMIC_LOAD: { 5299 auto *AT = cast<AtomicSDNode>(Op); 5300 // If we are looking at the loaded value. 5301 if (Op.getResNo() == 0) { 5302 Tmp = AT->getMemoryVT().getScalarSizeInBits(); 5303 if (Tmp == VTBits) 5304 return 1; // early-out 5305 5306 // For atomic_load, prefer to use the extension type. 5307 if (Op->getOpcode() == ISD::ATOMIC_LOAD) { 5308 switch (AT->getExtensionType()) { 5309 default: 5310 break; 5311 case ISD::SEXTLOAD: 5312 return VTBits - Tmp + 1; 5313 case ISD::ZEXTLOAD: 5314 return VTBits - Tmp; 5315 } 5316 } 5317 5318 if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND) 5319 return VTBits - Tmp + 1; 5320 if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) 5321 return VTBits - Tmp; 5322 } 5323 break; 5324 } 5325 } 5326 5327 // If we are looking at the loaded value of the SDNode. 5328 if (Op.getResNo() == 0) { 5329 // Handle LOADX separately here. EXTLOAD case will fallthrough. 5330 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 5331 unsigned ExtType = LD->getExtensionType(); 5332 switch (ExtType) { 5333 default: break; 5334 case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. 5335 Tmp = LD->getMemoryVT().getScalarSizeInBits(); 5336 return VTBits - Tmp + 1; 5337 case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. 5338 Tmp = LD->getMemoryVT().getScalarSizeInBits(); 5339 return VTBits - Tmp; 5340 case ISD::NON_EXTLOAD: 5341 if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { 5342 // We only need to handle vectors - computeKnownBits should handle 5343 // scalar cases. 5344 Type *CstTy = Cst->getType(); 5345 if (CstTy->isVectorTy() && !VT.isScalableVector() && 5346 (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() && 5347 VTBits == CstTy->getScalarSizeInBits()) { 5348 Tmp = VTBits; 5349 for (unsigned i = 0; i != NumElts; ++i) { 5350 if (!DemandedElts[i]) 5351 continue; 5352 if (Constant *Elt = Cst->getAggregateElement(i)) { 5353 if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { 5354 const APInt &Value = CInt->getValue(); 5355 Tmp = std::min(Tmp, Value.getNumSignBits()); 5356 continue; 5357 } 5358 if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { 5359 APInt Value = CFP->getValueAPF().bitcastToAPInt(); 5360 Tmp = std::min(Tmp, Value.getNumSignBits()); 5361 continue; 5362 } 5363 } 5364 // Unknown type. Conservatively assume no bits match sign bit. 5365 return 1; 5366 } 5367 return Tmp; 5368 } 5369 } 5370 break; 5371 } 5372 } 5373 } 5374 5375 // Allow the target to implement this method for its nodes. 5376 if (Opcode >= ISD::BUILTIN_OP_END || 5377 Opcode == ISD::INTRINSIC_WO_CHAIN || 5378 Opcode == ISD::INTRINSIC_W_CHAIN || 5379 Opcode == ISD::INTRINSIC_VOID) { 5380 // TODO: This can probably be removed once target code is audited. This 5381 // is here purely to reduce patch size and review complexity. 5382 if (!VT.isScalableVector()) { 5383 unsigned NumBits = 5384 TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); 5385 if (NumBits > 1) 5386 FirstAnswer = std::max(FirstAnswer, NumBits); 5387 } 5388 } 5389 5390 // Finally, if we can prove that the top bits of the result are 0's or 1's, 5391 // use this information. 5392 KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); 5393 return std::max(FirstAnswer, Known.countMinSignBits()); 5394 } 5395 5396 unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, 5397 unsigned Depth) const { 5398 unsigned SignBits = ComputeNumSignBits(Op, Depth); 5399 return Op.getScalarValueSizeInBits() - SignBits + 1; 5400 } 5401 5402 unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, 5403 const APInt &DemandedElts, 5404 unsigned Depth) const { 5405 unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); 5406 return Op.getScalarValueSizeInBits() - SignBits + 1; 5407 } 5408 5409 bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, 5410 unsigned Depth) const { 5411 // Early out for FREEZE. 5412 if (Op.getOpcode() == ISD::FREEZE) 5413 return true; 5414 5415 EVT VT = Op.getValueType(); 5416 APInt DemandedElts = VT.isFixedLengthVector() 5417 ? APInt::getAllOnes(VT.getVectorNumElements()) 5418 : APInt(1, 1); 5419 return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); 5420 } 5421 5422 bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, 5423 const APInt &DemandedElts, 5424 bool PoisonOnly, 5425 unsigned Depth) const { 5426 unsigned Opcode = Op.getOpcode(); 5427 5428 // Early out for FREEZE. 5429 if (Opcode == ISD::FREEZE) 5430 return true; 5431 5432 if (Depth >= MaxRecursionDepth) 5433 return false; // Limit search depth. 5434 5435 if (isIntOrFPConstant(Op)) 5436 return true; 5437 5438 switch (Opcode) { 5439 case ISD::CONDCODE: 5440 case ISD::VALUETYPE: 5441 case ISD::FrameIndex: 5442 case ISD::TargetFrameIndex: 5443 case ISD::CopyFromReg: 5444 return true; 5445 5446 case ISD::POISON: 5447 return false; 5448 5449 case ISD::UNDEF: 5450 return PoisonOnly; 5451 5452 case ISD::BUILD_VECTOR: 5453 // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - 5454 // this shouldn't affect the result. 5455 for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) { 5456 if (!DemandedElts[i]) 5457 continue; 5458 if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly, 5459 Depth + 1)) 5460 return false; 5461 } 5462 return true; 5463 5464 case ISD::SPLAT_VECTOR: 5465 return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly, 5466 Depth + 1); 5467 5468 case ISD::VECTOR_SHUFFLE: { 5469 APInt DemandedLHS, DemandedRHS; 5470 auto *SVN = cast<ShuffleVectorSDNode>(Op); 5471 if (!getShuffleDemandedElts(DemandedElts.getBitWidth(), SVN->getMask(), 5472 DemandedElts, DemandedLHS, DemandedRHS, 5473 /*AllowUndefElts=*/false)) 5474 return false; 5475 if (!DemandedLHS.isZero() && 5476 !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS, 5477 PoisonOnly, Depth + 1)) 5478 return false; 5479 if (!DemandedRHS.isZero() && 5480 !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS, 5481 PoisonOnly, Depth + 1)) 5482 return false; 5483 return true; 5484 } 5485 5486 // TODO: Search for noundef attributes from library functions. 5487 5488 // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. 5489 5490 default: 5491 // Allow the target to implement this method for its nodes. 5492 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 5493 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) 5494 return TLI->isGuaranteedNotToBeUndefOrPoisonForTargetNode( 5495 Op, DemandedElts, *this, PoisonOnly, Depth); 5496 break; 5497 } 5498 5499 // If Op can't create undef/poison and none of its operands are undef/poison 5500 // then Op is never undef/poison. 5501 // NOTE: TargetNodes can handle this in themselves in 5502 // isGuaranteedNotToBeUndefOrPoisonForTargetNode or let 5503 // TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode handle it. 5504 return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true, 5505 Depth) && 5506 all_of(Op->ops(), [&](SDValue V) { 5507 return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1); 5508 }); 5509 } 5510 5511 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, 5512 bool ConsiderFlags, 5513 unsigned Depth) const { 5514 EVT VT = Op.getValueType(); 5515 APInt DemandedElts = VT.isFixedLengthVector() 5516 ? APInt::getAllOnes(VT.getVectorNumElements()) 5517 : APInt(1, 1); 5518 return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags, 5519 Depth); 5520 } 5521 5522 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, 5523 bool PoisonOnly, bool ConsiderFlags, 5524 unsigned Depth) const { 5525 if (ConsiderFlags && Op->hasPoisonGeneratingFlags()) 5526 return true; 5527 5528 unsigned Opcode = Op.getOpcode(); 5529 switch (Opcode) { 5530 case ISD::AssertSext: 5531 case ISD::AssertZext: 5532 case ISD::AssertAlign: 5533 case ISD::AssertNoFPClass: 5534 // Assertion nodes can create poison if the assertion fails. 5535 return true; 5536 5537 case ISD::FREEZE: 5538 case ISD::CONCAT_VECTORS: 5539 case ISD::INSERT_SUBVECTOR: 5540 case ISD::EXTRACT_SUBVECTOR: 5541 case ISD::SADDSAT: 5542 case ISD::UADDSAT: 5543 case ISD::SSUBSAT: 5544 case ISD::USUBSAT: 5545 case ISD::MULHU: 5546 case ISD::MULHS: 5547 case ISD::SMIN: 5548 case ISD::SMAX: 5549 case ISD::UMIN: 5550 case ISD::UMAX: 5551 case ISD::AND: 5552 case ISD::XOR: 5553 case ISD::ROTL: 5554 case ISD::ROTR: 5555 case ISD::FSHL: 5556 case ISD::FSHR: 5557 case ISD::BSWAP: 5558 case ISD::CTTZ: 5559 case ISD::CTLZ: 5560 case ISD::CTPOP: 5561 case ISD::BITREVERSE: 5562 case ISD::PARITY: 5563 case ISD::SIGN_EXTEND: 5564 case ISD::TRUNCATE: 5565 case ISD::SIGN_EXTEND_INREG: 5566 case ISD::SIGN_EXTEND_VECTOR_INREG: 5567 case ISD::ZERO_EXTEND_VECTOR_INREG: 5568 case ISD::BITCAST: 5569 case ISD::BUILD_VECTOR: 5570 case ISD::BUILD_PAIR: 5571 case ISD::SPLAT_VECTOR: 5572 return false; 5573 5574 case ISD::ABS: 5575 // ISD::ABS defines abs(INT_MIN) -> INT_MIN and never generates poison. 5576 // Different to Intrinsic::abs. 5577 return false; 5578 5579 case ISD::ADDC: 5580 case ISD::SUBC: 5581 case ISD::ADDE: 5582 case ISD::SUBE: 5583 case ISD::SADDO: 5584 case ISD::SSUBO: 5585 case ISD::SMULO: 5586 case ISD::SADDO_CARRY: 5587 case ISD::SSUBO_CARRY: 5588 case ISD::UADDO: 5589 case ISD::USUBO: 5590 case ISD::UMULO: 5591 case ISD::UADDO_CARRY: 5592 case ISD::USUBO_CARRY: 5593 // No poison on result or overflow flags. 5594 return false; 5595 5596 case ISD::SELECT_CC: 5597 case ISD::SETCC: { 5598 // Integer setcc cannot create undef or poison. 5599 if (Op.getOperand(0).getValueType().isInteger()) 5600 return false; 5601 5602 // FP compares are more complicated. They can create poison for nan/infinity 5603 // based on options and flags. The options and flags also cause special 5604 // nonan condition codes to be used. Those condition codes may be preserved 5605 // even if the nonan flag is dropped somewhere. 5606 unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4; 5607 ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get(); 5608 if (((unsigned)CCCode & 0x10U)) 5609 return true; 5610 5611 const TargetOptions &Options = getTarget().Options; 5612 return Options.NoNaNsFPMath || Options.NoInfsFPMath; 5613 } 5614 5615 case ISD::OR: 5616 case ISD::ZERO_EXTEND: 5617 case ISD::SELECT: 5618 case ISD::VSELECT: 5619 case ISD::ADD: 5620 case ISD::SUB: 5621 case ISD::MUL: 5622 case ISD::FNEG: 5623 case ISD::FADD: 5624 case ISD::FSUB: 5625 case ISD::FMUL: 5626 case ISD::FDIV: 5627 case ISD::FREM: 5628 case ISD::FCOPYSIGN: 5629 // No poison except from flags (which is handled above) 5630 return false; 5631 5632 case ISD::SHL: 5633 case ISD::SRL: 5634 case ISD::SRA: 5635 // If the max shift amount isn't in range, then the shift can 5636 // create poison. 5637 return !getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1); 5638 5639 case ISD::CTTZ_ZERO_UNDEF: 5640 case ISD::CTLZ_ZERO_UNDEF: 5641 // If the amount is zero then the result will be poison. 5642 // TODO: Add isKnownNeverZero DemandedElts handling. 5643 return !isKnownNeverZero(Op.getOperand(0), Depth + 1); 5644 5645 case ISD::SCALAR_TO_VECTOR: 5646 // Check if we demand any upper (undef) elements. 5647 return !PoisonOnly && DemandedElts.ugt(1); 5648 5649 case ISD::INSERT_VECTOR_ELT: 5650 case ISD::EXTRACT_VECTOR_ELT: { 5651 // Ensure that the element index is in bounds. 5652 EVT VecVT = Op.getOperand(0).getValueType(); 5653 SDValue Idx = Op.getOperand(Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1); 5654 KnownBits KnownIdx = computeKnownBits(Idx, Depth + 1); 5655 return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements()); 5656 } 5657 5658 case ISD::VECTOR_SHUFFLE: { 5659 // Check for any demanded shuffle element that is undef. 5660 auto *SVN = cast<ShuffleVectorSDNode>(Op); 5661 for (auto [Idx, Elt] : enumerate(SVN->getMask())) 5662 if (Elt < 0 && DemandedElts[Idx]) 5663 return true; 5664 return false; 5665 } 5666 5667 default: 5668 // Allow the target to implement this method for its nodes. 5669 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 5670 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) 5671 return TLI->canCreateUndefOrPoisonForTargetNode( 5672 Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth); 5673 break; 5674 } 5675 5676 // Be conservative and return true. 5677 return true; 5678 } 5679 5680 bool SelectionDAG::isADDLike(SDValue Op, bool NoWrap) const { 5681 unsigned Opcode = Op.getOpcode(); 5682 if (Opcode == ISD::OR) 5683 return Op->getFlags().hasDisjoint() || 5684 haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); 5685 if (Opcode == ISD::XOR) 5686 return !NoWrap && isMinSignedConstant(Op.getOperand(1)); 5687 return false; 5688 } 5689 5690 bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { 5691 return Op.getNumOperands() == 2 && isa<ConstantSDNode>(Op.getOperand(1)) && 5692 (Op.isAnyAdd() || isADDLike(Op)); 5693 } 5694 5695 bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, 5696 unsigned Depth) const { 5697 EVT VT = Op.getValueType(); 5698 5699 // Since the number of lanes in a scalable vector is unknown at compile time, 5700 // we track one bit which is implicitly broadcast to all lanes. This means 5701 // that all lanes in a scalable vector are considered demanded. 5702 APInt DemandedElts = VT.isFixedLengthVector() 5703 ? APInt::getAllOnes(VT.getVectorNumElements()) 5704 : APInt(1, 1); 5705 5706 return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth); 5707 } 5708 5709 bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, 5710 bool SNaN, unsigned Depth) const { 5711 assert(!DemandedElts.isZero() && "No demanded elements"); 5712 5713 // If we're told that NaNs won't happen, assume they won't. 5714 if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) 5715 return true; 5716 5717 if (Depth >= MaxRecursionDepth) 5718 return false; // Limit search depth. 5719 5720 // If the value is a constant, we can obviously see if it is a NaN or not. 5721 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { 5722 return !C->getValueAPF().isNaN() || 5723 (SNaN && !C->getValueAPF().isSignaling()); 5724 } 5725 5726 unsigned Opcode = Op.getOpcode(); 5727 switch (Opcode) { 5728 case ISD::FADD: 5729 case ISD::FSUB: 5730 case ISD::FMUL: 5731 case ISD::FDIV: 5732 case ISD::FREM: 5733 case ISD::FSIN: 5734 case ISD::FCOS: 5735 case ISD::FTAN: 5736 case ISD::FASIN: 5737 case ISD::FACOS: 5738 case ISD::FATAN: 5739 case ISD::FATAN2: 5740 case ISD::FSINH: 5741 case ISD::FCOSH: 5742 case ISD::FTANH: 5743 case ISD::FMA: 5744 case ISD::FMAD: { 5745 if (SNaN) 5746 return true; 5747 // TODO: Need isKnownNeverInfinity 5748 return false; 5749 } 5750 case ISD::FCANONICALIZE: 5751 case ISD::FEXP: 5752 case ISD::FEXP2: 5753 case ISD::FEXP10: 5754 case ISD::FTRUNC: 5755 case ISD::FFLOOR: 5756 case ISD::FCEIL: 5757 case ISD::FROUND: 5758 case ISD::FROUNDEVEN: 5759 case ISD::LROUND: 5760 case ISD::LLROUND: 5761 case ISD::FRINT: 5762 case ISD::LRINT: 5763 case ISD::LLRINT: 5764 case ISD::FNEARBYINT: 5765 case ISD::FLDEXP: { 5766 if (SNaN) 5767 return true; 5768 return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1); 5769 } 5770 case ISD::FABS: 5771 case ISD::FNEG: 5772 case ISD::FCOPYSIGN: { 5773 return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1); 5774 } 5775 case ISD::SELECT: 5776 return isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1) && 5777 isKnownNeverNaN(Op.getOperand(2), DemandedElts, SNaN, Depth + 1); 5778 case ISD::FP_EXTEND: 5779 case ISD::FP_ROUND: { 5780 if (SNaN) 5781 return true; 5782 return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1); 5783 } 5784 case ISD::SINT_TO_FP: 5785 case ISD::UINT_TO_FP: 5786 return true; 5787 case ISD::FSQRT: // Need is known positive 5788 case ISD::FLOG: 5789 case ISD::FLOG2: 5790 case ISD::FLOG10: 5791 case ISD::FPOWI: 5792 case ISD::FPOW: { 5793 if (SNaN) 5794 return true; 5795 // TODO: Refine on operand 5796 return false; 5797 } 5798 case ISD::FMINNUM: 5799 case ISD::FMAXNUM: 5800 case ISD::FMINIMUMNUM: 5801 case ISD::FMAXIMUMNUM: { 5802 // Only one needs to be known not-nan, since it will be returned if the 5803 // other ends up being one. 5804 return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) || 5805 isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1); 5806 } 5807 case ISD::FMINNUM_IEEE: 5808 case ISD::FMAXNUM_IEEE: { 5809 if (SNaN) 5810 return true; 5811 // This can return a NaN if either operand is an sNaN, or if both operands 5812 // are NaN. 5813 return (isKnownNeverNaN(Op.getOperand(0), DemandedElts, false, Depth + 1) && 5814 isKnownNeverSNaN(Op.getOperand(1), DemandedElts, Depth + 1)) || 5815 (isKnownNeverNaN(Op.getOperand(1), DemandedElts, false, Depth + 1) && 5816 isKnownNeverSNaN(Op.getOperand(0), DemandedElts, Depth + 1)); 5817 } 5818 case ISD::FMINIMUM: 5819 case ISD::FMAXIMUM: { 5820 // TODO: Does this quiet or return the origina NaN as-is? 5821 return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1) && 5822 isKnownNeverNaN(Op.getOperand(1), DemandedElts, SNaN, Depth + 1); 5823 } 5824 case ISD::EXTRACT_VECTOR_ELT: { 5825 SDValue Src = Op.getOperand(0); 5826 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5827 EVT SrcVT = Src.getValueType(); 5828 if (SrcVT.isFixedLengthVector() && Idx && 5829 Idx->getAPIntValue().ult(SrcVT.getVectorNumElements())) { 5830 APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 5831 Idx->getZExtValue()); 5832 return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1); 5833 } 5834 return isKnownNeverNaN(Src, SNaN, Depth + 1); 5835 } 5836 case ISD::EXTRACT_SUBVECTOR: { 5837 SDValue Src = Op.getOperand(0); 5838 if (Src.getValueType().isFixedLengthVector()) { 5839 unsigned Idx = Op.getConstantOperandVal(1); 5840 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 5841 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 5842 return isKnownNeverNaN(Src, DemandedSrcElts, SNaN, Depth + 1); 5843 } 5844 return isKnownNeverNaN(Src, SNaN, Depth + 1); 5845 } 5846 case ISD::INSERT_SUBVECTOR: { 5847 SDValue BaseVector = Op.getOperand(0); 5848 SDValue SubVector = Op.getOperand(1); 5849 EVT BaseVectorVT = BaseVector.getValueType(); 5850 if (BaseVectorVT.isFixedLengthVector()) { 5851 unsigned Idx = Op.getConstantOperandVal(2); 5852 unsigned NumBaseElts = BaseVectorVT.getVectorNumElements(); 5853 unsigned NumSubElts = SubVector.getValueType().getVectorNumElements(); 5854 5855 // Clear/Extract the bits at the position where the subvector will be 5856 // inserted. 5857 APInt DemandedMask = 5858 APInt::getBitsSet(NumBaseElts, Idx, Idx + NumSubElts); 5859 APInt DemandedSrcElts = DemandedElts & ~DemandedMask; 5860 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); 5861 5862 bool NeverNaN = true; 5863 if (!DemandedSrcElts.isZero()) 5864 NeverNaN &= 5865 isKnownNeverNaN(BaseVector, DemandedSrcElts, SNaN, Depth + 1); 5866 if (NeverNaN && !DemandedSubElts.isZero()) 5867 NeverNaN &= 5868 isKnownNeverNaN(SubVector, DemandedSubElts, SNaN, Depth + 1); 5869 return NeverNaN; 5870 } 5871 return isKnownNeverNaN(BaseVector, SNaN, Depth + 1) && 5872 isKnownNeverNaN(SubVector, SNaN, Depth + 1); 5873 } 5874 case ISD::BUILD_VECTOR: { 5875 unsigned NumElts = Op.getNumOperands(); 5876 for (unsigned I = 0; I != NumElts; ++I) 5877 if (DemandedElts[I] && 5878 !isKnownNeverNaN(Op.getOperand(I), SNaN, Depth + 1)) 5879 return false; 5880 return true; 5881 } 5882 case ISD::AssertNoFPClass: { 5883 FPClassTest NoFPClass = 5884 static_cast<FPClassTest>(Op.getConstantOperandVal(1)); 5885 if ((NoFPClass & fcNan) == fcNan) 5886 return true; 5887 if (SNaN && (NoFPClass & fcSNan) == fcSNan) 5888 return true; 5889 return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1); 5890 } 5891 default: 5892 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 5893 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) { 5894 return TLI->isKnownNeverNaNForTargetNode(Op, DemandedElts, *this, SNaN, 5895 Depth); 5896 } 5897 5898 return false; 5899 } 5900 } 5901 5902 bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { 5903 assert(Op.getValueType().isFloatingPoint() && 5904 "Floating point type expected"); 5905 5906 // If the value is a constant, we can obviously see if it is a zero or not. 5907 return ISD::matchUnaryFpPredicate( 5908 Op, [](ConstantFPSDNode *C) { return !C->isZero(); }); 5909 } 5910 5911 bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { 5912 if (Depth >= MaxRecursionDepth) 5913 return false; // Limit search depth. 5914 5915 assert(!Op.getValueType().isFloatingPoint() && 5916 "Floating point types unsupported - use isKnownNeverZeroFloat"); 5917 5918 // If the value is a constant, we can obviously see if it is a zero or not. 5919 if (ISD::matchUnaryPredicate(Op, 5920 [](ConstantSDNode *C) { return !C->isZero(); })) 5921 return true; 5922 5923 // TODO: Recognize more cases here. Most of the cases are also incomplete to 5924 // some degree. 5925 switch (Op.getOpcode()) { 5926 default: 5927 break; 5928 5929 case ISD::OR: 5930 return isKnownNeverZero(Op.getOperand(1), Depth + 1) || 5931 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5932 5933 case ISD::VSELECT: 5934 case ISD::SELECT: 5935 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5936 isKnownNeverZero(Op.getOperand(2), Depth + 1); 5937 5938 case ISD::SHL: { 5939 if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) 5940 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 5941 KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); 5942 // 1 << X is never zero. 5943 if (ValKnown.One[0]) 5944 return true; 5945 // If max shift cnt of known ones is non-zero, result is non-zero. 5946 APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); 5947 if (MaxCnt.ult(ValKnown.getBitWidth()) && 5948 !ValKnown.One.shl(MaxCnt).isZero()) 5949 return true; 5950 break; 5951 } 5952 case ISD::UADDSAT: 5953 case ISD::UMAX: 5954 return isKnownNeverZero(Op.getOperand(1), Depth + 1) || 5955 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5956 5957 // For smin/smax: If either operand is known negative/positive 5958 // respectively we don't need the other to be known at all. 5959 case ISD::SMAX: { 5960 KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1); 5961 if (Op1.isStrictlyPositive()) 5962 return true; 5963 5964 KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1); 5965 if (Op0.isStrictlyPositive()) 5966 return true; 5967 5968 if (Op1.isNonZero() && Op0.isNonZero()) 5969 return true; 5970 5971 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5972 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5973 } 5974 case ISD::SMIN: { 5975 KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1); 5976 if (Op1.isNegative()) 5977 return true; 5978 5979 KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1); 5980 if (Op0.isNegative()) 5981 return true; 5982 5983 if (Op1.isNonZero() && Op0.isNonZero()) 5984 return true; 5985 5986 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5987 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5988 } 5989 case ISD::UMIN: 5990 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5991 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5992 5993 case ISD::ROTL: 5994 case ISD::ROTR: 5995 case ISD::BITREVERSE: 5996 case ISD::BSWAP: 5997 case ISD::CTPOP: 5998 case ISD::ABS: 5999 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 6000 6001 case ISD::SRA: 6002 case ISD::SRL: { 6003 if (Op->getFlags().hasExact()) 6004 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 6005 KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); 6006 if (ValKnown.isNegative()) 6007 return true; 6008 // If max shift cnt of known ones is non-zero, result is non-zero. 6009 APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); 6010 if (MaxCnt.ult(ValKnown.getBitWidth()) && 6011 !ValKnown.One.lshr(MaxCnt).isZero()) 6012 return true; 6013 break; 6014 } 6015 case ISD::UDIV: 6016 case ISD::SDIV: 6017 // div exact can only produce a zero if the dividend is zero. 6018 // TODO: For udiv this is also true if Op1 u<= Op0 6019 if (Op->getFlags().hasExact()) 6020 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 6021 break; 6022 6023 case ISD::ADD: 6024 if (Op->getFlags().hasNoUnsignedWrap()) 6025 if (isKnownNeverZero(Op.getOperand(1), Depth + 1) || 6026 isKnownNeverZero(Op.getOperand(0), Depth + 1)) 6027 return true; 6028 // TODO: There are a lot more cases we can prove for add. 6029 break; 6030 6031 case ISD::SUB: { 6032 if (isNullConstant(Op.getOperand(0))) 6033 return isKnownNeverZero(Op.getOperand(1), Depth + 1); 6034 6035 std::optional<bool> ne = 6036 KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1), 6037 computeKnownBits(Op.getOperand(1), Depth + 1)); 6038 return ne && *ne; 6039 } 6040 6041 case ISD::MUL: 6042 if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) 6043 if (isKnownNeverZero(Op.getOperand(1), Depth + 1) && 6044 isKnownNeverZero(Op.getOperand(0), Depth + 1)) 6045 return true; 6046 break; 6047 6048 case ISD::ZERO_EXTEND: 6049 case ISD::SIGN_EXTEND: 6050 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 6051 case ISD::VSCALE: { 6052 const Function &F = getMachineFunction().getFunction(); 6053 const APInt &Multiplier = Op.getConstantOperandAPInt(0); 6054 ConstantRange CR = 6055 getVScaleRange(&F, Op.getScalarValueSizeInBits()).multiply(Multiplier); 6056 if (!CR.contains(APInt(CR.getBitWidth(), 0))) 6057 return true; 6058 break; 6059 } 6060 } 6061 6062 return computeKnownBits(Op, Depth).isNonZero(); 6063 } 6064 6065 bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const { 6066 if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true)) 6067 return !C1->isNegative(); 6068 6069 return Op.getOpcode() == ISD::FABS; 6070 } 6071 6072 bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { 6073 // Check the obvious case. 6074 if (A == B) return true; 6075 6076 // For negative and positive zero. 6077 if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) 6078 if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) 6079 if (CA->isZero() && CB->isZero()) return true; 6080 6081 // Otherwise they may not be equal. 6082 return false; 6083 } 6084 6085 // Only bits set in Mask must be negated, other bits may be arbitrary. 6086 SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) { 6087 if (isBitwiseNot(V, AllowUndefs)) 6088 return V.getOperand(0); 6089 6090 // Handle any_extend (not (truncate X)) pattern, where Mask only sets 6091 // bits in the non-extended part. 6092 ConstantSDNode *MaskC = isConstOrConstSplat(Mask); 6093 if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND) 6094 return SDValue(); 6095 SDValue ExtArg = V.getOperand(0); 6096 if (ExtArg.getScalarValueSizeInBits() >= 6097 MaskC->getAPIntValue().getActiveBits() && 6098 isBitwiseNot(ExtArg, AllowUndefs) && 6099 ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE && 6100 ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType()) 6101 return ExtArg.getOperand(0).getOperand(0); 6102 return SDValue(); 6103 } 6104 6105 static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) { 6106 // Match masked merge pattern (X & ~M) op (Y & M) 6107 // Including degenerate case (X & ~M) op M 6108 auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask, 6109 SDValue Other) { 6110 if (SDValue NotOperand = 6111 getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) { 6112 if (NotOperand->getOpcode() == ISD::ZERO_EXTEND || 6113 NotOperand->getOpcode() == ISD::TRUNCATE) 6114 NotOperand = NotOperand->getOperand(0); 6115 6116 if (Other == NotOperand) 6117 return true; 6118 if (Other->getOpcode() == ISD::AND) 6119 return NotOperand == Other->getOperand(0) || 6120 NotOperand == Other->getOperand(1); 6121 } 6122 return false; 6123 }; 6124 6125 if (A->getOpcode() == ISD::ZERO_EXTEND || A->getOpcode() == ISD::TRUNCATE) 6126 A = A->getOperand(0); 6127 6128 if (B->getOpcode() == ISD::ZERO_EXTEND || B->getOpcode() == ISD::TRUNCATE) 6129 B = B->getOperand(0); 6130 6131 if (A->getOpcode() == ISD::AND) 6132 return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) || 6133 MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B); 6134 return false; 6135 } 6136 6137 // FIXME: unify with llvm::haveNoCommonBitsSet. 6138 bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { 6139 assert(A.getValueType() == B.getValueType() && 6140 "Values must have the same type"); 6141 if (haveNoCommonBitsSetCommutative(A, B) || 6142 haveNoCommonBitsSetCommutative(B, A)) 6143 return true; 6144 return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), 6145 computeKnownBits(B)); 6146 } 6147 6148 static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, 6149 SelectionDAG &DAG) { 6150 if (cast<ConstantSDNode>(Step)->isZero()) 6151 return DAG.getConstant(0, DL, VT); 6152 6153 return SDValue(); 6154 } 6155 6156 static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, 6157 ArrayRef<SDValue> Ops, 6158 SelectionDAG &DAG) { 6159 int NumOps = Ops.size(); 6160 assert(NumOps != 0 && "Can't build an empty vector!"); 6161 assert(!VT.isScalableVector() && 6162 "BUILD_VECTOR cannot be used with scalable types"); 6163 assert(VT.getVectorNumElements() == (unsigned)NumOps && 6164 "Incorrect element count in BUILD_VECTOR!"); 6165 6166 // BUILD_VECTOR of UNDEFs is UNDEF. 6167 if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) 6168 return DAG.getUNDEF(VT); 6169 6170 // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. 6171 SDValue IdentitySrc; 6172 bool IsIdentity = true; 6173 for (int i = 0; i != NumOps; ++i) { 6174 if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || 6175 Ops[i].getOperand(0).getValueType() != VT || 6176 (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || 6177 !isa<ConstantSDNode>(Ops[i].getOperand(1)) || 6178 Ops[i].getConstantOperandAPInt(1) != i) { 6179 IsIdentity = false; 6180 break; 6181 } 6182 IdentitySrc = Ops[i].getOperand(0); 6183 } 6184 if (IsIdentity) 6185 return IdentitySrc; 6186 6187 return SDValue(); 6188 } 6189 6190 /// Try to simplify vector concatenation to an input value, undef, or build 6191 /// vector. 6192 static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, 6193 ArrayRef<SDValue> Ops, 6194 SelectionDAG &DAG) { 6195 assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); 6196 assert(llvm::all_of(Ops, 6197 [Ops](SDValue Op) { 6198 return Ops[0].getValueType() == Op.getValueType(); 6199 }) && 6200 "Concatenation of vectors with inconsistent value types!"); 6201 assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) == 6202 VT.getVectorElementCount() && 6203 "Incorrect element count in vector concatenation!"); 6204 6205 if (Ops.size() == 1) 6206 return Ops[0]; 6207 6208 // Concat of UNDEFs is UNDEF. 6209 if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) 6210 return DAG.getUNDEF(VT); 6211 6212 // Scan the operands and look for extract operations from a single source 6213 // that correspond to insertion at the same location via this concatenation: 6214 // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... 6215 SDValue IdentitySrc; 6216 bool IsIdentity = true; 6217 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 6218 SDValue Op = Ops[i]; 6219 unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements(); 6220 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || 6221 Op.getOperand(0).getValueType() != VT || 6222 (IdentitySrc && Op.getOperand(0) != IdentitySrc) || 6223 Op.getConstantOperandVal(1) != IdentityIndex) { 6224 IsIdentity = false; 6225 break; 6226 } 6227 assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && 6228 "Unexpected identity source vector for concat of extracts"); 6229 IdentitySrc = Op.getOperand(0); 6230 } 6231 if (IsIdentity) { 6232 assert(IdentitySrc && "Failed to set source vector of extracts"); 6233 return IdentitySrc; 6234 } 6235 6236 // The code below this point is only designed to work for fixed width 6237 // vectors, so we bail out for now. 6238 if (VT.isScalableVector()) 6239 return SDValue(); 6240 6241 // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be 6242 // simplified to one big BUILD_VECTOR. 6243 // FIXME: Add support for SCALAR_TO_VECTOR as well. 6244 EVT SVT = VT.getScalarType(); 6245 SmallVector<SDValue, 16> Elts; 6246 for (SDValue Op : Ops) { 6247 EVT OpVT = Op.getValueType(); 6248 if (Op.isUndef()) 6249 Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); 6250 else if (Op.getOpcode() == ISD::BUILD_VECTOR) 6251 Elts.append(Op->op_begin(), Op->op_end()); 6252 else 6253 return SDValue(); 6254 } 6255 6256 // BUILD_VECTOR requires all inputs to be of the same type, find the 6257 // maximum type and extend them all. 6258 for (SDValue Op : Elts) 6259 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 6260 6261 if (SVT.bitsGT(VT.getScalarType())) { 6262 for (SDValue &Op : Elts) { 6263 if (Op.isUndef()) 6264 Op = DAG.getUNDEF(SVT); 6265 else 6266 Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) 6267 ? DAG.getZExtOrTrunc(Op, DL, SVT) 6268 : DAG.getSExtOrTrunc(Op, DL, SVT); 6269 } 6270 } 6271 6272 SDValue V = DAG.getBuildVector(VT, DL, Elts); 6273 NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); 6274 return V; 6275 } 6276 6277 /// Gets or creates the specified node. 6278 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { 6279 SDVTList VTs = getVTList(VT); 6280 FoldingSetNodeID ID; 6281 AddNodeIDNode(ID, Opcode, VTs, {}); 6282 void *IP = nullptr; 6283 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) 6284 return SDValue(E, 0); 6285 6286 auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 6287 CSEMap.InsertNode(N, IP); 6288 6289 InsertNode(N); 6290 SDValue V = SDValue(N, 0); 6291 NewSDValueDbgMsg(V, "Creating new node: ", this); 6292 return V; 6293 } 6294 6295 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 6296 SDValue N1) { 6297 SDNodeFlags Flags; 6298 if (Inserter) 6299 Flags = Inserter->getFlags(); 6300 return getNode(Opcode, DL, VT, N1, Flags); 6301 } 6302 6303 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 6304 SDValue N1, const SDNodeFlags Flags) { 6305 assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); 6306 6307 // Constant fold unary operations with a vector integer or float operand. 6308 switch (Opcode) { 6309 default: 6310 // FIXME: Entirely reasonable to perform folding of other unary 6311 // operations here as the need arises. 6312 break; 6313 case ISD::FNEG: 6314 case ISD::FABS: 6315 case ISD::FCEIL: 6316 case ISD::FTRUNC: 6317 case ISD::FFLOOR: 6318 case ISD::FP_EXTEND: 6319 case ISD::FP_TO_SINT: 6320 case ISD::FP_TO_UINT: 6321 case ISD::FP_TO_FP16: 6322 case ISD::FP_TO_BF16: 6323 case ISD::TRUNCATE: 6324 case ISD::ANY_EXTEND: 6325 case ISD::ZERO_EXTEND: 6326 case ISD::SIGN_EXTEND: 6327 case ISD::UINT_TO_FP: 6328 case ISD::SINT_TO_FP: 6329 case ISD::FP16_TO_FP: 6330 case ISD::BF16_TO_FP: 6331 case ISD::BITCAST: 6332 case ISD::ABS: 6333 case ISD::BITREVERSE: 6334 case ISD::BSWAP: 6335 case ISD::CTLZ: 6336 case ISD::CTLZ_ZERO_UNDEF: 6337 case ISD::CTTZ: 6338 case ISD::CTTZ_ZERO_UNDEF: 6339 case ISD::CTPOP: 6340 case ISD::STEP_VECTOR: { 6341 SDValue Ops = {N1}; 6342 if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) 6343 return Fold; 6344 } 6345 } 6346 6347 unsigned OpOpcode = N1.getNode()->getOpcode(); 6348 switch (Opcode) { 6349 case ISD::STEP_VECTOR: 6350 assert(VT.isScalableVector() && 6351 "STEP_VECTOR can only be used with scalable types"); 6352 assert(OpOpcode == ISD::TargetConstant && 6353 VT.getVectorElementType() == N1.getValueType() && 6354 "Unexpected step operand"); 6355 break; 6356 case ISD::FREEZE: 6357 assert(VT == N1.getValueType() && "Unexpected VT!"); 6358 if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false, 6359 /*Depth*/ 1)) 6360 return N1; 6361 break; 6362 case ISD::TokenFactor: 6363 case ISD::MERGE_VALUES: 6364 case ISD::CONCAT_VECTORS: 6365 return N1; // Factor, merge or concat of one node? No need. 6366 case ISD::BUILD_VECTOR: { 6367 // Attempt to simplify BUILD_VECTOR. 6368 SDValue Ops[] = {N1}; 6369 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 6370 return V; 6371 break; 6372 } 6373 case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); 6374 case ISD::FP_EXTEND: 6375 assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && 6376 "Invalid FP cast!"); 6377 if (N1.getValueType() == VT) return N1; // noop conversion. 6378 assert((!VT.isVector() || VT.getVectorElementCount() == 6379 N1.getValueType().getVectorElementCount()) && 6380 "Vector element count mismatch!"); 6381 assert(N1.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); 6382 if (N1.isUndef()) 6383 return getUNDEF(VT); 6384 break; 6385 case ISD::FP_TO_SINT: 6386 case ISD::FP_TO_UINT: 6387 if (N1.isUndef()) 6388 return getUNDEF(VT); 6389 break; 6390 case ISD::SINT_TO_FP: 6391 case ISD::UINT_TO_FP: 6392 // [us]itofp(undef) = 0, because the result value is bounded. 6393 if (N1.isUndef()) 6394 return getConstantFP(0.0, DL, VT); 6395 break; 6396 case ISD::SIGN_EXTEND: 6397 assert(VT.isInteger() && N1.getValueType().isInteger() && 6398 "Invalid SIGN_EXTEND!"); 6399 assert(VT.isVector() == N1.getValueType().isVector() && 6400 "SIGN_EXTEND result type type should be vector iff the operand " 6401 "type is vector!"); 6402 if (N1.getValueType() == VT) return N1; // noop extension 6403 assert((!VT.isVector() || VT.getVectorElementCount() == 6404 N1.getValueType().getVectorElementCount()) && 6405 "Vector element count mismatch!"); 6406 assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); 6407 if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) { 6408 SDNodeFlags Flags; 6409 if (OpOpcode == ISD::ZERO_EXTEND) 6410 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6411 SDValue NewVal = getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); 6412 transferDbgValues(N1, NewVal); 6413 return NewVal; 6414 } 6415 6416 if (OpOpcode == ISD::POISON) 6417 return getPOISON(VT); 6418 6419 if (N1.isUndef()) 6420 // sext(undef) = 0, because the top bits will all be the same. 6421 return getConstant(0, DL, VT); 6422 break; 6423 case ISD::ZERO_EXTEND: 6424 assert(VT.isInteger() && N1.getValueType().isInteger() && 6425 "Invalid ZERO_EXTEND!"); 6426 assert(VT.isVector() == N1.getValueType().isVector() && 6427 "ZERO_EXTEND result type type should be vector iff the operand " 6428 "type is vector!"); 6429 if (N1.getValueType() == VT) return N1; // noop extension 6430 assert((!VT.isVector() || VT.getVectorElementCount() == 6431 N1.getValueType().getVectorElementCount()) && 6432 "Vector element count mismatch!"); 6433 assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); 6434 if (OpOpcode == ISD::ZERO_EXTEND) { // (zext (zext x)) -> (zext x) 6435 SDNodeFlags Flags; 6436 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6437 SDValue NewVal = 6438 getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags); 6439 transferDbgValues(N1, NewVal); 6440 return NewVal; 6441 } 6442 6443 if (OpOpcode == ISD::POISON) 6444 return getPOISON(VT); 6445 6446 if (N1.isUndef()) 6447 // zext(undef) = 0, because the top bits will be zero. 6448 return getConstant(0, DL, VT); 6449 6450 // Skip unnecessary zext_inreg pattern: 6451 // (zext (trunc x)) -> x iff the upper bits are known zero. 6452 // TODO: Remove (zext (trunc (and x, c))) exception which some targets 6453 // use to recognise zext_inreg patterns. 6454 if (OpOpcode == ISD::TRUNCATE) { 6455 SDValue OpOp = N1.getOperand(0); 6456 if (OpOp.getValueType() == VT) { 6457 if (OpOp.getOpcode() != ISD::AND) { 6458 APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(), 6459 N1.getScalarValueSizeInBits()); 6460 if (MaskedValueIsZero(OpOp, HiBits)) { 6461 transferDbgValues(N1, OpOp); 6462 return OpOp; 6463 } 6464 } 6465 } 6466 } 6467 break; 6468 case ISD::ANY_EXTEND: 6469 assert(VT.isInteger() && N1.getValueType().isInteger() && 6470 "Invalid ANY_EXTEND!"); 6471 assert(VT.isVector() == N1.getValueType().isVector() && 6472 "ANY_EXTEND result type type should be vector iff the operand " 6473 "type is vector!"); 6474 if (N1.getValueType() == VT) return N1; // noop extension 6475 assert((!VT.isVector() || VT.getVectorElementCount() == 6476 N1.getValueType().getVectorElementCount()) && 6477 "Vector element count mismatch!"); 6478 assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); 6479 6480 if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || 6481 OpOpcode == ISD::ANY_EXTEND) { 6482 SDNodeFlags Flags; 6483 if (OpOpcode == ISD::ZERO_EXTEND) 6484 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6485 // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) 6486 return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); 6487 } 6488 if (N1.isUndef()) 6489 return getUNDEF(VT); 6490 6491 // (ext (trunc x)) -> x 6492 if (OpOpcode == ISD::TRUNCATE) { 6493 SDValue OpOp = N1.getOperand(0); 6494 if (OpOp.getValueType() == VT) { 6495 transferDbgValues(N1, OpOp); 6496 return OpOp; 6497 } 6498 } 6499 break; 6500 case ISD::TRUNCATE: 6501 assert(VT.isInteger() && N1.getValueType().isInteger() && 6502 "Invalid TRUNCATE!"); 6503 assert(VT.isVector() == N1.getValueType().isVector() && 6504 "TRUNCATE result type type should be vector iff the operand " 6505 "type is vector!"); 6506 if (N1.getValueType() == VT) return N1; // noop truncate 6507 assert((!VT.isVector() || VT.getVectorElementCount() == 6508 N1.getValueType().getVectorElementCount()) && 6509 "Vector element count mismatch!"); 6510 assert(N1.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); 6511 if (OpOpcode == ISD::TRUNCATE) 6512 return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0)); 6513 if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || 6514 OpOpcode == ISD::ANY_EXTEND) { 6515 // If the source is smaller than the dest, we still need an extend. 6516 if (N1.getOperand(0).getValueType().getScalarType().bitsLT( 6517 VT.getScalarType())) { 6518 SDNodeFlags Flags; 6519 if (OpOpcode == ISD::ZERO_EXTEND) 6520 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6521 return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); 6522 } 6523 if (N1.getOperand(0).getValueType().bitsGT(VT)) 6524 return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0)); 6525 return N1.getOperand(0); 6526 } 6527 if (N1.isUndef()) 6528 return getUNDEF(VT); 6529 if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) 6530 return getVScale(DL, VT, 6531 N1.getConstantOperandAPInt(0).trunc(VT.getSizeInBits())); 6532 break; 6533 case ISD::ANY_EXTEND_VECTOR_INREG: 6534 case ISD::ZERO_EXTEND_VECTOR_INREG: 6535 case ISD::SIGN_EXTEND_VECTOR_INREG: 6536 assert(VT.isVector() && "This DAG node is restricted to vector types."); 6537 assert(N1.getValueType().bitsLE(VT) && 6538 "The input must be the same size or smaller than the result."); 6539 assert(VT.getVectorMinNumElements() < 6540 N1.getValueType().getVectorMinNumElements() && 6541 "The destination vector type must have fewer lanes than the input."); 6542 break; 6543 case ISD::ABS: 6544 assert(VT.isInteger() && VT == N1.getValueType() && "Invalid ABS!"); 6545 if (N1.isUndef()) 6546 return getConstant(0, DL, VT); 6547 break; 6548 case ISD::BSWAP: 6549 assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BSWAP!"); 6550 assert((VT.getScalarSizeInBits() % 16 == 0) && 6551 "BSWAP types must be a multiple of 16 bits!"); 6552 if (N1.isUndef()) 6553 return getUNDEF(VT); 6554 // bswap(bswap(X)) -> X. 6555 if (OpOpcode == ISD::BSWAP) 6556 return N1.getOperand(0); 6557 break; 6558 case ISD::BITREVERSE: 6559 assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BITREVERSE!"); 6560 if (N1.isUndef()) 6561 return getUNDEF(VT); 6562 break; 6563 case ISD::BITCAST: 6564 assert(VT.getSizeInBits() == N1.getValueSizeInBits() && 6565 "Cannot BITCAST between types of different sizes!"); 6566 if (VT == N1.getValueType()) return N1; // noop conversion. 6567 if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) 6568 return getNode(ISD::BITCAST, DL, VT, N1.getOperand(0)); 6569 if (N1.isUndef()) 6570 return getUNDEF(VT); 6571 break; 6572 case ISD::SCALAR_TO_VECTOR: 6573 assert(VT.isVector() && !N1.getValueType().isVector() && 6574 (VT.getVectorElementType() == N1.getValueType() || 6575 (VT.getVectorElementType().isInteger() && 6576 N1.getValueType().isInteger() && 6577 VT.getVectorElementType().bitsLE(N1.getValueType()))) && 6578 "Illegal SCALAR_TO_VECTOR node!"); 6579 if (N1.isUndef()) 6580 return getUNDEF(VT); 6581 // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. 6582 if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && 6583 isa<ConstantSDNode>(N1.getOperand(1)) && 6584 N1.getConstantOperandVal(1) == 0 && 6585 N1.getOperand(0).getValueType() == VT) 6586 return N1.getOperand(0); 6587 break; 6588 case ISD::FNEG: 6589 // Negation of an unknown bag of bits is still completely undefined. 6590 if (N1.isUndef()) 6591 return getUNDEF(VT); 6592 6593 if (OpOpcode == ISD::FNEG) // --X -> X 6594 return N1.getOperand(0); 6595 break; 6596 case ISD::FABS: 6597 if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) 6598 return getNode(ISD::FABS, DL, VT, N1.getOperand(0)); 6599 break; 6600 case ISD::VSCALE: 6601 assert(VT == N1.getValueType() && "Unexpected VT!"); 6602 break; 6603 case ISD::CTPOP: 6604 if (N1.getValueType().getScalarType() == MVT::i1) 6605 return N1; 6606 break; 6607 case ISD::CTLZ: 6608 case ISD::CTTZ: 6609 if (N1.getValueType().getScalarType() == MVT::i1) 6610 return getNOT(DL, N1, N1.getValueType()); 6611 break; 6612 case ISD::VECREDUCE_ADD: 6613 if (N1.getValueType().getScalarType() == MVT::i1) 6614 return getNode(ISD::VECREDUCE_XOR, DL, VT, N1); 6615 break; 6616 case ISD::VECREDUCE_SMIN: 6617 case ISD::VECREDUCE_UMAX: 6618 if (N1.getValueType().getScalarType() == MVT::i1) 6619 return getNode(ISD::VECREDUCE_OR, DL, VT, N1); 6620 break; 6621 case ISD::VECREDUCE_SMAX: 6622 case ISD::VECREDUCE_UMIN: 6623 if (N1.getValueType().getScalarType() == MVT::i1) 6624 return getNode(ISD::VECREDUCE_AND, DL, VT, N1); 6625 break; 6626 case ISD::SPLAT_VECTOR: 6627 assert(VT.isVector() && "Wrong return type!"); 6628 // FIXME: Hexagon uses i32 scalar for a floating point zero vector so allow 6629 // that for now. 6630 assert((VT.getVectorElementType() == N1.getValueType() || 6631 (VT.isFloatingPoint() && N1.getValueType() == MVT::i32) || 6632 (VT.getVectorElementType().isInteger() && 6633 N1.getValueType().isInteger() && 6634 VT.getVectorElementType().bitsLE(N1.getValueType()))) && 6635 "Wrong operand type!"); 6636 break; 6637 } 6638 6639 SDNode *N; 6640 SDVTList VTs = getVTList(VT); 6641 SDValue Ops[] = {N1}; 6642 if (VT != MVT::Glue) { // Don't CSE glue producing nodes 6643 FoldingSetNodeID ID; 6644 AddNodeIDNode(ID, Opcode, VTs, Ops); 6645 void *IP = nullptr; 6646 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 6647 E->intersectFlagsWith(Flags); 6648 return SDValue(E, 0); 6649 } 6650 6651 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 6652 N->setFlags(Flags); 6653 createOperands(N, Ops); 6654 CSEMap.InsertNode(N, IP); 6655 } else { 6656 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 6657 createOperands(N, Ops); 6658 } 6659 6660 InsertNode(N); 6661 SDValue V = SDValue(N, 0); 6662 NewSDValueDbgMsg(V, "Creating new node: ", this); 6663 return V; 6664 } 6665 6666 static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, 6667 const APInt &C2) { 6668 switch (Opcode) { 6669 case ISD::ADD: return C1 + C2; 6670 case ISD::SUB: return C1 - C2; 6671 case ISD::MUL: return C1 * C2; 6672 case ISD::AND: return C1 & C2; 6673 case ISD::OR: return C1 | C2; 6674 case ISD::XOR: return C1 ^ C2; 6675 case ISD::SHL: return C1 << C2; 6676 case ISD::SRL: return C1.lshr(C2); 6677 case ISD::SRA: return C1.ashr(C2); 6678 case ISD::ROTL: return C1.rotl(C2); 6679 case ISD::ROTR: return C1.rotr(C2); 6680 case ISD::SMIN: return C1.sle(C2) ? C1 : C2; 6681 case ISD::SMAX: return C1.sge(C2) ? C1 : C2; 6682 case ISD::UMIN: return C1.ule(C2) ? C1 : C2; 6683 case ISD::UMAX: return C1.uge(C2) ? C1 : C2; 6684 case ISD::SADDSAT: return C1.sadd_sat(C2); 6685 case ISD::UADDSAT: return C1.uadd_sat(C2); 6686 case ISD::SSUBSAT: return C1.ssub_sat(C2); 6687 case ISD::USUBSAT: return C1.usub_sat(C2); 6688 case ISD::SSHLSAT: return C1.sshl_sat(C2); 6689 case ISD::USHLSAT: return C1.ushl_sat(C2); 6690 case ISD::UDIV: 6691 if (!C2.getBoolValue()) 6692 break; 6693 return C1.udiv(C2); 6694 case ISD::UREM: 6695 if (!C2.getBoolValue()) 6696 break; 6697 return C1.urem(C2); 6698 case ISD::SDIV: 6699 if (!C2.getBoolValue()) 6700 break; 6701 return C1.sdiv(C2); 6702 case ISD::SREM: 6703 if (!C2.getBoolValue()) 6704 break; 6705 return C1.srem(C2); 6706 case ISD::AVGFLOORS: 6707 return APIntOps::avgFloorS(C1, C2); 6708 case ISD::AVGFLOORU: 6709 return APIntOps::avgFloorU(C1, C2); 6710 case ISD::AVGCEILS: 6711 return APIntOps::avgCeilS(C1, C2); 6712 case ISD::AVGCEILU: 6713 return APIntOps::avgCeilU(C1, C2); 6714 case ISD::ABDS: 6715 return APIntOps::abds(C1, C2); 6716 case ISD::ABDU: 6717 return APIntOps::abdu(C1, C2); 6718 case ISD::MULHS: 6719 return APIntOps::mulhs(C1, C2); 6720 case ISD::MULHU: 6721 return APIntOps::mulhu(C1, C2); 6722 } 6723 return std::nullopt; 6724 } 6725 // Handle constant folding with UNDEF. 6726 // TODO: Handle more cases. 6727 static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1, 6728 bool IsUndef1, const APInt &C2, 6729 bool IsUndef2) { 6730 if (!(IsUndef1 || IsUndef2)) 6731 return FoldValue(Opcode, C1, C2); 6732 6733 // Fold and(x, undef) -> 0 6734 // Fold mul(x, undef) -> 0 6735 if (Opcode == ISD::AND || Opcode == ISD::MUL) 6736 return APInt::getZero(C1.getBitWidth()); 6737 6738 return std::nullopt; 6739 } 6740 6741 SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, 6742 const GlobalAddressSDNode *GA, 6743 const SDNode *N2) { 6744 if (GA->getOpcode() != ISD::GlobalAddress) 6745 return SDValue(); 6746 if (!TLI->isOffsetFoldingLegal(GA)) 6747 return SDValue(); 6748 auto *C2 = dyn_cast<ConstantSDNode>(N2); 6749 if (!C2) 6750 return SDValue(); 6751 int64_t Offset = C2->getSExtValue(); 6752 switch (Opcode) { 6753 case ISD::ADD: break; 6754 case ISD::SUB: Offset = -uint64_t(Offset); break; 6755 default: return SDValue(); 6756 } 6757 return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT, 6758 GA->getOffset() + uint64_t(Offset)); 6759 } 6760 6761 bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { 6762 switch (Opcode) { 6763 case ISD::SDIV: 6764 case ISD::UDIV: 6765 case ISD::SREM: 6766 case ISD::UREM: { 6767 // If a divisor is zero/undef or any element of a divisor vector is 6768 // zero/undef, the whole op is undef. 6769 assert(Ops.size() == 2 && "Div/rem should have 2 operands"); 6770 SDValue Divisor = Ops[1]; 6771 if (Divisor.isUndef() || isNullConstant(Divisor)) 6772 return true; 6773 6774 return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && 6775 llvm::any_of(Divisor->op_values(), 6776 [](SDValue V) { return V.isUndef() || 6777 isNullConstant(V); }); 6778 // TODO: Handle signed overflow. 6779 } 6780 // TODO: Handle oversized shifts. 6781 default: 6782 return false; 6783 } 6784 } 6785 6786 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, 6787 EVT VT, ArrayRef<SDValue> Ops, 6788 SDNodeFlags Flags) { 6789 // If the opcode is a target-specific ISD node, there's nothing we can 6790 // do here and the operand rules may not line up with the below, so 6791 // bail early. 6792 // We can't create a scalar CONCAT_VECTORS so skip it. It will break 6793 // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by 6794 // foldCONCAT_VECTORS in getNode before this is called. 6795 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) 6796 return SDValue(); 6797 6798 unsigned NumOps = Ops.size(); 6799 if (NumOps == 0) 6800 return SDValue(); 6801 6802 if (isUndef(Opcode, Ops)) 6803 return getUNDEF(VT); 6804 6805 // Handle unary special cases. 6806 if (NumOps == 1) { 6807 SDValue N1 = Ops[0]; 6808 6809 // Constant fold unary operations with an integer constant operand. Even 6810 // opaque constant will be folded, because the folding of unary operations 6811 // doesn't create new constants with different values. Nevertheless, the 6812 // opaque flag is preserved during folding to prevent future folding with 6813 // other constants. 6814 if (auto *C = dyn_cast<ConstantSDNode>(N1)) { 6815 const APInt &Val = C->getAPIntValue(); 6816 switch (Opcode) { 6817 case ISD::SIGN_EXTEND: 6818 return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, 6819 C->isTargetOpcode(), C->isOpaque()); 6820 case ISD::TRUNCATE: 6821 if (C->isOpaque()) 6822 break; 6823 [[fallthrough]]; 6824 case ISD::ZERO_EXTEND: 6825 return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, 6826 C->isTargetOpcode(), C->isOpaque()); 6827 case ISD::ANY_EXTEND: 6828 // Some targets like RISCV prefer to sign extend some types. 6829 if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT)) 6830 return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, 6831 C->isTargetOpcode(), C->isOpaque()); 6832 return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, 6833 C->isTargetOpcode(), C->isOpaque()); 6834 case ISD::ABS: 6835 return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), 6836 C->isOpaque()); 6837 case ISD::BITREVERSE: 6838 return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), 6839 C->isOpaque()); 6840 case ISD::BSWAP: 6841 return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), 6842 C->isOpaque()); 6843 case ISD::CTPOP: 6844 return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(), 6845 C->isOpaque()); 6846 case ISD::CTLZ: 6847 case ISD::CTLZ_ZERO_UNDEF: 6848 return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(), 6849 C->isOpaque()); 6850 case ISD::CTTZ: 6851 case ISD::CTTZ_ZERO_UNDEF: 6852 return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), 6853 C->isOpaque()); 6854 case ISD::UINT_TO_FP: 6855 case ISD::SINT_TO_FP: { 6856 APFloat FPV(VT.getFltSemantics(), APInt::getZero(VT.getSizeInBits())); 6857 (void)FPV.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP, 6858 APFloat::rmNearestTiesToEven); 6859 return getConstantFP(FPV, DL, VT); 6860 } 6861 case ISD::FP16_TO_FP: 6862 case ISD::BF16_TO_FP: { 6863 bool Ignored; 6864 APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() 6865 : APFloat::BFloat(), 6866 (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); 6867 6868 // This can return overflow, underflow, or inexact; we don't care. 6869 // FIXME need to be more flexible about rounding mode. 6870 (void)FPV.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 6871 &Ignored); 6872 return getConstantFP(FPV, DL, VT); 6873 } 6874 case ISD::STEP_VECTOR: 6875 if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this)) 6876 return V; 6877 break; 6878 case ISD::BITCAST: 6879 if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) 6880 return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); 6881 if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) 6882 return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); 6883 if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) 6884 return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); 6885 if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) 6886 return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); 6887 break; 6888 } 6889 } 6890 6891 // Constant fold unary operations with a floating point constant operand. 6892 if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) { 6893 APFloat V = C->getValueAPF(); // make copy 6894 switch (Opcode) { 6895 case ISD::FNEG: 6896 V.changeSign(); 6897 return getConstantFP(V, DL, VT); 6898 case ISD::FABS: 6899 V.clearSign(); 6900 return getConstantFP(V, DL, VT); 6901 case ISD::FCEIL: { 6902 APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); 6903 if (fs == APFloat::opOK || fs == APFloat::opInexact) 6904 return getConstantFP(V, DL, VT); 6905 return SDValue(); 6906 } 6907 case ISD::FTRUNC: { 6908 APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); 6909 if (fs == APFloat::opOK || fs == APFloat::opInexact) 6910 return getConstantFP(V, DL, VT); 6911 return SDValue(); 6912 } 6913 case ISD::FFLOOR: { 6914 APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); 6915 if (fs == APFloat::opOK || fs == APFloat::opInexact) 6916 return getConstantFP(V, DL, VT); 6917 return SDValue(); 6918 } 6919 case ISD::FP_EXTEND: { 6920 bool ignored; 6921 // This can return overflow, underflow, or inexact; we don't care. 6922 // FIXME need to be more flexible about rounding mode. 6923 (void)V.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 6924 &ignored); 6925 return getConstantFP(V, DL, VT); 6926 } 6927 case ISD::FP_TO_SINT: 6928 case ISD::FP_TO_UINT: { 6929 bool ignored; 6930 APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); 6931 // FIXME need to be more flexible about rounding mode. 6932 APFloat::opStatus s = 6933 V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); 6934 if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual 6935 break; 6936 return getConstant(IntVal, DL, VT); 6937 } 6938 case ISD::FP_TO_FP16: 6939 case ISD::FP_TO_BF16: { 6940 bool Ignored; 6941 // This can return overflow, underflow, or inexact; we don't care. 6942 // FIXME need to be more flexible about rounding mode. 6943 (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() 6944 : APFloat::BFloat(), 6945 APFloat::rmNearestTiesToEven, &Ignored); 6946 return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); 6947 } 6948 case ISD::BITCAST: 6949 if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) 6950 return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, 6951 VT); 6952 if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) 6953 return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, 6954 VT); 6955 if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) 6956 return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, 6957 VT); 6958 if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) 6959 return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); 6960 break; 6961 } 6962 } 6963 6964 // Early-out if we failed to constant fold a bitcast. 6965 if (Opcode == ISD::BITCAST) 6966 return SDValue(); 6967 } 6968 6969 // Handle binops special cases. 6970 if (NumOps == 2) { 6971 if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops)) 6972 return CFP; 6973 6974 if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { 6975 if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) { 6976 if (C1->isOpaque() || C2->isOpaque()) 6977 return SDValue(); 6978 6979 std::optional<APInt> FoldAttempt = 6980 FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); 6981 if (!FoldAttempt) 6982 return SDValue(); 6983 6984 SDValue Folded = getConstant(*FoldAttempt, DL, VT); 6985 assert((!Folded || !VT.isVector()) && 6986 "Can't fold vectors ops with scalar operands"); 6987 return Folded; 6988 } 6989 } 6990 6991 // fold (add Sym, c) -> Sym+c 6992 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0])) 6993 return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode()); 6994 if (TLI->isCommutativeBinOp(Opcode)) 6995 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1])) 6996 return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode()); 6997 6998 // fold (sext_in_reg c1) -> c2 6999 if (Opcode == ISD::SIGN_EXTEND_INREG) { 7000 EVT EVT = cast<VTSDNode>(Ops[1])->getVT(); 7001 7002 auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { 7003 unsigned FromBits = EVT.getScalarSizeInBits(); 7004 Val <<= Val.getBitWidth() - FromBits; 7005 Val.ashrInPlace(Val.getBitWidth() - FromBits); 7006 return getConstant(Val, DL, ConstantVT); 7007 }; 7008 7009 if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { 7010 const APInt &Val = C1->getAPIntValue(); 7011 return SignExtendInReg(Val, VT); 7012 } 7013 7014 if (ISD::isBuildVectorOfConstantSDNodes(Ops[0].getNode())) { 7015 SmallVector<SDValue, 8> ScalarOps; 7016 llvm::EVT OpVT = Ops[0].getOperand(0).getValueType(); 7017 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) { 7018 SDValue Op = Ops[0].getOperand(I); 7019 if (Op.isUndef()) { 7020 ScalarOps.push_back(getUNDEF(OpVT)); 7021 continue; 7022 } 7023 const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); 7024 ScalarOps.push_back(SignExtendInReg(Val, OpVT)); 7025 } 7026 return getBuildVector(VT, DL, ScalarOps); 7027 } 7028 7029 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR && 7030 isa<ConstantSDNode>(Ops[0].getOperand(0))) 7031 return getNode(ISD::SPLAT_VECTOR, DL, VT, 7032 SignExtendInReg(Ops[0].getConstantOperandAPInt(0), 7033 Ops[0].getOperand(0).getValueType())); 7034 } 7035 } 7036 7037 // This is for vector folding only from here on. 7038 if (!VT.isVector()) 7039 return SDValue(); 7040 7041 ElementCount NumElts = VT.getVectorElementCount(); 7042 7043 // See if we can fold through any bitcasted integer ops. 7044 if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && 7045 Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && 7046 (Ops[0].getOpcode() == ISD::BITCAST || 7047 Ops[1].getOpcode() == ISD::BITCAST)) { 7048 SDValue N1 = peekThroughBitcasts(Ops[0]); 7049 SDValue N2 = peekThroughBitcasts(Ops[1]); 7050 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 7051 auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); 7052 if (BV1 && BV2 && N1.getValueType().isInteger() && 7053 N2.getValueType().isInteger()) { 7054 bool IsLE = getDataLayout().isLittleEndian(); 7055 unsigned EltBits = VT.getScalarSizeInBits(); 7056 SmallVector<APInt> RawBits1, RawBits2; 7057 BitVector UndefElts1, UndefElts2; 7058 if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && 7059 BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) { 7060 SmallVector<APInt> RawBits; 7061 for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { 7062 std::optional<APInt> Fold = FoldValueWithUndef( 7063 Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]); 7064 if (!Fold) 7065 break; 7066 RawBits.push_back(*Fold); 7067 } 7068 if (RawBits.size() == NumElts.getFixedValue()) { 7069 // We have constant folded, but we might need to cast this again back 7070 // to the original (possibly legalized) type. 7071 EVT BVVT, BVEltVT; 7072 if (N1.getValueType() == VT) { 7073 BVVT = N1.getValueType(); 7074 BVEltVT = BV1->getOperand(0).getValueType(); 7075 } else { 7076 BVVT = N2.getValueType(); 7077 BVEltVT = BV2->getOperand(0).getValueType(); 7078 } 7079 unsigned BVEltBits = BVEltVT.getSizeInBits(); 7080 SmallVector<APInt> DstBits; 7081 BitVector DstUndefs; 7082 BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), 7083 DstBits, RawBits, DstUndefs, 7084 BitVector(RawBits.size(), false)); 7085 SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT)); 7086 for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { 7087 if (DstUndefs[I]) 7088 continue; 7089 Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT); 7090 } 7091 return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); 7092 } 7093 } 7094 } 7095 } 7096 7097 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). 7098 // (shl step_vector(C0), C1) -> (step_vector(C0 << C1)) 7099 if ((Opcode == ISD::MUL || Opcode == ISD::SHL) && 7100 Ops[0].getOpcode() == ISD::STEP_VECTOR) { 7101 APInt RHSVal; 7102 if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) { 7103 APInt NewStep = Opcode == ISD::MUL 7104 ? Ops[0].getConstantOperandAPInt(0) * RHSVal 7105 : Ops[0].getConstantOperandAPInt(0) << RHSVal; 7106 return getStepVector(DL, VT, NewStep); 7107 } 7108 } 7109 7110 auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { 7111 return !Op.getValueType().isVector() || 7112 Op.getValueType().getVectorElementCount() == NumElts; 7113 }; 7114 7115 auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { 7116 return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || 7117 Op.getOpcode() == ISD::BUILD_VECTOR || 7118 Op.getOpcode() == ISD::SPLAT_VECTOR; 7119 }; 7120 7121 // All operands must be vector types with the same number of elements as 7122 // the result type and must be either UNDEF or a build/splat vector 7123 // or UNDEF scalars. 7124 if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) || 7125 !llvm::all_of(Ops, IsScalarOrSameVectorSize)) 7126 return SDValue(); 7127 7128 // If we are comparing vectors, then the result needs to be a i1 boolean that 7129 // is then extended back to the legal result type depending on how booleans 7130 // are represented. 7131 EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); 7132 ISD::NodeType ExtendCode = 7133 (Opcode == ISD::SETCC && SVT != VT.getScalarType()) 7134 ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT)) 7135 : ISD::SIGN_EXTEND; 7136 7137 // Find legal integer scalar type for constant promotion and 7138 // ensure that its scalar size is at least as large as source. 7139 EVT LegalSVT = VT.getScalarType(); 7140 if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { 7141 LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); 7142 if (LegalSVT.bitsLT(VT.getScalarType())) 7143 return SDValue(); 7144 } 7145 7146 // For scalable vector types we know we're dealing with SPLAT_VECTORs. We 7147 // only have one operand to check. For fixed-length vector types we may have 7148 // a combination of BUILD_VECTOR and SPLAT_VECTOR. 7149 unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); 7150 7151 // Constant fold each scalar lane separately. 7152 SmallVector<SDValue, 4> ScalarResults; 7153 for (unsigned I = 0; I != NumVectorElts; I++) { 7154 SmallVector<SDValue, 4> ScalarOps; 7155 for (SDValue Op : Ops) { 7156 EVT InSVT = Op.getValueType().getScalarType(); 7157 if (Op.getOpcode() != ISD::BUILD_VECTOR && 7158 Op.getOpcode() != ISD::SPLAT_VECTOR) { 7159 if (Op.isUndef()) 7160 ScalarOps.push_back(getUNDEF(InSVT)); 7161 else 7162 ScalarOps.push_back(Op); 7163 continue; 7164 } 7165 7166 SDValue ScalarOp = 7167 Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I); 7168 EVT ScalarVT = ScalarOp.getValueType(); 7169 7170 // Build vector (integer) scalar operands may need implicit 7171 // truncation - do this before constant folding. 7172 if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) { 7173 // Don't create illegally-typed nodes unless they're constants or undef 7174 // - if we fail to constant fold we can't guarantee the (dead) nodes 7175 // we're creating will be cleaned up before being visited for 7176 // legalization. 7177 if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() && 7178 !isa<ConstantSDNode>(ScalarOp) && 7179 TLI->getTypeAction(*getContext(), InSVT) != 7180 TargetLowering::TypeLegal) 7181 return SDValue(); 7182 ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); 7183 } 7184 7185 ScalarOps.push_back(ScalarOp); 7186 } 7187 7188 // Constant fold the scalar operands. 7189 SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); 7190 7191 // Scalar folding only succeeded if the result is a constant or UNDEF. 7192 if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && 7193 ScalarResult.getOpcode() != ISD::ConstantFP) 7194 return SDValue(); 7195 7196 // Legalize the (integer) scalar constant if necessary. We only do 7197 // this once we know the folding succeeded, since otherwise we would 7198 // get a node with illegal type which has a user. 7199 if (LegalSVT != SVT) 7200 ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult); 7201 7202 ScalarResults.push_back(ScalarResult); 7203 } 7204 7205 SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0]) 7206 : getBuildVector(VT, DL, ScalarResults); 7207 NewSDValueDbgMsg(V, "New node fold constant vector: ", this); 7208 return V; 7209 } 7210 7211 SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, 7212 EVT VT, ArrayRef<SDValue> Ops) { 7213 // TODO: Add support for unary/ternary fp opcodes. 7214 if (Ops.size() != 2) 7215 return SDValue(); 7216 7217 // TODO: We don't do any constant folding for strict FP opcodes here, but we 7218 // should. That will require dealing with a potentially non-default 7219 // rounding mode, checking the "opStatus" return value from the APFloat 7220 // math calculations, and possibly other variations. 7221 SDValue N1 = Ops[0]; 7222 SDValue N2 = Ops[1]; 7223 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false); 7224 ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false); 7225 if (N1CFP && N2CFP) { 7226 APFloat C1 = N1CFP->getValueAPF(); // make copy 7227 const APFloat &C2 = N2CFP->getValueAPF(); 7228 switch (Opcode) { 7229 case ISD::FADD: 7230 C1.add(C2, APFloat::rmNearestTiesToEven); 7231 return getConstantFP(C1, DL, VT); 7232 case ISD::FSUB: 7233 C1.subtract(C2, APFloat::rmNearestTiesToEven); 7234 return getConstantFP(C1, DL, VT); 7235 case ISD::FMUL: 7236 C1.multiply(C2, APFloat::rmNearestTiesToEven); 7237 return getConstantFP(C1, DL, VT); 7238 case ISD::FDIV: 7239 C1.divide(C2, APFloat::rmNearestTiesToEven); 7240 return getConstantFP(C1, DL, VT); 7241 case ISD::FREM: 7242 C1.mod(C2); 7243 return getConstantFP(C1, DL, VT); 7244 case ISD::FCOPYSIGN: 7245 C1.copySign(C2); 7246 return getConstantFP(C1, DL, VT); 7247 case ISD::FMINNUM: 7248 return getConstantFP(minnum(C1, C2), DL, VT); 7249 case ISD::FMAXNUM: 7250 return getConstantFP(maxnum(C1, C2), DL, VT); 7251 case ISD::FMINIMUM: 7252 return getConstantFP(minimum(C1, C2), DL, VT); 7253 case ISD::FMAXIMUM: 7254 return getConstantFP(maximum(C1, C2), DL, VT); 7255 case ISD::FMINIMUMNUM: 7256 return getConstantFP(minimumnum(C1, C2), DL, VT); 7257 case ISD::FMAXIMUMNUM: 7258 return getConstantFP(maximumnum(C1, C2), DL, VT); 7259 default: break; 7260 } 7261 } 7262 if (N1CFP && Opcode == ISD::FP_ROUND) { 7263 APFloat C1 = N1CFP->getValueAPF(); // make copy 7264 bool Unused; 7265 // This can return overflow, underflow, or inexact; we don't care. 7266 // FIXME need to be more flexible about rounding mode. 7267 (void)C1.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 7268 &Unused); 7269 return getConstantFP(C1, DL, VT); 7270 } 7271 7272 switch (Opcode) { 7273 case ISD::FSUB: 7274 // -0.0 - undef --> undef (consistent with "fneg undef") 7275 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true)) 7276 if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef()) 7277 return getUNDEF(VT); 7278 [[fallthrough]]; 7279 7280 case ISD::FADD: 7281 case ISD::FMUL: 7282 case ISD::FDIV: 7283 case ISD::FREM: 7284 // If both operands are undef, the result is undef. If 1 operand is undef, 7285 // the result is NaN. This should match the behavior of the IR optimizer. 7286 if (N1.isUndef() && N2.isUndef()) 7287 return getUNDEF(VT); 7288 if (N1.isUndef() || N2.isUndef()) 7289 return getConstantFP(APFloat::getNaN(VT.getFltSemantics()), DL, VT); 7290 } 7291 return SDValue(); 7292 } 7293 7294 SDValue SelectionDAG::FoldConstantBuildVector(BuildVectorSDNode *BV, 7295 const SDLoc &DL, EVT DstEltVT) { 7296 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 7297 7298 // If this is already the right type, we're done. 7299 if (SrcEltVT == DstEltVT) 7300 return SDValue(BV, 0); 7301 7302 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 7303 unsigned DstBitSize = DstEltVT.getSizeInBits(); 7304 7305 // If this is a conversion of N elements of one type to N elements of another 7306 // type, convert each element. This handles FP<->INT cases. 7307 if (SrcBitSize == DstBitSize) { 7308 SmallVector<SDValue, 8> Ops; 7309 for (SDValue Op : BV->op_values()) { 7310 // If the vector element type is not legal, the BUILD_VECTOR operands 7311 // are promoted and implicitly truncated. Make that explicit here. 7312 if (Op.getValueType() != SrcEltVT) 7313 Op = getNode(ISD::TRUNCATE, DL, SrcEltVT, Op); 7314 Ops.push_back(getBitcast(DstEltVT, Op)); 7315 } 7316 EVT VT = EVT::getVectorVT(*getContext(), DstEltVT, 7317 BV->getValueType(0).getVectorNumElements()); 7318 return getBuildVector(VT, DL, Ops); 7319 } 7320 7321 // Otherwise, we're growing or shrinking the elements. To avoid having to 7322 // handle annoying details of growing/shrinking FP values, we convert them to 7323 // int first. 7324 if (SrcEltVT.isFloatingPoint()) { 7325 // Convert the input float vector to a int vector where the elements are the 7326 // same sizes. 7327 EVT IntEltVT = EVT::getIntegerVT(*getContext(), SrcEltVT.getSizeInBits()); 7328 if (SDValue Tmp = FoldConstantBuildVector(BV, DL, IntEltVT)) 7329 return FoldConstantBuildVector(cast<BuildVectorSDNode>(Tmp), DL, 7330 DstEltVT); 7331 return SDValue(); 7332 } 7333 7334 // Now we know the input is an integer vector. If the output is a FP type, 7335 // convert to integer first, then to FP of the right size. 7336 if (DstEltVT.isFloatingPoint()) { 7337 EVT IntEltVT = EVT::getIntegerVT(*getContext(), DstEltVT.getSizeInBits()); 7338 if (SDValue Tmp = FoldConstantBuildVector(BV, DL, IntEltVT)) 7339 return FoldConstantBuildVector(cast<BuildVectorSDNode>(Tmp), DL, 7340 DstEltVT); 7341 return SDValue(); 7342 } 7343 7344 // Okay, we know the src/dst types are both integers of differing types. 7345 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 7346 7347 // Extract the constant raw bit data. 7348 BitVector UndefElements; 7349 SmallVector<APInt> RawBits; 7350 bool IsLE = getDataLayout().isLittleEndian(); 7351 if (!BV->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) 7352 return SDValue(); 7353 7354 SmallVector<SDValue, 8> Ops; 7355 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { 7356 if (UndefElements[I]) 7357 Ops.push_back(getUNDEF(DstEltVT)); 7358 else 7359 Ops.push_back(getConstant(RawBits[I], DL, DstEltVT)); 7360 } 7361 7362 EVT VT = EVT::getVectorVT(*getContext(), DstEltVT, Ops.size()); 7363 return getBuildVector(VT, DL, Ops); 7364 } 7365 7366 SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { 7367 assert(Val.getValueType().isInteger() && "Invalid AssertAlign!"); 7368 7369 // There's no need to assert on a byte-aligned pointer. All pointers are at 7370 // least byte aligned. 7371 if (A == Align(1)) 7372 return Val; 7373 7374 SDVTList VTs = getVTList(Val.getValueType()); 7375 FoldingSetNodeID ID; 7376 AddNodeIDNode(ID, ISD::AssertAlign, VTs, {Val}); 7377 ID.AddInteger(A.value()); 7378 7379 void *IP = nullptr; 7380 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) 7381 return SDValue(E, 0); 7382 7383 auto *N = 7384 newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, A); 7385 createOperands(N, {Val}); 7386 7387 CSEMap.InsertNode(N, IP); 7388 InsertNode(N); 7389 7390 SDValue V(N, 0); 7391 NewSDValueDbgMsg(V, "Creating new node: ", this); 7392 return V; 7393 } 7394 7395 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7396 SDValue N1, SDValue N2) { 7397 SDNodeFlags Flags; 7398 if (Inserter) 7399 Flags = Inserter->getFlags(); 7400 return getNode(Opcode, DL, VT, N1, N2, Flags); 7401 } 7402 7403 void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, 7404 SDValue &N2) const { 7405 if (!TLI->isCommutativeBinOp(Opcode)) 7406 return; 7407 7408 // Canonicalize: 7409 // binop(const, nonconst) -> binop(nonconst, const) 7410 bool N1C = isConstantIntBuildVectorOrConstantInt(N1); 7411 bool N2C = isConstantIntBuildVectorOrConstantInt(N2); 7412 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 7413 bool N2CFP = isConstantFPBuildVectorOrConstantFP(N2); 7414 if ((N1C && !N2C) || (N1CFP && !N2CFP)) 7415 std::swap(N1, N2); 7416 7417 // Canonicalize: 7418 // binop(splat(x), step_vector) -> binop(step_vector, splat(x)) 7419 else if (N1.getOpcode() == ISD::SPLAT_VECTOR && 7420 N2.getOpcode() == ISD::STEP_VECTOR) 7421 std::swap(N1, N2); 7422 } 7423 7424 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7425 SDValue N1, SDValue N2, const SDNodeFlags Flags) { 7426 assert(N1.getOpcode() != ISD::DELETED_NODE && 7427 N2.getOpcode() != ISD::DELETED_NODE && 7428 "Operand is DELETED_NODE!"); 7429 7430 canonicalizeCommutativeBinop(Opcode, N1, N2); 7431 7432 auto *N1C = dyn_cast<ConstantSDNode>(N1); 7433 auto *N2C = dyn_cast<ConstantSDNode>(N2); 7434 7435 // Don't allow undefs in vector splats - we might be returning N2 when folding 7436 // to zero etc. 7437 ConstantSDNode *N2CV = 7438 isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true); 7439 7440 switch (Opcode) { 7441 default: break; 7442 case ISD::TokenFactor: 7443 assert(VT == MVT::Other && N1.getValueType() == MVT::Other && 7444 N2.getValueType() == MVT::Other && "Invalid token factor!"); 7445 // Fold trivial token factors. 7446 if (N1.getOpcode() == ISD::EntryToken) return N2; 7447 if (N2.getOpcode() == ISD::EntryToken) return N1; 7448 if (N1 == N2) return N1; 7449 break; 7450 case ISD::BUILD_VECTOR: { 7451 // Attempt to simplify BUILD_VECTOR. 7452 SDValue Ops[] = {N1, N2}; 7453 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 7454 return V; 7455 break; 7456 } 7457 case ISD::CONCAT_VECTORS: { 7458 SDValue Ops[] = {N1, N2}; 7459 if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) 7460 return V; 7461 break; 7462 } 7463 case ISD::AND: 7464 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7465 assert(N1.getValueType() == N2.getValueType() && 7466 N1.getValueType() == VT && "Binary operator types must match!"); 7467 // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's 7468 // worth handling here. 7469 if (N2CV && N2CV->isZero()) 7470 return N2; 7471 if (N2CV && N2CV->isAllOnes()) // X & -1 -> X 7472 return N1; 7473 break; 7474 case ISD::OR: 7475 case ISD::XOR: 7476 case ISD::ADD: 7477 case ISD::PTRADD: 7478 case ISD::SUB: 7479 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7480 assert(N1.getValueType() == N2.getValueType() && 7481 N1.getValueType() == VT && "Binary operator types must match!"); 7482 // The equal operand types requirement is unnecessarily strong for PTRADD. 7483 // However, the SelectionDAGBuilder does not generate PTRADDs with different 7484 // operand types, and we'd need to re-implement GEP's non-standard wrapping 7485 // logic everywhere where PTRADDs may be folded or combined to properly 7486 // support them. If/when we introduce pointer types to the SDAG, we will 7487 // need to relax this constraint. 7488 7489 // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so 7490 // it's worth handling here. 7491 if (N2CV && N2CV->isZero()) 7492 return N1; 7493 if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && 7494 VT.getScalarType() == MVT::i1) 7495 return getNode(ISD::XOR, DL, VT, N1, N2); 7496 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). 7497 if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE && 7498 N2.getOpcode() == ISD::VSCALE) { 7499 const APInt &C1 = N1->getConstantOperandAPInt(0); 7500 const APInt &C2 = N2->getConstantOperandAPInt(0); 7501 return getVScale(DL, VT, C1 + C2); 7502 } 7503 break; 7504 case ISD::MUL: 7505 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7506 assert(N1.getValueType() == N2.getValueType() && 7507 N1.getValueType() == VT && "Binary operator types must match!"); 7508 if (VT.getScalarType() == MVT::i1) 7509 return getNode(ISD::AND, DL, VT, N1, N2); 7510 if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { 7511 const APInt &MulImm = N1->getConstantOperandAPInt(0); 7512 const APInt &N2CImm = N2C->getAPIntValue(); 7513 return getVScale(DL, VT, MulImm * N2CImm); 7514 } 7515 break; 7516 case ISD::UDIV: 7517 case ISD::UREM: 7518 case ISD::MULHU: 7519 case ISD::MULHS: 7520 case ISD::SDIV: 7521 case ISD::SREM: 7522 case ISD::SADDSAT: 7523 case ISD::SSUBSAT: 7524 case ISD::UADDSAT: 7525 case ISD::USUBSAT: 7526 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7527 assert(N1.getValueType() == N2.getValueType() && 7528 N1.getValueType() == VT && "Binary operator types must match!"); 7529 if (VT.getScalarType() == MVT::i1) { 7530 // fold (add_sat x, y) -> (or x, y) for bool types. 7531 if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT) 7532 return getNode(ISD::OR, DL, VT, N1, N2); 7533 // fold (sub_sat x, y) -> (and x, ~y) for bool types. 7534 if (Opcode == ISD::SSUBSAT || Opcode == ISD::USUBSAT) 7535 return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT)); 7536 } 7537 break; 7538 case ISD::SCMP: 7539 case ISD::UCMP: 7540 assert(N1.getValueType() == N2.getValueType() && 7541 "Types of operands of UCMP/SCMP must match"); 7542 assert(N1.getValueType().isVector() == VT.isVector() && 7543 "Operands and return type of must both be scalars or vectors"); 7544 if (VT.isVector()) 7545 assert(VT.getVectorElementCount() == 7546 N1.getValueType().getVectorElementCount() && 7547 "Result and operands must have the same number of elements"); 7548 break; 7549 case ISD::AVGFLOORS: 7550 case ISD::AVGFLOORU: 7551 case ISD::AVGCEILS: 7552 case ISD::AVGCEILU: 7553 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7554 assert(N1.getValueType() == N2.getValueType() && 7555 N1.getValueType() == VT && "Binary operator types must match!"); 7556 break; 7557 case ISD::ABDS: 7558 case ISD::ABDU: 7559 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7560 assert(N1.getValueType() == N2.getValueType() && 7561 N1.getValueType() == VT && "Binary operator types must match!"); 7562 if (VT.getScalarType() == MVT::i1) 7563 return getNode(ISD::XOR, DL, VT, N1, N2); 7564 break; 7565 case ISD::SMIN: 7566 case ISD::UMAX: 7567 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7568 assert(N1.getValueType() == N2.getValueType() && 7569 N1.getValueType() == VT && "Binary operator types must match!"); 7570 if (VT.getScalarType() == MVT::i1) 7571 return getNode(ISD::OR, DL, VT, N1, N2); 7572 break; 7573 case ISD::SMAX: 7574 case ISD::UMIN: 7575 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7576 assert(N1.getValueType() == N2.getValueType() && 7577 N1.getValueType() == VT && "Binary operator types must match!"); 7578 if (VT.getScalarType() == MVT::i1) 7579 return getNode(ISD::AND, DL, VT, N1, N2); 7580 break; 7581 case ISD::FADD: 7582 case ISD::FSUB: 7583 case ISD::FMUL: 7584 case ISD::FDIV: 7585 case ISD::FREM: 7586 assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); 7587 assert(N1.getValueType() == N2.getValueType() && 7588 N1.getValueType() == VT && "Binary operator types must match!"); 7589 if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags)) 7590 return V; 7591 break; 7592 case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. 7593 assert(N1.getValueType() == VT && 7594 N1.getValueType().isFloatingPoint() && 7595 N2.getValueType().isFloatingPoint() && 7596 "Invalid FCOPYSIGN!"); 7597 break; 7598 case ISD::SHL: 7599 if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { 7600 const APInt &MulImm = N1->getConstantOperandAPInt(0); 7601 const APInt &ShiftImm = N2C->getAPIntValue(); 7602 return getVScale(DL, VT, MulImm << ShiftImm); 7603 } 7604 [[fallthrough]]; 7605 case ISD::SRA: 7606 case ISD::SRL: 7607 if (SDValue V = simplifyShift(N1, N2)) 7608 return V; 7609 [[fallthrough]]; 7610 case ISD::ROTL: 7611 case ISD::ROTR: 7612 assert(VT == N1.getValueType() && 7613 "Shift operators return type must be the same as their first arg"); 7614 assert(VT.isInteger() && N2.getValueType().isInteger() && 7615 "Shifts only work on integers"); 7616 assert((!VT.isVector() || VT == N2.getValueType()) && 7617 "Vector shift amounts must be in the same as their first arg"); 7618 // Verify that the shift amount VT is big enough to hold valid shift 7619 // amounts. This catches things like trying to shift an i1024 value by an 7620 // i8, which is easy to fall into in generic code that uses 7621 // TLI.getShiftAmount(). 7622 assert(N2.getValueType().getScalarSizeInBits() >= 7623 Log2_32_Ceil(VT.getScalarSizeInBits()) && 7624 "Invalid use of small shift amount with oversized value!"); 7625 7626 // Always fold shifts of i1 values so the code generator doesn't need to 7627 // handle them. Since we know the size of the shift has to be less than the 7628 // size of the value, the shift/rotate count is guaranteed to be zero. 7629 if (VT == MVT::i1) 7630 return N1; 7631 if (N2CV && N2CV->isZero()) 7632 return N1; 7633 break; 7634 case ISD::FP_ROUND: 7635 assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && 7636 VT.bitsLE(N1.getValueType()) && N2C && 7637 (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && 7638 N2.getOpcode() == ISD::TargetConstant && "Invalid FP_ROUND!"); 7639 if (N1.getValueType() == VT) return N1; // noop conversion. 7640 break; 7641 case ISD::AssertNoFPClass: { 7642 assert(N1.getValueType().isFloatingPoint() && 7643 "AssertNoFPClass is used for a non-floating type"); 7644 assert(isa<ConstantSDNode>(N2) && "NoFPClass is not Constant"); 7645 FPClassTest NoFPClass = static_cast<FPClassTest>(N2->getAsZExtVal()); 7646 assert(llvm::to_underlying(NoFPClass) <= 7647 BitmaskEnumDetail::Mask<FPClassTest>() && 7648 "FPClassTest value too large"); 7649 (void)NoFPClass; 7650 break; 7651 } 7652 case ISD::AssertSext: 7653 case ISD::AssertZext: { 7654 EVT EVT = cast<VTSDNode>(N2)->getVT(); 7655 assert(VT == N1.getValueType() && "Not an inreg extend!"); 7656 assert(VT.isInteger() && EVT.isInteger() && 7657 "Cannot *_EXTEND_INREG FP types"); 7658 assert(!EVT.isVector() && 7659 "AssertSExt/AssertZExt type should be the vector element type " 7660 "rather than the vector type!"); 7661 assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); 7662 if (VT.getScalarType() == EVT) return N1; // noop assertion. 7663 break; 7664 } 7665 case ISD::SIGN_EXTEND_INREG: { 7666 EVT EVT = cast<VTSDNode>(N2)->getVT(); 7667 assert(VT == N1.getValueType() && "Not an inreg extend!"); 7668 assert(VT.isInteger() && EVT.isInteger() && 7669 "Cannot *_EXTEND_INREG FP types"); 7670 assert(EVT.isVector() == VT.isVector() && 7671 "SIGN_EXTEND_INREG type should be vector iff the operand " 7672 "type is vector!"); 7673 assert((!EVT.isVector() || 7674 EVT.getVectorElementCount() == VT.getVectorElementCount()) && 7675 "Vector element counts must match in SIGN_EXTEND_INREG"); 7676 assert(EVT.bitsLE(VT) && "Not extending!"); 7677 if (EVT == VT) return N1; // Not actually extending 7678 break; 7679 } 7680 case ISD::FP_TO_SINT_SAT: 7681 case ISD::FP_TO_UINT_SAT: { 7682 assert(VT.isInteger() && cast<VTSDNode>(N2)->getVT().isInteger() && 7683 N1.getValueType().isFloatingPoint() && "Invalid FP_TO_*INT_SAT"); 7684 assert(N1.getValueType().isVector() == VT.isVector() && 7685 "FP_TO_*INT_SAT type should be vector iff the operand type is " 7686 "vector!"); 7687 assert((!VT.isVector() || VT.getVectorElementCount() == 7688 N1.getValueType().getVectorElementCount()) && 7689 "Vector element counts must match in FP_TO_*INT_SAT"); 7690 assert(!cast<VTSDNode>(N2)->getVT().isVector() && 7691 "Type to saturate to must be a scalar."); 7692 assert(cast<VTSDNode>(N2)->getVT().bitsLE(VT.getScalarType()) && 7693 "Not extending!"); 7694 break; 7695 } 7696 case ISD::EXTRACT_VECTOR_ELT: 7697 assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && 7698 "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ 7699 element type of the vector."); 7700 7701 // Extract from an undefined value or using an undefined index is undefined. 7702 if (N1.isUndef() || N2.isUndef()) 7703 return getUNDEF(VT); 7704 7705 // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length 7706 // vectors. For scalable vectors we will provide appropriate support for 7707 // dealing with arbitrary indices. 7708 if (N2C && N1.getValueType().isFixedLengthVector() && 7709 N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) 7710 return getUNDEF(VT); 7711 7712 // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is 7713 // expanding copies of large vectors from registers. This only works for 7714 // fixed length vectors, since we need to know the exact number of 7715 // elements. 7716 if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && 7717 N1.getOperand(0).getValueType().isFixedLengthVector()) { 7718 unsigned Factor = N1.getOperand(0).getValueType().getVectorNumElements(); 7719 return getExtractVectorElt(DL, VT, 7720 N1.getOperand(N2C->getZExtValue() / Factor), 7721 N2C->getZExtValue() % Factor); 7722 } 7723 7724 // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while 7725 // lowering is expanding large vector constants. 7726 if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || 7727 N1.getOpcode() == ISD::SPLAT_VECTOR)) { 7728 assert((N1.getOpcode() != ISD::BUILD_VECTOR || 7729 N1.getValueType().isFixedLengthVector()) && 7730 "BUILD_VECTOR used for scalable vectors"); 7731 unsigned Index = 7732 N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0; 7733 SDValue Elt = N1.getOperand(Index); 7734 7735 if (VT != Elt.getValueType()) 7736 // If the vector element type is not legal, the BUILD_VECTOR operands 7737 // are promoted and implicitly truncated, and the result implicitly 7738 // extended. Make that explicit here. 7739 Elt = getAnyExtOrTrunc(Elt, DL, VT); 7740 7741 return Elt; 7742 } 7743 7744 // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector 7745 // operations are lowered to scalars. 7746 if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { 7747 // If the indices are the same, return the inserted element else 7748 // if the indices are known different, extract the element from 7749 // the original vector. 7750 SDValue N1Op2 = N1.getOperand(2); 7751 ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2); 7752 7753 if (N1Op2C && N2C) { 7754 if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { 7755 if (VT == N1.getOperand(1).getValueType()) 7756 return N1.getOperand(1); 7757 if (VT.isFloatingPoint()) { 7758 assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits()); 7759 return getFPExtendOrRound(N1.getOperand(1), DL, VT); 7760 } 7761 return getSExtOrTrunc(N1.getOperand(1), DL, VT); 7762 } 7763 return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); 7764 } 7765 } 7766 7767 // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed 7768 // when vector types are scalarized and v1iX is legal. 7769 // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx). 7770 // Here we are completely ignoring the extract element index (N2), 7771 // which is fine for fixed width vectors, since any index other than 0 7772 // is undefined anyway. However, this cannot be ignored for scalable 7773 // vectors - in theory we could support this, but we don't want to do this 7774 // without a profitability check. 7775 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && 7776 N1.getValueType().isFixedLengthVector() && 7777 N1.getValueType().getVectorNumElements() == 1) { 7778 return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), 7779 N1.getOperand(1)); 7780 } 7781 break; 7782 case ISD::EXTRACT_ELEMENT: 7783 assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); 7784 assert(!N1.getValueType().isVector() && !VT.isVector() && 7785 (N1.getValueType().isInteger() == VT.isInteger()) && 7786 N1.getValueType() != VT && 7787 "Wrong types for EXTRACT_ELEMENT!"); 7788 7789 // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding 7790 // 64-bit integers into 32-bit parts. Instead of building the extract of 7791 // the BUILD_PAIR, only to have legalize rip it apart, just do it now. 7792 if (N1.getOpcode() == ISD::BUILD_PAIR) 7793 return N1.getOperand(N2C->getZExtValue()); 7794 7795 // EXTRACT_ELEMENT of a constant int is also very common. 7796 if (N1C) { 7797 unsigned ElementSize = VT.getSizeInBits(); 7798 unsigned Shift = ElementSize * N2C->getZExtValue(); 7799 const APInt &Val = N1C->getAPIntValue(); 7800 return getConstant(Val.extractBits(ElementSize, Shift), DL, VT); 7801 } 7802 break; 7803 case ISD::EXTRACT_SUBVECTOR: { 7804 EVT N1VT = N1.getValueType(); 7805 assert(VT.isVector() && N1VT.isVector() && 7806 "Extract subvector VTs must be vectors!"); 7807 assert(VT.getVectorElementType() == N1VT.getVectorElementType() && 7808 "Extract subvector VTs must have the same element type!"); 7809 assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) && 7810 "Cannot extract a scalable vector from a fixed length vector!"); 7811 assert((VT.isScalableVector() != N1VT.isScalableVector() || 7812 VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) && 7813 "Extract subvector must be from larger vector to smaller vector!"); 7814 assert(N2C && "Extract subvector index must be a constant"); 7815 assert((VT.isScalableVector() != N1VT.isScalableVector() || 7816 (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= 7817 N1VT.getVectorMinNumElements()) && 7818 "Extract subvector overflow!"); 7819 assert(N2C->getAPIntValue().getBitWidth() == 7820 TLI->getVectorIdxWidth(getDataLayout()) && 7821 "Constant index for EXTRACT_SUBVECTOR has an invalid size"); 7822 7823 // Trivial extraction. 7824 if (VT == N1VT) 7825 return N1; 7826 7827 // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. 7828 if (N1.isUndef()) 7829 return getUNDEF(VT); 7830 7831 // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of 7832 // the concat have the same type as the extract. 7833 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 7834 VT == N1.getOperand(0).getValueType()) { 7835 unsigned Factor = VT.getVectorMinNumElements(); 7836 return N1.getOperand(N2C->getZExtValue() / Factor); 7837 } 7838 7839 // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created 7840 // during shuffle legalization. 7841 if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && 7842 VT == N1.getOperand(1).getValueType()) 7843 return N1.getOperand(1); 7844 break; 7845 } 7846 } 7847 7848 // Perform trivial constant folding. 7849 if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) 7850 return SV; 7851 7852 // Canonicalize an UNDEF to the RHS, even over a constant. 7853 if (N1.isUndef()) { 7854 if (TLI->isCommutativeBinOp(Opcode)) { 7855 std::swap(N1, N2); 7856 } else { 7857 switch (Opcode) { 7858 case ISD::PTRADD: 7859 case ISD::SUB: 7860 // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. 7861 return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); 7862 case ISD::SIGN_EXTEND_INREG: 7863 case ISD::UDIV: 7864 case ISD::SDIV: 7865 case ISD::UREM: 7866 case ISD::SREM: 7867 case ISD::SSUBSAT: 7868 case ISD::USUBSAT: 7869 // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. 7870 return N1.getOpcode() == ISD::POISON ? getPOISON(VT) 7871 : getConstant(0, DL, VT); 7872 } 7873 } 7874 } 7875 7876 // Fold a bunch of operators when the RHS is undef. 7877 if (N2.isUndef()) { 7878 switch (Opcode) { 7879 case ISD::XOR: 7880 if (N1.isUndef()) 7881 // Handle undef ^ undef -> 0 special case. This is a common 7882 // idiom (misuse). 7883 return getConstant(0, DL, VT); 7884 [[fallthrough]]; 7885 case ISD::ADD: 7886 case ISD::PTRADD: 7887 case ISD::SUB: 7888 case ISD::UDIV: 7889 case ISD::SDIV: 7890 case ISD::UREM: 7891 case ISD::SREM: 7892 // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. 7893 return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); 7894 case ISD::MUL: 7895 case ISD::AND: 7896 case ISD::SSUBSAT: 7897 case ISD::USUBSAT: 7898 // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. 7899 return N2.getOpcode() == ISD::POISON ? getPOISON(VT) 7900 : getConstant(0, DL, VT); 7901 case ISD::OR: 7902 case ISD::SADDSAT: 7903 case ISD::UADDSAT: 7904 // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> 7905 // poison. 7906 return N2.getOpcode() == ISD::POISON ? getPOISON(VT) 7907 : getAllOnesConstant(DL, VT); 7908 } 7909 } 7910 7911 // Memoize this node if possible. 7912 SDNode *N; 7913 SDVTList VTs = getVTList(VT); 7914 SDValue Ops[] = {N1, N2}; 7915 if (VT != MVT::Glue) { 7916 FoldingSetNodeID ID; 7917 AddNodeIDNode(ID, Opcode, VTs, Ops); 7918 void *IP = nullptr; 7919 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 7920 E->intersectFlagsWith(Flags); 7921 return SDValue(E, 0); 7922 } 7923 7924 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 7925 N->setFlags(Flags); 7926 createOperands(N, Ops); 7927 CSEMap.InsertNode(N, IP); 7928 } else { 7929 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 7930 createOperands(N, Ops); 7931 } 7932 7933 InsertNode(N); 7934 SDValue V = SDValue(N, 0); 7935 NewSDValueDbgMsg(V, "Creating new node: ", this); 7936 return V; 7937 } 7938 7939 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7940 SDValue N1, SDValue N2, SDValue N3) { 7941 SDNodeFlags Flags; 7942 if (Inserter) 7943 Flags = Inserter->getFlags(); 7944 return getNode(Opcode, DL, VT, N1, N2, N3, Flags); 7945 } 7946 7947 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7948 SDValue N1, SDValue N2, SDValue N3, 7949 const SDNodeFlags Flags) { 7950 assert(N1.getOpcode() != ISD::DELETED_NODE && 7951 N2.getOpcode() != ISD::DELETED_NODE && 7952 N3.getOpcode() != ISD::DELETED_NODE && 7953 "Operand is DELETED_NODE!"); 7954 // Perform various simplifications. 7955 switch (Opcode) { 7956 case ISD::FMA: 7957 case ISD::FMAD: { 7958 assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); 7959 assert(N1.getValueType() == VT && N2.getValueType() == VT && 7960 N3.getValueType() == VT && "FMA types must match!"); 7961 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7962 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); 7963 ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); 7964 if (N1CFP && N2CFP && N3CFP) { 7965 APFloat V1 = N1CFP->getValueAPF(); 7966 const APFloat &V2 = N2CFP->getValueAPF(); 7967 const APFloat &V3 = N3CFP->getValueAPF(); 7968 if (Opcode == ISD::FMAD) { 7969 V1.multiply(V2, APFloat::rmNearestTiesToEven); 7970 V1.add(V3, APFloat::rmNearestTiesToEven); 7971 } else 7972 V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); 7973 return getConstantFP(V1, DL, VT); 7974 } 7975 break; 7976 } 7977 case ISD::BUILD_VECTOR: { 7978 // Attempt to simplify BUILD_VECTOR. 7979 SDValue Ops[] = {N1, N2, N3}; 7980 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 7981 return V; 7982 break; 7983 } 7984 case ISD::CONCAT_VECTORS: { 7985 SDValue Ops[] = {N1, N2, N3}; 7986 if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) 7987 return V; 7988 break; 7989 } 7990 case ISD::SETCC: { 7991 assert(VT.isInteger() && "SETCC result type must be an integer!"); 7992 assert(N1.getValueType() == N2.getValueType() && 7993 "SETCC operands must have the same type!"); 7994 assert(VT.isVector() == N1.getValueType().isVector() && 7995 "SETCC type should be vector iff the operand type is vector!"); 7996 assert((!VT.isVector() || VT.getVectorElementCount() == 7997 N1.getValueType().getVectorElementCount()) && 7998 "SETCC vector element counts must match!"); 7999 // Use FoldSetCC to simplify SETCC's. 8000 if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) 8001 return V; 8002 // Vector constant folding. 8003 SDValue Ops[] = {N1, N2, N3}; 8004 if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) { 8005 NewSDValueDbgMsg(V, "New node vector constant folding: ", this); 8006 return V; 8007 } 8008 break; 8009 } 8010 case ISD::SELECT: 8011 case ISD::VSELECT: 8012 if (SDValue V = simplifySelect(N1, N2, N3)) 8013 return V; 8014 break; 8015 case ISD::VECTOR_SHUFFLE: 8016 llvm_unreachable("should use getVectorShuffle constructor!"); 8017 case ISD::VECTOR_SPLICE: { 8018 if (cast<ConstantSDNode>(N3)->isZero()) 8019 return N1; 8020 break; 8021 } 8022 case ISD::INSERT_VECTOR_ELT: { 8023 assert(VT.isVector() && VT == N1.getValueType() && 8024 "INSERT_VECTOR_ELT vector type mismatch"); 8025 assert(VT.isFloatingPoint() == N2.getValueType().isFloatingPoint() && 8026 "INSERT_VECTOR_ELT scalar fp/int mismatch"); 8027 assert((!VT.isFloatingPoint() || 8028 VT.getVectorElementType() == N2.getValueType()) && 8029 "INSERT_VECTOR_ELT fp scalar type mismatch"); 8030 assert((!VT.isInteger() || 8031 VT.getScalarSizeInBits() <= N2.getScalarValueSizeInBits()) && 8032 "INSERT_VECTOR_ELT int scalar size mismatch"); 8033 8034 auto *N3C = dyn_cast<ConstantSDNode>(N3); 8035 // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except 8036 // for scalable vectors where we will generate appropriate code to 8037 // deal with out-of-bounds cases correctly. 8038 if (N3C && N1.getValueType().isFixedLengthVector() && 8039 N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) 8040 return getUNDEF(VT); 8041 8042 // Undefined index can be assumed out-of-bounds, so that's UNDEF too. 8043 if (N3.isUndef()) 8044 return getUNDEF(VT); 8045 8046 // If the inserted element is an UNDEF, just use the input vector. 8047 if (N2.isUndef()) 8048 return N1; 8049 8050 break; 8051 } 8052 case ISD::INSERT_SUBVECTOR: { 8053 // Inserting undef into undef is still undef. 8054 if (N1.isUndef() && N2.isUndef()) 8055 return getUNDEF(VT); 8056 8057 EVT N2VT = N2.getValueType(); 8058 assert(VT == N1.getValueType() && 8059 "Dest and insert subvector source types must match!"); 8060 assert(VT.isVector() && N2VT.isVector() && 8061 "Insert subvector VTs must be vectors!"); 8062 assert(VT.getVectorElementType() == N2VT.getVectorElementType() && 8063 "Insert subvector VTs must have the same element type!"); 8064 assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && 8065 "Cannot insert a scalable vector into a fixed length vector!"); 8066 assert((VT.isScalableVector() != N2VT.isScalableVector() || 8067 VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) && 8068 "Insert subvector must be from smaller vector to larger vector!"); 8069 assert(isa<ConstantSDNode>(N3) && 8070 "Insert subvector index must be constant"); 8071 assert((VT.isScalableVector() != N2VT.isScalableVector() || 8072 (N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <= 8073 VT.getVectorMinNumElements()) && 8074 "Insert subvector overflow!"); 8075 assert(N3->getAsAPIntVal().getBitWidth() == 8076 TLI->getVectorIdxWidth(getDataLayout()) && 8077 "Constant index for INSERT_SUBVECTOR has an invalid size"); 8078 8079 // Trivial insertion. 8080 if (VT == N2VT) 8081 return N2; 8082 8083 // If this is an insert of an extracted vector into an undef vector, we 8084 // can just use the input to the extract. 8085 if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && 8086 N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) 8087 return N2.getOperand(0); 8088 break; 8089 } 8090 case ISD::BITCAST: 8091 // Fold bit_convert nodes from a type to themselves. 8092 if (N1.getValueType() == VT) 8093 return N1; 8094 break; 8095 case ISD::VP_TRUNCATE: 8096 case ISD::VP_SIGN_EXTEND: 8097 case ISD::VP_ZERO_EXTEND: 8098 // Don't create noop casts. 8099 if (N1.getValueType() == VT) 8100 return N1; 8101 break; 8102 case ISD::VECTOR_COMPRESS: { 8103 [[maybe_unused]] EVT VecVT = N1.getValueType(); 8104 [[maybe_unused]] EVT MaskVT = N2.getValueType(); 8105 [[maybe_unused]] EVT PassthruVT = N3.getValueType(); 8106 assert(VT == VecVT && "Vector and result type don't match."); 8107 assert(VecVT.isVector() && MaskVT.isVector() && PassthruVT.isVector() && 8108 "All inputs must be vectors."); 8109 assert(VecVT == PassthruVT && "Vector and passthru types don't match."); 8110 assert(VecVT.getVectorElementCount() == MaskVT.getVectorElementCount() && 8111 "Vector and mask must have same number of elements."); 8112 8113 if (N1.isUndef() || N2.isUndef()) 8114 return N3; 8115 8116 break; 8117 } 8118 case ISD::PARTIAL_REDUCE_UMLA: 8119 case ISD::PARTIAL_REDUCE_SMLA: 8120 case ISD::PARTIAL_REDUCE_SUMLA: { 8121 [[maybe_unused]] EVT AccVT = N1.getValueType(); 8122 [[maybe_unused]] EVT Input1VT = N2.getValueType(); 8123 [[maybe_unused]] EVT Input2VT = N3.getValueType(); 8124 assert(Input1VT.isVector() && Input1VT == Input2VT && 8125 "Expected the second and third operands of the PARTIAL_REDUCE_MLA " 8126 "node to have the same type!"); 8127 assert(VT.isVector() && VT == AccVT && 8128 "Expected the first operand of the PARTIAL_REDUCE_MLA node to have " 8129 "the same type as its result!"); 8130 assert(Input1VT.getVectorElementCount().hasKnownScalarFactor( 8131 AccVT.getVectorElementCount()) && 8132 "Expected the element count of the second and third operands of the " 8133 "PARTIAL_REDUCE_MLA node to be a positive integer multiple of the " 8134 "element count of the first operand and the result!"); 8135 assert(N2.getScalarValueSizeInBits() <= N1.getScalarValueSizeInBits() && 8136 "Expected the second and third operands of the PARTIAL_REDUCE_MLA " 8137 "node to have an element type which is the same as or smaller than " 8138 "the element type of the first operand and result!"); 8139 break; 8140 } 8141 } 8142 8143 // Memoize node if it doesn't produce a glue result. 8144 SDNode *N; 8145 SDVTList VTs = getVTList(VT); 8146 SDValue Ops[] = {N1, N2, N3}; 8147 if (VT != MVT::Glue) { 8148 FoldingSetNodeID ID; 8149 AddNodeIDNode(ID, Opcode, VTs, Ops); 8150 void *IP = nullptr; 8151 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 8152 E->intersectFlagsWith(Flags); 8153 return SDValue(E, 0); 8154 } 8155 8156 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 8157 N->setFlags(Flags); 8158 createOperands(N, Ops); 8159 CSEMap.InsertNode(N, IP); 8160 } else { 8161 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 8162 createOperands(N, Ops); 8163 } 8164 8165 InsertNode(N); 8166 SDValue V = SDValue(N, 0); 8167 NewSDValueDbgMsg(V, "Creating new node: ", this); 8168 return V; 8169 } 8170 8171 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 8172 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 8173 const SDNodeFlags Flags) { 8174 SDValue Ops[] = { N1, N2, N3, N4 }; 8175 return getNode(Opcode, DL, VT, Ops, Flags); 8176 } 8177 8178 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 8179 SDValue N1, SDValue N2, SDValue N3, SDValue N4) { 8180 SDNodeFlags Flags; 8181 if (Inserter) 8182 Flags = Inserter->getFlags(); 8183 return getNode(Opcode, DL, VT, N1, N2, N3, N4, Flags); 8184 } 8185 8186 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 8187 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 8188 SDValue N5, const SDNodeFlags Flags) { 8189 SDValue Ops[] = { N1, N2, N3, N4, N5 }; 8190 return getNode(Opcode, DL, VT, Ops, Flags); 8191 } 8192 8193 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 8194 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 8195 SDValue N5) { 8196 SDNodeFlags Flags; 8197 if (Inserter) 8198 Flags = Inserter->getFlags(); 8199 return getNode(Opcode, DL, VT, N1, N2, N3, N4, N5, Flags); 8200 } 8201 8202 /// getStackArgumentTokenFactor - Compute a TokenFactor to force all 8203 /// the incoming stack arguments to be loaded from the stack. 8204 SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { 8205 SmallVector<SDValue, 8> ArgChains; 8206 8207 // Include the original chain at the beginning of the list. When this is 8208 // used by target LowerCall hooks, this helps legalize find the 8209 // CALLSEQ_BEGIN node. 8210 ArgChains.push_back(Chain); 8211 8212 // Add a chain value for each stack argument. 8213 for (SDNode *U : getEntryNode().getNode()->users()) 8214 if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) 8215 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) 8216 if (FI->getIndex() < 0) 8217 ArgChains.push_back(SDValue(L, 1)); 8218 8219 // Build a tokenfactor for all the chains. 8220 return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); 8221 } 8222 8223 /// getMemsetValue - Vectorized representation of the memset value 8224 /// operand. 8225 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, 8226 const SDLoc &dl) { 8227 assert(!Value.isUndef()); 8228 8229 unsigned NumBits = VT.getScalarSizeInBits(); 8230 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { 8231 assert(C->getAPIntValue().getBitWidth() == 8); 8232 APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); 8233 if (VT.isInteger()) { 8234 bool IsOpaque = VT.getSizeInBits() > 64 || 8235 !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue()); 8236 return DAG.getConstant(Val, dl, VT, false, IsOpaque); 8237 } 8238 return DAG.getConstantFP(APFloat(VT.getFltSemantics(), Val), dl, VT); 8239 } 8240 8241 assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); 8242 EVT IntVT = VT.getScalarType(); 8243 if (!IntVT.isInteger()) 8244 IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); 8245 8246 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); 8247 if (NumBits > 8) { 8248 // Use a multiplication with 0x010101... to extend the input to the 8249 // required length. 8250 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 8251 Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, 8252 DAG.getConstant(Magic, dl, IntVT)); 8253 } 8254 8255 if (VT != Value.getValueType() && !VT.isInteger()) 8256 Value = DAG.getBitcast(VT.getScalarType(), Value); 8257 if (VT != Value.getValueType()) 8258 Value = DAG.getSplatBuildVector(VT, dl, Value); 8259 8260 return Value; 8261 } 8262 8263 /// getMemsetStringVal - Similar to getMemsetValue. Except this is only 8264 /// used when a memcpy is turned into a memset when the source is a constant 8265 /// string ptr. 8266 static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, 8267 const TargetLowering &TLI, 8268 const ConstantDataArraySlice &Slice) { 8269 // Handle vector with all elements zero. 8270 if (Slice.Array == nullptr) { 8271 if (VT.isInteger()) 8272 return DAG.getConstant(0, dl, VT); 8273 return DAG.getNode(ISD::BITCAST, dl, VT, 8274 DAG.getConstant(0, dl, VT.changeTypeToInteger())); 8275 } 8276 8277 assert(!VT.isVector() && "Can't handle vector type here!"); 8278 unsigned NumVTBits = VT.getSizeInBits(); 8279 unsigned NumVTBytes = NumVTBits / 8; 8280 unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length)); 8281 8282 APInt Val(NumVTBits, 0); 8283 if (DAG.getDataLayout().isLittleEndian()) { 8284 for (unsigned i = 0; i != NumBytes; ++i) 8285 Val |= (uint64_t)(unsigned char)Slice[i] << i*8; 8286 } else { 8287 for (unsigned i = 0; i != NumBytes; ++i) 8288 Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; 8289 } 8290 8291 // If the "cost" of materializing the integer immediate is less than the cost 8292 // of a load, then it is cost effective to turn the load into the immediate. 8293 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 8294 if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) 8295 return DAG.getConstant(Val, dl, VT); 8296 return SDValue(); 8297 } 8298 8299 SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, 8300 const SDLoc &DL, 8301 const SDNodeFlags Flags) { 8302 EVT VT = Base.getValueType(); 8303 SDValue Index; 8304 8305 if (Offset.isScalable()) 8306 Index = getVScale(DL, Base.getValueType(), 8307 APInt(Base.getValueSizeInBits().getFixedValue(), 8308 Offset.getKnownMinValue())); 8309 else 8310 Index = getConstant(Offset.getFixedValue(), DL, VT); 8311 8312 return getMemBasePlusOffset(Base, Index, DL, Flags); 8313 } 8314 8315 SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, 8316 const SDLoc &DL, 8317 const SDNodeFlags Flags) { 8318 assert(Offset.getValueType().isInteger()); 8319 EVT BasePtrVT = Ptr.getValueType(); 8320 if (TLI->shouldPreservePtrArith(this->getMachineFunction().getFunction(), 8321 BasePtrVT)) 8322 return getNode(ISD::PTRADD, DL, BasePtrVT, Ptr, Offset, Flags); 8323 return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); 8324 } 8325 8326 /// Returns true if memcpy source is constant data. 8327 static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { 8328 uint64_t SrcDelta = 0; 8329 GlobalAddressSDNode *G = nullptr; 8330 if (Src.getOpcode() == ISD::GlobalAddress) 8331 G = cast<GlobalAddressSDNode>(Src); 8332 else if (Src.getOpcode() == ISD::ADD && 8333 Src.getOperand(0).getOpcode() == ISD::GlobalAddress && 8334 Src.getOperand(1).getOpcode() == ISD::Constant) { 8335 G = cast<GlobalAddressSDNode>(Src.getOperand(0)); 8336 SrcDelta = Src.getConstantOperandVal(1); 8337 } 8338 if (!G) 8339 return false; 8340 8341 return getConstantDataArrayInfo(G->getGlobal(), Slice, 8, 8342 SrcDelta + G->getOffset()); 8343 } 8344 8345 static bool shouldLowerMemFuncForSize(const MachineFunction &MF, 8346 SelectionDAG &DAG) { 8347 // On Darwin, -Os means optimize for size without hurting performance, so 8348 // only really optimize for size when -Oz (MinSize) is used. 8349 if (MF.getTarget().getTargetTriple().isOSDarwin()) 8350 return MF.getFunction().hasMinSize(); 8351 return DAG.shouldOptForSize(); 8352 } 8353 8354 static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, 8355 SmallVector<SDValue, 32> &OutChains, unsigned From, 8356 unsigned To, SmallVector<SDValue, 16> &OutLoadChains, 8357 SmallVector<SDValue, 16> &OutStoreChains) { 8358 assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); 8359 assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); 8360 SmallVector<SDValue, 16> GluedLoadChains; 8361 for (unsigned i = From; i < To; ++i) { 8362 OutChains.push_back(OutLoadChains[i]); 8363 GluedLoadChains.push_back(OutLoadChains[i]); 8364 } 8365 8366 // Chain for all loads. 8367 SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8368 GluedLoadChains); 8369 8370 for (unsigned i = From; i < To; ++i) { 8371 StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); 8372 SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), 8373 ST->getBasePtr(), ST->getMemoryVT(), 8374 ST->getMemOperand()); 8375 OutChains.push_back(NewStore); 8376 } 8377 } 8378 8379 static SDValue getMemcpyLoadsAndStores( 8380 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 8381 uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, 8382 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, 8383 const AAMDNodes &AAInfo, BatchAAResults *BatchAA) { 8384 // Turn a memcpy of undef to nop. 8385 // FIXME: We need to honor volatile even is Src is undef. 8386 if (Src.isUndef()) 8387 return Chain; 8388 8389 // Expand memcpy to a series of load and store ops if the size operand falls 8390 // below a certain threshold. 8391 // TODO: In the AlwaysInline case, if the size is big then generate a loop 8392 // rather than maybe a humongous number of loads and stores. 8393 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8394 const DataLayout &DL = DAG.getDataLayout(); 8395 LLVMContext &C = *DAG.getContext(); 8396 std::vector<EVT> MemOps; 8397 bool DstAlignCanChange = false; 8398 MachineFunction &MF = DAG.getMachineFunction(); 8399 MachineFrameInfo &MFI = MF.getFrameInfo(); 8400 bool OptSize = shouldLowerMemFuncForSize(MF, DAG); 8401 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); 8402 if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) 8403 DstAlignCanChange = true; 8404 MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); 8405 if (!SrcAlign || Alignment > *SrcAlign) 8406 SrcAlign = Alignment; 8407 assert(SrcAlign && "SrcAlign must be set"); 8408 ConstantDataArraySlice Slice; 8409 // If marked as volatile, perform a copy even when marked as constant. 8410 bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); 8411 bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; 8412 unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); 8413 const MemOp Op = isZeroConstant 8414 ? MemOp::Set(Size, DstAlignCanChange, Alignment, 8415 /*IsZeroMemset*/ true, isVol) 8416 : MemOp::Copy(Size, DstAlignCanChange, Alignment, 8417 *SrcAlign, isVol, CopyFromConstant); 8418 if (!TLI.findOptimalMemOpLowering( 8419 C, MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), 8420 SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) 8421 return SDValue(); 8422 8423 if (DstAlignCanChange) { 8424 Type *Ty = MemOps[0].getTypeForEVT(C); 8425 Align NewAlign = DL.getABITypeAlign(Ty); 8426 8427 // Don't promote to an alignment that would require dynamic stack 8428 // realignment which may conflict with optimizations such as tail call 8429 // optimization. 8430 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 8431 if (!TRI->hasStackRealignment(MF)) 8432 if (MaybeAlign StackAlign = DL.getStackAlignment()) 8433 NewAlign = std::min(NewAlign, *StackAlign); 8434 8435 if (NewAlign > Alignment) { 8436 // Give the stack frame object a larger alignment if needed. 8437 if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) 8438 MFI.setObjectAlignment(FI->getIndex(), NewAlign); 8439 Alignment = NewAlign; 8440 } 8441 } 8442 8443 // Prepare AAInfo for loads/stores after lowering this memcpy. 8444 AAMDNodes NewAAInfo = AAInfo; 8445 NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; 8446 8447 const Value *SrcVal = dyn_cast_if_present<const Value *>(SrcPtrInfo.V); 8448 bool isConstant = 8449 BatchAA && SrcVal && 8450 BatchAA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo)); 8451 8452 MachineMemOperand::Flags MMOFlags = 8453 isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; 8454 SmallVector<SDValue, 16> OutLoadChains; 8455 SmallVector<SDValue, 16> OutStoreChains; 8456 SmallVector<SDValue, 32> OutChains; 8457 unsigned NumMemOps = MemOps.size(); 8458 uint64_t SrcOff = 0, DstOff = 0; 8459 for (unsigned i = 0; i != NumMemOps; ++i) { 8460 EVT VT = MemOps[i]; 8461 unsigned VTSize = VT.getSizeInBits() / 8; 8462 SDValue Value, Store; 8463 8464 if (VTSize > Size) { 8465 // Issuing an unaligned load / store pair that overlaps with the previous 8466 // pair. Adjust the offset accordingly. 8467 assert(i == NumMemOps-1 && i != 0); 8468 SrcOff -= VTSize - Size; 8469 DstOff -= VTSize - Size; 8470 } 8471 8472 if (CopyFromConstant && 8473 (isZeroConstant || (VT.isInteger() && !VT.isVector()))) { 8474 // It's unlikely a store of a vector immediate can be done in a single 8475 // instruction. It would require a load from a constantpool first. 8476 // We only handle zero vectors here. 8477 // FIXME: Handle other cases where store of vector immediate is done in 8478 // a single instruction. 8479 ConstantDataArraySlice SubSlice; 8480 if (SrcOff < Slice.Length) { 8481 SubSlice = Slice; 8482 SubSlice.move(SrcOff); 8483 } else { 8484 // This is an out-of-bounds access and hence UB. Pretend we read zero. 8485 SubSlice.Array = nullptr; 8486 SubSlice.Offset = 0; 8487 SubSlice.Length = VTSize; 8488 } 8489 Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); 8490 if (Value.getNode()) { 8491 Store = DAG.getStore( 8492 Chain, dl, Value, 8493 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8494 DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); 8495 OutChains.push_back(Store); 8496 } 8497 } 8498 8499 if (!Store.getNode()) { 8500 // The type might not be legal for the target. This should only happen 8501 // if the type is smaller than a legal type, as on PPC, so the right 8502 // thing to do is generate a LoadExt/StoreTrunc pair. These simplify 8503 // to Load/Store if NVT==VT. 8504 // FIXME does the case above also need this? 8505 EVT NVT = TLI.getTypeToTransformTo(C, VT); 8506 assert(NVT.bitsGE(VT)); 8507 8508 bool isDereferenceable = 8509 SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); 8510 MachineMemOperand::Flags SrcMMOFlags = MMOFlags; 8511 if (isDereferenceable) 8512 SrcMMOFlags |= MachineMemOperand::MODereferenceable; 8513 if (isConstant) 8514 SrcMMOFlags |= MachineMemOperand::MOInvariant; 8515 8516 Value = DAG.getExtLoad( 8517 ISD::EXTLOAD, dl, NVT, Chain, 8518 DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), 8519 SrcPtrInfo.getWithOffset(SrcOff), VT, 8520 commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); 8521 OutLoadChains.push_back(Value.getValue(1)); 8522 8523 Store = DAG.getTruncStore( 8524 Chain, dl, Value, 8525 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8526 DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); 8527 OutStoreChains.push_back(Store); 8528 } 8529 SrcOff += VTSize; 8530 DstOff += VTSize; 8531 Size -= VTSize; 8532 } 8533 8534 unsigned GluedLdStLimit = MaxLdStGlue == 0 ? 8535 TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; 8536 unsigned NumLdStInMemcpy = OutStoreChains.size(); 8537 8538 if (NumLdStInMemcpy) { 8539 // It may be that memcpy might be converted to memset if it's memcpy 8540 // of constants. In such a case, we won't have loads and stores, but 8541 // just stores. In the absence of loads, there is nothing to gang up. 8542 if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { 8543 // If target does not care, just leave as it. 8544 for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { 8545 OutChains.push_back(OutLoadChains[i]); 8546 OutChains.push_back(OutStoreChains[i]); 8547 } 8548 } else { 8549 // Ld/St less than/equal limit set by target. 8550 if (NumLdStInMemcpy <= GluedLdStLimit) { 8551 chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, 8552 NumLdStInMemcpy, OutLoadChains, 8553 OutStoreChains); 8554 } else { 8555 unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; 8556 unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; 8557 unsigned GlueIter = 0; 8558 8559 for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { 8560 unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; 8561 unsigned IndexTo = NumLdStInMemcpy - GlueIter; 8562 8563 chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, 8564 OutLoadChains, OutStoreChains); 8565 GlueIter += GluedLdStLimit; 8566 } 8567 8568 // Residual ld/st. 8569 if (RemainingLdStInMemcpy) { 8570 chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, 8571 RemainingLdStInMemcpy, OutLoadChains, 8572 OutStoreChains); 8573 } 8574 } 8575 } 8576 } 8577 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); 8578 } 8579 8580 static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, 8581 SDValue Chain, SDValue Dst, SDValue Src, 8582 uint64_t Size, Align Alignment, 8583 bool isVol, bool AlwaysInline, 8584 MachinePointerInfo DstPtrInfo, 8585 MachinePointerInfo SrcPtrInfo, 8586 const AAMDNodes &AAInfo) { 8587 // Turn a memmove of undef to nop. 8588 // FIXME: We need to honor volatile even is Src is undef. 8589 if (Src.isUndef()) 8590 return Chain; 8591 8592 // Expand memmove to a series of load and store ops if the size operand falls 8593 // below a certain threshold. 8594 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8595 const DataLayout &DL = DAG.getDataLayout(); 8596 LLVMContext &C = *DAG.getContext(); 8597 std::vector<EVT> MemOps; 8598 bool DstAlignCanChange = false; 8599 MachineFunction &MF = DAG.getMachineFunction(); 8600 MachineFrameInfo &MFI = MF.getFrameInfo(); 8601 bool OptSize = shouldLowerMemFuncForSize(MF, DAG); 8602 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); 8603 if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) 8604 DstAlignCanChange = true; 8605 MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); 8606 if (!SrcAlign || Alignment > *SrcAlign) 8607 SrcAlign = Alignment; 8608 assert(SrcAlign && "SrcAlign must be set"); 8609 unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); 8610 if (!TLI.findOptimalMemOpLowering( 8611 C, MemOps, Limit, 8612 MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, 8613 /*IsVolatile*/ true), 8614 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 8615 MF.getFunction().getAttributes())) 8616 return SDValue(); 8617 8618 if (DstAlignCanChange) { 8619 Type *Ty = MemOps[0].getTypeForEVT(C); 8620 Align NewAlign = DL.getABITypeAlign(Ty); 8621 8622 // Don't promote to an alignment that would require dynamic stack 8623 // realignment which may conflict with optimizations such as tail call 8624 // optimization. 8625 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 8626 if (!TRI->hasStackRealignment(MF)) 8627 if (MaybeAlign StackAlign = DL.getStackAlignment()) 8628 NewAlign = std::min(NewAlign, *StackAlign); 8629 8630 if (NewAlign > Alignment) { 8631 // Give the stack frame object a larger alignment if needed. 8632 if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) 8633 MFI.setObjectAlignment(FI->getIndex(), NewAlign); 8634 Alignment = NewAlign; 8635 } 8636 } 8637 8638 // Prepare AAInfo for loads/stores after lowering this memmove. 8639 AAMDNodes NewAAInfo = AAInfo; 8640 NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; 8641 8642 MachineMemOperand::Flags MMOFlags = 8643 isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; 8644 uint64_t SrcOff = 0, DstOff = 0; 8645 SmallVector<SDValue, 8> LoadValues; 8646 SmallVector<SDValue, 8> LoadChains; 8647 SmallVector<SDValue, 8> OutChains; 8648 unsigned NumMemOps = MemOps.size(); 8649 for (unsigned i = 0; i < NumMemOps; i++) { 8650 EVT VT = MemOps[i]; 8651 unsigned VTSize = VT.getSizeInBits() / 8; 8652 SDValue Value; 8653 8654 bool isDereferenceable = 8655 SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); 8656 MachineMemOperand::Flags SrcMMOFlags = MMOFlags; 8657 if (isDereferenceable) 8658 SrcMMOFlags |= MachineMemOperand::MODereferenceable; 8659 8660 Value = DAG.getLoad( 8661 VT, dl, Chain, 8662 DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), 8663 SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); 8664 LoadValues.push_back(Value); 8665 LoadChains.push_back(Value.getValue(1)); 8666 SrcOff += VTSize; 8667 } 8668 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8669 OutChains.clear(); 8670 for (unsigned i = 0; i < NumMemOps; i++) { 8671 EVT VT = MemOps[i]; 8672 unsigned VTSize = VT.getSizeInBits() / 8; 8673 SDValue Store; 8674 8675 Store = DAG.getStore( 8676 Chain, dl, LoadValues[i], 8677 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8678 DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); 8679 OutChains.push_back(Store); 8680 DstOff += VTSize; 8681 } 8682 8683 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); 8684 } 8685 8686 /// Lower the call to 'memset' intrinsic function into a series of store 8687 /// operations. 8688 /// 8689 /// \param DAG Selection DAG where lowered code is placed. 8690 /// \param dl Link to corresponding IR location. 8691 /// \param Chain Control flow dependency. 8692 /// \param Dst Pointer to destination memory location. 8693 /// \param Src Value of byte to write into the memory. 8694 /// \param Size Number of bytes to write. 8695 /// \param Alignment Alignment of the destination in bytes. 8696 /// \param isVol True if destination is volatile. 8697 /// \param AlwaysInline Makes sure no function call is generated. 8698 /// \param DstPtrInfo IR information on the memory pointer. 8699 /// \returns New head in the control flow, if lowering was successful, empty 8700 /// SDValue otherwise. 8701 /// 8702 /// The function tries to replace 'llvm.memset' intrinsic with several store 8703 /// operations and value calculation code. This is usually profitable for small 8704 /// memory size or when the semantic requires inlining. 8705 static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, 8706 SDValue Chain, SDValue Dst, SDValue Src, 8707 uint64_t Size, Align Alignment, bool isVol, 8708 bool AlwaysInline, MachinePointerInfo DstPtrInfo, 8709 const AAMDNodes &AAInfo) { 8710 // Turn a memset of undef to nop. 8711 // FIXME: We need to honor volatile even is Src is undef. 8712 if (Src.isUndef()) 8713 return Chain; 8714 8715 // Expand memset to a series of load/store ops if the size operand 8716 // falls below a certain threshold. 8717 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8718 std::vector<EVT> MemOps; 8719 bool DstAlignCanChange = false; 8720 LLVMContext &C = *DAG.getContext(); 8721 MachineFunction &MF = DAG.getMachineFunction(); 8722 MachineFrameInfo &MFI = MF.getFrameInfo(); 8723 bool OptSize = shouldLowerMemFuncForSize(MF, DAG); 8724 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); 8725 if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) 8726 DstAlignCanChange = true; 8727 bool IsZeroVal = isNullConstant(Src); 8728 unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); 8729 8730 if (!TLI.findOptimalMemOpLowering( 8731 C, MemOps, Limit, 8732 MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), 8733 DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) 8734 return SDValue(); 8735 8736 if (DstAlignCanChange) { 8737 Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); 8738 const DataLayout &DL = DAG.getDataLayout(); 8739 Align NewAlign = DL.getABITypeAlign(Ty); 8740 8741 // Don't promote to an alignment that would require dynamic stack 8742 // realignment which may conflict with optimizations such as tail call 8743 // optimization. 8744 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 8745 if (!TRI->hasStackRealignment(MF)) 8746 if (MaybeAlign StackAlign = DL.getStackAlignment()) 8747 NewAlign = std::min(NewAlign, *StackAlign); 8748 8749 if (NewAlign > Alignment) { 8750 // Give the stack frame object a larger alignment if needed. 8751 if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) 8752 MFI.setObjectAlignment(FI->getIndex(), NewAlign); 8753 Alignment = NewAlign; 8754 } 8755 } 8756 8757 SmallVector<SDValue, 8> OutChains; 8758 uint64_t DstOff = 0; 8759 unsigned NumMemOps = MemOps.size(); 8760 8761 // Find the largest store and generate the bit pattern for it. 8762 EVT LargestVT = MemOps[0]; 8763 for (unsigned i = 1; i < NumMemOps; i++) 8764 if (MemOps[i].bitsGT(LargestVT)) 8765 LargestVT = MemOps[i]; 8766 SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); 8767 8768 // Prepare AAInfo for loads/stores after lowering this memset. 8769 AAMDNodes NewAAInfo = AAInfo; 8770 NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; 8771 8772 for (unsigned i = 0; i < NumMemOps; i++) { 8773 EVT VT = MemOps[i]; 8774 unsigned VTSize = VT.getSizeInBits() / 8; 8775 if (VTSize > Size) { 8776 // Issuing an unaligned load / store pair that overlaps with the previous 8777 // pair. Adjust the offset accordingly. 8778 assert(i == NumMemOps-1 && i != 0); 8779 DstOff -= VTSize - Size; 8780 } 8781 8782 // If this store is smaller than the largest store see whether we can get 8783 // the smaller value for free with a truncate or extract vector element and 8784 // then store. 8785 SDValue Value = MemSetValue; 8786 if (VT.bitsLT(LargestVT)) { 8787 unsigned Index; 8788 unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits(); 8789 EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts); 8790 if (!LargestVT.isVector() && !VT.isVector() && 8791 TLI.isTruncateFree(LargestVT, VT)) 8792 Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); 8793 else if (LargestVT.isVector() && !VT.isVector() && 8794 TLI.shallExtractConstSplatVectorElementToStore( 8795 LargestVT.getTypeForEVT(*DAG.getContext()), 8796 VT.getSizeInBits(), Index) && 8797 TLI.isTypeLegal(SVT) && 8798 LargestVT.getSizeInBits() == SVT.getSizeInBits()) { 8799 // Target which can combine store(extractelement VectorTy, Idx) can get 8800 // the smaller value for free. 8801 SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue); 8802 Value = DAG.getExtractVectorElt(dl, VT, TailValue, Index); 8803 } else 8804 Value = getMemsetValue(Src, VT, DAG, dl); 8805 } 8806 assert(Value.getValueType() == VT && "Value with wrong type."); 8807 SDValue Store = DAG.getStore( 8808 Chain, dl, Value, 8809 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8810 DstPtrInfo.getWithOffset(DstOff), Alignment, 8811 isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, 8812 NewAAInfo); 8813 OutChains.push_back(Store); 8814 DstOff += VT.getSizeInBits() / 8; 8815 Size -= VTSize; 8816 } 8817 8818 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); 8819 } 8820 8821 static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, 8822 unsigned AS) { 8823 // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all 8824 // pointer operands can be losslessly bitcasted to pointers of address space 0 8825 if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) { 8826 report_fatal_error("cannot lower memory intrinsic in address space " + 8827 Twine(AS)); 8828 } 8829 } 8830 8831 SDValue SelectionDAG::getMemcpy( 8832 SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, 8833 Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, 8834 std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo, 8835 MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, 8836 BatchAAResults *BatchAA) { 8837 // Check to see if we should lower the memcpy to loads and stores first. 8838 // For cases within the target-specified limits, this is the best choice. 8839 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 8840 if (ConstantSize) { 8841 // Memcpy with size zero? Just return the original chain. 8842 if (ConstantSize->isZero()) 8843 return Chain; 8844 8845 SDValue Result = getMemcpyLoadsAndStores( 8846 *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, 8847 isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, BatchAA); 8848 if (Result.getNode()) 8849 return Result; 8850 } 8851 8852 // Then check to see if we should lower the memcpy with target-specific 8853 // code. If the target chooses to do this, this is the next best. 8854 if (TSI) { 8855 SDValue Result = TSI->EmitTargetCodeForMemcpy( 8856 *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, 8857 DstPtrInfo, SrcPtrInfo); 8858 if (Result.getNode()) 8859 return Result; 8860 } 8861 8862 // If we really need inline code and the target declined to provide it, 8863 // use a (potentially long) sequence of loads and stores. 8864 if (AlwaysInline) { 8865 assert(ConstantSize && "AlwaysInline requires a constant size!"); 8866 return getMemcpyLoadsAndStores( 8867 *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, 8868 isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, BatchAA); 8869 } 8870 8871 checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); 8872 checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); 8873 8874 // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc 8875 // memcpy is not guaranteed to be safe. libc memcpys aren't required to 8876 // respect volatile, so they may do things like read or write memory 8877 // beyond the given memory regions. But fixing this isn't easy, and most 8878 // people don't care. 8879 8880 // Emit a library call. 8881 TargetLowering::ArgListTy Args; 8882 TargetLowering::ArgListEntry Entry; 8883 Entry.Ty = PointerType::getUnqual(*getContext()); 8884 Entry.Node = Dst; Args.push_back(Entry); 8885 Entry.Node = Src; Args.push_back(Entry); 8886 8887 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8888 Entry.Node = Size; Args.push_back(Entry); 8889 // FIXME: pass in SDLoc 8890 TargetLowering::CallLoweringInfo CLI(*this); 8891 bool IsTailCall = false; 8892 const char *MemCpyName = TLI->getMemcpyName(); 8893 8894 if (OverrideTailCall.has_value()) { 8895 IsTailCall = *OverrideTailCall; 8896 } else { 8897 bool LowersToMemcpy = StringRef(MemCpyName) == StringRef("memcpy"); 8898 bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); 8899 IsTailCall = CI && CI->isTailCall() && 8900 isInTailCallPosition(*CI, getTarget(), 8901 ReturnsFirstArg && LowersToMemcpy); 8902 } 8903 8904 CLI.setDebugLoc(dl) 8905 .setChain(Chain) 8906 .setLibCallee( 8907 TLI->getLibcallCallingConv(RTLIB::MEMCPY), 8908 Dst.getValueType().getTypeForEVT(*getContext()), 8909 getExternalSymbol(MemCpyName, TLI->getPointerTy(getDataLayout())), 8910 std::move(Args)) 8911 .setDiscardResult() 8912 .setTailCall(IsTailCall); 8913 8914 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 8915 return CallResult.second; 8916 } 8917 8918 SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, 8919 SDValue Dst, SDValue Src, SDValue Size, 8920 Type *SizeTy, unsigned ElemSz, 8921 bool isTailCall, 8922 MachinePointerInfo DstPtrInfo, 8923 MachinePointerInfo SrcPtrInfo) { 8924 // Emit a library call. 8925 TargetLowering::ArgListTy Args; 8926 TargetLowering::ArgListEntry Entry; 8927 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8928 Entry.Node = Dst; 8929 Args.push_back(Entry); 8930 8931 Entry.Node = Src; 8932 Args.push_back(Entry); 8933 8934 Entry.Ty = SizeTy; 8935 Entry.Node = Size; 8936 Args.push_back(Entry); 8937 8938 RTLIB::Libcall LibraryCall = 8939 RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); 8940 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) 8941 report_fatal_error("Unsupported element size"); 8942 8943 TargetLowering::CallLoweringInfo CLI(*this); 8944 CLI.setDebugLoc(dl) 8945 .setChain(Chain) 8946 .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), 8947 Type::getVoidTy(*getContext()), 8948 getExternalSymbol(TLI->getLibcallName(LibraryCall), 8949 TLI->getPointerTy(getDataLayout())), 8950 std::move(Args)) 8951 .setDiscardResult() 8952 .setTailCall(isTailCall); 8953 8954 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 8955 return CallResult.second; 8956 } 8957 8958 SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, 8959 SDValue Src, SDValue Size, Align Alignment, 8960 bool isVol, const CallInst *CI, 8961 std::optional<bool> OverrideTailCall, 8962 MachinePointerInfo DstPtrInfo, 8963 MachinePointerInfo SrcPtrInfo, 8964 const AAMDNodes &AAInfo, 8965 BatchAAResults *BatchAA) { 8966 // Check to see if we should lower the memmove to loads and stores first. 8967 // For cases within the target-specified limits, this is the best choice. 8968 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 8969 if (ConstantSize) { 8970 // Memmove with size zero? Just return the original chain. 8971 if (ConstantSize->isZero()) 8972 return Chain; 8973 8974 SDValue Result = getMemmoveLoadsAndStores( 8975 *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, 8976 isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); 8977 if (Result.getNode()) 8978 return Result; 8979 } 8980 8981 // Then check to see if we should lower the memmove with target-specific 8982 // code. If the target chooses to do this, this is the next best. 8983 if (TSI) { 8984 SDValue Result = 8985 TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, 8986 Alignment, isVol, DstPtrInfo, SrcPtrInfo); 8987 if (Result.getNode()) 8988 return Result; 8989 } 8990 8991 checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); 8992 checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); 8993 8994 // FIXME: If the memmove is volatile, lowering it to plain libc memmove may 8995 // not be safe. See memcpy above for more details. 8996 8997 // Emit a library call. 8998 TargetLowering::ArgListTy Args; 8999 TargetLowering::ArgListEntry Entry; 9000 Entry.Ty = PointerType::getUnqual(*getContext()); 9001 Entry.Node = Dst; Args.push_back(Entry); 9002 Entry.Node = Src; Args.push_back(Entry); 9003 9004 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 9005 Entry.Node = Size; Args.push_back(Entry); 9006 // FIXME: pass in SDLoc 9007 TargetLowering::CallLoweringInfo CLI(*this); 9008 9009 bool IsTailCall = false; 9010 if (OverrideTailCall.has_value()) { 9011 IsTailCall = *OverrideTailCall; 9012 } else { 9013 bool LowersToMemmove = 9014 TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove"); 9015 bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); 9016 IsTailCall = CI && CI->isTailCall() && 9017 isInTailCallPosition(*CI, getTarget(), 9018 ReturnsFirstArg && LowersToMemmove); 9019 } 9020 9021 CLI.setDebugLoc(dl) 9022 .setChain(Chain) 9023 .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), 9024 Dst.getValueType().getTypeForEVT(*getContext()), 9025 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), 9026 TLI->getPointerTy(getDataLayout())), 9027 std::move(Args)) 9028 .setDiscardResult() 9029 .setTailCall(IsTailCall); 9030 9031 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 9032 return CallResult.second; 9033 } 9034 9035 SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, 9036 SDValue Dst, SDValue Src, SDValue Size, 9037 Type *SizeTy, unsigned ElemSz, 9038 bool isTailCall, 9039 MachinePointerInfo DstPtrInfo, 9040 MachinePointerInfo SrcPtrInfo) { 9041 // Emit a library call. 9042 TargetLowering::ArgListTy Args; 9043 TargetLowering::ArgListEntry Entry; 9044 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 9045 Entry.Node = Dst; 9046 Args.push_back(Entry); 9047 9048 Entry.Node = Src; 9049 Args.push_back(Entry); 9050 9051 Entry.Ty = SizeTy; 9052 Entry.Node = Size; 9053 Args.push_back(Entry); 9054 9055 RTLIB::Libcall LibraryCall = 9056 RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); 9057 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) 9058 report_fatal_error("Unsupported element size"); 9059 9060 TargetLowering::CallLoweringInfo CLI(*this); 9061 CLI.setDebugLoc(dl) 9062 .setChain(Chain) 9063 .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), 9064 Type::getVoidTy(*getContext()), 9065 getExternalSymbol(TLI->getLibcallName(LibraryCall), 9066 TLI->getPointerTy(getDataLayout())), 9067 std::move(Args)) 9068 .setDiscardResult() 9069 .setTailCall(isTailCall); 9070 9071 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 9072 return CallResult.second; 9073 } 9074 9075 SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, 9076 SDValue Src, SDValue Size, Align Alignment, 9077 bool isVol, bool AlwaysInline, 9078 const CallInst *CI, 9079 MachinePointerInfo DstPtrInfo, 9080 const AAMDNodes &AAInfo) { 9081 // Check to see if we should lower the memset to stores first. 9082 // For cases within the target-specified limits, this is the best choice. 9083 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 9084 if (ConstantSize) { 9085 // Memset with size zero? Just return the original chain. 9086 if (ConstantSize->isZero()) 9087 return Chain; 9088 9089 SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, 9090 ConstantSize->getZExtValue(), Alignment, 9091 isVol, false, DstPtrInfo, AAInfo); 9092 9093 if (Result.getNode()) 9094 return Result; 9095 } 9096 9097 // Then check to see if we should lower the memset with target-specific 9098 // code. If the target chooses to do this, this is the next best. 9099 if (TSI) { 9100 SDValue Result = TSI->EmitTargetCodeForMemset( 9101 *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo); 9102 if (Result.getNode()) 9103 return Result; 9104 } 9105 9106 // If we really need inline code and the target declined to provide it, 9107 // use a (potentially long) sequence of loads and stores. 9108 if (AlwaysInline) { 9109 assert(ConstantSize && "AlwaysInline requires a constant size!"); 9110 SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, 9111 ConstantSize->getZExtValue(), Alignment, 9112 isVol, true, DstPtrInfo, AAInfo); 9113 assert(Result && 9114 "getMemsetStores must return a valid sequence when AlwaysInline"); 9115 return Result; 9116 } 9117 9118 checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); 9119 9120 // Emit a library call. 9121 auto &Ctx = *getContext(); 9122 const auto& DL = getDataLayout(); 9123 9124 TargetLowering::CallLoweringInfo CLI(*this); 9125 // FIXME: pass in SDLoc 9126 CLI.setDebugLoc(dl).setChain(Chain); 9127 9128 const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); 9129 9130 // Helper function to create an Entry from Node and Type. 9131 const auto CreateEntry = [](SDValue Node, Type *Ty) { 9132 TargetLowering::ArgListEntry Entry; 9133 Entry.Node = Node; 9134 Entry.Ty = Ty; 9135 return Entry; 9136 }; 9137 9138 bool UseBZero = isNullConstant(Src) && BzeroName; 9139 // If zeroing out and bzero is present, use it. 9140 if (UseBZero) { 9141 TargetLowering::ArgListTy Args; 9142 Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); 9143 Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); 9144 CLI.setLibCallee( 9145 TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), 9146 getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); 9147 } else { 9148 TargetLowering::ArgListTy Args; 9149 Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); 9150 Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); 9151 Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); 9152 CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), 9153 Dst.getValueType().getTypeForEVT(Ctx), 9154 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), 9155 TLI->getPointerTy(DL)), 9156 std::move(Args)); 9157 } 9158 bool LowersToMemset = 9159 TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset"); 9160 // If we're going to use bzero, make sure not to tail call unless the 9161 // subsequent return doesn't need a value, as bzero doesn't return the first 9162 // arg unlike memset. 9163 bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI) && !UseBZero; 9164 bool IsTailCall = 9165 CI && CI->isTailCall() && 9166 isInTailCallPosition(*CI, getTarget(), ReturnsFirstArg && LowersToMemset); 9167 CLI.setDiscardResult().setTailCall(IsTailCall); 9168 9169 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 9170 return CallResult.second; 9171 } 9172 9173 SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, 9174 SDValue Dst, SDValue Value, SDValue Size, 9175 Type *SizeTy, unsigned ElemSz, 9176 bool isTailCall, 9177 MachinePointerInfo DstPtrInfo) { 9178 // Emit a library call. 9179 TargetLowering::ArgListTy Args; 9180 TargetLowering::ArgListEntry Entry; 9181 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 9182 Entry.Node = Dst; 9183 Args.push_back(Entry); 9184 9185 Entry.Ty = Type::getInt8Ty(*getContext()); 9186 Entry.Node = Value; 9187 Args.push_back(Entry); 9188 9189 Entry.Ty = SizeTy; 9190 Entry.Node = Size; 9191 Args.push_back(Entry); 9192 9193 RTLIB::Libcall LibraryCall = 9194 RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); 9195 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) 9196 report_fatal_error("Unsupported element size"); 9197 9198 TargetLowering::CallLoweringInfo CLI(*this); 9199 CLI.setDebugLoc(dl) 9200 .setChain(Chain) 9201 .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), 9202 Type::getVoidTy(*getContext()), 9203 getExternalSymbol(TLI->getLibcallName(LibraryCall), 9204 TLI->getPointerTy(getDataLayout())), 9205 std::move(Args)) 9206 .setDiscardResult() 9207 .setTailCall(isTailCall); 9208 9209 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 9210 return CallResult.second; 9211 } 9212 9213 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, 9214 SDVTList VTList, ArrayRef<SDValue> Ops, 9215 MachineMemOperand *MMO, 9216 ISD::LoadExtType ExtType) { 9217 FoldingSetNodeID ID; 9218 AddNodeIDNode(ID, Opcode, VTList, Ops); 9219 ID.AddInteger(MemVT.getRawBits()); 9220 ID.AddInteger(getSyntheticNodeSubclassData<AtomicSDNode>( 9221 dl.getIROrder(), Opcode, VTList, MemVT, MMO, ExtType)); 9222 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9223 ID.AddInteger(MMO->getFlags()); 9224 void* IP = nullptr; 9225 if (auto *E = cast_or_null<AtomicSDNode>(FindNodeOrInsertPos(ID, dl, IP))) { 9226 E->refineAlignment(MMO); 9227 E->refineRanges(MMO); 9228 return SDValue(E, 0); 9229 } 9230 9231 auto *N = newSDNode<AtomicSDNode>(dl.getIROrder(), dl.getDebugLoc(), Opcode, 9232 VTList, MemVT, MMO, ExtType); 9233 createOperands(N, Ops); 9234 9235 CSEMap.InsertNode(N, IP); 9236 InsertNode(N); 9237 SDValue V(N, 0); 9238 NewSDValueDbgMsg(V, "Creating new node: ", this); 9239 return V; 9240 } 9241 9242 SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, 9243 EVT MemVT, SDVTList VTs, SDValue Chain, 9244 SDValue Ptr, SDValue Cmp, SDValue Swp, 9245 MachineMemOperand *MMO) { 9246 assert(Opcode == ISD::ATOMIC_CMP_SWAP || 9247 Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); 9248 assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); 9249 9250 SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; 9251 return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); 9252 } 9253 9254 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, 9255 SDValue Chain, SDValue Ptr, SDValue Val, 9256 MachineMemOperand *MMO) { 9257 assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || 9258 Opcode == ISD::ATOMIC_LOAD_AND || Opcode == ISD::ATOMIC_LOAD_CLR || 9259 Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || 9260 Opcode == ISD::ATOMIC_LOAD_NAND || Opcode == ISD::ATOMIC_LOAD_MIN || 9261 Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN || 9262 Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || 9263 Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || 9264 Opcode == ISD::ATOMIC_LOAD_FMIN || 9265 Opcode == ISD::ATOMIC_LOAD_FMINIMUM || 9266 Opcode == ISD::ATOMIC_LOAD_FMAXIMUM || 9267 Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || 9268 Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || 9269 Opcode == ISD::ATOMIC_LOAD_USUB_COND || 9270 Opcode == ISD::ATOMIC_LOAD_USUB_SAT || Opcode == ISD::ATOMIC_SWAP || 9271 Opcode == ISD::ATOMIC_STORE) && 9272 "Invalid Atomic Op"); 9273 9274 EVT VT = Val.getValueType(); 9275 9276 SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : 9277 getVTList(VT, MVT::Other); 9278 SDValue Ops[] = {Chain, Ptr, Val}; 9279 return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); 9280 } 9281 9282 SDValue SelectionDAG::getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, 9283 EVT MemVT, EVT VT, SDValue Chain, 9284 SDValue Ptr, MachineMemOperand *MMO) { 9285 SDVTList VTs = getVTList(VT, MVT::Other); 9286 SDValue Ops[] = {Chain, Ptr}; 9287 return getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, VTs, Ops, MMO, ExtType); 9288 } 9289 9290 /// getMergeValues - Create a MERGE_VALUES node from the given operands. 9291 SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { 9292 if (Ops.size() == 1) 9293 return Ops[0]; 9294 9295 SmallVector<EVT, 4> VTs; 9296 VTs.reserve(Ops.size()); 9297 for (const SDValue &Op : Ops) 9298 VTs.push_back(Op.getValueType()); 9299 return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); 9300 } 9301 9302 SDValue SelectionDAG::getMemIntrinsicNode( 9303 unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, 9304 EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, 9305 MachineMemOperand::Flags Flags, LocationSize Size, 9306 const AAMDNodes &AAInfo) { 9307 if (Size.hasValue() && !Size.getValue()) 9308 Size = LocationSize::precise(MemVT.getStoreSize()); 9309 9310 MachineFunction &MF = getMachineFunction(); 9311 MachineMemOperand *MMO = 9312 MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo); 9313 9314 return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); 9315 } 9316 9317 SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, 9318 SDVTList VTList, 9319 ArrayRef<SDValue> Ops, EVT MemVT, 9320 MachineMemOperand *MMO) { 9321 assert( 9322 (Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || 9323 Opcode == ISD::PREFETCH || 9324 (Opcode <= (unsigned)std::numeric_limits<int>::max() && 9325 Opcode >= ISD::BUILTIN_OP_END && TSI->isTargetMemoryOpcode(Opcode))) && 9326 "Opcode is not a memory-accessing opcode!"); 9327 9328 // Memoize the node unless it returns a glue result. 9329 MemIntrinsicSDNode *N; 9330 if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { 9331 FoldingSetNodeID ID; 9332 AddNodeIDNode(ID, Opcode, VTList, Ops); 9333 ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( 9334 Opcode, dl.getIROrder(), VTList, MemVT, MMO)); 9335 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9336 ID.AddInteger(MMO->getFlags()); 9337 ID.AddInteger(MemVT.getRawBits()); 9338 void *IP = nullptr; 9339 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9340 cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); 9341 return SDValue(E, 0); 9342 } 9343 9344 N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), 9345 VTList, MemVT, MMO); 9346 createOperands(N, Ops); 9347 9348 CSEMap.InsertNode(N, IP); 9349 } else { 9350 N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), 9351 VTList, MemVT, MMO); 9352 createOperands(N, Ops); 9353 } 9354 InsertNode(N); 9355 SDValue V(N, 0); 9356 NewSDValueDbgMsg(V, "Creating new node: ", this); 9357 return V; 9358 } 9359 9360 SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, 9361 SDValue Chain, int FrameIndex, 9362 int64_t Size, int64_t Offset) { 9363 const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; 9364 const auto VTs = getVTList(MVT::Other); 9365 SDValue Ops[2] = { 9366 Chain, 9367 getFrameIndex(FrameIndex, 9368 getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), 9369 true)}; 9370 9371 FoldingSetNodeID ID; 9372 AddNodeIDNode(ID, Opcode, VTs, Ops); 9373 ID.AddInteger(FrameIndex); 9374 ID.AddInteger(Size); 9375 ID.AddInteger(Offset); 9376 void *IP = nullptr; 9377 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 9378 return SDValue(E, 0); 9379 9380 LifetimeSDNode *N = newSDNode<LifetimeSDNode>( 9381 Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); 9382 createOperands(N, Ops); 9383 CSEMap.InsertNode(N, IP); 9384 InsertNode(N); 9385 SDValue V(N, 0); 9386 NewSDValueDbgMsg(V, "Creating new node: ", this); 9387 return V; 9388 } 9389 9390 SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, 9391 uint64_t Guid, uint64_t Index, 9392 uint32_t Attr) { 9393 const unsigned Opcode = ISD::PSEUDO_PROBE; 9394 const auto VTs = getVTList(MVT::Other); 9395 SDValue Ops[] = {Chain}; 9396 FoldingSetNodeID ID; 9397 AddNodeIDNode(ID, Opcode, VTs, Ops); 9398 ID.AddInteger(Guid); 9399 ID.AddInteger(Index); 9400 void *IP = nullptr; 9401 if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP)) 9402 return SDValue(E, 0); 9403 9404 auto *N = newSDNode<PseudoProbeSDNode>( 9405 Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr); 9406 createOperands(N, Ops); 9407 CSEMap.InsertNode(N, IP); 9408 InsertNode(N); 9409 SDValue V(N, 0); 9410 NewSDValueDbgMsg(V, "Creating new node: ", this); 9411 return V; 9412 } 9413 9414 /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a 9415 /// MachinePointerInfo record from it. This is particularly useful because the 9416 /// code generator has many cases where it doesn't bother passing in a 9417 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". 9418 static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, 9419 SelectionDAG &DAG, SDValue Ptr, 9420 int64_t Offset = 0) { 9421 // If this is FI+Offset, we can model it. 9422 if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) 9423 return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), 9424 FI->getIndex(), Offset); 9425 9426 // If this is (FI+Offset1)+Offset2, we can model it. 9427 if (Ptr.getOpcode() != ISD::ADD || 9428 !isa<ConstantSDNode>(Ptr.getOperand(1)) || 9429 !isa<FrameIndexSDNode>(Ptr.getOperand(0))) 9430 return Info; 9431 9432 int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); 9433 return MachinePointerInfo::getFixedStack( 9434 DAG.getMachineFunction(), FI, 9435 Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); 9436 } 9437 9438 /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a 9439 /// MachinePointerInfo record from it. This is particularly useful because the 9440 /// code generator has many cases where it doesn't bother passing in a 9441 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". 9442 static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, 9443 SelectionDAG &DAG, SDValue Ptr, 9444 SDValue OffsetOp) { 9445 // If the 'Offset' value isn't a constant, we can't handle this. 9446 if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) 9447 return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); 9448 if (OffsetOp.isUndef()) 9449 return InferPointerInfo(Info, DAG, Ptr); 9450 return Info; 9451 } 9452 9453 SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, 9454 EVT VT, const SDLoc &dl, SDValue Chain, 9455 SDValue Ptr, SDValue Offset, 9456 MachinePointerInfo PtrInfo, EVT MemVT, 9457 Align Alignment, 9458 MachineMemOperand::Flags MMOFlags, 9459 const AAMDNodes &AAInfo, const MDNode *Ranges) { 9460 assert(Chain.getValueType() == MVT::Other && 9461 "Invalid chain type"); 9462 9463 MMOFlags |= MachineMemOperand::MOLoad; 9464 assert((MMOFlags & MachineMemOperand::MOStore) == 0); 9465 // If we don't have a PtrInfo, infer the trivial frame index case to simplify 9466 // clients. 9467 if (PtrInfo.V.isNull()) 9468 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); 9469 9470 TypeSize Size = MemVT.getStoreSize(); 9471 MachineFunction &MF = getMachineFunction(); 9472 MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, 9473 Alignment, AAInfo, Ranges); 9474 return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); 9475 } 9476 9477 SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, 9478 EVT VT, const SDLoc &dl, SDValue Chain, 9479 SDValue Ptr, SDValue Offset, EVT MemVT, 9480 MachineMemOperand *MMO) { 9481 if (VT == MemVT) { 9482 ExtType = ISD::NON_EXTLOAD; 9483 } else if (ExtType == ISD::NON_EXTLOAD) { 9484 assert(VT == MemVT && "Non-extending load from different memory type!"); 9485 } else { 9486 // Extending load. 9487 assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && 9488 "Should only be an extending load, not truncating!"); 9489 assert(VT.isInteger() == MemVT.isInteger() && 9490 "Cannot convert from FP to Int or Int -> FP!"); 9491 assert(VT.isVector() == MemVT.isVector() && 9492 "Cannot use an ext load to convert to or from a vector!"); 9493 assert((!VT.isVector() || 9494 VT.getVectorElementCount() == MemVT.getVectorElementCount()) && 9495 "Cannot use an ext load to change the number of vector elements!"); 9496 } 9497 9498 assert((!MMO->getRanges() || 9499 (mdconst::extract<ConstantInt>(MMO->getRanges()->getOperand(0)) 9500 ->getBitWidth() == MemVT.getScalarSizeInBits() && 9501 MemVT.isInteger())) && 9502 "Range metadata and load type must match!"); 9503 9504 bool Indexed = AM != ISD::UNINDEXED; 9505 assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); 9506 9507 SDVTList VTs = Indexed ? 9508 getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); 9509 SDValue Ops[] = { Chain, Ptr, Offset }; 9510 FoldingSetNodeID ID; 9511 AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); 9512 ID.AddInteger(MemVT.getRawBits()); 9513 ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>( 9514 dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); 9515 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9516 ID.AddInteger(MMO->getFlags()); 9517 void *IP = nullptr; 9518 if (auto *E = cast_or_null<LoadSDNode>(FindNodeOrInsertPos(ID, dl, IP))) { 9519 E->refineAlignment(MMO); 9520 E->refineRanges(MMO); 9521 return SDValue(E, 0); 9522 } 9523 auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9524 ExtType, MemVT, MMO); 9525 createOperands(N, Ops); 9526 9527 CSEMap.InsertNode(N, IP); 9528 InsertNode(N); 9529 SDValue V(N, 0); 9530 NewSDValueDbgMsg(V, "Creating new node: ", this); 9531 return V; 9532 } 9533 9534 SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, 9535 SDValue Ptr, MachinePointerInfo PtrInfo, 9536 MaybeAlign Alignment, 9537 MachineMemOperand::Flags MMOFlags, 9538 const AAMDNodes &AAInfo, const MDNode *Ranges) { 9539 SDValue Undef = getUNDEF(Ptr.getValueType()); 9540 return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9541 PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges); 9542 } 9543 9544 SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, 9545 SDValue Ptr, MachineMemOperand *MMO) { 9546 SDValue Undef = getUNDEF(Ptr.getValueType()); 9547 return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9548 VT, MMO); 9549 } 9550 9551 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, 9552 EVT VT, SDValue Chain, SDValue Ptr, 9553 MachinePointerInfo PtrInfo, EVT MemVT, 9554 MaybeAlign Alignment, 9555 MachineMemOperand::Flags MMOFlags, 9556 const AAMDNodes &AAInfo) { 9557 SDValue Undef = getUNDEF(Ptr.getValueType()); 9558 return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo, 9559 MemVT, Alignment, MMOFlags, AAInfo); 9560 } 9561 9562 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, 9563 EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, 9564 MachineMemOperand *MMO) { 9565 SDValue Undef = getUNDEF(Ptr.getValueType()); 9566 return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, 9567 MemVT, MMO); 9568 } 9569 9570 SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, 9571 SDValue Base, SDValue Offset, 9572 ISD::MemIndexedMode AM) { 9573 LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); 9574 assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); 9575 // Don't propagate the invariant or dereferenceable flags. 9576 auto MMOFlags = 9577 LD->getMemOperand()->getFlags() & 9578 ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); 9579 return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, 9580 LD->getChain(), Base, Offset, LD->getPointerInfo(), 9581 LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo()); 9582 } 9583 9584 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9585 SDValue Ptr, MachinePointerInfo PtrInfo, 9586 Align Alignment, 9587 MachineMemOperand::Flags MMOFlags, 9588 const AAMDNodes &AAInfo) { 9589 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9590 9591 MMOFlags |= MachineMemOperand::MOStore; 9592 assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 9593 9594 if (PtrInfo.V.isNull()) 9595 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); 9596 9597 MachineFunction &MF = getMachineFunction(); 9598 TypeSize Size = Val.getValueType().getStoreSize(); 9599 MachineMemOperand *MMO = 9600 MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); 9601 return getStore(Chain, dl, Val, Ptr, MMO); 9602 } 9603 9604 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9605 SDValue Ptr, MachineMemOperand *MMO) { 9606 SDValue Undef = getUNDEF(Ptr.getValueType()); 9607 return getStore(Chain, dl, Val, Ptr, Undef, Val.getValueType(), MMO, 9608 ISD::UNINDEXED); 9609 } 9610 9611 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9612 SDValue Ptr, SDValue Offset, EVT SVT, 9613 MachineMemOperand *MMO, ISD::MemIndexedMode AM, 9614 bool IsTruncating) { 9615 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9616 EVT VT = Val.getValueType(); 9617 if (VT == SVT) { 9618 IsTruncating = false; 9619 } else if (!IsTruncating) { 9620 assert(VT == SVT && "No-truncating store from different memory type!"); 9621 } else { 9622 assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && 9623 "Should only be a truncating store, not extending!"); 9624 assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); 9625 assert(VT.isVector() == SVT.isVector() && 9626 "Cannot use trunc store to convert to or from a vector!"); 9627 assert((!VT.isVector() || 9628 VT.getVectorElementCount() == SVT.getVectorElementCount()) && 9629 "Cannot use trunc store to change the number of vector elements!"); 9630 } 9631 9632 bool Indexed = AM != ISD::UNINDEXED; 9633 assert((Indexed || Offset.isUndef()) && "Unindexed store with an offset!"); 9634 SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) 9635 : getVTList(MVT::Other); 9636 SDValue Ops[] = {Chain, Val, Ptr, Offset}; 9637 FoldingSetNodeID ID; 9638 AddNodeIDNode(ID, ISD::STORE, VTs, Ops); 9639 ID.AddInteger(SVT.getRawBits()); 9640 ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( 9641 dl.getIROrder(), VTs, AM, IsTruncating, SVT, MMO)); 9642 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9643 ID.AddInteger(MMO->getFlags()); 9644 void *IP = nullptr; 9645 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9646 cast<StoreSDNode>(E)->refineAlignment(MMO); 9647 return SDValue(E, 0); 9648 } 9649 auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9650 IsTruncating, SVT, MMO); 9651 createOperands(N, Ops); 9652 9653 CSEMap.InsertNode(N, IP); 9654 InsertNode(N); 9655 SDValue V(N, 0); 9656 NewSDValueDbgMsg(V, "Creating new node: ", this); 9657 return V; 9658 } 9659 9660 SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9661 SDValue Ptr, MachinePointerInfo PtrInfo, 9662 EVT SVT, Align Alignment, 9663 MachineMemOperand::Flags MMOFlags, 9664 const AAMDNodes &AAInfo) { 9665 assert(Chain.getValueType() == MVT::Other && 9666 "Invalid chain type"); 9667 9668 MMOFlags |= MachineMemOperand::MOStore; 9669 assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 9670 9671 if (PtrInfo.V.isNull()) 9672 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); 9673 9674 MachineFunction &MF = getMachineFunction(); 9675 MachineMemOperand *MMO = MF.getMachineMemOperand( 9676 PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); 9677 return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); 9678 } 9679 9680 SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9681 SDValue Ptr, EVT SVT, 9682 MachineMemOperand *MMO) { 9683 SDValue Undef = getUNDEF(Ptr.getValueType()); 9684 return getStore(Chain, dl, Val, Ptr, Undef, SVT, MMO, ISD::UNINDEXED, true); 9685 } 9686 9687 SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, 9688 SDValue Base, SDValue Offset, 9689 ISD::MemIndexedMode AM) { 9690 StoreSDNode *ST = cast<StoreSDNode>(OrigStore); 9691 assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); 9692 return getStore(ST->getChain(), dl, ST->getValue(), Base, Offset, 9693 ST->getMemoryVT(), ST->getMemOperand(), AM, 9694 ST->isTruncatingStore()); 9695 } 9696 9697 SDValue SelectionDAG::getLoadVP( 9698 ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, 9699 SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, 9700 MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, 9701 MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, 9702 const MDNode *Ranges, bool IsExpanding) { 9703 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9704 9705 MMOFlags |= MachineMemOperand::MOLoad; 9706 assert((MMOFlags & MachineMemOperand::MOStore) == 0); 9707 // If we don't have a PtrInfo, infer the trivial frame index case to simplify 9708 // clients. 9709 if (PtrInfo.V.isNull()) 9710 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); 9711 9712 TypeSize Size = MemVT.getStoreSize(); 9713 MachineFunction &MF = getMachineFunction(); 9714 MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, 9715 Alignment, AAInfo, Ranges); 9716 return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT, 9717 MMO, IsExpanding); 9718 } 9719 9720 SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, 9721 ISD::LoadExtType ExtType, EVT VT, 9722 const SDLoc &dl, SDValue Chain, SDValue Ptr, 9723 SDValue Offset, SDValue Mask, SDValue EVL, 9724 EVT MemVT, MachineMemOperand *MMO, 9725 bool IsExpanding) { 9726 bool Indexed = AM != ISD::UNINDEXED; 9727 assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); 9728 9729 SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) 9730 : getVTList(VT, MVT::Other); 9731 SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL}; 9732 FoldingSetNodeID ID; 9733 AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops); 9734 ID.AddInteger(MemVT.getRawBits()); 9735 ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>( 9736 dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); 9737 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9738 ID.AddInteger(MMO->getFlags()); 9739 void *IP = nullptr; 9740 if (auto *E = cast_or_null<VPLoadSDNode>(FindNodeOrInsertPos(ID, dl, IP))) { 9741 E->refineAlignment(MMO); 9742 E->refineRanges(MMO); 9743 return SDValue(E, 0); 9744 } 9745 auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9746 ExtType, IsExpanding, MemVT, MMO); 9747 createOperands(N, Ops); 9748 9749 CSEMap.InsertNode(N, IP); 9750 InsertNode(N); 9751 SDValue V(N, 0); 9752 NewSDValueDbgMsg(V, "Creating new node: ", this); 9753 return V; 9754 } 9755 9756 SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, 9757 SDValue Ptr, SDValue Mask, SDValue EVL, 9758 MachinePointerInfo PtrInfo, 9759 MaybeAlign Alignment, 9760 MachineMemOperand::Flags MMOFlags, 9761 const AAMDNodes &AAInfo, const MDNode *Ranges, 9762 bool IsExpanding) { 9763 SDValue Undef = getUNDEF(Ptr.getValueType()); 9764 return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9765 Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges, 9766 IsExpanding); 9767 } 9768 9769 SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, 9770 SDValue Ptr, SDValue Mask, SDValue EVL, 9771 MachineMemOperand *MMO, bool IsExpanding) { 9772 SDValue Undef = getUNDEF(Ptr.getValueType()); 9773 return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9774 Mask, EVL, VT, MMO, IsExpanding); 9775 } 9776 9777 SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, 9778 EVT VT, SDValue Chain, SDValue Ptr, 9779 SDValue Mask, SDValue EVL, 9780 MachinePointerInfo PtrInfo, EVT MemVT, 9781 MaybeAlign Alignment, 9782 MachineMemOperand::Flags MMOFlags, 9783 const AAMDNodes &AAInfo, bool IsExpanding) { 9784 SDValue Undef = getUNDEF(Ptr.getValueType()); 9785 return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, 9786 EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr, 9787 IsExpanding); 9788 } 9789 9790 SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, 9791 EVT VT, SDValue Chain, SDValue Ptr, 9792 SDValue Mask, SDValue EVL, EVT MemVT, 9793 MachineMemOperand *MMO, bool IsExpanding) { 9794 SDValue Undef = getUNDEF(Ptr.getValueType()); 9795 return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, 9796 EVL, MemVT, MMO, IsExpanding); 9797 } 9798 9799 SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, 9800 SDValue Base, SDValue Offset, 9801 ISD::MemIndexedMode AM) { 9802 auto *LD = cast<VPLoadSDNode>(OrigLoad); 9803 assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); 9804 // Don't propagate the invariant or dereferenceable flags. 9805 auto MMOFlags = 9806 LD->getMemOperand()->getFlags() & 9807 ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); 9808 return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, 9809 LD->getChain(), Base, Offset, LD->getMask(), 9810 LD->getVectorLength(), LD->getPointerInfo(), 9811 LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(), 9812 nullptr, LD->isExpandingLoad()); 9813 } 9814 9815 SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, 9816 SDValue Ptr, SDValue Offset, SDValue Mask, 9817 SDValue EVL, EVT MemVT, MachineMemOperand *MMO, 9818 ISD::MemIndexedMode AM, bool IsTruncating, 9819 bool IsCompressing) { 9820 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9821 bool Indexed = AM != ISD::UNINDEXED; 9822 assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); 9823 SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) 9824 : getVTList(MVT::Other); 9825 SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL}; 9826 FoldingSetNodeID ID; 9827 AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); 9828 ID.AddInteger(MemVT.getRawBits()); 9829 ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( 9830 dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); 9831 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9832 ID.AddInteger(MMO->getFlags()); 9833 void *IP = nullptr; 9834 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9835 cast<VPStoreSDNode>(E)->refineAlignment(MMO); 9836 return SDValue(E, 0); 9837 } 9838 auto *N = newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9839 IsTruncating, IsCompressing, MemVT, MMO); 9840 createOperands(N, Ops); 9841 9842 CSEMap.InsertNode(N, IP); 9843 InsertNode(N); 9844 SDValue V(N, 0); 9845 NewSDValueDbgMsg(V, "Creating new node: ", this); 9846 return V; 9847 } 9848 9849 SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, 9850 SDValue Val, SDValue Ptr, SDValue Mask, 9851 SDValue EVL, MachinePointerInfo PtrInfo, 9852 EVT SVT, Align Alignment, 9853 MachineMemOperand::Flags MMOFlags, 9854 const AAMDNodes &AAInfo, 9855 bool IsCompressing) { 9856 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9857 9858 MMOFlags |= MachineMemOperand::MOStore; 9859 assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 9860 9861 if (PtrInfo.V.isNull()) 9862 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); 9863 9864 MachineFunction &MF = getMachineFunction(); 9865 MachineMemOperand *MMO = MF.getMachineMemOperand( 9866 PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); 9867 return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, 9868 IsCompressing); 9869 } 9870 9871 SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, 9872 SDValue Val, SDValue Ptr, SDValue Mask, 9873 SDValue EVL, EVT SVT, 9874 MachineMemOperand *MMO, 9875 bool IsCompressing) { 9876 EVT VT = Val.getValueType(); 9877 9878 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9879 if (VT == SVT) 9880 return getStoreVP(Chain, dl, Val, Ptr, getUNDEF(Ptr.getValueType()), Mask, 9881 EVL, VT, MMO, ISD::UNINDEXED, 9882 /*IsTruncating*/ false, IsCompressing); 9883 9884 assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && 9885 "Should only be a truncating store, not extending!"); 9886 assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); 9887 assert(VT.isVector() == SVT.isVector() && 9888 "Cannot use trunc store to convert to or from a vector!"); 9889 assert((!VT.isVector() || 9890 VT.getVectorElementCount() == SVT.getVectorElementCount()) && 9891 "Cannot use trunc store to change the number of vector elements!"); 9892 9893 SDVTList VTs = getVTList(MVT::Other); 9894 SDValue Undef = getUNDEF(Ptr.getValueType()); 9895 SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; 9896 FoldingSetNodeID ID; 9897 AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); 9898 ID.AddInteger(SVT.getRawBits()); 9899 ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( 9900 dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); 9901 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9902 ID.AddInteger(MMO->getFlags()); 9903 void *IP = nullptr; 9904 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9905 cast<VPStoreSDNode>(E)->refineAlignment(MMO); 9906 return SDValue(E, 0); 9907 } 9908 auto *N = 9909 newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9910 ISD::UNINDEXED, true, IsCompressing, SVT, MMO); 9911 createOperands(N, Ops); 9912 9913 CSEMap.InsertNode(N, IP); 9914 InsertNode(N); 9915 SDValue V(N, 0); 9916 NewSDValueDbgMsg(V, "Creating new node: ", this); 9917 return V; 9918 } 9919 9920 SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, 9921 SDValue Base, SDValue Offset, 9922 ISD::MemIndexedMode AM) { 9923 auto *ST = cast<VPStoreSDNode>(OrigStore); 9924 assert(ST->getOffset().isUndef() && "Store is already an indexed store!"); 9925 SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); 9926 SDValue Ops[] = {ST->getChain(), ST->getValue(), Base, 9927 Offset, ST->getMask(), ST->getVectorLength()}; 9928 FoldingSetNodeID ID; 9929 AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); 9930 ID.AddInteger(ST->getMemoryVT().getRawBits()); 9931 ID.AddInteger(ST->getRawSubclassData()); 9932 ID.AddInteger(ST->getPointerInfo().getAddrSpace()); 9933 ID.AddInteger(ST->getMemOperand()->getFlags()); 9934 void *IP = nullptr; 9935 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 9936 return SDValue(E, 0); 9937 9938 auto *N = newSDNode<VPStoreSDNode>( 9939 dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), 9940 ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand()); 9941 createOperands(N, Ops); 9942 9943 CSEMap.InsertNode(N, IP); 9944 InsertNode(N); 9945 SDValue V(N, 0); 9946 NewSDValueDbgMsg(V, "Creating new node: ", this); 9947 return V; 9948 } 9949 9950 SDValue SelectionDAG::getStridedLoadVP( 9951 ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, 9952 SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, 9953 SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { 9954 bool Indexed = AM != ISD::UNINDEXED; 9955 assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); 9956 9957 SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL}; 9958 SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) 9959 : getVTList(VT, MVT::Other); 9960 FoldingSetNodeID ID; 9961 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops); 9962 ID.AddInteger(VT.getRawBits()); 9963 ID.AddInteger(getSyntheticNodeSubclassData<VPStridedLoadSDNode>( 9964 DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); 9965 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9966 9967 void *IP = nullptr; 9968 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 9969 cast<VPStridedLoadSDNode>(E)->refineAlignment(MMO); 9970 return SDValue(E, 0); 9971 } 9972 9973 auto *N = 9974 newSDNode<VPStridedLoadSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, AM, 9975 ExtType, IsExpanding, MemVT, MMO); 9976 createOperands(N, Ops); 9977 CSEMap.InsertNode(N, IP); 9978 InsertNode(N); 9979 SDValue V(N, 0); 9980 NewSDValueDbgMsg(V, "Creating new node: ", this); 9981 return V; 9982 } 9983 9984 SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, 9985 SDValue Ptr, SDValue Stride, 9986 SDValue Mask, SDValue EVL, 9987 MachineMemOperand *MMO, 9988 bool IsExpanding) { 9989 SDValue Undef = getUNDEF(Ptr.getValueType()); 9990 return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, 9991 Undef, Stride, Mask, EVL, VT, MMO, IsExpanding); 9992 } 9993 9994 SDValue SelectionDAG::getExtStridedLoadVP( 9995 ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, 9996 SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, 9997 MachineMemOperand *MMO, bool IsExpanding) { 9998 SDValue Undef = getUNDEF(Ptr.getValueType()); 9999 return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, 10000 Stride, Mask, EVL, MemVT, MMO, IsExpanding); 10001 } 10002 10003 SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, 10004 SDValue Val, SDValue Ptr, 10005 SDValue Offset, SDValue Stride, 10006 SDValue Mask, SDValue EVL, EVT MemVT, 10007 MachineMemOperand *MMO, 10008 ISD::MemIndexedMode AM, 10009 bool IsTruncating, bool IsCompressing) { 10010 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 10011 bool Indexed = AM != ISD::UNINDEXED; 10012 assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); 10013 SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) 10014 : getVTList(MVT::Other); 10015 SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL}; 10016 FoldingSetNodeID ID; 10017 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); 10018 ID.AddInteger(MemVT.getRawBits()); 10019 ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>( 10020 DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); 10021 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10022 void *IP = nullptr; 10023 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 10024 cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO); 10025 return SDValue(E, 0); 10026 } 10027 auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(), 10028 VTs, AM, IsTruncating, 10029 IsCompressing, MemVT, MMO); 10030 createOperands(N, Ops); 10031 10032 CSEMap.InsertNode(N, IP); 10033 InsertNode(N); 10034 SDValue V(N, 0); 10035 NewSDValueDbgMsg(V, "Creating new node: ", this); 10036 return V; 10037 } 10038 10039 SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, 10040 SDValue Val, SDValue Ptr, 10041 SDValue Stride, SDValue Mask, 10042 SDValue EVL, EVT SVT, 10043 MachineMemOperand *MMO, 10044 bool IsCompressing) { 10045 EVT VT = Val.getValueType(); 10046 10047 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 10048 if (VT == SVT) 10049 return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()), 10050 Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED, 10051 /*IsTruncating*/ false, IsCompressing); 10052 10053 assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && 10054 "Should only be a truncating store, not extending!"); 10055 assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); 10056 assert(VT.isVector() == SVT.isVector() && 10057 "Cannot use trunc store to convert to or from a vector!"); 10058 assert((!VT.isVector() || 10059 VT.getVectorElementCount() == SVT.getVectorElementCount()) && 10060 "Cannot use trunc store to change the number of vector elements!"); 10061 10062 SDVTList VTs = getVTList(MVT::Other); 10063 SDValue Undef = getUNDEF(Ptr.getValueType()); 10064 SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL}; 10065 FoldingSetNodeID ID; 10066 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); 10067 ID.AddInteger(SVT.getRawBits()); 10068 ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>( 10069 DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); 10070 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10071 void *IP = nullptr; 10072 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 10073 cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO); 10074 return SDValue(E, 0); 10075 } 10076 auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(), 10077 VTs, ISD::UNINDEXED, true, 10078 IsCompressing, SVT, MMO); 10079 createOperands(N, Ops); 10080 10081 CSEMap.InsertNode(N, IP); 10082 InsertNode(N); 10083 SDValue V(N, 0); 10084 NewSDValueDbgMsg(V, "Creating new node: ", this); 10085 return V; 10086 } 10087 10088 SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, 10089 ArrayRef<SDValue> Ops, MachineMemOperand *MMO, 10090 ISD::MemIndexType IndexType) { 10091 assert(Ops.size() == 6 && "Incompatible number of operands"); 10092 10093 FoldingSetNodeID ID; 10094 AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops); 10095 ID.AddInteger(VT.getRawBits()); 10096 ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>( 10097 dl.getIROrder(), VTs, VT, MMO, IndexType)); 10098 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10099 ID.AddInteger(MMO->getFlags()); 10100 void *IP = nullptr; 10101 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10102 cast<VPGatherSDNode>(E)->refineAlignment(MMO); 10103 return SDValue(E, 0); 10104 } 10105 10106 auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 10107 VT, MMO, IndexType); 10108 createOperands(N, Ops); 10109 10110 assert(N->getMask().getValueType().getVectorElementCount() == 10111 N->getValueType(0).getVectorElementCount() && 10112 "Vector width mismatch between mask and data"); 10113 assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == 10114 N->getValueType(0).getVectorElementCount().isScalable() && 10115 "Scalable flags of index and data do not match"); 10116 assert(ElementCount::isKnownGE( 10117 N->getIndex().getValueType().getVectorElementCount(), 10118 N->getValueType(0).getVectorElementCount()) && 10119 "Vector width mismatch between index and data"); 10120 assert(isa<ConstantSDNode>(N->getScale()) && 10121 N->getScale()->getAsAPIntVal().isPowerOf2() && 10122 "Scale should be a constant power of 2"); 10123 10124 CSEMap.InsertNode(N, IP); 10125 InsertNode(N); 10126 SDValue V(N, 0); 10127 NewSDValueDbgMsg(V, "Creating new node: ", this); 10128 return V; 10129 } 10130 10131 SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, 10132 ArrayRef<SDValue> Ops, 10133 MachineMemOperand *MMO, 10134 ISD::MemIndexType IndexType) { 10135 assert(Ops.size() == 7 && "Incompatible number of operands"); 10136 10137 FoldingSetNodeID ID; 10138 AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops); 10139 ID.AddInteger(VT.getRawBits()); 10140 ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>( 10141 dl.getIROrder(), VTs, VT, MMO, IndexType)); 10142 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10143 ID.AddInteger(MMO->getFlags()); 10144 void *IP = nullptr; 10145 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10146 cast<VPScatterSDNode>(E)->refineAlignment(MMO); 10147 return SDValue(E, 0); 10148 } 10149 auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 10150 VT, MMO, IndexType); 10151 createOperands(N, Ops); 10152 10153 assert(N->getMask().getValueType().getVectorElementCount() == 10154 N->getValue().getValueType().getVectorElementCount() && 10155 "Vector width mismatch between mask and data"); 10156 assert( 10157 N->getIndex().getValueType().getVectorElementCount().isScalable() == 10158 N->getValue().getValueType().getVectorElementCount().isScalable() && 10159 "Scalable flags of index and data do not match"); 10160 assert(ElementCount::isKnownGE( 10161 N->getIndex().getValueType().getVectorElementCount(), 10162 N->getValue().getValueType().getVectorElementCount()) && 10163 "Vector width mismatch between index and data"); 10164 assert(isa<ConstantSDNode>(N->getScale()) && 10165 N->getScale()->getAsAPIntVal().isPowerOf2() && 10166 "Scale should be a constant power of 2"); 10167 10168 CSEMap.InsertNode(N, IP); 10169 InsertNode(N); 10170 SDValue V(N, 0); 10171 NewSDValueDbgMsg(V, "Creating new node: ", this); 10172 return V; 10173 } 10174 10175 SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, 10176 SDValue Base, SDValue Offset, SDValue Mask, 10177 SDValue PassThru, EVT MemVT, 10178 MachineMemOperand *MMO, 10179 ISD::MemIndexedMode AM, 10180 ISD::LoadExtType ExtTy, bool isExpanding) { 10181 bool Indexed = AM != ISD::UNINDEXED; 10182 assert((Indexed || Offset.isUndef()) && 10183 "Unindexed masked load with an offset!"); 10184 SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) 10185 : getVTList(VT, MVT::Other); 10186 SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; 10187 FoldingSetNodeID ID; 10188 AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); 10189 ID.AddInteger(MemVT.getRawBits()); 10190 ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( 10191 dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); 10192 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10193 ID.AddInteger(MMO->getFlags()); 10194 void *IP = nullptr; 10195 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10196 cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); 10197 return SDValue(E, 0); 10198 } 10199 auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 10200 AM, ExtTy, isExpanding, MemVT, MMO); 10201 createOperands(N, Ops); 10202 10203 CSEMap.InsertNode(N, IP); 10204 InsertNode(N); 10205 SDValue V(N, 0); 10206 NewSDValueDbgMsg(V, "Creating new node: ", this); 10207 return V; 10208 } 10209 10210 SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, 10211 SDValue Base, SDValue Offset, 10212 ISD::MemIndexedMode AM) { 10213 MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad); 10214 assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); 10215 return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, 10216 Offset, LD->getMask(), LD->getPassThru(), 10217 LD->getMemoryVT(), LD->getMemOperand(), AM, 10218 LD->getExtensionType(), LD->isExpandingLoad()); 10219 } 10220 10221 SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, 10222 SDValue Val, SDValue Base, SDValue Offset, 10223 SDValue Mask, EVT MemVT, 10224 MachineMemOperand *MMO, 10225 ISD::MemIndexedMode AM, bool IsTruncating, 10226 bool IsCompressing) { 10227 assert(Chain.getValueType() == MVT::Other && 10228 "Invalid chain type"); 10229 bool Indexed = AM != ISD::UNINDEXED; 10230 assert((Indexed || Offset.isUndef()) && 10231 "Unindexed masked store with an offset!"); 10232 SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) 10233 : getVTList(MVT::Other); 10234 SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; 10235 FoldingSetNodeID ID; 10236 AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); 10237 ID.AddInteger(MemVT.getRawBits()); 10238 ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( 10239 dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); 10240 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10241 ID.AddInteger(MMO->getFlags()); 10242 void *IP = nullptr; 10243 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10244 cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); 10245 return SDValue(E, 0); 10246 } 10247 auto *N = 10248 newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 10249 IsTruncating, IsCompressing, MemVT, MMO); 10250 createOperands(N, Ops); 10251 10252 CSEMap.InsertNode(N, IP); 10253 InsertNode(N); 10254 SDValue V(N, 0); 10255 NewSDValueDbgMsg(V, "Creating new node: ", this); 10256 return V; 10257 } 10258 10259 SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, 10260 SDValue Base, SDValue Offset, 10261 ISD::MemIndexedMode AM) { 10262 MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore); 10263 assert(ST->getOffset().isUndef() && 10264 "Masked store is already a indexed store!"); 10265 return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, 10266 ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), 10267 AM, ST->isTruncatingStore(), ST->isCompressingStore()); 10268 } 10269 10270 SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, 10271 ArrayRef<SDValue> Ops, 10272 MachineMemOperand *MMO, 10273 ISD::MemIndexType IndexType, 10274 ISD::LoadExtType ExtTy) { 10275 assert(Ops.size() == 6 && "Incompatible number of operands"); 10276 10277 FoldingSetNodeID ID; 10278 AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); 10279 ID.AddInteger(MemVT.getRawBits()); 10280 ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( 10281 dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy)); 10282 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10283 ID.AddInteger(MMO->getFlags()); 10284 void *IP = nullptr; 10285 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10286 cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); 10287 return SDValue(E, 0); 10288 } 10289 10290 auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), 10291 VTs, MemVT, MMO, IndexType, ExtTy); 10292 createOperands(N, Ops); 10293 10294 assert(N->getPassThru().getValueType() == N->getValueType(0) && 10295 "Incompatible type of the PassThru value in MaskedGatherSDNode"); 10296 assert(N->getMask().getValueType().getVectorElementCount() == 10297 N->getValueType(0).getVectorElementCount() && 10298 "Vector width mismatch between mask and data"); 10299 assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == 10300 N->getValueType(0).getVectorElementCount().isScalable() && 10301 "Scalable flags of index and data do not match"); 10302 assert(ElementCount::isKnownGE( 10303 N->getIndex().getValueType().getVectorElementCount(), 10304 N->getValueType(0).getVectorElementCount()) && 10305 "Vector width mismatch between index and data"); 10306 assert(isa<ConstantSDNode>(N->getScale()) && 10307 N->getScale()->getAsAPIntVal().isPowerOf2() && 10308 "Scale should be a constant power of 2"); 10309 10310 CSEMap.InsertNode(N, IP); 10311 InsertNode(N); 10312 SDValue V(N, 0); 10313 NewSDValueDbgMsg(V, "Creating new node: ", this); 10314 return V; 10315 } 10316 10317 SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, 10318 ArrayRef<SDValue> Ops, 10319 MachineMemOperand *MMO, 10320 ISD::MemIndexType IndexType, 10321 bool IsTrunc) { 10322 assert(Ops.size() == 6 && "Incompatible number of operands"); 10323 10324 FoldingSetNodeID ID; 10325 AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); 10326 ID.AddInteger(MemVT.getRawBits()); 10327 ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( 10328 dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc)); 10329 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10330 ID.AddInteger(MMO->getFlags()); 10331 void *IP = nullptr; 10332 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10333 cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); 10334 return SDValue(E, 0); 10335 } 10336 10337 auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), 10338 VTs, MemVT, MMO, IndexType, IsTrunc); 10339 createOperands(N, Ops); 10340 10341 assert(N->getMask().getValueType().getVectorElementCount() == 10342 N->getValue().getValueType().getVectorElementCount() && 10343 "Vector width mismatch between mask and data"); 10344 assert( 10345 N->getIndex().getValueType().getVectorElementCount().isScalable() == 10346 N->getValue().getValueType().getVectorElementCount().isScalable() && 10347 "Scalable flags of index and data do not match"); 10348 assert(ElementCount::isKnownGE( 10349 N->getIndex().getValueType().getVectorElementCount(), 10350 N->getValue().getValueType().getVectorElementCount()) && 10351 "Vector width mismatch between index and data"); 10352 assert(isa<ConstantSDNode>(N->getScale()) && 10353 N->getScale()->getAsAPIntVal().isPowerOf2() && 10354 "Scale should be a constant power of 2"); 10355 10356 CSEMap.InsertNode(N, IP); 10357 InsertNode(N); 10358 SDValue V(N, 0); 10359 NewSDValueDbgMsg(V, "Creating new node: ", this); 10360 return V; 10361 } 10362 10363 SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, 10364 const SDLoc &dl, ArrayRef<SDValue> Ops, 10365 MachineMemOperand *MMO, 10366 ISD::MemIndexType IndexType) { 10367 assert(Ops.size() == 7 && "Incompatible number of operands"); 10368 10369 FoldingSetNodeID ID; 10370 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, VTs, Ops); 10371 ID.AddInteger(MemVT.getRawBits()); 10372 ID.AddInteger(getSyntheticNodeSubclassData<MaskedHistogramSDNode>( 10373 dl.getIROrder(), VTs, MemVT, MMO, IndexType)); 10374 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10375 ID.AddInteger(MMO->getFlags()); 10376 void *IP = nullptr; 10377 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10378 cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); 10379 return SDValue(E, 0); 10380 } 10381 10382 auto *N = newSDNode<MaskedHistogramSDNode>(dl.getIROrder(), dl.getDebugLoc(), 10383 VTs, MemVT, MMO, IndexType); 10384 createOperands(N, Ops); 10385 10386 assert(N->getMask().getValueType().getVectorElementCount() == 10387 N->getIndex().getValueType().getVectorElementCount() && 10388 "Vector width mismatch between mask and data"); 10389 assert(isa<ConstantSDNode>(N->getScale()) && 10390 N->getScale()->getAsAPIntVal().isPowerOf2() && 10391 "Scale should be a constant power of 2"); 10392 assert(N->getInc().getValueType().isInteger() && "Non integer update value"); 10393 10394 CSEMap.InsertNode(N, IP); 10395 InsertNode(N); 10396 SDValue V(N, 0); 10397 NewSDValueDbgMsg(V, "Creating new node: ", this); 10398 return V; 10399 } 10400 10401 SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, 10402 EVT MemVT, MachineMemOperand *MMO) { 10403 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 10404 SDVTList VTs = getVTList(MVT::Other); 10405 SDValue Ops[] = {Chain, Ptr}; 10406 FoldingSetNodeID ID; 10407 AddNodeIDNode(ID, ISD::GET_FPENV_MEM, VTs, Ops); 10408 ID.AddInteger(MemVT.getRawBits()); 10409 ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>( 10410 ISD::GET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO)); 10411 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10412 ID.AddInteger(MMO->getFlags()); 10413 void *IP = nullptr; 10414 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 10415 return SDValue(E, 0); 10416 10417 auto *N = newSDNode<FPStateAccessSDNode>(ISD::GET_FPENV_MEM, dl.getIROrder(), 10418 dl.getDebugLoc(), VTs, MemVT, MMO); 10419 createOperands(N, Ops); 10420 10421 CSEMap.InsertNode(N, IP); 10422 InsertNode(N); 10423 SDValue V(N, 0); 10424 NewSDValueDbgMsg(V, "Creating new node: ", this); 10425 return V; 10426 } 10427 10428 SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, 10429 EVT MemVT, MachineMemOperand *MMO) { 10430 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 10431 SDVTList VTs = getVTList(MVT::Other); 10432 SDValue Ops[] = {Chain, Ptr}; 10433 FoldingSetNodeID ID; 10434 AddNodeIDNode(ID, ISD::SET_FPENV_MEM, VTs, Ops); 10435 ID.AddInteger(MemVT.getRawBits()); 10436 ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>( 10437 ISD::SET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO)); 10438 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10439 ID.AddInteger(MMO->getFlags()); 10440 void *IP = nullptr; 10441 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 10442 return SDValue(E, 0); 10443 10444 auto *N = newSDNode<FPStateAccessSDNode>(ISD::SET_FPENV_MEM, dl.getIROrder(), 10445 dl.getDebugLoc(), VTs, MemVT, MMO); 10446 createOperands(N, Ops); 10447 10448 CSEMap.InsertNode(N, IP); 10449 InsertNode(N); 10450 SDValue V(N, 0); 10451 NewSDValueDbgMsg(V, "Creating new node: ", this); 10452 return V; 10453 } 10454 10455 SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { 10456 // select undef, T, F --> T (if T is a constant), otherwise F 10457 // select, ?, undef, F --> F 10458 // select, ?, T, undef --> T 10459 if (Cond.isUndef()) 10460 return isConstantValueOfAnyType(T) ? T : F; 10461 if (T.isUndef()) 10462 return F; 10463 if (F.isUndef()) 10464 return T; 10465 10466 // select true, T, F --> T 10467 // select false, T, F --> F 10468 if (auto C = isBoolConstant(Cond)) 10469 return *C ? T : F; 10470 10471 // select ?, T, T --> T 10472 if (T == F) 10473 return T; 10474 10475 return SDValue(); 10476 } 10477 10478 SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { 10479 // shift undef, Y --> 0 (can always assume that the undef value is 0) 10480 if (X.isUndef()) 10481 return getConstant(0, SDLoc(X.getNode()), X.getValueType()); 10482 // shift X, undef --> undef (because it may shift by the bitwidth) 10483 if (Y.isUndef()) 10484 return getUNDEF(X.getValueType()); 10485 10486 // shift 0, Y --> 0 10487 // shift X, 0 --> X 10488 if (isNullOrNullSplat(X) || isNullOrNullSplat(Y)) 10489 return X; 10490 10491 // shift X, C >= bitwidth(X) --> undef 10492 // All vector elements must be too big (or undef) to avoid partial undefs. 10493 auto isShiftTooBig = [X](ConstantSDNode *Val) { 10494 return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits()); 10495 }; 10496 if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true)) 10497 return getUNDEF(X.getValueType()); 10498 10499 // shift i1/vXi1 X, Y --> X (any non-zero shift amount is undefined). 10500 if (X.getValueType().getScalarType() == MVT::i1) 10501 return X; 10502 10503 return SDValue(); 10504 } 10505 10506 SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, 10507 SDNodeFlags Flags) { 10508 // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand 10509 // (an undef operand can be chosen to be Nan/Inf), then the result of this 10510 // operation is poison. That result can be relaxed to undef. 10511 ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true); 10512 ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); 10513 bool HasNan = (XC && XC->getValueAPF().isNaN()) || 10514 (YC && YC->getValueAPF().isNaN()); 10515 bool HasInf = (XC && XC->getValueAPF().isInfinity()) || 10516 (YC && YC->getValueAPF().isInfinity()); 10517 10518 if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef())) 10519 return getUNDEF(X.getValueType()); 10520 10521 if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef())) 10522 return getUNDEF(X.getValueType()); 10523 10524 if (!YC) 10525 return SDValue(); 10526 10527 // X + -0.0 --> X 10528 if (Opcode == ISD::FADD) 10529 if (YC->getValueAPF().isNegZero()) 10530 return X; 10531 10532 // X - +0.0 --> X 10533 if (Opcode == ISD::FSUB) 10534 if (YC->getValueAPF().isPosZero()) 10535 return X; 10536 10537 // X * 1.0 --> X 10538 // X / 1.0 --> X 10539 if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) 10540 if (YC->getValueAPF().isExactlyValue(1.0)) 10541 return X; 10542 10543 // X * 0.0 --> 0.0 10544 if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros()) 10545 if (YC->getValueAPF().isZero()) 10546 return getConstantFP(0.0, SDLoc(Y), Y.getValueType()); 10547 10548 return SDValue(); 10549 } 10550 10551 SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, 10552 SDValue Ptr, SDValue SV, unsigned Align) { 10553 SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; 10554 return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); 10555 } 10556 10557 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 10558 ArrayRef<SDUse> Ops) { 10559 switch (Ops.size()) { 10560 case 0: return getNode(Opcode, DL, VT); 10561 case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0])); 10562 case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); 10563 case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); 10564 default: break; 10565 } 10566 10567 // Copy from an SDUse array into an SDValue array for use with 10568 // the regular getNode logic. 10569 SmallVector<SDValue, 8> NewOps(Ops); 10570 return getNode(Opcode, DL, VT, NewOps); 10571 } 10572 10573 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 10574 ArrayRef<SDValue> Ops) { 10575 SDNodeFlags Flags; 10576 if (Inserter) 10577 Flags = Inserter->getFlags(); 10578 return getNode(Opcode, DL, VT, Ops, Flags); 10579 } 10580 10581 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 10582 ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { 10583 unsigned NumOps = Ops.size(); 10584 switch (NumOps) { 10585 case 0: return getNode(Opcode, DL, VT); 10586 case 1: return getNode(Opcode, DL, VT, Ops[0], Flags); 10587 case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); 10588 case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags); 10589 default: break; 10590 } 10591 10592 #ifndef NDEBUG 10593 for (const auto &Op : Ops) 10594 assert(Op.getOpcode() != ISD::DELETED_NODE && 10595 "Operand is DELETED_NODE!"); 10596 #endif 10597 10598 switch (Opcode) { 10599 default: break; 10600 case ISD::BUILD_VECTOR: 10601 // Attempt to simplify BUILD_VECTOR. 10602 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 10603 return V; 10604 break; 10605 case ISD::CONCAT_VECTORS: 10606 if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) 10607 return V; 10608 break; 10609 case ISD::SELECT_CC: 10610 assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); 10611 assert(Ops[0].getValueType() == Ops[1].getValueType() && 10612 "LHS and RHS of condition must have same type!"); 10613 assert(Ops[2].getValueType() == Ops[3].getValueType() && 10614 "True and False arms of SelectCC must have same type!"); 10615 assert(Ops[2].getValueType() == VT && 10616 "select_cc node must be of same type as true and false value!"); 10617 assert((!Ops[0].getValueType().isVector() || 10618 Ops[0].getValueType().getVectorElementCount() == 10619 VT.getVectorElementCount()) && 10620 "Expected select_cc with vector result to have the same sized " 10621 "comparison type!"); 10622 break; 10623 case ISD::BR_CC: 10624 assert(NumOps == 5 && "BR_CC takes 5 operands!"); 10625 assert(Ops[2].getValueType() == Ops[3].getValueType() && 10626 "LHS/RHS of comparison should match types!"); 10627 break; 10628 case ISD::VP_ADD: 10629 case ISD::VP_SUB: 10630 // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR 10631 if (VT.getScalarType() == MVT::i1) 10632 Opcode = ISD::VP_XOR; 10633 break; 10634 case ISD::VP_MUL: 10635 // If it is VP_MUL mask operation then turn it to VP_AND 10636 if (VT.getScalarType() == MVT::i1) 10637 Opcode = ISD::VP_AND; 10638 break; 10639 case ISD::VP_REDUCE_MUL: 10640 // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND 10641 if (VT == MVT::i1) 10642 Opcode = ISD::VP_REDUCE_AND; 10643 break; 10644 case ISD::VP_REDUCE_ADD: 10645 // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR 10646 if (VT == MVT::i1) 10647 Opcode = ISD::VP_REDUCE_XOR; 10648 break; 10649 case ISD::VP_REDUCE_SMAX: 10650 case ISD::VP_REDUCE_UMIN: 10651 // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to 10652 // VP_REDUCE_AND. 10653 if (VT == MVT::i1) 10654 Opcode = ISD::VP_REDUCE_AND; 10655 break; 10656 case ISD::VP_REDUCE_SMIN: 10657 case ISD::VP_REDUCE_UMAX: 10658 // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to 10659 // VP_REDUCE_OR. 10660 if (VT == MVT::i1) 10661 Opcode = ISD::VP_REDUCE_OR; 10662 break; 10663 } 10664 10665 // Memoize nodes. 10666 SDNode *N; 10667 SDVTList VTs = getVTList(VT); 10668 10669 if (VT != MVT::Glue) { 10670 FoldingSetNodeID ID; 10671 AddNodeIDNode(ID, Opcode, VTs, Ops); 10672 void *IP = nullptr; 10673 10674 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 10675 E->intersectFlagsWith(Flags); 10676 return SDValue(E, 0); 10677 } 10678 10679 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 10680 createOperands(N, Ops); 10681 10682 CSEMap.InsertNode(N, IP); 10683 } else { 10684 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 10685 createOperands(N, Ops); 10686 } 10687 10688 N->setFlags(Flags); 10689 InsertNode(N); 10690 SDValue V(N, 0); 10691 NewSDValueDbgMsg(V, "Creating new node: ", this); 10692 return V; 10693 } 10694 10695 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, 10696 ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { 10697 return getNode(Opcode, DL, getVTList(ResultTys), Ops); 10698 } 10699 10700 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10701 ArrayRef<SDValue> Ops) { 10702 SDNodeFlags Flags; 10703 if (Inserter) 10704 Flags = Inserter->getFlags(); 10705 return getNode(Opcode, DL, VTList, Ops, Flags); 10706 } 10707 10708 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10709 ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { 10710 if (VTList.NumVTs == 1) 10711 return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); 10712 10713 #ifndef NDEBUG 10714 for (const auto &Op : Ops) 10715 assert(Op.getOpcode() != ISD::DELETED_NODE && 10716 "Operand is DELETED_NODE!"); 10717 #endif 10718 10719 switch (Opcode) { 10720 case ISD::SADDO: 10721 case ISD::UADDO: 10722 case ISD::SSUBO: 10723 case ISD::USUBO: { 10724 assert(VTList.NumVTs == 2 && Ops.size() == 2 && 10725 "Invalid add/sub overflow op!"); 10726 assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && 10727 Ops[0].getValueType() == Ops[1].getValueType() && 10728 Ops[0].getValueType() == VTList.VTs[0] && 10729 "Binary operator types must match!"); 10730 SDValue N1 = Ops[0], N2 = Ops[1]; 10731 canonicalizeCommutativeBinop(Opcode, N1, N2); 10732 10733 // (X +- 0) -> X with zero-overflow. 10734 ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false, 10735 /*AllowTruncation*/ true); 10736 if (N2CV && N2CV->isZero()) { 10737 SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); 10738 return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); 10739 } 10740 10741 if (VTList.VTs[0].getScalarType() == MVT::i1 && 10742 VTList.VTs[1].getScalarType() == MVT::i1) { 10743 SDValue F1 = getFreeze(N1); 10744 SDValue F2 = getFreeze(N2); 10745 // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)} 10746 if (Opcode == ISD::UADDO || Opcode == ISD::SADDO) 10747 return getNode(ISD::MERGE_VALUES, DL, VTList, 10748 {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), 10749 getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)}, 10750 Flags); 10751 // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)} 10752 if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) { 10753 SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]); 10754 return getNode(ISD::MERGE_VALUES, DL, VTList, 10755 {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), 10756 getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)}, 10757 Flags); 10758 } 10759 } 10760 break; 10761 } 10762 case ISD::SADDO_CARRY: 10763 case ISD::UADDO_CARRY: 10764 case ISD::SSUBO_CARRY: 10765 case ISD::USUBO_CARRY: 10766 assert(VTList.NumVTs == 2 && Ops.size() == 3 && 10767 "Invalid add/sub overflow op!"); 10768 assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && 10769 Ops[0].getValueType() == Ops[1].getValueType() && 10770 Ops[0].getValueType() == VTList.VTs[0] && 10771 Ops[2].getValueType() == VTList.VTs[1] && 10772 "Binary operator types must match!"); 10773 break; 10774 case ISD::SMUL_LOHI: 10775 case ISD::UMUL_LOHI: { 10776 assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!"); 10777 assert(VTList.VTs[0].isInteger() && VTList.VTs[0] == VTList.VTs[1] && 10778 VTList.VTs[0] == Ops[0].getValueType() && 10779 VTList.VTs[0] == Ops[1].getValueType() && 10780 "Binary operator types must match!"); 10781 // Constant fold. 10782 ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]); 10783 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]); 10784 if (LHS && RHS) { 10785 unsigned Width = VTList.VTs[0].getScalarSizeInBits(); 10786 unsigned OutWidth = Width * 2; 10787 APInt Val = LHS->getAPIntValue(); 10788 APInt Mul = RHS->getAPIntValue(); 10789 if (Opcode == ISD::SMUL_LOHI) { 10790 Val = Val.sext(OutWidth); 10791 Mul = Mul.sext(OutWidth); 10792 } else { 10793 Val = Val.zext(OutWidth); 10794 Mul = Mul.zext(OutWidth); 10795 } 10796 Val *= Mul; 10797 10798 SDValue Hi = 10799 getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]); 10800 SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]); 10801 return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags); 10802 } 10803 break; 10804 } 10805 case ISD::FFREXP: { 10806 assert(VTList.NumVTs == 2 && Ops.size() == 1 && "Invalid ffrexp op!"); 10807 assert(VTList.VTs[0].isFloatingPoint() && VTList.VTs[1].isInteger() && 10808 VTList.VTs[0] == Ops[0].getValueType() && "frexp type mismatch"); 10809 10810 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Ops[0])) { 10811 int FrexpExp; 10812 APFloat FrexpMant = 10813 frexp(C->getValueAPF(), FrexpExp, APFloat::rmNearestTiesToEven); 10814 SDValue Result0 = getConstantFP(FrexpMant, DL, VTList.VTs[0]); 10815 SDValue Result1 = 10816 getConstant(FrexpMant.isFinite() ? FrexpExp : 0, DL, VTList.VTs[1]); 10817 return getNode(ISD::MERGE_VALUES, DL, VTList, {Result0, Result1}, Flags); 10818 } 10819 10820 break; 10821 } 10822 case ISD::STRICT_FP_EXTEND: 10823 assert(VTList.NumVTs == 2 && Ops.size() == 2 && 10824 "Invalid STRICT_FP_EXTEND!"); 10825 assert(VTList.VTs[0].isFloatingPoint() && 10826 Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!"); 10827 assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && 10828 "STRICT_FP_EXTEND result type should be vector iff the operand " 10829 "type is vector!"); 10830 assert((!VTList.VTs[0].isVector() || 10831 VTList.VTs[0].getVectorElementCount() == 10832 Ops[1].getValueType().getVectorElementCount()) && 10833 "Vector element count mismatch!"); 10834 assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) && 10835 "Invalid fpext node, dst <= src!"); 10836 break; 10837 case ISD::STRICT_FP_ROUND: 10838 assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!"); 10839 assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && 10840 "STRICT_FP_ROUND result type should be vector iff the operand " 10841 "type is vector!"); 10842 assert((!VTList.VTs[0].isVector() || 10843 VTList.VTs[0].getVectorElementCount() == 10844 Ops[1].getValueType().getVectorElementCount()) && 10845 "Vector element count mismatch!"); 10846 assert(VTList.VTs[0].isFloatingPoint() && 10847 Ops[1].getValueType().isFloatingPoint() && 10848 VTList.VTs[0].bitsLT(Ops[1].getValueType()) && 10849 Ops[2].getOpcode() == ISD::TargetConstant && 10850 (Ops[2]->getAsZExtVal() == 0 || Ops[2]->getAsZExtVal() == 1) && 10851 "Invalid STRICT_FP_ROUND!"); 10852 break; 10853 #if 0 10854 // FIXME: figure out how to safely handle things like 10855 // int foo(int x) { return 1 << (x & 255); } 10856 // int bar() { return foo(256); } 10857 case ISD::SRA_PARTS: 10858 case ISD::SRL_PARTS: 10859 case ISD::SHL_PARTS: 10860 if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && 10861 cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) 10862 return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); 10863 else if (N3.getOpcode() == ISD::AND) 10864 if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { 10865 // If the and is only masking out bits that cannot effect the shift, 10866 // eliminate the and. 10867 unsigned NumBits = VT.getScalarSizeInBits()*2; 10868 if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) 10869 return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); 10870 } 10871 break; 10872 #endif 10873 } 10874 10875 // Memoize the node unless it returns a glue result. 10876 SDNode *N; 10877 if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { 10878 FoldingSetNodeID ID; 10879 AddNodeIDNode(ID, Opcode, VTList, Ops); 10880 void *IP = nullptr; 10881 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 10882 E->intersectFlagsWith(Flags); 10883 return SDValue(E, 0); 10884 } 10885 10886 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); 10887 createOperands(N, Ops); 10888 CSEMap.InsertNode(N, IP); 10889 } else { 10890 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); 10891 createOperands(N, Ops); 10892 } 10893 10894 N->setFlags(Flags); 10895 InsertNode(N); 10896 SDValue V(N, 0); 10897 NewSDValueDbgMsg(V, "Creating new node: ", this); 10898 return V; 10899 } 10900 10901 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, 10902 SDVTList VTList) { 10903 return getNode(Opcode, DL, VTList, ArrayRef<SDValue>()); 10904 } 10905 10906 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10907 SDValue N1) { 10908 SDValue Ops[] = { N1 }; 10909 return getNode(Opcode, DL, VTList, Ops); 10910 } 10911 10912 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10913 SDValue N1, SDValue N2) { 10914 SDValue Ops[] = { N1, N2 }; 10915 return getNode(Opcode, DL, VTList, Ops); 10916 } 10917 10918 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10919 SDValue N1, SDValue N2, SDValue N3) { 10920 SDValue Ops[] = { N1, N2, N3 }; 10921 return getNode(Opcode, DL, VTList, Ops); 10922 } 10923 10924 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10925 SDValue N1, SDValue N2, SDValue N3, SDValue N4) { 10926 SDValue Ops[] = { N1, N2, N3, N4 }; 10927 return getNode(Opcode, DL, VTList, Ops); 10928 } 10929 10930 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10931 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 10932 SDValue N5) { 10933 SDValue Ops[] = { N1, N2, N3, N4, N5 }; 10934 return getNode(Opcode, DL, VTList, Ops); 10935 } 10936 10937 SDVTList SelectionDAG::getVTList(EVT VT) { 10938 if (!VT.isExtended()) 10939 return makeVTList(SDNode::getValueTypeList(VT.getSimpleVT()), 1); 10940 10941 return makeVTList(&(*EVTs.insert(VT).first), 1); 10942 } 10943 10944 SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { 10945 FoldingSetNodeID ID; 10946 ID.AddInteger(2U); 10947 ID.AddInteger(VT1.getRawBits()); 10948 ID.AddInteger(VT2.getRawBits()); 10949 10950 void *IP = nullptr; 10951 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10952 if (!Result) { 10953 EVT *Array = Allocator.Allocate<EVT>(2); 10954 Array[0] = VT1; 10955 Array[1] = VT2; 10956 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); 10957 VTListMap.InsertNode(Result, IP); 10958 } 10959 return Result->getSDVTList(); 10960 } 10961 10962 SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { 10963 FoldingSetNodeID ID; 10964 ID.AddInteger(3U); 10965 ID.AddInteger(VT1.getRawBits()); 10966 ID.AddInteger(VT2.getRawBits()); 10967 ID.AddInteger(VT3.getRawBits()); 10968 10969 void *IP = nullptr; 10970 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10971 if (!Result) { 10972 EVT *Array = Allocator.Allocate<EVT>(3); 10973 Array[0] = VT1; 10974 Array[1] = VT2; 10975 Array[2] = VT3; 10976 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); 10977 VTListMap.InsertNode(Result, IP); 10978 } 10979 return Result->getSDVTList(); 10980 } 10981 10982 SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { 10983 FoldingSetNodeID ID; 10984 ID.AddInteger(4U); 10985 ID.AddInteger(VT1.getRawBits()); 10986 ID.AddInteger(VT2.getRawBits()); 10987 ID.AddInteger(VT3.getRawBits()); 10988 ID.AddInteger(VT4.getRawBits()); 10989 10990 void *IP = nullptr; 10991 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10992 if (!Result) { 10993 EVT *Array = Allocator.Allocate<EVT>(4); 10994 Array[0] = VT1; 10995 Array[1] = VT2; 10996 Array[2] = VT3; 10997 Array[3] = VT4; 10998 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); 10999 VTListMap.InsertNode(Result, IP); 11000 } 11001 return Result->getSDVTList(); 11002 } 11003 11004 SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { 11005 unsigned NumVTs = VTs.size(); 11006 FoldingSetNodeID ID; 11007 ID.AddInteger(NumVTs); 11008 for (unsigned index = 0; index < NumVTs; index++) { 11009 ID.AddInteger(VTs[index].getRawBits()); 11010 } 11011 11012 void *IP = nullptr; 11013 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 11014 if (!Result) { 11015 EVT *Array = Allocator.Allocate<EVT>(NumVTs); 11016 llvm::copy(VTs, Array); 11017 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); 11018 VTListMap.InsertNode(Result, IP); 11019 } 11020 return Result->getSDVTList(); 11021 } 11022 11023 11024 /// UpdateNodeOperands - *Mutate* the specified node in-place to have the 11025 /// specified operands. If the resultant node already exists in the DAG, 11026 /// this does not modify the specified node, instead it returns the node that 11027 /// already exists. If the resultant node does not exist in the DAG, the 11028 /// input node is returned. As a degenerate case, if you specify the same 11029 /// input operands as the node already has, the input node is returned. 11030 SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { 11031 assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); 11032 11033 // Check to see if there is no change. 11034 if (Op == N->getOperand(0)) return N; 11035 11036 // See if the modified node already exists. 11037 void *InsertPos = nullptr; 11038 if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) 11039 return Existing; 11040 11041 // Nope it doesn't. Remove the node from its current place in the maps. 11042 if (InsertPos) 11043 if (!RemoveNodeFromCSEMaps(N)) 11044 InsertPos = nullptr; 11045 11046 // Now we update the operands. 11047 N->OperandList[0].set(Op); 11048 11049 updateDivergence(N); 11050 // If this gets put into a CSE map, add it. 11051 if (InsertPos) CSEMap.InsertNode(N, InsertPos); 11052 return N; 11053 } 11054 11055 SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { 11056 assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); 11057 11058 // Check to see if there is no change. 11059 if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) 11060 return N; // No operands changed, just return the input node. 11061 11062 // See if the modified node already exists. 11063 void *InsertPos = nullptr; 11064 if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) 11065 return Existing; 11066 11067 // Nope it doesn't. Remove the node from its current place in the maps. 11068 if (InsertPos) 11069 if (!RemoveNodeFromCSEMaps(N)) 11070 InsertPos = nullptr; 11071 11072 // Now we update the operands. 11073 if (N->OperandList[0] != Op1) 11074 N->OperandList[0].set(Op1); 11075 if (N->OperandList[1] != Op2) 11076 N->OperandList[1].set(Op2); 11077 11078 updateDivergence(N); 11079 // If this gets put into a CSE map, add it. 11080 if (InsertPos) CSEMap.InsertNode(N, InsertPos); 11081 return N; 11082 } 11083 11084 SDNode *SelectionDAG:: 11085 UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { 11086 SDValue Ops[] = { Op1, Op2, Op3 }; 11087 return UpdateNodeOperands(N, Ops); 11088 } 11089 11090 SDNode *SelectionDAG:: 11091 UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, 11092 SDValue Op3, SDValue Op4) { 11093 SDValue Ops[] = { Op1, Op2, Op3, Op4 }; 11094 return UpdateNodeOperands(N, Ops); 11095 } 11096 11097 SDNode *SelectionDAG:: 11098 UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, 11099 SDValue Op3, SDValue Op4, SDValue Op5) { 11100 SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; 11101 return UpdateNodeOperands(N, Ops); 11102 } 11103 11104 SDNode *SelectionDAG:: 11105 UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { 11106 unsigned NumOps = Ops.size(); 11107 assert(N->getNumOperands() == NumOps && 11108 "Update with wrong number of operands"); 11109 11110 // If no operands changed just return the input node. 11111 if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) 11112 return N; 11113 11114 // See if the modified node already exists. 11115 void *InsertPos = nullptr; 11116 if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) 11117 return Existing; 11118 11119 // Nope it doesn't. Remove the node from its current place in the maps. 11120 if (InsertPos) 11121 if (!RemoveNodeFromCSEMaps(N)) 11122 InsertPos = nullptr; 11123 11124 // Now we update the operands. 11125 for (unsigned i = 0; i != NumOps; ++i) 11126 if (N->OperandList[i] != Ops[i]) 11127 N->OperandList[i].set(Ops[i]); 11128 11129 updateDivergence(N); 11130 // If this gets put into a CSE map, add it. 11131 if (InsertPos) CSEMap.InsertNode(N, InsertPos); 11132 return N; 11133 } 11134 11135 /// DropOperands - Release the operands and set this node to have 11136 /// zero operands. 11137 void SDNode::DropOperands() { 11138 // Unlike the code in MorphNodeTo that does this, we don't need to 11139 // watch for dead nodes here. 11140 for (op_iterator I = op_begin(), E = op_end(); I != E; ) { 11141 SDUse &Use = *I++; 11142 Use.set(SDValue()); 11143 } 11144 } 11145 11146 void SelectionDAG::setNodeMemRefs(MachineSDNode *N, 11147 ArrayRef<MachineMemOperand *> NewMemRefs) { 11148 if (NewMemRefs.empty()) { 11149 N->clearMemRefs(); 11150 return; 11151 } 11152 11153 // Check if we can avoid allocating by storing a single reference directly. 11154 if (NewMemRefs.size() == 1) { 11155 N->MemRefs = NewMemRefs[0]; 11156 N->NumMemRefs = 1; 11157 return; 11158 } 11159 11160 MachineMemOperand **MemRefsBuffer = 11161 Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size()); 11162 llvm::copy(NewMemRefs, MemRefsBuffer); 11163 N->MemRefs = MemRefsBuffer; 11164 N->NumMemRefs = static_cast<int>(NewMemRefs.size()); 11165 } 11166 11167 /// SelectNodeTo - These are wrappers around MorphNodeTo that accept a 11168 /// machine opcode. 11169 /// 11170 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11171 EVT VT) { 11172 SDVTList VTs = getVTList(VT); 11173 return SelectNodeTo(N, MachineOpc, VTs, {}); 11174 } 11175 11176 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11177 EVT VT, SDValue Op1) { 11178 SDVTList VTs = getVTList(VT); 11179 SDValue Ops[] = { Op1 }; 11180 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11181 } 11182 11183 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11184 EVT VT, SDValue Op1, 11185 SDValue Op2) { 11186 SDVTList VTs = getVTList(VT); 11187 SDValue Ops[] = { Op1, Op2 }; 11188 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11189 } 11190 11191 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11192 EVT VT, SDValue Op1, 11193 SDValue Op2, SDValue Op3) { 11194 SDVTList VTs = getVTList(VT); 11195 SDValue Ops[] = { Op1, Op2, Op3 }; 11196 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11197 } 11198 11199 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11200 EVT VT, ArrayRef<SDValue> Ops) { 11201 SDVTList VTs = getVTList(VT); 11202 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11203 } 11204 11205 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11206 EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { 11207 SDVTList VTs = getVTList(VT1, VT2); 11208 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11209 } 11210 11211 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11212 EVT VT1, EVT VT2) { 11213 SDVTList VTs = getVTList(VT1, VT2); 11214 return SelectNodeTo(N, MachineOpc, VTs, {}); 11215 } 11216 11217 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11218 EVT VT1, EVT VT2, EVT VT3, 11219 ArrayRef<SDValue> Ops) { 11220 SDVTList VTs = getVTList(VT1, VT2, VT3); 11221 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11222 } 11223 11224 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11225 EVT VT1, EVT VT2, 11226 SDValue Op1, SDValue Op2) { 11227 SDVTList VTs = getVTList(VT1, VT2); 11228 SDValue Ops[] = { Op1, Op2 }; 11229 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11230 } 11231 11232 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11233 SDVTList VTs,ArrayRef<SDValue> Ops) { 11234 SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); 11235 // Reset the NodeID to -1. 11236 New->setNodeId(-1); 11237 if (New != N) { 11238 ReplaceAllUsesWith(N, New); 11239 RemoveDeadNode(N); 11240 } 11241 return New; 11242 } 11243 11244 /// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away 11245 /// the line number information on the merged node since it is not possible to 11246 /// preserve the information that operation is associated with multiple lines. 11247 /// This will make the debugger working better at -O0, were there is a higher 11248 /// probability having other instructions associated with that line. 11249 /// 11250 /// For IROrder, we keep the smaller of the two 11251 SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { 11252 DebugLoc NLoc = N->getDebugLoc(); 11253 if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) { 11254 N->setDebugLoc(DebugLoc()); 11255 } 11256 unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); 11257 N->setIROrder(Order); 11258 return N; 11259 } 11260 11261 /// MorphNodeTo - This *mutates* the specified node to have the specified 11262 /// return type, opcode, and operands. 11263 /// 11264 /// Note that MorphNodeTo returns the resultant node. If there is already a 11265 /// node of the specified opcode and operands, it returns that node instead of 11266 /// the current one. Note that the SDLoc need not be the same. 11267 /// 11268 /// Using MorphNodeTo is faster than creating a new node and swapping it in 11269 /// with ReplaceAllUsesWith both because it often avoids allocating a new 11270 /// node, and because it doesn't require CSE recalculation for any of 11271 /// the node's users. 11272 /// 11273 /// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. 11274 /// As a consequence it isn't appropriate to use from within the DAG combiner or 11275 /// the legalizer which maintain worklists that would need to be updated when 11276 /// deleting things. 11277 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, 11278 SDVTList VTs, ArrayRef<SDValue> Ops) { 11279 // If an identical node already exists, use it. 11280 void *IP = nullptr; 11281 if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { 11282 FoldingSetNodeID ID; 11283 AddNodeIDNode(ID, Opc, VTs, Ops); 11284 if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) 11285 return UpdateSDLocOnMergeSDNode(ON, SDLoc(N)); 11286 } 11287 11288 if (!RemoveNodeFromCSEMaps(N)) 11289 IP = nullptr; 11290 11291 // Start the morphing. 11292 N->NodeType = Opc; 11293 N->ValueList = VTs.VTs; 11294 N->NumValues = VTs.NumVTs; 11295 11296 // Clear the operands list, updating used nodes to remove this from their 11297 // use list. Keep track of any operands that become dead as a result. 11298 SmallPtrSet<SDNode*, 16> DeadNodeSet; 11299 for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { 11300 SDUse &Use = *I++; 11301 SDNode *Used = Use.getNode(); 11302 Use.set(SDValue()); 11303 if (Used->use_empty()) 11304 DeadNodeSet.insert(Used); 11305 } 11306 11307 // For MachineNode, initialize the memory references information. 11308 if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) 11309 MN->clearMemRefs(); 11310 11311 // Swap for an appropriately sized array from the recycler. 11312 removeOperands(N); 11313 createOperands(N, Ops); 11314 11315 // Delete any nodes that are still dead after adding the uses for the 11316 // new operands. 11317 if (!DeadNodeSet.empty()) { 11318 SmallVector<SDNode *, 16> DeadNodes; 11319 for (SDNode *N : DeadNodeSet) 11320 if (N->use_empty()) 11321 DeadNodes.push_back(N); 11322 RemoveDeadNodes(DeadNodes); 11323 } 11324 11325 if (IP) 11326 CSEMap.InsertNode(N, IP); // Memoize the new node. 11327 return N; 11328 } 11329 11330 SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { 11331 unsigned OrigOpc = Node->getOpcode(); 11332 unsigned NewOpc; 11333 switch (OrigOpc) { 11334 default: 11335 llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); 11336 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 11337 case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; 11338 #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 11339 case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; 11340 #include "llvm/IR/ConstrainedOps.def" 11341 } 11342 11343 assert(Node->getNumValues() == 2 && "Unexpected number of results!"); 11344 11345 // We're taking this node out of the chain, so we need to re-link things. 11346 SDValue InputChain = Node->getOperand(0); 11347 SDValue OutputChain = SDValue(Node, 1); 11348 ReplaceAllUsesOfValueWith(OutputChain, InputChain); 11349 11350 SmallVector<SDValue, 3> Ops; 11351 for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) 11352 Ops.push_back(Node->getOperand(i)); 11353 11354 SDVTList VTs = getVTList(Node->getValueType(0)); 11355 SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops); 11356 11357 // MorphNodeTo can operate in two ways: if an existing node with the 11358 // specified operands exists, it can just return it. Otherwise, it 11359 // updates the node in place to have the requested operands. 11360 if (Res == Node) { 11361 // If we updated the node in place, reset the node ID. To the isel, 11362 // this should be just like a newly allocated machine node. 11363 Res->setNodeId(-1); 11364 } else { 11365 ReplaceAllUsesWith(Node, Res); 11366 RemoveDeadNode(Node); 11367 } 11368 11369 return Res; 11370 } 11371 11372 /// getMachineNode - These are used for target selectors to create a new node 11373 /// with specified return type(s), MachineInstr opcode, and operands. 11374 /// 11375 /// Note that getMachineNode returns the resultant node. If there is already a 11376 /// node of the specified opcode and operands, it returns that node instead of 11377 /// the current one. 11378 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11379 EVT VT) { 11380 SDVTList VTs = getVTList(VT); 11381 return getMachineNode(Opcode, dl, VTs, {}); 11382 } 11383 11384 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11385 EVT VT, SDValue Op1) { 11386 SDVTList VTs = getVTList(VT); 11387 SDValue Ops[] = { Op1 }; 11388 return getMachineNode(Opcode, dl, VTs, Ops); 11389 } 11390 11391 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11392 EVT VT, SDValue Op1, SDValue Op2) { 11393 SDVTList VTs = getVTList(VT); 11394 SDValue Ops[] = { Op1, Op2 }; 11395 return getMachineNode(Opcode, dl, VTs, Ops); 11396 } 11397 11398 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11399 EVT VT, SDValue Op1, SDValue Op2, 11400 SDValue Op3) { 11401 SDVTList VTs = getVTList(VT); 11402 SDValue Ops[] = { Op1, Op2, Op3 }; 11403 return getMachineNode(Opcode, dl, VTs, Ops); 11404 } 11405 11406 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11407 EVT VT, ArrayRef<SDValue> Ops) { 11408 SDVTList VTs = getVTList(VT); 11409 return getMachineNode(Opcode, dl, VTs, Ops); 11410 } 11411 11412 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11413 EVT VT1, EVT VT2, SDValue Op1, 11414 SDValue Op2) { 11415 SDVTList VTs = getVTList(VT1, VT2); 11416 SDValue Ops[] = { Op1, Op2 }; 11417 return getMachineNode(Opcode, dl, VTs, Ops); 11418 } 11419 11420 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11421 EVT VT1, EVT VT2, SDValue Op1, 11422 SDValue Op2, SDValue Op3) { 11423 SDVTList VTs = getVTList(VT1, VT2); 11424 SDValue Ops[] = { Op1, Op2, Op3 }; 11425 return getMachineNode(Opcode, dl, VTs, Ops); 11426 } 11427 11428 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11429 EVT VT1, EVT VT2, 11430 ArrayRef<SDValue> Ops) { 11431 SDVTList VTs = getVTList(VT1, VT2); 11432 return getMachineNode(Opcode, dl, VTs, Ops); 11433 } 11434 11435 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11436 EVT VT1, EVT VT2, EVT VT3, 11437 SDValue Op1, SDValue Op2) { 11438 SDVTList VTs = getVTList(VT1, VT2, VT3); 11439 SDValue Ops[] = { Op1, Op2 }; 11440 return getMachineNode(Opcode, dl, VTs, Ops); 11441 } 11442 11443 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11444 EVT VT1, EVT VT2, EVT VT3, 11445 SDValue Op1, SDValue Op2, 11446 SDValue Op3) { 11447 SDVTList VTs = getVTList(VT1, VT2, VT3); 11448 SDValue Ops[] = { Op1, Op2, Op3 }; 11449 return getMachineNode(Opcode, dl, VTs, Ops); 11450 } 11451 11452 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11453 EVT VT1, EVT VT2, EVT VT3, 11454 ArrayRef<SDValue> Ops) { 11455 SDVTList VTs = getVTList(VT1, VT2, VT3); 11456 return getMachineNode(Opcode, dl, VTs, Ops); 11457 } 11458 11459 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11460 ArrayRef<EVT> ResultTys, 11461 ArrayRef<SDValue> Ops) { 11462 SDVTList VTs = getVTList(ResultTys); 11463 return getMachineNode(Opcode, dl, VTs, Ops); 11464 } 11465 11466 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, 11467 SDVTList VTs, 11468 ArrayRef<SDValue> Ops) { 11469 bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; 11470 MachineSDNode *N; 11471 void *IP = nullptr; 11472 11473 if (DoCSE) { 11474 FoldingSetNodeID ID; 11475 AddNodeIDNode(ID, ~Opcode, VTs, Ops); 11476 IP = nullptr; 11477 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 11478 return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL)); 11479 } 11480 } 11481 11482 // Allocate a new MachineSDNode. 11483 N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 11484 createOperands(N, Ops); 11485 11486 if (DoCSE) 11487 CSEMap.InsertNode(N, IP); 11488 11489 InsertNode(N); 11490 NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); 11491 return N; 11492 } 11493 11494 /// getTargetExtractSubreg - A convenience function for creating 11495 /// TargetOpcode::EXTRACT_SUBREG nodes. 11496 SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, 11497 SDValue Operand) { 11498 SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); 11499 SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, 11500 VT, Operand, SRIdxVal); 11501 return SDValue(Subreg, 0); 11502 } 11503 11504 /// getTargetInsertSubreg - A convenience function for creating 11505 /// TargetOpcode::INSERT_SUBREG nodes. 11506 SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, 11507 SDValue Operand, SDValue Subreg) { 11508 SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); 11509 SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, 11510 VT, Operand, Subreg, SRIdxVal); 11511 return SDValue(Result, 0); 11512 } 11513 11514 /// getNodeIfExists - Get the specified node if it's already available, or 11515 /// else return NULL. 11516 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, 11517 ArrayRef<SDValue> Ops) { 11518 SDNodeFlags Flags; 11519 if (Inserter) 11520 Flags = Inserter->getFlags(); 11521 return getNodeIfExists(Opcode, VTList, Ops, Flags); 11522 } 11523 11524 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, 11525 ArrayRef<SDValue> Ops, 11526 const SDNodeFlags Flags) { 11527 if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { 11528 FoldingSetNodeID ID; 11529 AddNodeIDNode(ID, Opcode, VTList, Ops); 11530 void *IP = nullptr; 11531 if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { 11532 E->intersectFlagsWith(Flags); 11533 return E; 11534 } 11535 } 11536 return nullptr; 11537 } 11538 11539 /// doesNodeExist - Check if a node exists without modifying its flags. 11540 bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList, 11541 ArrayRef<SDValue> Ops) { 11542 if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { 11543 FoldingSetNodeID ID; 11544 AddNodeIDNode(ID, Opcode, VTList, Ops); 11545 void *IP = nullptr; 11546 if (FindNodeOrInsertPos(ID, SDLoc(), IP)) 11547 return true; 11548 } 11549 return false; 11550 } 11551 11552 /// getDbgValue - Creates a SDDbgValue node. 11553 /// 11554 /// SDNode 11555 SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, 11556 SDNode *N, unsigned R, bool IsIndirect, 11557 const DebugLoc &DL, unsigned O) { 11558 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11559 "Expected inlined-at fields to agree"); 11560 return new (DbgInfo->getAlloc()) 11561 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromNode(N, R), 11562 {}, IsIndirect, DL, O, 11563 /*IsVariadic=*/false); 11564 } 11565 11566 /// Constant 11567 SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, 11568 DIExpression *Expr, 11569 const Value *C, 11570 const DebugLoc &DL, unsigned O) { 11571 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11572 "Expected inlined-at fields to agree"); 11573 return new (DbgInfo->getAlloc()) 11574 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromConst(C), {}, 11575 /*IsIndirect=*/false, DL, O, 11576 /*IsVariadic=*/false); 11577 } 11578 11579 /// FrameIndex 11580 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, 11581 DIExpression *Expr, unsigned FI, 11582 bool IsIndirect, 11583 const DebugLoc &DL, 11584 unsigned O) { 11585 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11586 "Expected inlined-at fields to agree"); 11587 return getFrameIndexDbgValue(Var, Expr, FI, {}, IsIndirect, DL, O); 11588 } 11589 11590 /// FrameIndex with dependencies 11591 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, 11592 DIExpression *Expr, unsigned FI, 11593 ArrayRef<SDNode *> Dependencies, 11594 bool IsIndirect, 11595 const DebugLoc &DL, 11596 unsigned O) { 11597 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11598 "Expected inlined-at fields to agree"); 11599 return new (DbgInfo->getAlloc()) 11600 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromFrameIdx(FI), 11601 Dependencies, IsIndirect, DL, O, 11602 /*IsVariadic=*/false); 11603 } 11604 11605 /// VReg 11606 SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, DIExpression *Expr, 11607 Register VReg, bool IsIndirect, 11608 const DebugLoc &DL, unsigned O) { 11609 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11610 "Expected inlined-at fields to agree"); 11611 return new (DbgInfo->getAlloc()) 11612 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromVReg(VReg), 11613 {}, IsIndirect, DL, O, 11614 /*IsVariadic=*/false); 11615 } 11616 11617 SDDbgValue *SelectionDAG::getDbgValueList(DIVariable *Var, DIExpression *Expr, 11618 ArrayRef<SDDbgOperand> Locs, 11619 ArrayRef<SDNode *> Dependencies, 11620 bool IsIndirect, const DebugLoc &DL, 11621 unsigned O, bool IsVariadic) { 11622 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11623 "Expected inlined-at fields to agree"); 11624 return new (DbgInfo->getAlloc()) 11625 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, Locs, Dependencies, IsIndirect, 11626 DL, O, IsVariadic); 11627 } 11628 11629 void SelectionDAG::transferDbgValues(SDValue From, SDValue To, 11630 unsigned OffsetInBits, unsigned SizeInBits, 11631 bool InvalidateDbg) { 11632 SDNode *FromNode = From.getNode(); 11633 SDNode *ToNode = To.getNode(); 11634 assert(FromNode && ToNode && "Can't modify dbg values"); 11635 11636 // PR35338 11637 // TODO: assert(From != To && "Redundant dbg value transfer"); 11638 // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); 11639 if (From == To || FromNode == ToNode) 11640 return; 11641 11642 if (!FromNode->getHasDebugValue()) 11643 return; 11644 11645 SDDbgOperand FromLocOp = 11646 SDDbgOperand::fromNode(From.getNode(), From.getResNo()); 11647 SDDbgOperand ToLocOp = SDDbgOperand::fromNode(To.getNode(), To.getResNo()); 11648 11649 SmallVector<SDDbgValue *, 2> ClonedDVs; 11650 for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { 11651 if (Dbg->isInvalidated()) 11652 continue; 11653 11654 // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); 11655 11656 // Create a new location ops vector that is equal to the old vector, but 11657 // with each instance of FromLocOp replaced with ToLocOp. 11658 bool Changed = false; 11659 auto NewLocOps = Dbg->copyLocationOps(); 11660 std::replace_if( 11661 NewLocOps.begin(), NewLocOps.end(), 11662 [&Changed, FromLocOp](const SDDbgOperand &Op) { 11663 bool Match = Op == FromLocOp; 11664 Changed |= Match; 11665 return Match; 11666 }, 11667 ToLocOp); 11668 // Ignore this SDDbgValue if we didn't find a matching location. 11669 if (!Changed) 11670 continue; 11671 11672 DIVariable *Var = Dbg->getVariable(); 11673 auto *Expr = Dbg->getExpression(); 11674 // If a fragment is requested, update the expression. 11675 if (SizeInBits) { 11676 // When splitting a larger (e.g., sign-extended) value whose 11677 // lower bits are described with an SDDbgValue, do not attempt 11678 // to transfer the SDDbgValue to the upper bits. 11679 if (auto FI = Expr->getFragmentInfo()) 11680 if (OffsetInBits + SizeInBits > FI->SizeInBits) 11681 continue; 11682 auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, 11683 SizeInBits); 11684 if (!Fragment) 11685 continue; 11686 Expr = *Fragment; 11687 } 11688 11689 auto AdditionalDependencies = Dbg->getAdditionalDependencies(); 11690 // Clone the SDDbgValue and move it to To. 11691 SDDbgValue *Clone = getDbgValueList( 11692 Var, Expr, NewLocOps, AdditionalDependencies, Dbg->isIndirect(), 11693 Dbg->getDebugLoc(), std::max(ToNode->getIROrder(), Dbg->getOrder()), 11694 Dbg->isVariadic()); 11695 ClonedDVs.push_back(Clone); 11696 11697 if (InvalidateDbg) { 11698 // Invalidate value and indicate the SDDbgValue should not be emitted. 11699 Dbg->setIsInvalidated(); 11700 Dbg->setIsEmitted(); 11701 } 11702 } 11703 11704 for (SDDbgValue *Dbg : ClonedDVs) { 11705 assert(is_contained(Dbg->getSDNodes(), ToNode) && 11706 "Transferred DbgValues should depend on the new SDNode"); 11707 AddDbgValue(Dbg, false); 11708 } 11709 } 11710 11711 void SelectionDAG::salvageDebugInfo(SDNode &N) { 11712 if (!N.getHasDebugValue()) 11713 return; 11714 11715 auto GetLocationOperand = [](SDNode *Node, unsigned ResNo) { 11716 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Node)) 11717 return SDDbgOperand::fromFrameIdx(FISDN->getIndex()); 11718 return SDDbgOperand::fromNode(Node, ResNo); 11719 }; 11720 11721 SmallVector<SDDbgValue *, 2> ClonedDVs; 11722 for (auto *DV : GetDbgValues(&N)) { 11723 if (DV->isInvalidated()) 11724 continue; 11725 switch (N.getOpcode()) { 11726 default: 11727 break; 11728 case ISD::ADD: { 11729 SDValue N0 = N.getOperand(0); 11730 SDValue N1 = N.getOperand(1); 11731 if (!isa<ConstantSDNode>(N0)) { 11732 bool RHSConstant = isa<ConstantSDNode>(N1); 11733 uint64_t Offset; 11734 if (RHSConstant) 11735 Offset = N.getConstantOperandVal(1); 11736 // We are not allowed to turn indirect debug values variadic, so 11737 // don't salvage those. 11738 if (!RHSConstant && DV->isIndirect()) 11739 continue; 11740 11741 // Rewrite an ADD constant node into a DIExpression. Since we are 11742 // performing arithmetic to compute the variable's *value* in the 11743 // DIExpression, we need to mark the expression with a 11744 // DW_OP_stack_value. 11745 auto *DIExpr = DV->getExpression(); 11746 auto NewLocOps = DV->copyLocationOps(); 11747 bool Changed = false; 11748 size_t OrigLocOpsSize = NewLocOps.size(); 11749 for (size_t i = 0; i < OrigLocOpsSize; ++i) { 11750 // We're not given a ResNo to compare against because the whole 11751 // node is going away. We know that any ISD::ADD only has one 11752 // result, so we can assume any node match is using the result. 11753 if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || 11754 NewLocOps[i].getSDNode() != &N) 11755 continue; 11756 NewLocOps[i] = GetLocationOperand(N0.getNode(), N0.getResNo()); 11757 if (RHSConstant) { 11758 SmallVector<uint64_t, 3> ExprOps; 11759 DIExpression::appendOffset(ExprOps, Offset); 11760 DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); 11761 } else { 11762 // Convert to a variadic expression (if not already). 11763 // convertToVariadicExpression() returns a const pointer, so we use 11764 // a temporary const variable here. 11765 const auto *TmpDIExpr = 11766 DIExpression::convertToVariadicExpression(DIExpr); 11767 SmallVector<uint64_t, 3> ExprOps; 11768 ExprOps.push_back(dwarf::DW_OP_LLVM_arg); 11769 ExprOps.push_back(NewLocOps.size()); 11770 ExprOps.push_back(dwarf::DW_OP_plus); 11771 SDDbgOperand RHS = 11772 SDDbgOperand::fromNode(N1.getNode(), N1.getResNo()); 11773 NewLocOps.push_back(RHS); 11774 DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true); 11775 } 11776 Changed = true; 11777 } 11778 (void)Changed; 11779 assert(Changed && "Salvage target doesn't use N"); 11780 11781 bool IsVariadic = 11782 DV->isVariadic() || OrigLocOpsSize != NewLocOps.size(); 11783 11784 auto AdditionalDependencies = DV->getAdditionalDependencies(); 11785 SDDbgValue *Clone = getDbgValueList( 11786 DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies, 11787 DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic); 11788 ClonedDVs.push_back(Clone); 11789 DV->setIsInvalidated(); 11790 DV->setIsEmitted(); 11791 LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; 11792 N0.getNode()->dumprFull(this); 11793 dbgs() << " into " << *DIExpr << '\n'); 11794 } 11795 break; 11796 } 11797 case ISD::TRUNCATE: { 11798 SDValue N0 = N.getOperand(0); 11799 TypeSize FromSize = N0.getValueSizeInBits(); 11800 TypeSize ToSize = N.getValueSizeInBits(0); 11801 11802 DIExpression *DbgExpression = DV->getExpression(); 11803 auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false); 11804 auto NewLocOps = DV->copyLocationOps(); 11805 bool Changed = false; 11806 for (size_t i = 0; i < NewLocOps.size(); ++i) { 11807 if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || 11808 NewLocOps[i].getSDNode() != &N) 11809 continue; 11810 11811 NewLocOps[i] = GetLocationOperand(N0.getNode(), N0.getResNo()); 11812 DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i); 11813 Changed = true; 11814 } 11815 assert(Changed && "Salvage target doesn't use N"); 11816 (void)Changed; 11817 11818 SDDbgValue *Clone = 11819 getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps, 11820 DV->getAdditionalDependencies(), DV->isIndirect(), 11821 DV->getDebugLoc(), DV->getOrder(), DV->isVariadic()); 11822 11823 ClonedDVs.push_back(Clone); 11824 DV->setIsInvalidated(); 11825 DV->setIsEmitted(); 11826 LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); 11827 dbgs() << " into " << *DbgExpression << '\n'); 11828 break; 11829 } 11830 } 11831 } 11832 11833 for (SDDbgValue *Dbg : ClonedDVs) { 11834 assert((!Dbg->getSDNodes().empty() || 11835 llvm::any_of(Dbg->getLocationOps(), 11836 [&](const SDDbgOperand &Op) { 11837 return Op.getKind() == SDDbgOperand::FRAMEIX; 11838 })) && 11839 "Salvaged DbgValue should depend on a new SDNode"); 11840 AddDbgValue(Dbg, false); 11841 } 11842 } 11843 11844 /// Creates a SDDbgLabel node. 11845 SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, 11846 const DebugLoc &DL, unsigned O) { 11847 assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && 11848 "Expected inlined-at fields to agree"); 11849 return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); 11850 } 11851 11852 namespace { 11853 11854 /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node 11855 /// pointed to by a use iterator is deleted, increment the use iterator 11856 /// so that it doesn't dangle. 11857 /// 11858 class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { 11859 SDNode::use_iterator &UI; 11860 SDNode::use_iterator &UE; 11861 11862 void NodeDeleted(SDNode *N, SDNode *E) override { 11863 // Increment the iterator as needed. 11864 while (UI != UE && N == UI->getUser()) 11865 ++UI; 11866 } 11867 11868 public: 11869 RAUWUpdateListener(SelectionDAG &d, 11870 SDNode::use_iterator &ui, 11871 SDNode::use_iterator &ue) 11872 : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} 11873 }; 11874 11875 } // end anonymous namespace 11876 11877 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. 11878 /// This can cause recursive merging of nodes in the DAG. 11879 /// 11880 /// This version assumes From has a single result value. 11881 /// 11882 void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { 11883 SDNode *From = FromN.getNode(); 11884 assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && 11885 "Cannot replace with this method!"); 11886 assert(From != To.getNode() && "Cannot replace uses of with self"); 11887 11888 // Preserve Debug Values 11889 transferDbgValues(FromN, To); 11890 // Preserve extra info. 11891 copyExtraInfo(From, To.getNode()); 11892 11893 // Iterate over all the existing uses of From. New uses will be added 11894 // to the beginning of the use list, which we avoid visiting. 11895 // This specifically avoids visiting uses of From that arise while the 11896 // replacement is happening, because any such uses would be the result 11897 // of CSE: If an existing node looks like From after one of its operands 11898 // is replaced by To, we don't want to replace of all its users with To 11899 // too. See PR3018 for more info. 11900 SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); 11901 RAUWUpdateListener Listener(*this, UI, UE); 11902 while (UI != UE) { 11903 SDNode *User = UI->getUser(); 11904 11905 // This node is about to morph, remove its old self from the CSE maps. 11906 RemoveNodeFromCSEMaps(User); 11907 11908 // A user can appear in a use list multiple times, and when this 11909 // happens the uses are usually next to each other in the list. 11910 // To help reduce the number of CSE recomputations, process all 11911 // the uses of this user that we can find this way. 11912 do { 11913 SDUse &Use = *UI; 11914 ++UI; 11915 Use.set(To); 11916 if (To->isDivergent() != From->isDivergent()) 11917 updateDivergence(User); 11918 } while (UI != UE && UI->getUser() == User); 11919 // Now that we have modified User, add it back to the CSE maps. If it 11920 // already exists there, recursively merge the results together. 11921 AddModifiedNodeToCSEMaps(User); 11922 } 11923 11924 // If we just RAUW'd the root, take note. 11925 if (FromN == getRoot()) 11926 setRoot(To); 11927 } 11928 11929 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. 11930 /// This can cause recursive merging of nodes in the DAG. 11931 /// 11932 /// This version assumes that for each value of From, there is a 11933 /// corresponding value in To in the same position with the same type. 11934 /// 11935 void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { 11936 #ifndef NDEBUG 11937 for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) 11938 assert((!From->hasAnyUseOfValue(i) || 11939 From->getValueType(i) == To->getValueType(i)) && 11940 "Cannot use this version of ReplaceAllUsesWith!"); 11941 #endif 11942 11943 // Handle the trivial case. 11944 if (From == To) 11945 return; 11946 11947 // Preserve Debug Info. Only do this if there's a use. 11948 for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) 11949 if (From->hasAnyUseOfValue(i)) { 11950 assert((i < To->getNumValues()) && "Invalid To location"); 11951 transferDbgValues(SDValue(From, i), SDValue(To, i)); 11952 } 11953 // Preserve extra info. 11954 copyExtraInfo(From, To); 11955 11956 // Iterate over just the existing users of From. See the comments in 11957 // the ReplaceAllUsesWith above. 11958 SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); 11959 RAUWUpdateListener Listener(*this, UI, UE); 11960 while (UI != UE) { 11961 SDNode *User = UI->getUser(); 11962 11963 // This node is about to morph, remove its old self from the CSE maps. 11964 RemoveNodeFromCSEMaps(User); 11965 11966 // A user can appear in a use list multiple times, and when this 11967 // happens the uses are usually next to each other in the list. 11968 // To help reduce the number of CSE recomputations, process all 11969 // the uses of this user that we can find this way. 11970 do { 11971 SDUse &Use = *UI; 11972 ++UI; 11973 Use.setNode(To); 11974 if (To->isDivergent() != From->isDivergent()) 11975 updateDivergence(User); 11976 } while (UI != UE && UI->getUser() == User); 11977 11978 // Now that we have modified User, add it back to the CSE maps. If it 11979 // already exists there, recursively merge the results together. 11980 AddModifiedNodeToCSEMaps(User); 11981 } 11982 11983 // If we just RAUW'd the root, take note. 11984 if (From == getRoot().getNode()) 11985 setRoot(SDValue(To, getRoot().getResNo())); 11986 } 11987 11988 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. 11989 /// This can cause recursive merging of nodes in the DAG. 11990 /// 11991 /// This version can replace From with any result values. To must match the 11992 /// number and types of values returned by From. 11993 void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { 11994 if (From->getNumValues() == 1) // Handle the simple case efficiently. 11995 return ReplaceAllUsesWith(SDValue(From, 0), To[0]); 11996 11997 for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) { 11998 // Preserve Debug Info. 11999 transferDbgValues(SDValue(From, i), To[i]); 12000 // Preserve extra info. 12001 copyExtraInfo(From, To[i].getNode()); 12002 } 12003 12004 // Iterate over just the existing users of From. See the comments in 12005 // the ReplaceAllUsesWith above. 12006 SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); 12007 RAUWUpdateListener Listener(*this, UI, UE); 12008 while (UI != UE) { 12009 SDNode *User = UI->getUser(); 12010 12011 // This node is about to morph, remove its old self from the CSE maps. 12012 RemoveNodeFromCSEMaps(User); 12013 12014 // A user can appear in a use list multiple times, and when this happens the 12015 // uses are usually next to each other in the list. To help reduce the 12016 // number of CSE and divergence recomputations, process all the uses of this 12017 // user that we can find this way. 12018 bool To_IsDivergent = false; 12019 do { 12020 SDUse &Use = *UI; 12021 const SDValue &ToOp = To[Use.getResNo()]; 12022 ++UI; 12023 Use.set(ToOp); 12024 To_IsDivergent |= ToOp->isDivergent(); 12025 } while (UI != UE && UI->getUser() == User); 12026 12027 if (To_IsDivergent != From->isDivergent()) 12028 updateDivergence(User); 12029 12030 // Now that we have modified User, add it back to the CSE maps. If it 12031 // already exists there, recursively merge the results together. 12032 AddModifiedNodeToCSEMaps(User); 12033 } 12034 12035 // If we just RAUW'd the root, take note. 12036 if (From == getRoot().getNode()) 12037 setRoot(SDValue(To[getRoot().getResNo()])); 12038 } 12039 12040 /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving 12041 /// uses of other values produced by From.getNode() alone. The Deleted 12042 /// vector is handled the same way as for ReplaceAllUsesWith. 12043 void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ 12044 // Handle the really simple, really trivial case efficiently. 12045 if (From == To) return; 12046 12047 // Handle the simple, trivial, case efficiently. 12048 if (From.getNode()->getNumValues() == 1) { 12049 ReplaceAllUsesWith(From, To); 12050 return; 12051 } 12052 12053 // Preserve Debug Info. 12054 transferDbgValues(From, To); 12055 copyExtraInfo(From.getNode(), To.getNode()); 12056 12057 // Iterate over just the existing users of From. See the comments in 12058 // the ReplaceAllUsesWith above. 12059 SDNode::use_iterator UI = From.getNode()->use_begin(), 12060 UE = From.getNode()->use_end(); 12061 RAUWUpdateListener Listener(*this, UI, UE); 12062 while (UI != UE) { 12063 SDNode *User = UI->getUser(); 12064 bool UserRemovedFromCSEMaps = false; 12065 12066 // A user can appear in a use list multiple times, and when this 12067 // happens the uses are usually next to each other in the list. 12068 // To help reduce the number of CSE recomputations, process all 12069 // the uses of this user that we can find this way. 12070 do { 12071 SDUse &Use = *UI; 12072 12073 // Skip uses of different values from the same node. 12074 if (Use.getResNo() != From.getResNo()) { 12075 ++UI; 12076 continue; 12077 } 12078 12079 // If this node hasn't been modified yet, it's still in the CSE maps, 12080 // so remove its old self from the CSE maps. 12081 if (!UserRemovedFromCSEMaps) { 12082 RemoveNodeFromCSEMaps(User); 12083 UserRemovedFromCSEMaps = true; 12084 } 12085 12086 ++UI; 12087 Use.set(To); 12088 if (To->isDivergent() != From->isDivergent()) 12089 updateDivergence(User); 12090 } while (UI != UE && UI->getUser() == User); 12091 // We are iterating over all uses of the From node, so if a use 12092 // doesn't use the specific value, no changes are made. 12093 if (!UserRemovedFromCSEMaps) 12094 continue; 12095 12096 // Now that we have modified User, add it back to the CSE maps. If it 12097 // already exists there, recursively merge the results together. 12098 AddModifiedNodeToCSEMaps(User); 12099 } 12100 12101 // If we just RAUW'd the root, take note. 12102 if (From == getRoot()) 12103 setRoot(To); 12104 } 12105 12106 namespace { 12107 12108 /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith 12109 /// to record information about a use. 12110 struct UseMemo { 12111 SDNode *User; 12112 unsigned Index; 12113 SDUse *Use; 12114 }; 12115 12116 /// operator< - Sort Memos by User. 12117 bool operator<(const UseMemo &L, const UseMemo &R) { 12118 return (intptr_t)L.User < (intptr_t)R.User; 12119 } 12120 12121 /// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node 12122 /// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that 12123 /// the node already has been taken care of recursively. 12124 class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { 12125 SmallVectorImpl<UseMemo> &Uses; 12126 12127 void NodeDeleted(SDNode *N, SDNode *E) override { 12128 for (UseMemo &Memo : Uses) 12129 if (Memo.User == N) 12130 Memo.User = nullptr; 12131 } 12132 12133 public: 12134 RAUOVWUpdateListener(SelectionDAG &d, SmallVectorImpl<UseMemo> &uses) 12135 : SelectionDAG::DAGUpdateListener(d), Uses(uses) {} 12136 }; 12137 12138 } // end anonymous namespace 12139 12140 /// Return true if a glue output should propagate divergence information. 12141 static bool gluePropagatesDivergence(const SDNode *Node) { 12142 switch (Node->getOpcode()) { 12143 case ISD::CopyFromReg: 12144 case ISD::CopyToReg: 12145 return false; 12146 default: 12147 return true; 12148 } 12149 12150 llvm_unreachable("covered opcode switch"); 12151 } 12152 12153 bool SelectionDAG::calculateDivergence(SDNode *N) { 12154 if (!DivergentTarget) 12155 return false; 12156 if (TLI->isSDNodeAlwaysUniform(N)) { 12157 assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) && 12158 "Conflicting divergence information!"); 12159 return false; 12160 } 12161 if (TLI->isSDNodeSourceOfDivergence(N, FLI, UA)) 12162 return true; 12163 for (const auto &Op : N->ops()) { 12164 EVT VT = Op.getValueType(); 12165 12166 // Skip Chain. It does not carry divergence. 12167 if (VT != MVT::Other && Op.getNode()->isDivergent() && 12168 (VT != MVT::Glue || gluePropagatesDivergence(Op.getNode()))) 12169 return true; 12170 } 12171 return false; 12172 } 12173 12174 void SelectionDAG::updateDivergence(SDNode *N) { 12175 if (!DivergentTarget) 12176 return; 12177 SmallVector<SDNode *, 16> Worklist(1, N); 12178 do { 12179 N = Worklist.pop_back_val(); 12180 bool IsDivergent = calculateDivergence(N); 12181 if (N->SDNodeBits.IsDivergent != IsDivergent) { 12182 N->SDNodeBits.IsDivergent = IsDivergent; 12183 llvm::append_range(Worklist, N->users()); 12184 } 12185 } while (!Worklist.empty()); 12186 } 12187 12188 void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { 12189 DenseMap<SDNode *, unsigned> Degree; 12190 Order.reserve(AllNodes.size()); 12191 for (auto &N : allnodes()) { 12192 unsigned NOps = N.getNumOperands(); 12193 Degree[&N] = NOps; 12194 if (0 == NOps) 12195 Order.push_back(&N); 12196 } 12197 for (size_t I = 0; I != Order.size(); ++I) { 12198 SDNode *N = Order[I]; 12199 for (auto *U : N->users()) { 12200 unsigned &UnsortedOps = Degree[U]; 12201 if (0 == --UnsortedOps) 12202 Order.push_back(U); 12203 } 12204 } 12205 } 12206 12207 #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS 12208 void SelectionDAG::VerifyDAGDivergence() { 12209 std::vector<SDNode *> TopoOrder; 12210 CreateTopologicalOrder(TopoOrder); 12211 for (auto *N : TopoOrder) { 12212 assert(calculateDivergence(N) == N->isDivergent() && 12213 "Divergence bit inconsistency detected"); 12214 } 12215 } 12216 #endif 12217 12218 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving 12219 /// uses of other values produced by From.getNode() alone. The same value 12220 /// may appear in both the From and To list. The Deleted vector is 12221 /// handled the same way as for ReplaceAllUsesWith. 12222 void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, 12223 const SDValue *To, 12224 unsigned Num){ 12225 // Handle the simple, trivial case efficiently. 12226 if (Num == 1) 12227 return ReplaceAllUsesOfValueWith(*From, *To); 12228 12229 transferDbgValues(*From, *To); 12230 copyExtraInfo(From->getNode(), To->getNode()); 12231 12232 // Read up all the uses and make records of them. This helps 12233 // processing new uses that are introduced during the 12234 // replacement process. 12235 SmallVector<UseMemo, 4> Uses; 12236 for (unsigned i = 0; i != Num; ++i) { 12237 unsigned FromResNo = From[i].getResNo(); 12238 SDNode *FromNode = From[i].getNode(); 12239 for (SDUse &Use : FromNode->uses()) { 12240 if (Use.getResNo() == FromResNo) { 12241 UseMemo Memo = {Use.getUser(), i, &Use}; 12242 Uses.push_back(Memo); 12243 } 12244 } 12245 } 12246 12247 // Sort the uses, so that all the uses from a given User are together. 12248 llvm::sort(Uses); 12249 RAUOVWUpdateListener Listener(*this, Uses); 12250 12251 for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); 12252 UseIndex != UseIndexEnd; ) { 12253 // We know that this user uses some value of From. If it is the right 12254 // value, update it. 12255 SDNode *User = Uses[UseIndex].User; 12256 // If the node has been deleted by recursive CSE updates when updating 12257 // another node, then just skip this entry. 12258 if (User == nullptr) { 12259 ++UseIndex; 12260 continue; 12261 } 12262 12263 // This node is about to morph, remove its old self from the CSE maps. 12264 RemoveNodeFromCSEMaps(User); 12265 12266 // The Uses array is sorted, so all the uses for a given User 12267 // are next to each other in the list. 12268 // To help reduce the number of CSE recomputations, process all 12269 // the uses of this user that we can find this way. 12270 do { 12271 unsigned i = Uses[UseIndex].Index; 12272 SDUse &Use = *Uses[UseIndex].Use; 12273 ++UseIndex; 12274 12275 Use.set(To[i]); 12276 } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); 12277 12278 // Now that we have modified User, add it back to the CSE maps. If it 12279 // already exists there, recursively merge the results together. 12280 AddModifiedNodeToCSEMaps(User); 12281 } 12282 } 12283 12284 /// AssignTopologicalOrder - Assign a unique node id for each node in the DAG 12285 /// based on their topological order. It returns the maximum id and a vector 12286 /// of the SDNodes* in assigned order by reference. 12287 unsigned SelectionDAG::AssignTopologicalOrder() { 12288 unsigned DAGSize = 0; 12289 12290 // SortedPos tracks the progress of the algorithm. Nodes before it are 12291 // sorted, nodes after it are unsorted. When the algorithm completes 12292 // it is at the end of the list. 12293 allnodes_iterator SortedPos = allnodes_begin(); 12294 12295 // Visit all the nodes. Move nodes with no operands to the front of 12296 // the list immediately. Annotate nodes that do have operands with their 12297 // operand count. Before we do this, the Node Id fields of the nodes 12298 // may contain arbitrary values. After, the Node Id fields for nodes 12299 // before SortedPos will contain the topological sort index, and the 12300 // Node Id fields for nodes At SortedPos and after will contain the 12301 // count of outstanding operands. 12302 for (SDNode &N : llvm::make_early_inc_range(allnodes())) { 12303 checkForCycles(&N, this); 12304 unsigned Degree = N.getNumOperands(); 12305 if (Degree == 0) { 12306 // A node with no uses, add it to the result array immediately. 12307 N.setNodeId(DAGSize++); 12308 allnodes_iterator Q(&N); 12309 if (Q != SortedPos) 12310 SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); 12311 assert(SortedPos != AllNodes.end() && "Overran node list"); 12312 ++SortedPos; 12313 } else { 12314 // Temporarily use the Node Id as scratch space for the degree count. 12315 N.setNodeId(Degree); 12316 } 12317 } 12318 12319 // Visit all the nodes. As we iterate, move nodes into sorted order, 12320 // such that by the time the end is reached all nodes will be sorted. 12321 for (SDNode &Node : allnodes()) { 12322 SDNode *N = &Node; 12323 checkForCycles(N, this); 12324 // N is in sorted position, so all its uses have one less operand 12325 // that needs to be sorted. 12326 for (SDNode *P : N->users()) { 12327 unsigned Degree = P->getNodeId(); 12328 assert(Degree != 0 && "Invalid node degree"); 12329 --Degree; 12330 if (Degree == 0) { 12331 // All of P's operands are sorted, so P may sorted now. 12332 P->setNodeId(DAGSize++); 12333 if (P->getIterator() != SortedPos) 12334 SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); 12335 assert(SortedPos != AllNodes.end() && "Overran node list"); 12336 ++SortedPos; 12337 } else { 12338 // Update P's outstanding operand count. 12339 P->setNodeId(Degree); 12340 } 12341 } 12342 if (Node.getIterator() == SortedPos) { 12343 #ifndef NDEBUG 12344 allnodes_iterator I(N); 12345 SDNode *S = &*++I; 12346 dbgs() << "Overran sorted position:\n"; 12347 S->dumprFull(this); dbgs() << "\n"; 12348 dbgs() << "Checking if this is due to cycles\n"; 12349 checkForCycles(this, true); 12350 #endif 12351 llvm_unreachable(nullptr); 12352 } 12353 } 12354 12355 assert(SortedPos == AllNodes.end() && 12356 "Topological sort incomplete!"); 12357 assert(AllNodes.front().getOpcode() == ISD::EntryToken && 12358 "First node in topological sort is not the entry token!"); 12359 assert(AllNodes.front().getNodeId() == 0 && 12360 "First node in topological sort has non-zero id!"); 12361 assert(AllNodes.front().getNumOperands() == 0 && 12362 "First node in topological sort has operands!"); 12363 assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && 12364 "Last node in topologic sort has unexpected id!"); 12365 assert(AllNodes.back().use_empty() && 12366 "Last node in topologic sort has users!"); 12367 assert(DAGSize == allnodes_size() && "Node count mismatch!"); 12368 return DAGSize; 12369 } 12370 12371 /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the 12372 /// value is produced by SD. 12373 void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { 12374 for (SDNode *SD : DB->getSDNodes()) { 12375 if (!SD) 12376 continue; 12377 assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); 12378 SD->setHasDebugValue(true); 12379 } 12380 DbgInfo->add(DB, isParameter); 12381 } 12382 12383 void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(DB); } 12384 12385 SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, 12386 SDValue NewMemOpChain) { 12387 assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node"); 12388 assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT"); 12389 // The new memory operation must have the same position as the old load in 12390 // terms of memory dependency. Create a TokenFactor for the old load and new 12391 // memory operation and update uses of the old load's output chain to use that 12392 // TokenFactor. 12393 if (OldChain == NewMemOpChain || OldChain.use_empty()) 12394 return NewMemOpChain; 12395 12396 SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other, 12397 OldChain, NewMemOpChain); 12398 ReplaceAllUsesOfValueWith(OldChain, TokenFactor); 12399 UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewMemOpChain); 12400 return TokenFactor; 12401 } 12402 12403 SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, 12404 SDValue NewMemOp) { 12405 assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); 12406 SDValue OldChain = SDValue(OldLoad, 1); 12407 SDValue NewMemOpChain = NewMemOp.getValue(1); 12408 return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain); 12409 } 12410 12411 SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, 12412 Function **OutFunction) { 12413 assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); 12414 12415 auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 12416 auto *Module = MF->getFunction().getParent(); 12417 auto *Function = Module->getFunction(Symbol); 12418 12419 if (OutFunction != nullptr) 12420 *OutFunction = Function; 12421 12422 if (Function != nullptr) { 12423 auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace()); 12424 return getGlobalAddress(Function, SDLoc(Op), PtrTy); 12425 } 12426 12427 std::string ErrorStr; 12428 raw_string_ostream ErrorFormatter(ErrorStr); 12429 ErrorFormatter << "Undefined external symbol "; 12430 ErrorFormatter << '"' << Symbol << '"'; 12431 report_fatal_error(Twine(ErrorStr)); 12432 } 12433 12434 //===----------------------------------------------------------------------===// 12435 // SDNode Class 12436 //===----------------------------------------------------------------------===// 12437 12438 bool llvm::isNullConstant(SDValue V) { 12439 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12440 return Const != nullptr && Const->isZero(); 12441 } 12442 12443 bool llvm::isNullConstantOrUndef(SDValue V) { 12444 return V.isUndef() || isNullConstant(V); 12445 } 12446 12447 bool llvm::isNullFPConstant(SDValue V) { 12448 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); 12449 return Const != nullptr && Const->isZero() && !Const->isNegative(); 12450 } 12451 12452 bool llvm::isAllOnesConstant(SDValue V) { 12453 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12454 return Const != nullptr && Const->isAllOnes(); 12455 } 12456 12457 bool llvm::isOneConstant(SDValue V) { 12458 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12459 return Const != nullptr && Const->isOne(); 12460 } 12461 12462 bool llvm::isMinSignedConstant(SDValue V) { 12463 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12464 return Const != nullptr && Const->isMinSignedValue(); 12465 } 12466 12467 bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, 12468 unsigned OperandNo) { 12469 // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). 12470 // TODO: Target-specific opcodes could be added. 12471 if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false, 12472 /*AllowTruncation*/ true)) { 12473 APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits()); 12474 switch (Opcode) { 12475 case ISD::ADD: 12476 case ISD::OR: 12477 case ISD::XOR: 12478 case ISD::UMAX: 12479 return Const.isZero(); 12480 case ISD::MUL: 12481 return Const.isOne(); 12482 case ISD::AND: 12483 case ISD::UMIN: 12484 return Const.isAllOnes(); 12485 case ISD::SMAX: 12486 return Const.isMinSignedValue(); 12487 case ISD::SMIN: 12488 return Const.isMaxSignedValue(); 12489 case ISD::SUB: 12490 case ISD::SHL: 12491 case ISD::SRA: 12492 case ISD::SRL: 12493 return OperandNo == 1 && Const.isZero(); 12494 case ISD::UDIV: 12495 case ISD::SDIV: 12496 return OperandNo == 1 && Const.isOne(); 12497 } 12498 } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { 12499 switch (Opcode) { 12500 case ISD::FADD: 12501 return ConstFP->isZero() && 12502 (Flags.hasNoSignedZeros() || ConstFP->isNegative()); 12503 case ISD::FSUB: 12504 return OperandNo == 1 && ConstFP->isZero() && 12505 (Flags.hasNoSignedZeros() || !ConstFP->isNegative()); 12506 case ISD::FMUL: 12507 return ConstFP->isExactlyValue(1.0); 12508 case ISD::FDIV: 12509 return OperandNo == 1 && ConstFP->isExactlyValue(1.0); 12510 case ISD::FMINNUM: 12511 case ISD::FMAXNUM: { 12512 // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. 12513 EVT VT = V.getValueType(); 12514 const fltSemantics &Semantics = VT.getFltSemantics(); 12515 APFloat NeutralAF = !Flags.hasNoNaNs() 12516 ? APFloat::getQNaN(Semantics) 12517 : !Flags.hasNoInfs() 12518 ? APFloat::getInf(Semantics) 12519 : APFloat::getLargest(Semantics); 12520 if (Opcode == ISD::FMAXNUM) 12521 NeutralAF.changeSign(); 12522 12523 return ConstFP->isExactlyValue(NeutralAF); 12524 } 12525 } 12526 } 12527 return false; 12528 } 12529 12530 SDValue llvm::peekThroughBitcasts(SDValue V) { 12531 while (V.getOpcode() == ISD::BITCAST) 12532 V = V.getOperand(0); 12533 return V; 12534 } 12535 12536 SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { 12537 while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse()) 12538 V = V.getOperand(0); 12539 return V; 12540 } 12541 12542 SDValue llvm::peekThroughExtractSubvectors(SDValue V) { 12543 while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) 12544 V = V.getOperand(0); 12545 return V; 12546 } 12547 12548 SDValue llvm::peekThroughTruncates(SDValue V) { 12549 while (V.getOpcode() == ISD::TRUNCATE) 12550 V = V.getOperand(0); 12551 return V; 12552 } 12553 12554 bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { 12555 if (V.getOpcode() != ISD::XOR) 12556 return false; 12557 V = peekThroughBitcasts(V.getOperand(1)); 12558 unsigned NumBits = V.getScalarValueSizeInBits(); 12559 ConstantSDNode *C = 12560 isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); 12561 return C && (C->getAPIntValue().countr_one() >= NumBits); 12562 } 12563 12564 ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, 12565 bool AllowTruncation) { 12566 EVT VT = N.getValueType(); 12567 APInt DemandedElts = VT.isFixedLengthVector() 12568 ? APInt::getAllOnes(VT.getVectorMinNumElements()) 12569 : APInt(1, 1); 12570 return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation); 12571 } 12572 12573 ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, 12574 bool AllowUndefs, 12575 bool AllowTruncation) { 12576 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 12577 return CN; 12578 12579 // SplatVectors can truncate their operands. Ignore that case here unless 12580 // AllowTruncation is set. 12581 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 12582 EVT VecEltVT = N->getValueType(0).getVectorElementType(); 12583 if (auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 12584 EVT CVT = CN->getValueType(0); 12585 assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension"); 12586 if (AllowTruncation || CVT == VecEltVT) 12587 return CN; 12588 } 12589 } 12590 12591 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 12592 BitVector UndefElements; 12593 ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); 12594 12595 // BuildVectors can truncate their operands. Ignore that case here unless 12596 // AllowTruncation is set. 12597 // TODO: Look into whether we should allow UndefElements in non-DemandedElts 12598 if (CN && (UndefElements.none() || AllowUndefs)) { 12599 EVT CVT = CN->getValueType(0); 12600 EVT NSVT = N.getValueType().getScalarType(); 12601 assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); 12602 if (AllowTruncation || (CVT == NSVT)) 12603 return CN; 12604 } 12605 } 12606 12607 return nullptr; 12608 } 12609 12610 ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { 12611 EVT VT = N.getValueType(); 12612 APInt DemandedElts = VT.isFixedLengthVector() 12613 ? APInt::getAllOnes(VT.getVectorMinNumElements()) 12614 : APInt(1, 1); 12615 return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs); 12616 } 12617 12618 ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, 12619 const APInt &DemandedElts, 12620 bool AllowUndefs) { 12621 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 12622 return CN; 12623 12624 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 12625 BitVector UndefElements; 12626 ConstantFPSDNode *CN = 12627 BV->getConstantFPSplatNode(DemandedElts, &UndefElements); 12628 // TODO: Look into whether we should allow UndefElements in non-DemandedElts 12629 if (CN && (UndefElements.none() || AllowUndefs)) 12630 return CN; 12631 } 12632 12633 if (N.getOpcode() == ISD::SPLAT_VECTOR) 12634 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) 12635 return CN; 12636 12637 return nullptr; 12638 } 12639 12640 bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { 12641 // TODO: may want to use peekThroughBitcast() here. 12642 ConstantSDNode *C = 12643 isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); 12644 return C && C->isZero(); 12645 } 12646 12647 bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { 12648 ConstantSDNode *C = 12649 isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true); 12650 return C && C->isOne(); 12651 } 12652 12653 bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { 12654 N = peekThroughBitcasts(N); 12655 unsigned BitWidth = N.getScalarValueSizeInBits(); 12656 ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); 12657 return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth; 12658 } 12659 12660 bool llvm::isOnesOrOnesSplat(SDValue N, bool AllowUndefs) { 12661 ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); 12662 return C && APInt::isSameValue(C->getAPIntValue(), 12663 APInt(C->getAPIntValue().getBitWidth(), 1)); 12664 } 12665 12666 bool llvm::isZeroOrZeroSplat(SDValue N, bool AllowUndefs) { 12667 N = peekThroughBitcasts(N); 12668 ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs, true); 12669 return C && C->isZero(); 12670 } 12671 12672 HandleSDNode::~HandleSDNode() { 12673 DropOperands(); 12674 } 12675 12676 MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, 12677 SDVTList VTs, EVT memvt, MachineMemOperand *mmo) 12678 : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { 12679 MemSDNodeBits.IsVolatile = MMO->isVolatile(); 12680 MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); 12681 MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); 12682 MemSDNodeBits.IsInvariant = MMO->isInvariant(); 12683 12684 // We check here that the size of the memory operand fits within the size of 12685 // the MMO. This is because the MMO might indicate only a possible address 12686 // range instead of specifying the affected memory addresses precisely. 12687 assert( 12688 (!MMO->getType().isValid() || 12689 TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) && 12690 "Size mismatch!"); 12691 } 12692 12693 /// Profile - Gather unique data for the node. 12694 /// 12695 void SDNode::Profile(FoldingSetNodeID &ID) const { 12696 AddNodeIDNode(ID, this); 12697 } 12698 12699 namespace { 12700 12701 struct EVTArray { 12702 std::vector<EVT> VTs; 12703 12704 EVTArray() { 12705 VTs.reserve(MVT::VALUETYPE_SIZE); 12706 for (unsigned i = 0; i < MVT::VALUETYPE_SIZE; ++i) 12707 VTs.push_back(MVT((MVT::SimpleValueType)i)); 12708 } 12709 }; 12710 12711 } // end anonymous namespace 12712 12713 /// getValueTypeList - Return a pointer to the specified value type. 12714 /// 12715 const EVT *SDNode::getValueTypeList(MVT VT) { 12716 static EVTArray SimpleVTArray; 12717 12718 assert(VT < MVT::VALUETYPE_SIZE && "Value type out of range!"); 12719 return &SimpleVTArray.VTs[VT.SimpleTy]; 12720 } 12721 12722 /// hasAnyUseOfValue - Return true if there are any use of the indicated 12723 /// value. This method ignores uses of other values defined by this operation. 12724 bool SDNode::hasAnyUseOfValue(unsigned Value) const { 12725 assert(Value < getNumValues() && "Bad value!"); 12726 12727 for (SDUse &U : uses()) 12728 if (U.getResNo() == Value) 12729 return true; 12730 12731 return false; 12732 } 12733 12734 /// isOnlyUserOf - Return true if this node is the only use of N. 12735 bool SDNode::isOnlyUserOf(const SDNode *N) const { 12736 bool Seen = false; 12737 for (const SDNode *User : N->users()) { 12738 if (User == this) 12739 Seen = true; 12740 else 12741 return false; 12742 } 12743 12744 return Seen; 12745 } 12746 12747 /// Return true if the only users of N are contained in Nodes. 12748 bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { 12749 bool Seen = false; 12750 for (const SDNode *User : N->users()) { 12751 if (llvm::is_contained(Nodes, User)) 12752 Seen = true; 12753 else 12754 return false; 12755 } 12756 12757 return Seen; 12758 } 12759 12760 /// isOperand - Return true if this node is an operand of N. 12761 bool SDValue::isOperandOf(const SDNode *N) const { 12762 return is_contained(N->op_values(), *this); 12763 } 12764 12765 bool SDNode::isOperandOf(const SDNode *N) const { 12766 return any_of(N->op_values(), 12767 [this](SDValue Op) { return this == Op.getNode(); }); 12768 } 12769 12770 /// reachesChainWithoutSideEffects - Return true if this operand (which must 12771 /// be a chain) reaches the specified operand without crossing any 12772 /// side-effecting instructions on any chain path. In practice, this looks 12773 /// through token factors and non-volatile loads. In order to remain efficient, 12774 /// this only looks a couple of nodes in, it does not do an exhaustive search. 12775 /// 12776 /// Note that we only need to examine chains when we're searching for 12777 /// side-effects; SelectionDAG requires that all side-effects are represented 12778 /// by chains, even if another operand would force a specific ordering. This 12779 /// constraint is necessary to allow transformations like splitting loads. 12780 bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, 12781 unsigned Depth) const { 12782 if (*this == Dest) return true; 12783 12784 // Don't search too deeply, we just want to be able to see through 12785 // TokenFactor's etc. 12786 if (Depth == 0) return false; 12787 12788 // If this is a token factor, all inputs to the TF happen in parallel. 12789 if (getOpcode() == ISD::TokenFactor) { 12790 // First, try a shallow search. 12791 if (is_contained((*this)->ops(), Dest)) { 12792 // We found the chain we want as an operand of this TokenFactor. 12793 // Essentially, we reach the chain without side-effects if we could 12794 // serialize the TokenFactor into a simple chain of operations with 12795 // Dest as the last operation. This is automatically true if the 12796 // chain has one use: there are no other ordering constraints. 12797 // If the chain has more than one use, we give up: some other 12798 // use of Dest might force a side-effect between Dest and the current 12799 // node. 12800 if (Dest.hasOneUse()) 12801 return true; 12802 } 12803 // Next, try a deep search: check whether every operand of the TokenFactor 12804 // reaches Dest. 12805 return llvm::all_of((*this)->ops(), [=](SDValue Op) { 12806 return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); 12807 }); 12808 } 12809 12810 // Loads don't have side effects, look through them. 12811 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { 12812 if (Ld->isUnordered()) 12813 return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); 12814 } 12815 return false; 12816 } 12817 12818 bool SDNode::hasPredecessor(const SDNode *N) const { 12819 SmallPtrSet<const SDNode *, 32> Visited; 12820 SmallVector<const SDNode *, 16> Worklist; 12821 Worklist.push_back(this); 12822 return hasPredecessorHelper(N, Visited, Worklist); 12823 } 12824 12825 void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { 12826 this->Flags &= Flags; 12827 } 12828 12829 SDValue 12830 SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, 12831 ArrayRef<ISD::NodeType> CandidateBinOps, 12832 bool AllowPartials) { 12833 // The pattern must end in an extract from index 0. 12834 if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12835 !isNullConstant(Extract->getOperand(1))) 12836 return SDValue(); 12837 12838 // Match against one of the candidate binary ops. 12839 SDValue Op = Extract->getOperand(0); 12840 if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { 12841 return Op.getOpcode() == unsigned(BinOp); 12842 })) 12843 return SDValue(); 12844 12845 // Floating-point reductions may require relaxed constraints on the final step 12846 // of the reduction because they may reorder intermediate operations. 12847 unsigned CandidateBinOp = Op.getOpcode(); 12848 if (Op.getValueType().isFloatingPoint()) { 12849 SDNodeFlags Flags = Op->getFlags(); 12850 switch (CandidateBinOp) { 12851 case ISD::FADD: 12852 if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) 12853 return SDValue(); 12854 break; 12855 default: 12856 llvm_unreachable("Unhandled FP opcode for binop reduction"); 12857 } 12858 } 12859 12860 // Matching failed - attempt to see if we did enough stages that a partial 12861 // reduction from a subvector is possible. 12862 auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { 12863 if (!AllowPartials || !Op) 12864 return SDValue(); 12865 EVT OpVT = Op.getValueType(); 12866 EVT OpSVT = OpVT.getScalarType(); 12867 EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); 12868 if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) 12869 return SDValue(); 12870 BinOp = (ISD::NodeType)CandidateBinOp; 12871 return getExtractSubvector(SDLoc(Op), SubVT, Op, 0); 12872 }; 12873 12874 // At each stage, we're looking for something that looks like: 12875 // %s = shufflevector <8 x i32> %op, <8 x i32> undef, 12876 // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, 12877 // i32 undef, i32 undef, i32 undef, i32 undef> 12878 // %a = binop <8 x i32> %op, %s 12879 // Where the mask changes according to the stage. E.g. for a 3-stage pyramid, 12880 // we expect something like: 12881 // <4,5,6,7,u,u,u,u> 12882 // <2,3,u,u,u,u,u,u> 12883 // <1,u,u,u,u,u,u,u> 12884 // While a partial reduction match would be: 12885 // <2,3,u,u,u,u,u,u> 12886 // <1,u,u,u,u,u,u,u> 12887 unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); 12888 SDValue PrevOp; 12889 for (unsigned i = 0; i < Stages; ++i) { 12890 unsigned MaskEnd = (1 << i); 12891 12892 if (Op.getOpcode() != CandidateBinOp) 12893 return PartialReduction(PrevOp, MaskEnd); 12894 12895 SDValue Op0 = Op.getOperand(0); 12896 SDValue Op1 = Op.getOperand(1); 12897 12898 ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0); 12899 if (Shuffle) { 12900 Op = Op1; 12901 } else { 12902 Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1); 12903 Op = Op0; 12904 } 12905 12906 // The first operand of the shuffle should be the same as the other operand 12907 // of the binop. 12908 if (!Shuffle || Shuffle->getOperand(0) != Op) 12909 return PartialReduction(PrevOp, MaskEnd); 12910 12911 // Verify the shuffle has the expected (at this stage of the pyramid) mask. 12912 for (int Index = 0; Index < (int)MaskEnd; ++Index) 12913 if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) 12914 return PartialReduction(PrevOp, MaskEnd); 12915 12916 PrevOp = Op; 12917 } 12918 12919 // Handle subvector reductions, which tend to appear after the shuffle 12920 // reduction stages. 12921 while (Op.getOpcode() == CandidateBinOp) { 12922 unsigned NumElts = Op.getValueType().getVectorNumElements(); 12923 SDValue Op0 = Op.getOperand(0); 12924 SDValue Op1 = Op.getOperand(1); 12925 if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR || 12926 Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 12927 Op0.getOperand(0) != Op1.getOperand(0)) 12928 break; 12929 SDValue Src = Op0.getOperand(0); 12930 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 12931 if (NumSrcElts != (2 * NumElts)) 12932 break; 12933 if (!(Op0.getConstantOperandAPInt(1) == 0 && 12934 Op1.getConstantOperandAPInt(1) == NumElts) && 12935 !(Op1.getConstantOperandAPInt(1) == 0 && 12936 Op0.getConstantOperandAPInt(1) == NumElts)) 12937 break; 12938 Op = Src; 12939 } 12940 12941 BinOp = (ISD::NodeType)CandidateBinOp; 12942 return Op; 12943 } 12944 12945 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { 12946 EVT VT = N->getValueType(0); 12947 EVT EltVT = VT.getVectorElementType(); 12948 unsigned NE = VT.getVectorNumElements(); 12949 12950 SDLoc dl(N); 12951 12952 // If ResNE is 0, fully unroll the vector op. 12953 if (ResNE == 0) 12954 ResNE = NE; 12955 else if (NE > ResNE) 12956 NE = ResNE; 12957 12958 if (N->getNumValues() == 2) { 12959 SmallVector<SDValue, 8> Scalars0, Scalars1; 12960 SmallVector<SDValue, 4> Operands(N->getNumOperands()); 12961 EVT VT1 = N->getValueType(1); 12962 EVT EltVT1 = VT1.getVectorElementType(); 12963 12964 unsigned i; 12965 for (i = 0; i != NE; ++i) { 12966 for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { 12967 SDValue Operand = N->getOperand(j); 12968 EVT OperandVT = Operand.getValueType(); 12969 12970 // A vector operand; extract a single element. 12971 EVT OperandEltVT = OperandVT.getVectorElementType(); 12972 Operands[j] = getExtractVectorElt(dl, OperandEltVT, Operand, i); 12973 } 12974 12975 SDValue EltOp = getNode(N->getOpcode(), dl, {EltVT, EltVT1}, Operands); 12976 Scalars0.push_back(EltOp); 12977 Scalars1.push_back(EltOp.getValue(1)); 12978 } 12979 12980 for (; i < ResNE; ++i) { 12981 Scalars0.push_back(getUNDEF(EltVT)); 12982 Scalars1.push_back(getUNDEF(EltVT1)); 12983 } 12984 12985 EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); 12986 EVT VecVT1 = EVT::getVectorVT(*getContext(), EltVT1, ResNE); 12987 SDValue Vec0 = getBuildVector(VecVT, dl, Scalars0); 12988 SDValue Vec1 = getBuildVector(VecVT1, dl, Scalars1); 12989 return getMergeValues({Vec0, Vec1}, dl); 12990 } 12991 12992 assert(N->getNumValues() == 1 && 12993 "Can't unroll a vector with multiple results!"); 12994 12995 SmallVector<SDValue, 8> Scalars; 12996 SmallVector<SDValue, 4> Operands(N->getNumOperands()); 12997 12998 unsigned i; 12999 for (i= 0; i != NE; ++i) { 13000 for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { 13001 SDValue Operand = N->getOperand(j); 13002 EVT OperandVT = Operand.getValueType(); 13003 if (OperandVT.isVector()) { 13004 // A vector operand; extract a single element. 13005 EVT OperandEltVT = OperandVT.getVectorElementType(); 13006 Operands[j] = getExtractVectorElt(dl, OperandEltVT, Operand, i); 13007 } else { 13008 // A scalar operand; just use it as is. 13009 Operands[j] = Operand; 13010 } 13011 } 13012 13013 switch (N->getOpcode()) { 13014 default: { 13015 Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, 13016 N->getFlags())); 13017 break; 13018 } 13019 case ISD::VSELECT: 13020 Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); 13021 break; 13022 case ISD::SHL: 13023 case ISD::SRA: 13024 case ISD::SRL: 13025 case ISD::ROTL: 13026 case ISD::ROTR: 13027 Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], 13028 getShiftAmountOperand(Operands[0].getValueType(), 13029 Operands[1]))); 13030 break; 13031 case ISD::SIGN_EXTEND_INREG: { 13032 EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); 13033 Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, 13034 Operands[0], 13035 getValueType(ExtVT))); 13036 break; 13037 } 13038 case ISD::ADDRSPACECAST: { 13039 const auto *ASC = cast<AddrSpaceCastSDNode>(N); 13040 Scalars.push_back(getAddrSpaceCast(dl, EltVT, Operands[0], 13041 ASC->getSrcAddressSpace(), 13042 ASC->getDestAddressSpace())); 13043 break; 13044 } 13045 } 13046 } 13047 13048 for (; i < ResNE; ++i) 13049 Scalars.push_back(getUNDEF(EltVT)); 13050 13051 EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); 13052 return getBuildVector(VecVT, dl, Scalars); 13053 } 13054 13055 std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( 13056 SDNode *N, unsigned ResNE) { 13057 unsigned Opcode = N->getOpcode(); 13058 assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || 13059 Opcode == ISD::USUBO || Opcode == ISD::SSUBO || 13060 Opcode == ISD::UMULO || Opcode == ISD::SMULO) && 13061 "Expected an overflow opcode"); 13062 13063 EVT ResVT = N->getValueType(0); 13064 EVT OvVT = N->getValueType(1); 13065 EVT ResEltVT = ResVT.getVectorElementType(); 13066 EVT OvEltVT = OvVT.getVectorElementType(); 13067 SDLoc dl(N); 13068 13069 // If ResNE is 0, fully unroll the vector op. 13070 unsigned NE = ResVT.getVectorNumElements(); 13071 if (ResNE == 0) 13072 ResNE = NE; 13073 else if (NE > ResNE) 13074 NE = ResNE; 13075 13076 SmallVector<SDValue, 8> LHSScalars; 13077 SmallVector<SDValue, 8> RHSScalars; 13078 ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); 13079 ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); 13080 13081 EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); 13082 SDVTList VTs = getVTList(ResEltVT, SVT); 13083 SmallVector<SDValue, 8> ResScalars; 13084 SmallVector<SDValue, 8> OvScalars; 13085 for (unsigned i = 0; i < NE; ++i) { 13086 SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); 13087 SDValue Ov = 13088 getSelect(dl, OvEltVT, Res.getValue(1), 13089 getBoolConstant(true, dl, OvEltVT, ResVT), 13090 getConstant(0, dl, OvEltVT)); 13091 13092 ResScalars.push_back(Res); 13093 OvScalars.push_back(Ov); 13094 } 13095 13096 ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); 13097 OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); 13098 13099 EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); 13100 EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); 13101 return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), 13102 getBuildVector(NewOvVT, dl, OvScalars)); 13103 } 13104 13105 bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, 13106 LoadSDNode *Base, 13107 unsigned Bytes, 13108 int Dist) const { 13109 if (LD->isVolatile() || Base->isVolatile()) 13110 return false; 13111 // TODO: probably too restrictive for atomics, revisit 13112 if (!LD->isSimple()) 13113 return false; 13114 if (LD->isIndexed() || Base->isIndexed()) 13115 return false; 13116 if (LD->getChain() != Base->getChain()) 13117 return false; 13118 EVT VT = LD->getMemoryVT(); 13119 if (VT.getSizeInBits() / 8 != Bytes) 13120 return false; 13121 13122 auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); 13123 auto LocDecomp = BaseIndexOffset::match(LD, *this); 13124 13125 int64_t Offset = 0; 13126 if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) 13127 return (Dist * (int64_t)Bytes == Offset); 13128 return false; 13129 } 13130 13131 /// InferPtrAlignment - Infer alignment of a load / store address. Return 13132 /// std::nullopt if it cannot be inferred. 13133 MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { 13134 // If this is a GlobalAddress + cst, return the alignment. 13135 const GlobalValue *GV = nullptr; 13136 int64_t GVOffset = 0; 13137 if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { 13138 unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); 13139 KnownBits Known(PtrWidth); 13140 llvm::computeKnownBits(GV, Known, getDataLayout()); 13141 unsigned AlignBits = Known.countMinTrailingZeros(); 13142 if (AlignBits) 13143 return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); 13144 } 13145 13146 // If this is a direct reference to a stack slot, use information about the 13147 // stack slot's alignment. 13148 int FrameIdx = INT_MIN; 13149 int64_t FrameOffset = 0; 13150 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { 13151 FrameIdx = FI->getIndex(); 13152 } else if (isBaseWithConstantOffset(Ptr) && 13153 isa<FrameIndexSDNode>(Ptr.getOperand(0))) { 13154 // Handle FI+Cst 13155 FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); 13156 FrameOffset = Ptr.getConstantOperandVal(1); 13157 } 13158 13159 if (FrameIdx != INT_MIN) { 13160 const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); 13161 return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); 13162 } 13163 13164 return std::nullopt; 13165 } 13166 13167 /// Split the scalar node with EXTRACT_ELEMENT using the provided 13168 /// VTs and return the low/high part. 13169 std::pair<SDValue, SDValue> SelectionDAG::SplitScalar(const SDValue &N, 13170 const SDLoc &DL, 13171 const EVT &LoVT, 13172 const EVT &HiVT) { 13173 assert(!LoVT.isVector() && !HiVT.isVector() && !N.getValueType().isVector() && 13174 "Split node must be a scalar type"); 13175 SDValue Lo = 13176 getNode(ISD::EXTRACT_ELEMENT, DL, LoVT, N, getIntPtrConstant(0, DL)); 13177 SDValue Hi = 13178 getNode(ISD::EXTRACT_ELEMENT, DL, HiVT, N, getIntPtrConstant(1, DL)); 13179 return std::make_pair(Lo, Hi); 13180 } 13181 13182 /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type 13183 /// which is split (or expanded) into two not necessarily identical pieces. 13184 std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { 13185 // Currently all types are split in half. 13186 EVT LoVT, HiVT; 13187 if (!VT.isVector()) 13188 LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); 13189 else 13190 LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext()); 13191 13192 return std::make_pair(LoVT, HiVT); 13193 } 13194 13195 /// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a 13196 /// type, dependent on an enveloping VT that has been split into two identical 13197 /// pieces. Sets the HiIsEmpty flag when hi type has zero storage size. 13198 std::pair<EVT, EVT> 13199 SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, 13200 bool *HiIsEmpty) const { 13201 EVT EltTp = VT.getVectorElementType(); 13202 // Examples: 13203 // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) 13204 // custom VL=9 with enveloping VL=8/8 yields 8/1 13205 // custom VL=10 with enveloping VL=8/8 yields 8/2 13206 // etc. 13207 ElementCount VTNumElts = VT.getVectorElementCount(); 13208 ElementCount EnvNumElts = EnvVT.getVectorElementCount(); 13209 assert(VTNumElts.isScalable() == EnvNumElts.isScalable() && 13210 "Mixing fixed width and scalable vectors when enveloping a type"); 13211 EVT LoVT, HiVT; 13212 if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { 13213 LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); 13214 HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts); 13215 *HiIsEmpty = false; 13216 } else { 13217 // Flag that hi type has zero storage size, but return split envelop type 13218 // (this would be easier if vector types with zero elements were allowed). 13219 LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts); 13220 HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); 13221 *HiIsEmpty = true; 13222 } 13223 return std::make_pair(LoVT, HiVT); 13224 } 13225 13226 /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the 13227 /// low/high part. 13228 std::pair<SDValue, SDValue> 13229 SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, 13230 const EVT &HiVT) { 13231 assert(LoVT.isScalableVector() == HiVT.isScalableVector() && 13232 LoVT.isScalableVector() == N.getValueType().isScalableVector() && 13233 "Splitting vector with an invalid mixture of fixed and scalable " 13234 "vector types"); 13235 assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <= 13236 N.getValueType().getVectorMinNumElements() && 13237 "More vector elements requested than available!"); 13238 SDValue Lo, Hi; 13239 Lo = getExtractSubvector(DL, LoVT, N, 0); 13240 // For scalable vectors it is safe to use LoVT.getVectorMinNumElements() 13241 // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales 13242 // IDX with the runtime scaling factor of the result vector type. For 13243 // fixed-width result vectors, that runtime scaling factor is 1. 13244 Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, 13245 getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); 13246 return std::make_pair(Lo, Hi); 13247 } 13248 13249 std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT, 13250 const SDLoc &DL) { 13251 // Split the vector length parameter. 13252 // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). 13253 EVT VT = N.getValueType(); 13254 assert(VecVT.getVectorElementCount().isKnownEven() && 13255 "Expecting the mask to be an evenly-sized vector"); 13256 unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; 13257 SDValue HalfNumElts = 13258 VecVT.isFixedLengthVector() 13259 ? getConstant(HalfMinNumElts, DL, VT) 13260 : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts)); 13261 SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts); 13262 SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts); 13263 return std::make_pair(Lo, Hi); 13264 } 13265 13266 /// Widen the vector up to the next power of two using INSERT_SUBVECTOR. 13267 SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { 13268 EVT VT = N.getValueType(); 13269 EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), 13270 NextPowerOf2(VT.getVectorNumElements())); 13271 return getInsertSubvector(DL, getUNDEF(WideVT), N, 0); 13272 } 13273 13274 void SelectionDAG::ExtractVectorElements(SDValue Op, 13275 SmallVectorImpl<SDValue> &Args, 13276 unsigned Start, unsigned Count, 13277 EVT EltVT) { 13278 EVT VT = Op.getValueType(); 13279 if (Count == 0) 13280 Count = VT.getVectorNumElements(); 13281 if (EltVT == EVT()) 13282 EltVT = VT.getVectorElementType(); 13283 SDLoc SL(Op); 13284 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 13285 Args.push_back(getExtractVectorElt(SL, EltVT, Op, i)); 13286 } 13287 } 13288 13289 // getAddressSpace - Return the address space this GlobalAddress belongs to. 13290 unsigned GlobalAddressSDNode::getAddressSpace() const { 13291 return getGlobal()->getType()->getAddressSpace(); 13292 } 13293 13294 Type *ConstantPoolSDNode::getType() const { 13295 if (isMachineConstantPoolEntry()) 13296 return Val.MachineCPVal->getType(); 13297 return Val.ConstVal->getType(); 13298 } 13299 13300 bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, 13301 unsigned &SplatBitSize, 13302 bool &HasAnyUndefs, 13303 unsigned MinSplatBits, 13304 bool IsBigEndian) const { 13305 EVT VT = getValueType(0); 13306 assert(VT.isVector() && "Expected a vector type"); 13307 unsigned VecWidth = VT.getSizeInBits(); 13308 if (MinSplatBits > VecWidth) 13309 return false; 13310 13311 // FIXME: The widths are based on this node's type, but build vectors can 13312 // truncate their operands. 13313 SplatValue = APInt(VecWidth, 0); 13314 SplatUndef = APInt(VecWidth, 0); 13315 13316 // Get the bits. Bits with undefined values (when the corresponding element 13317 // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared 13318 // in SplatValue. If any of the values are not constant, give up and return 13319 // false. 13320 unsigned int NumOps = getNumOperands(); 13321 assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); 13322 unsigned EltWidth = VT.getScalarSizeInBits(); 13323 13324 for (unsigned j = 0; j < NumOps; ++j) { 13325 unsigned i = IsBigEndian ? NumOps - 1 - j : j; 13326 SDValue OpVal = getOperand(i); 13327 unsigned BitPos = j * EltWidth; 13328 13329 if (OpVal.isUndef()) 13330 SplatUndef.setBits(BitPos, BitPos + EltWidth); 13331 else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal)) 13332 SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); 13333 else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal)) 13334 SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); 13335 else 13336 return false; 13337 } 13338 13339 // The build_vector is all constants or undefs. Find the smallest element 13340 // size that splats the vector. 13341 HasAnyUndefs = (SplatUndef != 0); 13342 13343 // FIXME: This does not work for vectors with elements less than 8 bits. 13344 while (VecWidth > 8) { 13345 // If we can't split in half, stop here. 13346 if (VecWidth & 1) 13347 break; 13348 13349 unsigned HalfSize = VecWidth / 2; 13350 APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize); 13351 APInt LowValue = SplatValue.extractBits(HalfSize, 0); 13352 APInt HighUndef = SplatUndef.extractBits(HalfSize, HalfSize); 13353 APInt LowUndef = SplatUndef.extractBits(HalfSize, 0); 13354 13355 // If the two halves do not match (ignoring undef bits), stop here. 13356 if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || 13357 MinSplatBits > HalfSize) 13358 break; 13359 13360 SplatValue = HighValue | LowValue; 13361 SplatUndef = HighUndef & LowUndef; 13362 13363 VecWidth = HalfSize; 13364 } 13365 13366 // FIXME: The loop above only tries to split in halves. But if the input 13367 // vector for example is <3 x i16> it wouldn't be able to detect a 13368 // SplatBitSize of 16. No idea if that is a design flaw currently limiting 13369 // optimizations. I guess that back in the days when this helper was created 13370 // vectors normally was power-of-2 sized. 13371 13372 SplatBitSize = VecWidth; 13373 return true; 13374 } 13375 13376 SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, 13377 BitVector *UndefElements) const { 13378 unsigned NumOps = getNumOperands(); 13379 if (UndefElements) { 13380 UndefElements->clear(); 13381 UndefElements->resize(NumOps); 13382 } 13383 assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); 13384 if (!DemandedElts) 13385 return SDValue(); 13386 SDValue Splatted; 13387 for (unsigned i = 0; i != NumOps; ++i) { 13388 if (!DemandedElts[i]) 13389 continue; 13390 SDValue Op = getOperand(i); 13391 if (Op.isUndef()) { 13392 if (UndefElements) 13393 (*UndefElements)[i] = true; 13394 } else if (!Splatted) { 13395 Splatted = Op; 13396 } else if (Splatted != Op) { 13397 return SDValue(); 13398 } 13399 } 13400 13401 if (!Splatted) { 13402 unsigned FirstDemandedIdx = DemandedElts.countr_zero(); 13403 assert(getOperand(FirstDemandedIdx).isUndef() && 13404 "Can only have a splat without a constant for all undefs."); 13405 return getOperand(FirstDemandedIdx); 13406 } 13407 13408 return Splatted; 13409 } 13410 13411 SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { 13412 APInt DemandedElts = APInt::getAllOnes(getNumOperands()); 13413 return getSplatValue(DemandedElts, UndefElements); 13414 } 13415 13416 bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts, 13417 SmallVectorImpl<SDValue> &Sequence, 13418 BitVector *UndefElements) const { 13419 unsigned NumOps = getNumOperands(); 13420 Sequence.clear(); 13421 if (UndefElements) { 13422 UndefElements->clear(); 13423 UndefElements->resize(NumOps); 13424 } 13425 assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); 13426 if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(NumOps)) 13427 return false; 13428 13429 // Set the undefs even if we don't find a sequence (like getSplatValue). 13430 if (UndefElements) 13431 for (unsigned I = 0; I != NumOps; ++I) 13432 if (DemandedElts[I] && getOperand(I).isUndef()) 13433 (*UndefElements)[I] = true; 13434 13435 // Iteratively widen the sequence length looking for repetitions. 13436 for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) { 13437 Sequence.append(SeqLen, SDValue()); 13438 for (unsigned I = 0; I != NumOps; ++I) { 13439 if (!DemandedElts[I]) 13440 continue; 13441 SDValue &SeqOp = Sequence[I % SeqLen]; 13442 SDValue Op = getOperand(I); 13443 if (Op.isUndef()) { 13444 if (!SeqOp) 13445 SeqOp = Op; 13446 continue; 13447 } 13448 if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) { 13449 Sequence.clear(); 13450 break; 13451 } 13452 SeqOp = Op; 13453 } 13454 if (!Sequence.empty()) 13455 return true; 13456 } 13457 13458 assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern"); 13459 return false; 13460 } 13461 13462 bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, 13463 BitVector *UndefElements) const { 13464 APInt DemandedElts = APInt::getAllOnes(getNumOperands()); 13465 return getRepeatedSequence(DemandedElts, Sequence, UndefElements); 13466 } 13467 13468 ConstantSDNode * 13469 BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, 13470 BitVector *UndefElements) const { 13471 return dyn_cast_or_null<ConstantSDNode>( 13472 getSplatValue(DemandedElts, UndefElements)); 13473 } 13474 13475 ConstantSDNode * 13476 BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { 13477 return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); 13478 } 13479 13480 ConstantFPSDNode * 13481 BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, 13482 BitVector *UndefElements) const { 13483 return dyn_cast_or_null<ConstantFPSDNode>( 13484 getSplatValue(DemandedElts, UndefElements)); 13485 } 13486 13487 ConstantFPSDNode * 13488 BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { 13489 return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); 13490 } 13491 13492 int32_t 13493 BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, 13494 uint32_t BitWidth) const { 13495 if (ConstantFPSDNode *CN = 13496 dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { 13497 bool IsExact; 13498 APSInt IntVal(BitWidth); 13499 const APFloat &APF = CN->getValueAPF(); 13500 if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != 13501 APFloat::opOK || 13502 !IsExact) 13503 return -1; 13504 13505 return IntVal.exactLogBase2(); 13506 } 13507 return -1; 13508 } 13509 13510 bool BuildVectorSDNode::getConstantRawBits( 13511 bool IsLittleEndian, unsigned DstEltSizeInBits, 13512 SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const { 13513 // Early-out if this contains anything but Undef/Constant/ConstantFP. 13514 if (!isConstant()) 13515 return false; 13516 13517 unsigned NumSrcOps = getNumOperands(); 13518 unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits(); 13519 assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && 13520 "Invalid bitcast scale"); 13521 13522 // Extract raw src bits. 13523 SmallVector<APInt> SrcBitElements(NumSrcOps, 13524 APInt::getZero(SrcEltSizeInBits)); 13525 BitVector SrcUndeElements(NumSrcOps, false); 13526 13527 for (unsigned I = 0; I != NumSrcOps; ++I) { 13528 SDValue Op = getOperand(I); 13529 if (Op.isUndef()) { 13530 SrcUndeElements.set(I); 13531 continue; 13532 } 13533 auto *CInt = dyn_cast<ConstantSDNode>(Op); 13534 auto *CFP = dyn_cast<ConstantFPSDNode>(Op); 13535 assert((CInt || CFP) && "Unknown constant"); 13536 SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits) 13537 : CFP->getValueAPF().bitcastToAPInt(); 13538 } 13539 13540 // Recast to dst width. 13541 recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements, 13542 SrcBitElements, UndefElements, SrcUndeElements); 13543 return true; 13544 } 13545 13546 void BuildVectorSDNode::recastRawBits(bool IsLittleEndian, 13547 unsigned DstEltSizeInBits, 13548 SmallVectorImpl<APInt> &DstBitElements, 13549 ArrayRef<APInt> SrcBitElements, 13550 BitVector &DstUndefElements, 13551 const BitVector &SrcUndefElements) { 13552 unsigned NumSrcOps = SrcBitElements.size(); 13553 unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth(); 13554 assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && 13555 "Invalid bitcast scale"); 13556 assert(NumSrcOps == SrcUndefElements.size() && 13557 "Vector size mismatch"); 13558 13559 unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; 13560 DstUndefElements.clear(); 13561 DstUndefElements.resize(NumDstOps, false); 13562 DstBitElements.assign(NumDstOps, APInt::getZero(DstEltSizeInBits)); 13563 13564 // Concatenate src elements constant bits together into dst element. 13565 if (SrcEltSizeInBits <= DstEltSizeInBits) { 13566 unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; 13567 for (unsigned I = 0; I != NumDstOps; ++I) { 13568 DstUndefElements.set(I); 13569 APInt &DstBits = DstBitElements[I]; 13570 for (unsigned J = 0; J != Scale; ++J) { 13571 unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); 13572 if (SrcUndefElements[Idx]) 13573 continue; 13574 DstUndefElements.reset(I); 13575 const APInt &SrcBits = SrcBitElements[Idx]; 13576 assert(SrcBits.getBitWidth() == SrcEltSizeInBits && 13577 "Illegal constant bitwidths"); 13578 DstBits.insertBits(SrcBits, J * SrcEltSizeInBits); 13579 } 13580 } 13581 return; 13582 } 13583 13584 // Split src element constant bits into dst elements. 13585 unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; 13586 for (unsigned I = 0; I != NumSrcOps; ++I) { 13587 if (SrcUndefElements[I]) { 13588 DstUndefElements.set(I * Scale, (I + 1) * Scale); 13589 continue; 13590 } 13591 const APInt &SrcBits = SrcBitElements[I]; 13592 for (unsigned J = 0; J != Scale; ++J) { 13593 unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); 13594 APInt &DstBits = DstBitElements[Idx]; 13595 DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits); 13596 } 13597 } 13598 } 13599 13600 bool BuildVectorSDNode::isConstant() const { 13601 for (const SDValue &Op : op_values()) { 13602 unsigned Opc = Op.getOpcode(); 13603 if (!Op.isUndef() && Opc != ISD::Constant && Opc != ISD::ConstantFP) 13604 return false; 13605 } 13606 return true; 13607 } 13608 13609 std::optional<std::pair<APInt, APInt>> 13610 BuildVectorSDNode::isConstantSequence() const { 13611 unsigned NumOps = getNumOperands(); 13612 if (NumOps < 2) 13613 return std::nullopt; 13614 13615 if (!isa<ConstantSDNode>(getOperand(0)) || 13616 !isa<ConstantSDNode>(getOperand(1))) 13617 return std::nullopt; 13618 13619 unsigned EltSize = getValueType(0).getScalarSizeInBits(); 13620 APInt Start = getConstantOperandAPInt(0).trunc(EltSize); 13621 APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start; 13622 13623 if (Stride.isZero()) 13624 return std::nullopt; 13625 13626 for (unsigned i = 2; i < NumOps; ++i) { 13627 if (!isa<ConstantSDNode>(getOperand(i))) 13628 return std::nullopt; 13629 13630 APInt Val = getConstantOperandAPInt(i).trunc(EltSize); 13631 if (Val != (Start + (Stride * i))) 13632 return std::nullopt; 13633 } 13634 13635 return std::make_pair(Start, Stride); 13636 } 13637 13638 bool ShuffleVectorSDNode::isSplatMask(ArrayRef<int> Mask) { 13639 // Find the first non-undef value in the shuffle mask. 13640 unsigned i, e; 13641 for (i = 0, e = Mask.size(); i != e && Mask[i] < 0; ++i) 13642 /* search */; 13643 13644 // If all elements are undefined, this shuffle can be considered a splat 13645 // (although it should eventually get simplified away completely). 13646 if (i == e) 13647 return true; 13648 13649 // Make sure all remaining elements are either undef or the same as the first 13650 // non-undef value. 13651 for (int Idx = Mask[i]; i != e; ++i) 13652 if (Mask[i] >= 0 && Mask[i] != Idx) 13653 return false; 13654 return true; 13655 } 13656 13657 // Returns true if it is a constant integer BuildVector or constant integer, 13658 // possibly hidden by a bitcast. 13659 bool SelectionDAG::isConstantIntBuildVectorOrConstantInt( 13660 SDValue N, bool AllowOpaques) const { 13661 N = peekThroughBitcasts(N); 13662 13663 if (auto *C = dyn_cast<ConstantSDNode>(N)) 13664 return AllowOpaques || !C->isOpaque(); 13665 13666 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 13667 return true; 13668 13669 // Treat a GlobalAddress supporting constant offset folding as a 13670 // constant integer. 13671 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) 13672 if (GA->getOpcode() == ISD::GlobalAddress && 13673 TLI->isOffsetFoldingLegal(GA)) 13674 return true; 13675 13676 if ((N.getOpcode() == ISD::SPLAT_VECTOR) && 13677 isa<ConstantSDNode>(N.getOperand(0))) 13678 return true; 13679 return false; 13680 } 13681 13682 // Returns true if it is a constant float BuildVector or constant float. 13683 bool SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { 13684 if (isa<ConstantFPSDNode>(N)) 13685 return true; 13686 13687 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 13688 return true; 13689 13690 if ((N.getOpcode() == ISD::SPLAT_VECTOR) && 13691 isa<ConstantFPSDNode>(N.getOperand(0))) 13692 return true; 13693 13694 return false; 13695 } 13696 13697 std::optional<bool> SelectionDAG::isBoolConstant(SDValue N) const { 13698 ConstantSDNode *Const = 13699 isConstOrConstSplat(N, false, /*AllowTruncation=*/true); 13700 if (!Const) 13701 return std::nullopt; 13702 13703 EVT VT = N->getValueType(0); 13704 const APInt CVal = Const->getAPIntValue().trunc(VT.getScalarSizeInBits()); 13705 switch (TLI->getBooleanContents(N.getValueType())) { 13706 case TargetLowering::ZeroOrOneBooleanContent: 13707 if (CVal.isOne()) 13708 return true; 13709 if (CVal.isZero()) 13710 return false; 13711 return std::nullopt; 13712 case TargetLowering::ZeroOrNegativeOneBooleanContent: 13713 if (CVal.isAllOnes()) 13714 return true; 13715 if (CVal.isZero()) 13716 return false; 13717 return std::nullopt; 13718 case TargetLowering::UndefinedBooleanContent: 13719 return CVal[0]; 13720 } 13721 llvm_unreachable("Unknown BooleanContent enum"); 13722 } 13723 13724 void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { 13725 assert(!Node->OperandList && "Node already has operands"); 13726 assert(SDNode::getMaxNumOperands() >= Vals.size() && 13727 "too many operands to fit into SDNode"); 13728 SDUse *Ops = OperandRecycler.allocate( 13729 ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); 13730 13731 bool IsDivergent = false; 13732 for (unsigned I = 0; I != Vals.size(); ++I) { 13733 Ops[I].setUser(Node); 13734 Ops[I].setInitial(Vals[I]); 13735 EVT VT = Ops[I].getValueType(); 13736 13737 // Take care of the Node's operands iff target has divergence 13738 // Skip Chain. It does not carry divergence. 13739 if (DivergentTarget && VT != MVT::Other && 13740 (VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) && 13741 Ops[I].getNode()->isDivergent()) { 13742 // Node is going to be divergent if at least one of its operand is 13743 // divergent, unless it belongs to the "AlwaysUniform" exemptions. 13744 IsDivergent = true; 13745 } 13746 } 13747 Node->NumOperands = Vals.size(); 13748 Node->OperandList = Ops; 13749 // Check the divergence of the Node itself. 13750 if (DivergentTarget && !TLI->isSDNodeAlwaysUniform(Node)) { 13751 IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA); 13752 Node->SDNodeBits.IsDivergent = IsDivergent; 13753 } 13754 checkForCycles(Node); 13755 } 13756 13757 SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, 13758 SmallVectorImpl<SDValue> &Vals) { 13759 size_t Limit = SDNode::getMaxNumOperands(); 13760 while (Vals.size() > Limit) { 13761 unsigned SliceIdx = Vals.size() - Limit; 13762 auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); 13763 SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); 13764 Vals.erase(Vals.begin() + SliceIdx, Vals.end()); 13765 Vals.emplace_back(NewTF); 13766 } 13767 return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); 13768 } 13769 13770 SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, 13771 EVT VT, SDNodeFlags Flags) { 13772 switch (Opcode) { 13773 default: 13774 return SDValue(); 13775 case ISD::ADD: 13776 case ISD::OR: 13777 case ISD::XOR: 13778 case ISD::UMAX: 13779 return getConstant(0, DL, VT); 13780 case ISD::MUL: 13781 return getConstant(1, DL, VT); 13782 case ISD::AND: 13783 case ISD::UMIN: 13784 return getAllOnesConstant(DL, VT); 13785 case ISD::SMAX: 13786 return getConstant(APInt::getSignedMinValue(VT.getSizeInBits()), DL, VT); 13787 case ISD::SMIN: 13788 return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT); 13789 case ISD::FADD: 13790 // If flags allow, prefer positive zero since it's generally cheaper 13791 // to materialize on most targets. 13792 return getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT); 13793 case ISD::FMUL: 13794 return getConstantFP(1.0, DL, VT); 13795 case ISD::FMINNUM: 13796 case ISD::FMAXNUM: { 13797 // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. 13798 const fltSemantics &Semantics = VT.getFltSemantics(); 13799 APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) : 13800 !Flags.hasNoInfs() ? APFloat::getInf(Semantics) : 13801 APFloat::getLargest(Semantics); 13802 if (Opcode == ISD::FMAXNUM) 13803 NeutralAF.changeSign(); 13804 13805 return getConstantFP(NeutralAF, DL, VT); 13806 } 13807 case ISD::FMINIMUM: 13808 case ISD::FMAXIMUM: { 13809 // Neutral element for fminimum is Inf or FLT_MAX, depending on FMF. 13810 const fltSemantics &Semantics = VT.getFltSemantics(); 13811 APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics) 13812 : APFloat::getLargest(Semantics); 13813 if (Opcode == ISD::FMAXIMUM) 13814 NeutralAF.changeSign(); 13815 13816 return getConstantFP(NeutralAF, DL, VT); 13817 } 13818 13819 } 13820 } 13821 13822 /// Helper used to make a call to a library function that has one argument of 13823 /// pointer type. 13824 /// 13825 /// Such functions include 'fegetmode', 'fesetenv' and some others, which are 13826 /// used to get or set floating-point state. They have one argument of pointer 13827 /// type, which points to the memory region containing bits of the 13828 /// floating-point state. The value returned by such function is ignored in the 13829 /// created call. 13830 /// 13831 /// \param LibFunc Reference to library function (value of RTLIB::Libcall). 13832 /// \param Ptr Pointer used to save/load state. 13833 /// \param InChain Ingoing token chain. 13834 /// \returns Outgoing chain token. 13835 SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, 13836 SDValue InChain, 13837 const SDLoc &DLoc) { 13838 assert(InChain.getValueType() == MVT::Other && "Expected token chain"); 13839 TargetLowering::ArgListTy Args; 13840 TargetLowering::ArgListEntry Entry; 13841 Entry.Node = Ptr; 13842 Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext()); 13843 Args.push_back(Entry); 13844 RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc); 13845 SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC), 13846 TLI->getPointerTy(getDataLayout())); 13847 TargetLowering::CallLoweringInfo CLI(*this); 13848 CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee( 13849 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee, 13850 std::move(Args)); 13851 return TLI->LowerCallTo(CLI).second; 13852 } 13853 13854 void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { 13855 assert(From && To && "Invalid SDNode; empty source SDValue?"); 13856 auto I = SDEI.find(From); 13857 if (I == SDEI.end()) 13858 return; 13859 13860 // Use of operator[] on the DenseMap may cause an insertion, which invalidates 13861 // the iterator, hence the need to make a copy to prevent a use-after-free. 13862 NodeExtraInfo NEI = I->second; 13863 if (LLVM_LIKELY(!NEI.PCSections)) { 13864 // No deep copy required for the types of extra info set. 13865 // 13866 // FIXME: Investigate if other types of extra info also need deep copy. This 13867 // depends on the types of nodes they can be attached to: if some extra info 13868 // is only ever attached to nodes where a replacement To node is always the 13869 // node where later use and propagation of the extra info has the intended 13870 // semantics, no deep copy is required. 13871 SDEI[To] = std::move(NEI); 13872 return; 13873 } 13874 13875 // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced 13876 // through the replacement of From with To. Otherwise, replacements of a node 13877 // (From) with more complex nodes (To and its operands) may result in lost 13878 // extra info where the root node (To) is insignificant in further propagating 13879 // and using extra info when further lowering to MIR. 13880 // 13881 // In the first step pre-populate the visited set with the nodes reachable 13882 // from the old From node. This avoids copying NodeExtraInfo to parts of the 13883 // DAG that is not new and should be left untouched. 13884 SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom. 13885 DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From. 13886 auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) { 13887 if (MaxDepth == 0) { 13888 // Remember this node in case we need to increase MaxDepth and continue 13889 // populating FromReach from this node. 13890 Leafs.emplace_back(N); 13891 return; 13892 } 13893 if (!FromReach.insert(N).second) 13894 return; 13895 for (const SDValue &Op : N->op_values()) 13896 Self(Self, Op.getNode(), MaxDepth - 1); 13897 }; 13898 13899 // Copy extra info to To and all its transitive operands (that are new). 13900 SmallPtrSet<const SDNode *, 8> Visited; 13901 auto DeepCopyTo = [&](auto &&Self, const SDNode *N) { 13902 if (FromReach.contains(N)) 13903 return true; 13904 if (!Visited.insert(N).second) 13905 return true; 13906 if (getEntryNode().getNode() == N) 13907 return false; 13908 for (const SDValue &Op : N->op_values()) { 13909 if (!Self(Self, Op.getNode())) 13910 return false; 13911 } 13912 // Copy only if entry node was not reached. 13913 SDEI[N] = NEI; 13914 return true; 13915 }; 13916 13917 // We first try with a lower MaxDepth, assuming that the path to common 13918 // operands between From and To is relatively short. This significantly 13919 // improves performance in the common case. The initial MaxDepth is big 13920 // enough to avoid retry in the common case; the last MaxDepth is large 13921 // enough to avoid having to use the fallback below (and protects from 13922 // potential stack exhaustion from recursion). 13923 for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024; 13924 PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) { 13925 // StartFrom is the previous (or initial) set of leafs reachable at the 13926 // previous maximum depth. 13927 SmallVector<const SDNode *> StartFrom; 13928 std::swap(StartFrom, Leafs); 13929 for (const SDNode *N : StartFrom) 13930 VisitFrom(VisitFrom, N, MaxDepth - PrevDepth); 13931 if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To))) 13932 return; 13933 // This should happen very rarely (reached the entry node). 13934 LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n"); 13935 assert(!Leafs.empty()); 13936 } 13937 13938 // This should not happen - but if it did, that means the subgraph reachable 13939 // from From has depth greater or equal to maximum MaxDepth, and VisitFrom() 13940 // could not visit all reachable common operands. Consequently, we were able 13941 // to reach the entry node. 13942 errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n"; 13943 assert(false && "From subgraph too complex - increase max. MaxDepth?"); 13944 // Best-effort fallback if assertions disabled. 13945 SDEI[To] = std::move(NEI); 13946 } 13947 13948 #ifndef NDEBUG 13949 static void checkForCyclesHelper(const SDNode *N, 13950 SmallPtrSetImpl<const SDNode*> &Visited, 13951 SmallPtrSetImpl<const SDNode*> &Checked, 13952 const llvm::SelectionDAG *DAG) { 13953 // If this node has already been checked, don't check it again. 13954 if (Checked.count(N)) 13955 return; 13956 13957 // If a node has already been visited on this depth-first walk, reject it as 13958 // a cycle. 13959 if (!Visited.insert(N).second) { 13960 errs() << "Detected cycle in SelectionDAG\n"; 13961 dbgs() << "Offending node:\n"; 13962 N->dumprFull(DAG); dbgs() << "\n"; 13963 abort(); 13964 } 13965 13966 for (const SDValue &Op : N->op_values()) 13967 checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG); 13968 13969 Checked.insert(N); 13970 Visited.erase(N); 13971 } 13972 #endif 13973 13974 void llvm::checkForCycles(const llvm::SDNode *N, 13975 const llvm::SelectionDAG *DAG, 13976 bool force) { 13977 #ifndef NDEBUG 13978 bool check = force; 13979 #ifdef EXPENSIVE_CHECKS 13980 check = true; 13981 #endif // EXPENSIVE_CHECKS 13982 if (check) { 13983 assert(N && "Checking nonexistent SDNode"); 13984 SmallPtrSet<const SDNode*, 32> visited; 13985 SmallPtrSet<const SDNode*, 32> checked; 13986 checkForCyclesHelper(N, visited, checked, DAG); 13987 } 13988 #endif // !NDEBUG 13989 } 13990 13991 void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { 13992 checkForCycles(DAG->getRoot().getNode(), DAG, force); 13993 } 13994