1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SelectionDAG::LegalizeVectors method. 10 // 11 // The vector legalizer looks for vector operations which might need to be 12 // scalarized and legalizes them. This is a separate step from Legalize because 13 // scalarizing can introduce illegal types. For example, suppose we have an 14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition 15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the 16 // operation, which introduces nodes with the illegal type i64 which must be 17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; 18 // the operation must be unrolled, which introduces nodes with the illegal 19 // type i8 which must be promoted. 20 // 21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR, 22 // or operations that happen to take a vector which are custom-lowered; 23 // the legalization for such operations never produces nodes 24 // with illegal types, so it's okay to put off legalizing them until 25 // SelectionDAG::Legalize runs. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/Analysis/TargetLibraryInfo.h" 32 #include "llvm/Analysis/VectorUtils.h" 33 #include "llvm/CodeGen/ISDOpcodes.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/TargetLowering.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/CodeGenTypes/MachineValueType.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <iterator> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "legalizevectorops" 52 53 namespace { 54 55 class VectorLegalizer { 56 SelectionDAG& DAG; 57 const TargetLowering &TLI; 58 bool Changed = false; // Keep track of whether anything changed 59 60 /// For nodes that are of legal width, and that have more than one use, this 61 /// map indicates what regularized operand to use. This allows us to avoid 62 /// legalizing the same thing more than once. 63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; 64 65 /// Adds a node to the translation cache. 66 void AddLegalizedOperand(SDValue From, SDValue To) { 67 LegalizedNodes.insert(std::make_pair(From, To)); 68 // If someone requests legalization of the new node, return itself. 69 if (From != To) 70 LegalizedNodes.insert(std::make_pair(To, To)); 71 } 72 73 /// Legalizes the given node. 74 SDValue LegalizeOp(SDValue Op); 75 76 /// Assuming the node is legal, "legalize" the results. 77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); 78 79 /// Make sure Results are legal and update the translation cache. 80 SDValue RecursivelyLegalizeResults(SDValue Op, 81 MutableArrayRef<SDValue> Results); 82 83 /// Wrapper to interface LowerOperation with a vector of Results. 84 /// Returns false if the target wants to use default expansion. Otherwise 85 /// returns true. If return is true and the Results are empty, then the 86 /// target wants to keep the input node as is. 87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); 88 89 /// Implements unrolling a VSETCC. 90 SDValue UnrollVSETCC(SDNode *Node); 91 92 /// Implement expand-based legalization of vector operations. 93 /// 94 /// This is just a high-level routine to dispatch to specific code paths for 95 /// operations to legalize them. 96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); 97 98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if 99 /// FP_TO_SINT isn't legal. 100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 101 102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if 103 /// SINT_TO_FLOAT and SHR on vectors isn't legal. 104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 105 106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. 107 SDValue ExpandSEXTINREG(SDNode *Node); 108 109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG. 110 /// 111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper 112 /// type. The contents of the bits in the extended part of each element are 113 /// undef. 114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); 115 116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. 117 /// 118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper 119 /// type, then shifts left and arithmetic shifts right to introduce a sign 120 /// extension. 121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); 122 123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. 124 /// 125 /// Shuffles the low lanes of the operand into place and blends zeros into 126 /// the remaining lanes, finally bitcasting to the proper type. 127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); 128 129 /// Expand bswap of vectors into a shuffle if legal. 130 SDValue ExpandBSWAP(SDNode *Node); 131 132 /// Implement vselect in terms of XOR, AND, OR when blend is not 133 /// supported by the target. 134 SDValue ExpandVSELECT(SDNode *Node); 135 SDValue ExpandVP_SELECT(SDNode *Node); 136 SDValue ExpandVP_MERGE(SDNode *Node); 137 SDValue ExpandVP_REM(SDNode *Node); 138 SDValue ExpandSELECT(SDNode *Node); 139 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); 140 SDValue ExpandStore(SDNode *N); 141 SDValue ExpandFNEG(SDNode *Node); 142 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); 143 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); 144 void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); 145 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 146 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 147 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 148 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); 149 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); 150 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); 151 152 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, 153 SmallVectorImpl<SDValue> &Results); 154 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, 155 RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, 156 RTLIB::Libcall Call_F128, 157 RTLIB::Libcall Call_PPCF128, 158 SmallVectorImpl<SDValue> &Results); 159 160 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); 161 162 /// Implements vector promotion. 163 /// 164 /// This is essentially just bitcasting the operands to a different type and 165 /// bitcasting the result back to the original type. 166 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); 167 168 /// Implements [SU]INT_TO_FP vector promotion. 169 /// 170 /// This is a [zs]ext of the input operand to a larger integer type. 171 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); 172 173 /// Implements FP_TO_[SU]INT vector promotion of the result type. 174 /// 175 /// It is promoted to a larger integer type. The result is then 176 /// truncated back to the original type. 177 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 178 179 /// Implements vector setcc operation promotion. 180 /// 181 /// All vector operands are promoted to a vector type with larger element 182 /// type. 183 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); 184 185 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 186 187 public: 188 VectorLegalizer(SelectionDAG& dag) : 189 DAG(dag), TLI(dag.getTargetLoweringInfo()) {} 190 191 /// Begin legalizer the vector operations in the DAG. 192 bool Run(); 193 }; 194 195 } // end anonymous namespace 196 197 bool VectorLegalizer::Run() { 198 // Before we start legalizing vector nodes, check if there are any vectors. 199 bool HasVectors = false; 200 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 201 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { 202 // Check if the values of the nodes contain vectors. We don't need to check 203 // the operands because we are going to check their values at some point. 204 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); }); 205 206 // If we found a vector node we can start the legalization. 207 if (HasVectors) 208 break; 209 } 210 211 // If this basic block has no vectors then no need to legalize vectors. 212 if (!HasVectors) 213 return false; 214 215 // The legalize process is inherently a bottom-up recursive process (users 216 // legalize their uses before themselves). Given infinite stack space, we 217 // could just start legalizing on the root and traverse the whole graph. In 218 // practice however, this causes us to run out of stack space on large basic 219 // blocks. To avoid this problem, compute an ordering of the nodes where each 220 // node is only legalized after all of its operands are legalized. 221 DAG.AssignTopologicalOrder(); 222 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 223 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) 224 LegalizeOp(SDValue(&*I, 0)); 225 226 // Finally, it's possible the root changed. Get the new root. 227 SDValue OldRoot = DAG.getRoot(); 228 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); 229 DAG.setRoot(LegalizedNodes[OldRoot]); 230 231 LegalizedNodes.clear(); 232 233 // Remove dead nodes now. 234 DAG.RemoveDeadNodes(); 235 236 return Changed; 237 } 238 239 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { 240 assert(Op->getNumValues() == Result->getNumValues() && 241 "Unexpected number of results"); 242 // Generic legalization: just pass the operand through. 243 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) 244 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i)); 245 return SDValue(Result, Op.getResNo()); 246 } 247 248 SDValue 249 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, 250 MutableArrayRef<SDValue> Results) { 251 assert(Results.size() == Op->getNumValues() && 252 "Unexpected number of results"); 253 // Make sure that the generated code is itself legal. 254 for (unsigned i = 0, e = Results.size(); i != e; ++i) { 255 Results[i] = LegalizeOp(Results[i]); 256 AddLegalizedOperand(Op.getValue(i), Results[i]); 257 } 258 259 return Results[Op.getResNo()]; 260 } 261 262 SDValue VectorLegalizer::LegalizeOp(SDValue Op) { 263 // Note that LegalizeOp may be reentered even from single-use nodes, which 264 // means that we always must cache transformed nodes. 265 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); 266 if (I != LegalizedNodes.end()) return I->second; 267 268 // Legalize the operands 269 SmallVector<SDValue, 8> Ops; 270 for (const SDValue &Oper : Op->op_values()) 271 Ops.push_back(LegalizeOp(Oper)); 272 273 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops); 274 275 bool HasVectorValueOrOp = 276 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) || 277 llvm::any_of(Node->op_values(), 278 [](SDValue O) { return O.getValueType().isVector(); }); 279 if (!HasVectorValueOrOp) 280 return TranslateLegalizeResults(Op, Node); 281 282 TargetLowering::LegalizeAction Action = TargetLowering::Legal; 283 EVT ValVT; 284 switch (Op.getOpcode()) { 285 default: 286 return TranslateLegalizeResults(Op, Node); 287 case ISD::LOAD: { 288 LoadSDNode *LD = cast<LoadSDNode>(Node); 289 ISD::LoadExtType ExtType = LD->getExtensionType(); 290 EVT LoadedVT = LD->getMemoryVT(); 291 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) 292 Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT); 293 break; 294 } 295 case ISD::STORE: { 296 StoreSDNode *ST = cast<StoreSDNode>(Node); 297 EVT StVT = ST->getMemoryVT(); 298 MVT ValVT = ST->getValue().getSimpleValueType(); 299 if (StVT.isVector() && ST->isTruncatingStore()) 300 Action = TLI.getTruncStoreAction(ValVT, StVT); 301 break; 302 } 303 case ISD::MERGE_VALUES: 304 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 305 // This operation lies about being legal: when it claims to be legal, 306 // it should actually be expanded. 307 if (Action == TargetLowering::Legal) 308 Action = TargetLowering::Expand; 309 break; 310 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 311 case ISD::STRICT_##DAGN: 312 #include "llvm/IR/ConstrainedOps.def" 313 ValVT = Node->getValueType(0); 314 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || 315 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 316 ValVT = Node->getOperand(1).getValueType(); 317 if (Op.getOpcode() == ISD::STRICT_FSETCC || 318 Op.getOpcode() == ISD::STRICT_FSETCCS) { 319 MVT OpVT = Node->getOperand(1).getSimpleValueType(); 320 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get(); 321 Action = TLI.getCondCodeAction(CCCode, OpVT); 322 if (Action == TargetLowering::Legal) 323 Action = TLI.getOperationAction(Node->getOpcode(), OpVT); 324 } else { 325 Action = TLI.getOperationAction(Node->getOpcode(), ValVT); 326 } 327 // If we're asked to expand a strict vector floating-point operation, 328 // by default we're going to simply unroll it. That is usually the 329 // best approach, except in the case where the resulting strict (scalar) 330 // operations would themselves use the fallback mutation to non-strict. 331 // In that specific case, just do the fallback on the vector op. 332 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && 333 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) == 334 TargetLowering::Legal) { 335 EVT EltVT = ValVT.getVectorElementType(); 336 if (TLI.getOperationAction(Node->getOpcode(), EltVT) 337 == TargetLowering::Expand && 338 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) 339 == TargetLowering::Legal) 340 Action = TargetLowering::Legal; 341 } 342 break; 343 case ISD::ADD: 344 case ISD::SUB: 345 case ISD::MUL: 346 case ISD::MULHS: 347 case ISD::MULHU: 348 case ISD::SDIV: 349 case ISD::UDIV: 350 case ISD::SREM: 351 case ISD::UREM: 352 case ISD::SDIVREM: 353 case ISD::UDIVREM: 354 case ISD::FADD: 355 case ISD::FSUB: 356 case ISD::FMUL: 357 case ISD::FDIV: 358 case ISD::FREM: 359 case ISD::AND: 360 case ISD::OR: 361 case ISD::XOR: 362 case ISD::SHL: 363 case ISD::SRA: 364 case ISD::SRL: 365 case ISD::FSHL: 366 case ISD::FSHR: 367 case ISD::ROTL: 368 case ISD::ROTR: 369 case ISD::ABS: 370 case ISD::ABDS: 371 case ISD::ABDU: 372 case ISD::AVGCEILS: 373 case ISD::AVGCEILU: 374 case ISD::AVGFLOORS: 375 case ISD::AVGFLOORU: 376 case ISD::BSWAP: 377 case ISD::BITREVERSE: 378 case ISD::CTLZ: 379 case ISD::CTTZ: 380 case ISD::CTLZ_ZERO_UNDEF: 381 case ISD::CTTZ_ZERO_UNDEF: 382 case ISD::CTPOP: 383 case ISD::SELECT: 384 case ISD::VSELECT: 385 case ISD::SELECT_CC: 386 case ISD::ZERO_EXTEND: 387 case ISD::ANY_EXTEND: 388 case ISD::TRUNCATE: 389 case ISD::SIGN_EXTEND: 390 case ISD::FP_TO_SINT: 391 case ISD::FP_TO_UINT: 392 case ISD::FNEG: 393 case ISD::FABS: 394 case ISD::FMINNUM: 395 case ISD::FMAXNUM: 396 case ISD::FMINNUM_IEEE: 397 case ISD::FMAXNUM_IEEE: 398 case ISD::FMINIMUM: 399 case ISD::FMAXIMUM: 400 case ISD::FCOPYSIGN: 401 case ISD::FSQRT: 402 case ISD::FSIN: 403 case ISD::FCOS: 404 case ISD::FTAN: 405 case ISD::FASIN: 406 case ISD::FACOS: 407 case ISD::FATAN: 408 case ISD::FSINH: 409 case ISD::FCOSH: 410 case ISD::FTANH: 411 case ISD::FLDEXP: 412 case ISD::FPOWI: 413 case ISD::FPOW: 414 case ISD::FLOG: 415 case ISD::FLOG2: 416 case ISD::FLOG10: 417 case ISD::FEXP: 418 case ISD::FEXP2: 419 case ISD::FEXP10: 420 case ISD::FCEIL: 421 case ISD::FTRUNC: 422 case ISD::FRINT: 423 case ISD::FNEARBYINT: 424 case ISD::FROUND: 425 case ISD::FROUNDEVEN: 426 case ISD::FFLOOR: 427 case ISD::FP_ROUND: 428 case ISD::FP_EXTEND: 429 case ISD::FPTRUNC_ROUND: 430 case ISD::FMA: 431 case ISD::SIGN_EXTEND_INREG: 432 case ISD::ANY_EXTEND_VECTOR_INREG: 433 case ISD::SIGN_EXTEND_VECTOR_INREG: 434 case ISD::ZERO_EXTEND_VECTOR_INREG: 435 case ISD::SMIN: 436 case ISD::SMAX: 437 case ISD::UMIN: 438 case ISD::UMAX: 439 case ISD::SMUL_LOHI: 440 case ISD::UMUL_LOHI: 441 case ISD::SADDO: 442 case ISD::UADDO: 443 case ISD::SSUBO: 444 case ISD::USUBO: 445 case ISD::SMULO: 446 case ISD::UMULO: 447 case ISD::FCANONICALIZE: 448 case ISD::FFREXP: 449 case ISD::SADDSAT: 450 case ISD::UADDSAT: 451 case ISD::SSUBSAT: 452 case ISD::USUBSAT: 453 case ISD::SSHLSAT: 454 case ISD::USHLSAT: 455 case ISD::FP_TO_SINT_SAT: 456 case ISD::FP_TO_UINT_SAT: 457 case ISD::MGATHER: 458 case ISD::VECTOR_COMPRESS: 459 case ISD::SCMP: 460 case ISD::UCMP: 461 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 462 break; 463 case ISD::SMULFIX: 464 case ISD::SMULFIXSAT: 465 case ISD::UMULFIX: 466 case ISD::UMULFIXSAT: 467 case ISD::SDIVFIX: 468 case ISD::SDIVFIXSAT: 469 case ISD::UDIVFIX: 470 case ISD::UDIVFIXSAT: { 471 unsigned Scale = Node->getConstantOperandVal(2); 472 Action = TLI.getFixedPointOperationAction(Node->getOpcode(), 473 Node->getValueType(0), Scale); 474 break; 475 } 476 case ISD::LRINT: 477 case ISD::LLRINT: 478 case ISD::SINT_TO_FP: 479 case ISD::UINT_TO_FP: 480 case ISD::VECREDUCE_ADD: 481 case ISD::VECREDUCE_MUL: 482 case ISD::VECREDUCE_AND: 483 case ISD::VECREDUCE_OR: 484 case ISD::VECREDUCE_XOR: 485 case ISD::VECREDUCE_SMAX: 486 case ISD::VECREDUCE_SMIN: 487 case ISD::VECREDUCE_UMAX: 488 case ISD::VECREDUCE_UMIN: 489 case ISD::VECREDUCE_FADD: 490 case ISD::VECREDUCE_FMUL: 491 case ISD::VECREDUCE_FMAX: 492 case ISD::VECREDUCE_FMIN: 493 case ISD::VECREDUCE_FMAXIMUM: 494 case ISD::VECREDUCE_FMINIMUM: 495 Action = TLI.getOperationAction(Node->getOpcode(), 496 Node->getOperand(0).getValueType()); 497 break; 498 case ISD::VECREDUCE_SEQ_FADD: 499 case ISD::VECREDUCE_SEQ_FMUL: 500 Action = TLI.getOperationAction(Node->getOpcode(), 501 Node->getOperand(1).getValueType()); 502 break; 503 case ISD::SETCC: { 504 MVT OpVT = Node->getOperand(0).getSimpleValueType(); 505 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); 506 Action = TLI.getCondCodeAction(CCCode, OpVT); 507 if (Action == TargetLowering::Legal) 508 Action = TLI.getOperationAction(Node->getOpcode(), OpVT); 509 break; 510 } 511 512 #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ 513 case ISD::VPID: { \ 514 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ 515 : Node->getOperand(LEGALPOS).getValueType(); \ 516 if (ISD::VPID == ISD::VP_SETCC) { \ 517 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ 518 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ 519 if (Action != TargetLowering::Legal) \ 520 break; \ 521 } \ 522 /* Defer non-vector results to LegalizeDAG. */ \ 523 if (!Node->getValueType(0).isVector() && \ 524 Node->getValueType(0) != MVT::Other) { \ 525 Action = TargetLowering::Legal; \ 526 break; \ 527 } \ 528 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ 529 } break; 530 #include "llvm/IR/VPIntrinsics.def" 531 } 532 533 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); 534 535 SmallVector<SDValue, 8> ResultVals; 536 switch (Action) { 537 default: llvm_unreachable("This action is not supported yet!"); 538 case TargetLowering::Promote: 539 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && 540 "This action is not supported yet!"); 541 LLVM_DEBUG(dbgs() << "Promoting\n"); 542 Promote(Node, ResultVals); 543 assert(!ResultVals.empty() && "No results for promotion?"); 544 break; 545 case TargetLowering::Legal: 546 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); 547 break; 548 case TargetLowering::Custom: 549 LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); 550 if (LowerOperationWrapper(Node, ResultVals)) 551 break; 552 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); 553 [[fallthrough]]; 554 case TargetLowering::Expand: 555 LLVM_DEBUG(dbgs() << "Expanding\n"); 556 Expand(Node, ResultVals); 557 break; 558 } 559 560 if (ResultVals.empty()) 561 return TranslateLegalizeResults(Op, Node); 562 563 Changed = true; 564 return RecursivelyLegalizeResults(Op, ResultVals); 565 } 566 567 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we 568 // merge them somehow? 569 bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, 570 SmallVectorImpl<SDValue> &Results) { 571 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); 572 573 if (!Res.getNode()) 574 return false; 575 576 if (Res == SDValue(Node, 0)) 577 return true; 578 579 // If the original node has one result, take the return value from 580 // LowerOperation as is. It might not be result number 0. 581 if (Node->getNumValues() == 1) { 582 Results.push_back(Res); 583 return true; 584 } 585 586 // If the original node has multiple results, then the return node should 587 // have the same number of results. 588 assert((Node->getNumValues() == Res->getNumValues()) && 589 "Lowering returned the wrong number of results!"); 590 591 // Places new result values base on N result number. 592 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) 593 Results.push_back(Res.getValue(I)); 594 595 return true; 596 } 597 598 void VectorLegalizer::PromoteSETCC(SDNode *Node, 599 SmallVectorImpl<SDValue> &Results) { 600 MVT VecVT = Node->getOperand(0).getSimpleValueType(); 601 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); 602 603 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; 604 605 SDLoc DL(Node); 606 SmallVector<SDValue, 5> Operands(Node->getNumOperands()); 607 608 Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0)); 609 Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1)); 610 Operands[2] = Node->getOperand(2); 611 612 if (Node->getOpcode() == ISD::VP_SETCC) { 613 Operands[3] = Node->getOperand(3); // mask 614 Operands[4] = Node->getOperand(4); // evl 615 } 616 617 SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0), 618 Operands, Node->getFlags()); 619 620 Results.push_back(Res); 621 } 622 623 void VectorLegalizer::PromoteSTRICT(SDNode *Node, 624 SmallVectorImpl<SDValue> &Results) { 625 MVT VecVT = Node->getOperand(1).getSimpleValueType(); 626 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); 627 628 assert(VecVT.isFloatingPoint()); 629 630 SDLoc DL(Node); 631 SmallVector<SDValue, 5> Operands(Node->getNumOperands()); 632 SmallVector<SDValue, 2> Chains; 633 634 for (unsigned j = 1; j != Node->getNumOperands(); ++j) 635 if (Node->getOperand(j).getValueType().isVector() && 636 !(ISD::isVPOpcode(Node->getOpcode()) && 637 ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. 638 { 639 // promote the vector operand. 640 SDValue Ext = 641 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other}, 642 {Node->getOperand(0), Node->getOperand(j)}); 643 Operands[j] = Ext.getValue(0); 644 Chains.push_back(Ext.getValue(1)); 645 } else 646 Operands[j] = Node->getOperand(j); // Skip no vector operand. 647 648 SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1)); 649 650 Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 651 652 SDValue Res = 653 DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags()); 654 655 SDValue Round = 656 DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other}, 657 {Res.getValue(1), Res.getValue(0), 658 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}); 659 660 Results.push_back(Round.getValue(0)); 661 Results.push_back(Round.getValue(1)); 662 } 663 664 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { 665 // For a few operations there is a specific concept for promotion based on 666 // the operand's type. 667 switch (Node->getOpcode()) { 668 case ISD::SINT_TO_FP: 669 case ISD::UINT_TO_FP: 670 case ISD::STRICT_SINT_TO_FP: 671 case ISD::STRICT_UINT_TO_FP: 672 // "Promote" the operation by extending the operand. 673 PromoteINT_TO_FP(Node, Results); 674 return; 675 case ISD::FP_TO_UINT: 676 case ISD::FP_TO_SINT: 677 case ISD::STRICT_FP_TO_UINT: 678 case ISD::STRICT_FP_TO_SINT: 679 // Promote the operation by extending the operand. 680 PromoteFP_TO_INT(Node, Results); 681 return; 682 case ISD::VP_SETCC: 683 case ISD::SETCC: 684 // Promote the operation by extending the operand. 685 PromoteSETCC(Node, Results); 686 return; 687 case ISD::STRICT_FADD: 688 case ISD::STRICT_FSUB: 689 case ISD::STRICT_FMUL: 690 case ISD::STRICT_FDIV: 691 case ISD::STRICT_FSQRT: 692 case ISD::STRICT_FMA: 693 PromoteSTRICT(Node, Results); 694 return; 695 case ISD::FP_ROUND: 696 case ISD::FP_EXTEND: 697 // These operations are used to do promotion so they can't be promoted 698 // themselves. 699 llvm_unreachable("Don't know how to promote this operation!"); 700 } 701 702 // There are currently two cases of vector promotion: 703 // 1) Bitcasting a vector of integers to a different type to a vector of the 704 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. 705 // 2) Extending a vector of floats to a vector of the same number of larger 706 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. 707 assert(Node->getNumValues() == 1 && 708 "Can't promote a vector with multiple results!"); 709 MVT VT = Node->getSimpleValueType(0); 710 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 711 SDLoc dl(Node); 712 SmallVector<SDValue, 4> Operands(Node->getNumOperands()); 713 714 for (unsigned j = 0; j != Node->getNumOperands(); ++j) { 715 // Do not promote the mask operand of a VP OP. 716 bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) && 717 ISD::getVPMaskIdx(Node->getOpcode()) == j; 718 if (Node->getOperand(j).getValueType().isVector() && !SkipPromote) 719 if (Node->getOperand(j) 720 .getValueType() 721 .getVectorElementType() 722 .isFloatingPoint() && 723 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) 724 Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); 725 else 726 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j)); 727 else 728 Operands[j] = Node->getOperand(j); 729 } 730 731 SDValue Res = 732 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags()); 733 734 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || 735 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && 736 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) 737 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, 738 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); 739 else 740 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); 741 742 Results.push_back(Res); 743 } 744 745 void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, 746 SmallVectorImpl<SDValue> &Results) { 747 // INT_TO_FP operations may require the input operand be promoted even 748 // when the type is otherwise legal. 749 bool IsStrict = Node->isStrictFPOpcode(); 750 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType(); 751 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 752 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 753 "Vectors have different number of elements!"); 754 755 SDLoc dl(Node); 756 SmallVector<SDValue, 4> Operands(Node->getNumOperands()); 757 758 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || 759 Node->getOpcode() == ISD::STRICT_UINT_TO_FP) 760 ? ISD::ZERO_EXTEND 761 : ISD::SIGN_EXTEND; 762 for (unsigned j = 0; j != Node->getNumOperands(); ++j) { 763 if (Node->getOperand(j).getValueType().isVector()) 764 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j)); 765 else 766 Operands[j] = Node->getOperand(j); 767 } 768 769 if (IsStrict) { 770 SDValue Res = DAG.getNode(Node->getOpcode(), dl, 771 {Node->getValueType(0), MVT::Other}, Operands); 772 Results.push_back(Res); 773 Results.push_back(Res.getValue(1)); 774 return; 775 } 776 777 SDValue Res = 778 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands); 779 Results.push_back(Res); 780 } 781 782 // For FP_TO_INT we promote the result type to a vector type with wider 783 // elements and then truncate the result. This is different from the default 784 // PromoteVector which uses bitcast to promote thus assumning that the 785 // promoted vector type has the same overall size. 786 void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, 787 SmallVectorImpl<SDValue> &Results) { 788 MVT VT = Node->getSimpleValueType(0); 789 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 790 bool IsStrict = Node->isStrictFPOpcode(); 791 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 792 "Vectors have different number of elements!"); 793 794 unsigned NewOpc = Node->getOpcode(); 795 // Change FP_TO_UINT to FP_TO_SINT if possible. 796 // TODO: Should we only do this if FP_TO_UINT itself isn't legal? 797 if (NewOpc == ISD::FP_TO_UINT && 798 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) 799 NewOpc = ISD::FP_TO_SINT; 800 801 if (NewOpc == ISD::STRICT_FP_TO_UINT && 802 TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) 803 NewOpc = ISD::STRICT_FP_TO_SINT; 804 805 SDLoc dl(Node); 806 SDValue Promoted, Chain; 807 if (IsStrict) { 808 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, 809 {Node->getOperand(0), Node->getOperand(1)}); 810 Chain = Promoted.getValue(1); 811 } else 812 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0)); 813 814 // Assert that the converted value fits in the original type. If it doesn't 815 // (eg: because the value being converted is too big), then the result of the 816 // original operation was undefined anyway, so the assert is still correct. 817 if (Node->getOpcode() == ISD::FP_TO_UINT || 818 Node->getOpcode() == ISD::STRICT_FP_TO_UINT) 819 NewOpc = ISD::AssertZext; 820 else 821 NewOpc = ISD::AssertSext; 822 823 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted, 824 DAG.getValueType(VT.getScalarType())); 825 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); 826 Results.push_back(Promoted); 827 if (IsStrict) 828 Results.push_back(Chain); 829 } 830 831 std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { 832 LoadSDNode *LD = cast<LoadSDNode>(N); 833 return TLI.scalarizeVectorLoad(LD, DAG); 834 } 835 836 SDValue VectorLegalizer::ExpandStore(SDNode *N) { 837 StoreSDNode *ST = cast<StoreSDNode>(N); 838 SDValue TF = TLI.scalarizeVectorStore(ST, DAG); 839 return TF; 840 } 841 842 void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { 843 switch (Node->getOpcode()) { 844 case ISD::LOAD: { 845 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); 846 Results.push_back(Tmp.first); 847 Results.push_back(Tmp.second); 848 return; 849 } 850 case ISD::STORE: 851 Results.push_back(ExpandStore(Node)); 852 return; 853 case ISD::MERGE_VALUES: 854 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) 855 Results.push_back(Node->getOperand(i)); 856 return; 857 case ISD::SIGN_EXTEND_INREG: 858 Results.push_back(ExpandSEXTINREG(Node)); 859 return; 860 case ISD::ANY_EXTEND_VECTOR_INREG: 861 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node)); 862 return; 863 case ISD::SIGN_EXTEND_VECTOR_INREG: 864 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node)); 865 return; 866 case ISD::ZERO_EXTEND_VECTOR_INREG: 867 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node)); 868 return; 869 case ISD::BSWAP: 870 Results.push_back(ExpandBSWAP(Node)); 871 return; 872 case ISD::VP_BSWAP: 873 Results.push_back(TLI.expandVPBSWAP(Node, DAG)); 874 return; 875 case ISD::VSELECT: 876 Results.push_back(ExpandVSELECT(Node)); 877 return; 878 case ISD::VP_SELECT: 879 Results.push_back(ExpandVP_SELECT(Node)); 880 return; 881 case ISD::VP_SREM: 882 case ISD::VP_UREM: 883 if (SDValue Expanded = ExpandVP_REM(Node)) { 884 Results.push_back(Expanded); 885 return; 886 } 887 break; 888 case ISD::SELECT: 889 Results.push_back(ExpandSELECT(Node)); 890 return; 891 case ISD::SELECT_CC: { 892 if (Node->getValueType(0).isScalableVector()) { 893 EVT CondVT = TLI.getSetCCResultType( 894 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); 895 SDValue SetCC = 896 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), 897 Node->getOperand(1), Node->getOperand(4)); 898 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, 899 Node->getOperand(2), 900 Node->getOperand(3))); 901 return; 902 } 903 break; 904 } 905 case ISD::FP_TO_UINT: 906 ExpandFP_TO_UINT(Node, Results); 907 return; 908 case ISD::UINT_TO_FP: 909 ExpandUINT_TO_FLOAT(Node, Results); 910 return; 911 case ISD::FNEG: 912 Results.push_back(ExpandFNEG(Node)); 913 return; 914 case ISD::FSUB: 915 ExpandFSUB(Node, Results); 916 return; 917 case ISD::SETCC: 918 case ISD::VP_SETCC: 919 ExpandSETCC(Node, Results); 920 return; 921 case ISD::ABS: 922 if (SDValue Expanded = TLI.expandABS(Node, DAG)) { 923 Results.push_back(Expanded); 924 return; 925 } 926 break; 927 case ISD::ABDS: 928 case ISD::ABDU: 929 if (SDValue Expanded = TLI.expandABD(Node, DAG)) { 930 Results.push_back(Expanded); 931 return; 932 } 933 break; 934 case ISD::AVGCEILS: 935 case ISD::AVGCEILU: 936 case ISD::AVGFLOORS: 937 case ISD::AVGFLOORU: 938 if (SDValue Expanded = TLI.expandAVG(Node, DAG)) { 939 Results.push_back(Expanded); 940 return; 941 } 942 break; 943 case ISD::BITREVERSE: 944 ExpandBITREVERSE(Node, Results); 945 return; 946 case ISD::VP_BITREVERSE: 947 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) { 948 Results.push_back(Expanded); 949 return; 950 } 951 break; 952 case ISD::CTPOP: 953 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { 954 Results.push_back(Expanded); 955 return; 956 } 957 break; 958 case ISD::VP_CTPOP: 959 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) { 960 Results.push_back(Expanded); 961 return; 962 } 963 break; 964 case ISD::CTLZ: 965 case ISD::CTLZ_ZERO_UNDEF: 966 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { 967 Results.push_back(Expanded); 968 return; 969 } 970 break; 971 case ISD::VP_CTLZ: 972 case ISD::VP_CTLZ_ZERO_UNDEF: 973 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) { 974 Results.push_back(Expanded); 975 return; 976 } 977 break; 978 case ISD::CTTZ: 979 case ISD::CTTZ_ZERO_UNDEF: 980 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { 981 Results.push_back(Expanded); 982 return; 983 } 984 break; 985 case ISD::VP_CTTZ: 986 case ISD::VP_CTTZ_ZERO_UNDEF: 987 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) { 988 Results.push_back(Expanded); 989 return; 990 } 991 break; 992 case ISD::FSHL: 993 case ISD::VP_FSHL: 994 case ISD::FSHR: 995 case ISD::VP_FSHR: 996 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) { 997 Results.push_back(Expanded); 998 return; 999 } 1000 break; 1001 case ISD::ROTL: 1002 case ISD::ROTR: 1003 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) { 1004 Results.push_back(Expanded); 1005 return; 1006 } 1007 break; 1008 case ISD::FMINNUM: 1009 case ISD::FMAXNUM: 1010 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) { 1011 Results.push_back(Expanded); 1012 return; 1013 } 1014 break; 1015 case ISD::FMINIMUM: 1016 case ISD::FMAXIMUM: 1017 Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)); 1018 return; 1019 case ISD::SMIN: 1020 case ISD::SMAX: 1021 case ISD::UMIN: 1022 case ISD::UMAX: 1023 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { 1024 Results.push_back(Expanded); 1025 return; 1026 } 1027 break; 1028 case ISD::UADDO: 1029 case ISD::USUBO: 1030 ExpandUADDSUBO(Node, Results); 1031 return; 1032 case ISD::SADDO: 1033 case ISD::SSUBO: 1034 ExpandSADDSUBO(Node, Results); 1035 return; 1036 case ISD::UMULO: 1037 case ISD::SMULO: 1038 ExpandMULO(Node, Results); 1039 return; 1040 case ISD::USUBSAT: 1041 case ISD::SSUBSAT: 1042 case ISD::UADDSAT: 1043 case ISD::SADDSAT: 1044 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { 1045 Results.push_back(Expanded); 1046 return; 1047 } 1048 break; 1049 case ISD::USHLSAT: 1050 case ISD::SSHLSAT: 1051 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { 1052 Results.push_back(Expanded); 1053 return; 1054 } 1055 break; 1056 case ISD::FP_TO_SINT_SAT: 1057 case ISD::FP_TO_UINT_SAT: 1058 // Expand the fpsosisat if it is scalable to prevent it from unrolling below. 1059 if (Node->getValueType(0).isScalableVector()) { 1060 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { 1061 Results.push_back(Expanded); 1062 return; 1063 } 1064 } 1065 break; 1066 case ISD::SMULFIX: 1067 case ISD::UMULFIX: 1068 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { 1069 Results.push_back(Expanded); 1070 return; 1071 } 1072 break; 1073 case ISD::SMULFIXSAT: 1074 case ISD::UMULFIXSAT: 1075 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly 1076 // why. Maybe it results in worse codegen compared to the unroll for some 1077 // targets? This should probably be investigated. And if we still prefer to 1078 // unroll an explanation could be helpful. 1079 break; 1080 case ISD::SDIVFIX: 1081 case ISD::UDIVFIX: 1082 ExpandFixedPointDiv(Node, Results); 1083 return; 1084 case ISD::SDIVFIXSAT: 1085 case ISD::UDIVFIXSAT: 1086 break; 1087 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1088 case ISD::STRICT_##DAGN: 1089 #include "llvm/IR/ConstrainedOps.def" 1090 ExpandStrictFPOp(Node, Results); 1091 return; 1092 case ISD::VECREDUCE_ADD: 1093 case ISD::VECREDUCE_MUL: 1094 case ISD::VECREDUCE_AND: 1095 case ISD::VECREDUCE_OR: 1096 case ISD::VECREDUCE_XOR: 1097 case ISD::VECREDUCE_SMAX: 1098 case ISD::VECREDUCE_SMIN: 1099 case ISD::VECREDUCE_UMAX: 1100 case ISD::VECREDUCE_UMIN: 1101 case ISD::VECREDUCE_FADD: 1102 case ISD::VECREDUCE_FMUL: 1103 case ISD::VECREDUCE_FMAX: 1104 case ISD::VECREDUCE_FMIN: 1105 case ISD::VECREDUCE_FMAXIMUM: 1106 case ISD::VECREDUCE_FMINIMUM: 1107 Results.push_back(TLI.expandVecReduce(Node, DAG)); 1108 return; 1109 case ISD::VECREDUCE_SEQ_FADD: 1110 case ISD::VECREDUCE_SEQ_FMUL: 1111 Results.push_back(TLI.expandVecReduceSeq(Node, DAG)); 1112 return; 1113 case ISD::SREM: 1114 case ISD::UREM: 1115 ExpandREM(Node, Results); 1116 return; 1117 case ISD::VP_MERGE: 1118 Results.push_back(ExpandVP_MERGE(Node)); 1119 return; 1120 case ISD::FREM: 1121 if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, 1122 RTLIB::REM_F80, RTLIB::REM_F128, 1123 RTLIB::REM_PPCF128, Results)) 1124 return; 1125 1126 break; 1127 case ISD::VECTOR_COMPRESS: 1128 Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); 1129 return; 1130 } 1131 1132 SDValue Unrolled = DAG.UnrollVectorOp(Node); 1133 if (Node->getNumValues() == 1) { 1134 Results.push_back(Unrolled); 1135 } else { 1136 assert(Node->getNumValues() == Unrolled->getNumValues() && 1137 "VectorLegalizer Expand returned wrong number of results!"); 1138 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) 1139 Results.push_back(Unrolled.getValue(I)); 1140 } 1141 } 1142 1143 SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { 1144 // Lower a select instruction where the condition is a scalar and the 1145 // operands are vectors. Lower this select to VSELECT and implement it 1146 // using XOR AND OR. The selector bit is broadcasted. 1147 EVT VT = Node->getValueType(0); 1148 SDLoc DL(Node); 1149 1150 SDValue Mask = Node->getOperand(0); 1151 SDValue Op1 = Node->getOperand(1); 1152 SDValue Op2 = Node->getOperand(2); 1153 1154 assert(VT.isVector() && !Mask.getValueType().isVector() 1155 && Op1.getValueType() == Op2.getValueType() && "Invalid type"); 1156 1157 // If we can't even use the basic vector operations of 1158 // AND,OR,XOR, we will have to scalarize the op. 1159 // Notice that the operation may be 'promoted' which means that it is 1160 // 'bitcasted' to another type which is handled. 1161 // Also, we need to be able to construct a splat vector using either 1162 // BUILD_VECTOR or SPLAT_VECTOR. 1163 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to 1164 // BUILD_VECTOR? 1165 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1166 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1167 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 1168 TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR 1169 : ISD::SPLAT_VECTOR, 1170 VT) == TargetLowering::Expand) 1171 return DAG.UnrollVectorOp(Node); 1172 1173 // Generate a mask operand. 1174 EVT MaskTy = VT.changeVectorElementTypeToInteger(); 1175 1176 // What is the size of each element in the vector mask. 1177 EVT BitTy = MaskTy.getScalarType(); 1178 1179 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy), 1180 DAG.getConstant(0, DL, BitTy)); 1181 1182 // Broadcast the mask so that the entire vector is all one or all zero. 1183 Mask = DAG.getSplat(MaskTy, DL, Mask); 1184 1185 // Bitcast the operands to be the same type as the mask. 1186 // This is needed when we select between FP types because 1187 // the mask is a vector of integers. 1188 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); 1189 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); 1190 1191 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy); 1192 1193 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); 1194 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); 1195 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); 1196 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); 1197 } 1198 1199 SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { 1200 EVT VT = Node->getValueType(0); 1201 1202 // Make sure that the SRA and SHL instructions are available. 1203 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || 1204 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) 1205 return DAG.UnrollVectorOp(Node); 1206 1207 SDLoc DL(Node); 1208 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1209 1210 unsigned BW = VT.getScalarSizeInBits(); 1211 unsigned OrigBW = OrigTy.getScalarSizeInBits(); 1212 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); 1213 1214 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz); 1215 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); 1216 } 1217 1218 // Generically expand a vector anyext in register to a shuffle of the relevant 1219 // lanes into the appropriate locations, with other lanes left undef. 1220 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { 1221 SDLoc DL(Node); 1222 EVT VT = Node->getValueType(0); 1223 int NumElements = VT.getVectorNumElements(); 1224 SDValue Src = Node->getOperand(0); 1225 EVT SrcVT = Src.getValueType(); 1226 int NumSrcElements = SrcVT.getVectorNumElements(); 1227 1228 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector 1229 // into a larger vector type. 1230 if (SrcVT.bitsLE(VT)) { 1231 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && 1232 "ANY_EXTEND_VECTOR_INREG vector size mismatch"); 1233 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); 1234 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), 1235 NumSrcElements); 1236 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), 1237 Src, DAG.getVectorIdxConstant(0, DL)); 1238 } 1239 1240 // Build a base mask of undef shuffles. 1241 SmallVector<int, 16> ShuffleMask; 1242 ShuffleMask.resize(NumSrcElements, -1); 1243 1244 // Place the extended lanes into the correct locations. 1245 int ExtLaneScale = NumSrcElements / NumElements; 1246 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 1247 for (int i = 0; i < NumElements; ++i) 1248 ShuffleMask[i * ExtLaneScale + EndianOffset] = i; 1249 1250 return DAG.getNode( 1251 ISD::BITCAST, DL, VT, 1252 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); 1253 } 1254 1255 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { 1256 SDLoc DL(Node); 1257 EVT VT = Node->getValueType(0); 1258 SDValue Src = Node->getOperand(0); 1259 EVT SrcVT = Src.getValueType(); 1260 1261 // First build an any-extend node which can be legalized above when we 1262 // recurse through it. 1263 SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); 1264 1265 // Now we need sign extend. Do this by shifting the elements. Even if these 1266 // aren't legal operations, they have a better chance of being legalized 1267 // without full scalarization than the sign extension does. 1268 unsigned EltWidth = VT.getScalarSizeInBits(); 1269 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); 1270 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); 1271 return DAG.getNode(ISD::SRA, DL, VT, 1272 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), 1273 ShiftAmount); 1274 } 1275 1276 // Generically expand a vector zext in register to a shuffle of the relevant 1277 // lanes into the appropriate locations, a blend of zero into the high bits, 1278 // and a bitcast to the wider element type. 1279 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { 1280 SDLoc DL(Node); 1281 EVT VT = Node->getValueType(0); 1282 int NumElements = VT.getVectorNumElements(); 1283 SDValue Src = Node->getOperand(0); 1284 EVT SrcVT = Src.getValueType(); 1285 int NumSrcElements = SrcVT.getVectorNumElements(); 1286 1287 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector 1288 // into a larger vector type. 1289 if (SrcVT.bitsLE(VT)) { 1290 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && 1291 "ZERO_EXTEND_VECTOR_INREG vector size mismatch"); 1292 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); 1293 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), 1294 NumSrcElements); 1295 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), 1296 Src, DAG.getVectorIdxConstant(0, DL)); 1297 } 1298 1299 // Build up a zero vector to blend into this one. 1300 SDValue Zero = DAG.getConstant(0, DL, SrcVT); 1301 1302 // Shuffle the incoming lanes into the correct position, and pull all other 1303 // lanes from the zero vector. 1304 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements)); 1305 1306 int ExtLaneScale = NumSrcElements / NumElements; 1307 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 1308 for (int i = 0; i < NumElements; ++i) 1309 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; 1310 1311 return DAG.getNode(ISD::BITCAST, DL, VT, 1312 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); 1313 } 1314 1315 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { 1316 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; 1317 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) 1318 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) 1319 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); 1320 } 1321 1322 SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { 1323 EVT VT = Node->getValueType(0); 1324 1325 // Scalable vectors can't use shuffle expansion. 1326 if (VT.isScalableVector()) 1327 return TLI.expandBSWAP(Node, DAG); 1328 1329 // Generate a byte wise shuffle mask for the BSWAP. 1330 SmallVector<int, 16> ShuffleMask; 1331 createBSWAPShuffleMask(VT, ShuffleMask); 1332 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); 1333 1334 // Only emit a shuffle if the mask is legal. 1335 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { 1336 SDLoc DL(Node); 1337 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); 1338 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); 1339 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 1340 } 1341 1342 // If we have the appropriate vector bit operations, it is better to use them 1343 // than unrolling and expanding each component. 1344 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && 1345 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && 1346 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && 1347 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) 1348 return TLI.expandBSWAP(Node, DAG); 1349 1350 // Otherwise unroll. 1351 return DAG.UnrollVectorOp(Node); 1352 } 1353 1354 void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, 1355 SmallVectorImpl<SDValue> &Results) { 1356 EVT VT = Node->getValueType(0); 1357 1358 // We can't unroll or use shuffles for scalable vectors. 1359 if (VT.isScalableVector()) { 1360 Results.push_back(TLI.expandBITREVERSE(Node, DAG)); 1361 return; 1362 } 1363 1364 // If we have the scalar operation, it's probably cheaper to unroll it. 1365 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) { 1366 SDValue Tmp = DAG.UnrollVectorOp(Node); 1367 Results.push_back(Tmp); 1368 return; 1369 } 1370 1371 // If the vector element width is a whole number of bytes, test if its legal 1372 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte 1373 // vector. This greatly reduces the number of bit shifts necessary. 1374 unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); 1375 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { 1376 SmallVector<int, 16> BSWAPMask; 1377 createBSWAPShuffleMask(VT, BSWAPMask); 1378 1379 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); 1380 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && 1381 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || 1382 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && 1383 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && 1384 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && 1385 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { 1386 SDLoc DL(Node); 1387 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); 1388 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), 1389 BSWAPMask); 1390 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); 1391 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 1392 Results.push_back(Op); 1393 return; 1394 } 1395 } 1396 1397 // If we have the appropriate vector bit operations, it is better to use them 1398 // than unrolling and expanding each component. 1399 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && 1400 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && 1401 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && 1402 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) { 1403 Results.push_back(TLI.expandBITREVERSE(Node, DAG)); 1404 return; 1405 } 1406 1407 // Otherwise unroll. 1408 SDValue Tmp = DAG.UnrollVectorOp(Node); 1409 Results.push_back(Tmp); 1410 } 1411 1412 SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { 1413 // Implement VSELECT in terms of XOR, AND, OR 1414 // on platforms which do not support blend natively. 1415 SDLoc DL(Node); 1416 1417 SDValue Mask = Node->getOperand(0); 1418 SDValue Op1 = Node->getOperand(1); 1419 SDValue Op2 = Node->getOperand(2); 1420 1421 EVT VT = Mask.getValueType(); 1422 1423 // If we can't even use the basic vector operations of 1424 // AND,OR,XOR, we will have to scalarize the op. 1425 // Notice that the operation may be 'promoted' which means that it is 1426 // 'bitcasted' to another type which is handled. 1427 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1428 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1429 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) 1430 return DAG.UnrollVectorOp(Node); 1431 1432 // This operation also isn't safe with AND, OR, XOR when the boolean type is 1433 // 0/1 and the select operands aren't also booleans, as we need an all-ones 1434 // vector constant to mask with. 1435 // FIXME: Sign extend 1 to all ones if that's legal on the target. 1436 auto BoolContents = TLI.getBooleanContents(Op1.getValueType()); 1437 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && 1438 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && 1439 Op1.getValueType().getVectorElementType() == MVT::i1)) 1440 return DAG.UnrollVectorOp(Node); 1441 1442 // If the mask and the type are different sizes, unroll the vector op. This 1443 // can occur when getSetCCResultType returns something that is different in 1444 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. 1445 if (VT.getSizeInBits() != Op1.getValueSizeInBits()) 1446 return DAG.UnrollVectorOp(Node); 1447 1448 // Bitcast the operands to be the same type as the mask. 1449 // This is needed when we select between FP types because 1450 // the mask is a vector of integers. 1451 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); 1452 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); 1453 1454 SDValue NotMask = DAG.getNOT(DL, Mask, VT); 1455 1456 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); 1457 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); 1458 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); 1459 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); 1460 } 1461 1462 SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { 1463 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which 1464 // do not support it natively. 1465 SDLoc DL(Node); 1466 1467 SDValue Mask = Node->getOperand(0); 1468 SDValue Op1 = Node->getOperand(1); 1469 SDValue Op2 = Node->getOperand(2); 1470 SDValue EVL = Node->getOperand(3); 1471 1472 EVT VT = Mask.getValueType(); 1473 1474 // If we can't even use the basic vector operations of 1475 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. 1476 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand || 1477 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand || 1478 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand) 1479 return DAG.UnrollVectorOp(Node); 1480 1481 // This operation also isn't safe when the operands aren't also booleans. 1482 if (Op1.getValueType().getVectorElementType() != MVT::i1) 1483 return DAG.UnrollVectorOp(Node); 1484 1485 SDValue Ones = DAG.getAllOnesConstant(DL, VT); 1486 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL); 1487 1488 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL); 1489 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL); 1490 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL); 1491 } 1492 1493 SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { 1494 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector 1495 // indices less than the EVL/pivot are true. Combine that with the original 1496 // mask for a full-length mask. Use a full-length VSELECT to select between 1497 // the true and false values. 1498 SDLoc DL(Node); 1499 1500 SDValue Mask = Node->getOperand(0); 1501 SDValue Op1 = Node->getOperand(1); 1502 SDValue Op2 = Node->getOperand(2); 1503 SDValue EVL = Node->getOperand(3); 1504 1505 EVT MaskVT = Mask.getValueType(); 1506 bool IsFixedLen = MaskVT.isFixedLengthVector(); 1507 1508 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(), 1509 MaskVT.getVectorElementCount()); 1510 1511 // If we can't construct the EVL mask efficiently, it's better to unroll. 1512 if ((IsFixedLen && 1513 !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) || 1514 (!IsFixedLen && 1515 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) || 1516 !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT)))) 1517 return DAG.UnrollVectorOp(Node); 1518 1519 // If using a SETCC would result in a different type than the mask type, 1520 // unroll. 1521 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 1522 EVLVecVT) != MaskVT) 1523 return DAG.UnrollVectorOp(Node); 1524 1525 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT); 1526 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL); 1527 SDValue EVLMask = 1528 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT); 1529 1530 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask); 1531 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2); 1532 } 1533 1534 SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { 1535 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. 1536 EVT VT = Node->getValueType(0); 1537 1538 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; 1539 1540 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) || 1541 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) || 1542 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT)) 1543 return SDValue(); 1544 1545 SDLoc DL(Node); 1546 1547 SDValue Dividend = Node->getOperand(0); 1548 SDValue Divisor = Node->getOperand(1); 1549 SDValue Mask = Node->getOperand(2); 1550 SDValue EVL = Node->getOperand(3); 1551 1552 // X % Y -> X-X/Y*Y 1553 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL); 1554 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL); 1555 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL); 1556 } 1557 1558 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, 1559 SmallVectorImpl<SDValue> &Results) { 1560 // Attempt to expand using TargetLowering. 1561 SDValue Result, Chain; 1562 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) { 1563 Results.push_back(Result); 1564 if (Node->isStrictFPOpcode()) 1565 Results.push_back(Chain); 1566 return; 1567 } 1568 1569 // Otherwise go ahead and unroll. 1570 if (Node->isStrictFPOpcode()) { 1571 UnrollStrictFPOp(Node, Results); 1572 return; 1573 } 1574 1575 Results.push_back(DAG.UnrollVectorOp(Node)); 1576 } 1577 1578 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, 1579 SmallVectorImpl<SDValue> &Results) { 1580 bool IsStrict = Node->isStrictFPOpcode(); 1581 unsigned OpNo = IsStrict ? 1 : 0; 1582 SDValue Src = Node->getOperand(OpNo); 1583 EVT VT = Src.getValueType(); 1584 SDLoc DL(Node); 1585 1586 // Attempt to expand using TargetLowering. 1587 SDValue Result; 1588 SDValue Chain; 1589 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) { 1590 Results.push_back(Result); 1591 if (IsStrict) 1592 Results.push_back(Chain); 1593 return; 1594 } 1595 1596 // Make sure that the SINT_TO_FP and SRL instructions are available. 1597 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) == 1598 TargetLowering::Expand) || 1599 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) == 1600 TargetLowering::Expand)) || 1601 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) { 1602 if (IsStrict) { 1603 UnrollStrictFPOp(Node, Results); 1604 return; 1605 } 1606 1607 Results.push_back(DAG.UnrollVectorOp(Node)); 1608 return; 1609 } 1610 1611 unsigned BW = VT.getScalarSizeInBits(); 1612 assert((BW == 64 || BW == 32) && 1613 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); 1614 1615 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); 1616 1617 // Constants to clear the upper part of the word. 1618 // Notice that we can also use SHL+SHR, but using a constant is slightly 1619 // faster on x86. 1620 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; 1621 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); 1622 1623 // Two to the power of half-word-size. 1624 SDValue TWOHW = 1625 DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0)); 1626 1627 // Clear upper part of LO, lower HI 1628 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord); 1629 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask); 1630 1631 if (IsStrict) { 1632 // Convert hi and lo to floats 1633 // Convert the hi part back to the upper values 1634 // TODO: Can any fast-math-flags be set on these nodes? 1635 SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, 1636 {Node->getValueType(0), MVT::Other}, 1637 {Node->getOperand(0), HI}); 1638 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, 1639 {fHI.getValue(1), fHI, TWOHW}); 1640 SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, 1641 {Node->getValueType(0), MVT::Other}, 1642 {Node->getOperand(0), LO}); 1643 1644 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), 1645 fLO.getValue(1)); 1646 1647 // Add the two halves 1648 SDValue Result = 1649 DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, 1650 {TF, fHI, fLO}); 1651 1652 Results.push_back(Result); 1653 Results.push_back(Result.getValue(1)); 1654 return; 1655 } 1656 1657 // Convert hi and lo to floats 1658 // Convert the hi part back to the upper values 1659 // TODO: Can any fast-math-flags be set on these nodes? 1660 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI); 1661 fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW); 1662 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO); 1663 1664 // Add the two halves 1665 Results.push_back( 1666 DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO)); 1667 } 1668 1669 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { 1670 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) { 1671 SDLoc DL(Node); 1672 SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0)); 1673 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. 1674 return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero, 1675 Node->getOperand(0)); 1676 } 1677 return DAG.UnrollVectorOp(Node); 1678 } 1679 1680 void VectorLegalizer::ExpandFSUB(SDNode *Node, 1681 SmallVectorImpl<SDValue> &Results) { 1682 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, 1683 // we can defer this to operation legalization where it will be lowered as 1684 // a+(-b). 1685 EVT VT = Node->getValueType(0); 1686 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && 1687 TLI.isOperationLegalOrCustom(ISD::FADD, VT)) 1688 return; // Defer to LegalizeDAG 1689 1690 SDValue Tmp = DAG.UnrollVectorOp(Node); 1691 Results.push_back(Tmp); 1692 } 1693 1694 void VectorLegalizer::ExpandSETCC(SDNode *Node, 1695 SmallVectorImpl<SDValue> &Results) { 1696 bool NeedInvert = false; 1697 bool IsVP = Node->getOpcode() == ISD::VP_SETCC; 1698 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || 1699 Node->getOpcode() == ISD::STRICT_FSETCCS; 1700 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; 1701 unsigned Offset = IsStrict ? 1 : 0; 1702 1703 SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); 1704 SDValue LHS = Node->getOperand(0 + Offset); 1705 SDValue RHS = Node->getOperand(1 + Offset); 1706 SDValue CC = Node->getOperand(2 + Offset); 1707 1708 MVT OpVT = LHS.getSimpleValueType(); 1709 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); 1710 1711 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) { 1712 if (IsStrict) { 1713 UnrollStrictFPOp(Node, Results); 1714 return; 1715 } 1716 Results.push_back(UnrollVSETCC(Node)); 1717 return; 1718 } 1719 1720 SDValue Mask, EVL; 1721 if (IsVP) { 1722 Mask = Node->getOperand(3 + Offset); 1723 EVL = Node->getOperand(4 + Offset); 1724 } 1725 1726 SDLoc dl(Node); 1727 bool Legalized = 1728 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask, 1729 EVL, NeedInvert, dl, Chain, IsSignaling); 1730 1731 if (Legalized) { 1732 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the 1733 // condition code, create a new SETCC node. 1734 if (CC.getNode()) { 1735 if (IsStrict) { 1736 LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(), 1737 {Chain, LHS, RHS, CC}, Node->getFlags()); 1738 Chain = LHS.getValue(1); 1739 } else if (IsVP) { 1740 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0), 1741 {LHS, RHS, CC, Mask, EVL}, Node->getFlags()); 1742 } else { 1743 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, 1744 Node->getFlags()); 1745 } 1746 } 1747 1748 // If we expanded the SETCC by inverting the condition code, then wrap 1749 // the existing SETCC in a NOT to restore the intended condition. 1750 if (NeedInvert) { 1751 if (!IsVP) 1752 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); 1753 else 1754 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0)); 1755 } 1756 } else { 1757 assert(!IsStrict && "Don't know how to expand for strict nodes."); 1758 1759 // Otherwise, SETCC for the given comparison type must be completely 1760 // illegal; expand it into a SELECT_CC. 1761 EVT VT = Node->getValueType(0); 1762 LHS = 1763 DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS, 1764 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()), 1765 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC); 1766 LHS->setFlags(Node->getFlags()); 1767 } 1768 1769 Results.push_back(LHS); 1770 if (IsStrict) 1771 Results.push_back(Chain); 1772 } 1773 1774 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, 1775 SmallVectorImpl<SDValue> &Results) { 1776 SDValue Result, Overflow; 1777 TLI.expandUADDSUBO(Node, Result, Overflow, DAG); 1778 Results.push_back(Result); 1779 Results.push_back(Overflow); 1780 } 1781 1782 void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, 1783 SmallVectorImpl<SDValue> &Results) { 1784 SDValue Result, Overflow; 1785 TLI.expandSADDSUBO(Node, Result, Overflow, DAG); 1786 Results.push_back(Result); 1787 Results.push_back(Overflow); 1788 } 1789 1790 void VectorLegalizer::ExpandMULO(SDNode *Node, 1791 SmallVectorImpl<SDValue> &Results) { 1792 SDValue Result, Overflow; 1793 if (!TLI.expandMULO(Node, Result, Overflow, DAG)) 1794 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node); 1795 1796 Results.push_back(Result); 1797 Results.push_back(Overflow); 1798 } 1799 1800 void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, 1801 SmallVectorImpl<SDValue> &Results) { 1802 SDNode *N = Node; 1803 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), 1804 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) 1805 Results.push_back(Expanded); 1806 } 1807 1808 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, 1809 SmallVectorImpl<SDValue> &Results) { 1810 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { 1811 ExpandUINT_TO_FLOAT(Node, Results); 1812 return; 1813 } 1814 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { 1815 ExpandFP_TO_UINT(Node, Results); 1816 return; 1817 } 1818 1819 if (Node->getOpcode() == ISD::STRICT_FSETCC || 1820 Node->getOpcode() == ISD::STRICT_FSETCCS) { 1821 ExpandSETCC(Node, Results); 1822 return; 1823 } 1824 1825 UnrollStrictFPOp(Node, Results); 1826 } 1827 1828 void VectorLegalizer::ExpandREM(SDNode *Node, 1829 SmallVectorImpl<SDValue> &Results) { 1830 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && 1831 "Expected REM node"); 1832 1833 SDValue Result; 1834 if (!TLI.expandREM(Node, Result, DAG)) 1835 Result = DAG.UnrollVectorOp(Node); 1836 Results.push_back(Result); 1837 } 1838 1839 // Try to expand libm nodes into vector math routine calls. Callers provide the 1840 // LibFunc equivalent of the passed in Node, which is used to lookup mappings 1841 // within TargetLibraryInfo. The only mappings considered are those where the 1842 // result and all operands are the same vector type. While predicated nodes are 1843 // not supported, we will emit calls to masked routines by passing in an all 1844 // true mask. 1845 bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, 1846 SmallVectorImpl<SDValue> &Results) { 1847 // Chain must be propagated but currently strict fp operations are down 1848 // converted to their none strict counterpart. 1849 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!"); 1850 1851 const char *LCName = TLI.getLibcallName(LC); 1852 if (!LCName) 1853 return false; 1854 LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n"); 1855 1856 EVT VT = Node->getValueType(0); 1857 ElementCount VL = VT.getVectorElementCount(); 1858 1859 // Lookup a vector function equivalent to the specified libcall. Prefer 1860 // unmasked variants but we will generate a mask if need be. 1861 const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); 1862 const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false); 1863 if (!VD) 1864 VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true); 1865 if (!VD) 1866 return false; 1867 1868 LLVMContext *Ctx = DAG.getContext(); 1869 Type *Ty = VT.getTypeForEVT(*Ctx); 1870 Type *ScalarTy = Ty->getScalarType(); 1871 1872 // Construct a scalar function type based on Node's operands. 1873 SmallVector<Type *, 8> ArgTys; 1874 for (unsigned i = 0; i < Node->getNumOperands(); ++i) { 1875 assert(Node->getOperand(i).getValueType() == VT && 1876 "Expected matching vector types!"); 1877 ArgTys.push_back(ScalarTy); 1878 } 1879 FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false); 1880 1881 // Generate call information for the vector function. 1882 const std::string MangledName = VD->getVectorFunctionABIVariantString(); 1883 auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); 1884 if (!OptVFInfo) 1885 return false; 1886 1887 LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() 1888 << "\n"); 1889 1890 // Sanity check just in case OptVFInfo has unexpected parameters. 1891 if (OptVFInfo->Shape.Parameters.size() != 1892 Node->getNumOperands() + VD->isMasked()) 1893 return false; 1894 1895 // Collect vector call operands. 1896 1897 SDLoc DL(Node); 1898 TargetLowering::ArgListTy Args; 1899 TargetLowering::ArgListEntry Entry; 1900 Entry.IsSExt = false; 1901 Entry.IsZExt = false; 1902 1903 unsigned OpNum = 0; 1904 for (auto &VFParam : OptVFInfo->Shape.Parameters) { 1905 if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { 1906 EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT); 1907 Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT); 1908 Entry.Ty = MaskVT.getTypeForEVT(*Ctx); 1909 Args.push_back(Entry); 1910 continue; 1911 } 1912 1913 // Only vector operands are supported. 1914 if (VFParam.ParamKind != VFParamKind::Vector) 1915 return false; 1916 1917 Entry.Node = Node->getOperand(OpNum++); 1918 Entry.Ty = Ty; 1919 Args.push_back(Entry); 1920 } 1921 1922 // Emit a call to the vector function. 1923 SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(), 1924 TLI.getPointerTy(DAG.getDataLayout())); 1925 TargetLowering::CallLoweringInfo CLI(DAG); 1926 CLI.setDebugLoc(DL) 1927 .setChain(DAG.getEntryNode()) 1928 .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args)); 1929 1930 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); 1931 Results.push_back(CallResult.first); 1932 return true; 1933 } 1934 1935 /// Try to expand the node to a vector libcall based on the result type. 1936 bool VectorLegalizer::tryExpandVecMathCall( 1937 SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, 1938 RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, 1939 RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { 1940 RTLIB::Libcall LC = RTLIB::getFPLibCall( 1941 Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64, 1942 Call_F80, Call_F128, Call_PPCF128); 1943 1944 if (LC == RTLIB::UNKNOWN_LIBCALL) 1945 return false; 1946 1947 return tryExpandVecMathCall(Node, LC, Results); 1948 } 1949 1950 void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, 1951 SmallVectorImpl<SDValue> &Results) { 1952 EVT VT = Node->getValueType(0); 1953 EVT EltVT = VT.getVectorElementType(); 1954 unsigned NumElems = VT.getVectorNumElements(); 1955 unsigned NumOpers = Node->getNumOperands(); 1956 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1957 1958 EVT TmpEltVT = EltVT; 1959 if (Node->getOpcode() == ISD::STRICT_FSETCC || 1960 Node->getOpcode() == ISD::STRICT_FSETCCS) 1961 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), 1962 *DAG.getContext(), TmpEltVT); 1963 1964 EVT ValueVTs[] = {TmpEltVT, MVT::Other}; 1965 SDValue Chain = Node->getOperand(0); 1966 SDLoc dl(Node); 1967 1968 SmallVector<SDValue, 32> OpValues; 1969 SmallVector<SDValue, 32> OpChains; 1970 for (unsigned i = 0; i < NumElems; ++i) { 1971 SmallVector<SDValue, 4> Opers; 1972 SDValue Idx = DAG.getVectorIdxConstant(i, dl); 1973 1974 // The Chain is the first operand. 1975 Opers.push_back(Chain); 1976 1977 // Now process the remaining operands. 1978 for (unsigned j = 1; j < NumOpers; ++j) { 1979 SDValue Oper = Node->getOperand(j); 1980 EVT OperVT = Oper.getValueType(); 1981 1982 if (OperVT.isVector()) 1983 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 1984 OperVT.getVectorElementType(), Oper, Idx); 1985 1986 Opers.push_back(Oper); 1987 } 1988 1989 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); 1990 SDValue ScalarResult = ScalarOp.getValue(0); 1991 SDValue ScalarChain = ScalarOp.getValue(1); 1992 1993 if (Node->getOpcode() == ISD::STRICT_FSETCC || 1994 Node->getOpcode() == ISD::STRICT_FSETCCS) 1995 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, 1996 DAG.getAllOnesConstant(dl, EltVT), 1997 DAG.getConstant(0, dl, EltVT)); 1998 1999 OpValues.push_back(ScalarResult); 2000 OpChains.push_back(ScalarChain); 2001 } 2002 2003 SDValue Result = DAG.getBuildVector(VT, dl, OpValues); 2004 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); 2005 2006 Results.push_back(Result); 2007 Results.push_back(NewChain); 2008 } 2009 2010 SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { 2011 EVT VT = Node->getValueType(0); 2012 unsigned NumElems = VT.getVectorNumElements(); 2013 EVT EltVT = VT.getVectorElementType(); 2014 SDValue LHS = Node->getOperand(0); 2015 SDValue RHS = Node->getOperand(1); 2016 SDValue CC = Node->getOperand(2); 2017 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 2018 SDLoc dl(Node); 2019 SmallVector<SDValue, 8> Ops(NumElems); 2020 for (unsigned i = 0; i < NumElems; ++i) { 2021 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 2022 DAG.getVectorIdxConstant(i, dl)); 2023 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 2024 DAG.getVectorIdxConstant(i, dl)); 2025 Ops[i] = DAG.getNode(ISD::SETCC, dl, 2026 TLI.getSetCCResultType(DAG.getDataLayout(), 2027 *DAG.getContext(), TmpEltVT), 2028 LHSElem, RHSElem, CC); 2029 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT), 2030 DAG.getConstant(0, dl, EltVT)); 2031 } 2032 return DAG.getBuildVector(VT, dl, Ops); 2033 } 2034 2035 bool SelectionDAG::LegalizeVectors() { 2036 return VectorLegalizer(*this).Run(); 2037 } 2038