1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SelectionDAG::LegalizeVectors method. 10 // 11 // The vector legalizer looks for vector operations which might need to be 12 // scalarized and legalizes them. This is a separate step from Legalize because 13 // scalarizing can introduce illegal types. For example, suppose we have an 14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition 15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the 16 // operation, which introduces nodes with the illegal type i64 which must be 17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; 18 // the operation must be unrolled, which introduces nodes with the illegal 19 // type i8 which must be promoted. 20 // 21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR, 22 // or operations that happen to take a vector which are custom-lowered; 23 // the legalization for such operations never produces nodes 24 // with illegal types, so it's okay to put off legalizing them until 25 // SelectionDAG::Legalize runs. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/Analysis/TargetLibraryInfo.h" 32 #include "llvm/Analysis/VectorUtils.h" 33 #include "llvm/CodeGen/ISDOpcodes.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/TargetLowering.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/CodeGenTypes/MachineValueType.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <iterator> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "legalizevectorops" 52 53 namespace { 54 55 class VectorLegalizer { 56 SelectionDAG& DAG; 57 const TargetLowering &TLI; 58 bool Changed = false; // Keep track of whether anything changed 59 60 /// For nodes that are of legal width, and that have more than one use, this 61 /// map indicates what regularized operand to use. This allows us to avoid 62 /// legalizing the same thing more than once. 63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; 64 65 /// Adds a node to the translation cache. 66 void AddLegalizedOperand(SDValue From, SDValue To) { 67 LegalizedNodes.insert(std::make_pair(From, To)); 68 // If someone requests legalization of the new node, return itself. 69 if (From != To) 70 LegalizedNodes.insert(std::make_pair(To, To)); 71 } 72 73 /// Legalizes the given node. 74 SDValue LegalizeOp(SDValue Op); 75 76 /// Assuming the node is legal, "legalize" the results. 77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); 78 79 /// Make sure Results are legal and update the translation cache. 80 SDValue RecursivelyLegalizeResults(SDValue Op, 81 MutableArrayRef<SDValue> Results); 82 83 /// Wrapper to interface LowerOperation with a vector of Results. 84 /// Returns false if the target wants to use default expansion. Otherwise 85 /// returns true. If return is true and the Results are empty, then the 86 /// target wants to keep the input node as is. 87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); 88 89 /// Implements unrolling a VSETCC. 90 SDValue UnrollVSETCC(SDNode *Node); 91 92 /// Implement expand-based legalization of vector operations. 93 /// 94 /// This is just a high-level routine to dispatch to specific code paths for 95 /// operations to legalize them. 96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); 97 98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if 99 /// FP_TO_SINT isn't legal. 100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 101 102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if 103 /// SINT_TO_FLOAT and SHR on vectors isn't legal. 104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 105 106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. 107 SDValue ExpandSEXTINREG(SDNode *Node); 108 109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG. 110 /// 111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper 112 /// type. The contents of the bits in the extended part of each element are 113 /// undef. 114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); 115 116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. 117 /// 118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper 119 /// type, then shifts left and arithmetic shifts right to introduce a sign 120 /// extension. 121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); 122 123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. 124 /// 125 /// Shuffles the low lanes of the operand into place and blends zeros into 126 /// the remaining lanes, finally bitcasting to the proper type. 127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); 128 129 /// Expand bswap of vectors into a shuffle if legal. 130 SDValue ExpandBSWAP(SDNode *Node); 131 132 /// Implement vselect in terms of XOR, AND, OR when blend is not 133 /// supported by the target. 134 SDValue ExpandVSELECT(SDNode *Node); 135 SDValue ExpandVP_SELECT(SDNode *Node); 136 SDValue ExpandVP_MERGE(SDNode *Node); 137 SDValue ExpandVP_REM(SDNode *Node); 138 SDValue ExpandVP_FNEG(SDNode *Node); 139 SDValue ExpandVP_FABS(SDNode *Node); 140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node); 141 SDValue ExpandSELECT(SDNode *Node); 142 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); 143 SDValue ExpandStore(SDNode *N); 144 SDValue ExpandFNEG(SDNode *Node); 145 SDValue ExpandFABS(SDNode *Node); 146 SDValue ExpandFCOPYSIGN(SDNode *Node); 147 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); 148 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); 149 SDValue ExpandBITREVERSE(SDNode *Node); 150 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 151 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 152 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 153 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); 154 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); 155 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); 156 157 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, 158 SmallVectorImpl<SDValue> &Results); 159 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, 160 RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, 161 RTLIB::Libcall Call_F128, 162 RTLIB::Libcall Call_PPCF128, 163 SmallVectorImpl<SDValue> &Results); 164 165 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); 166 167 /// Implements vector promotion. 168 /// 169 /// This is essentially just bitcasting the operands to a different type and 170 /// bitcasting the result back to the original type. 171 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); 172 173 /// Implements [SU]INT_TO_FP vector promotion. 174 /// 175 /// This is a [zs]ext of the input operand to a larger integer type. 176 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); 177 178 /// Implements FP_TO_[SU]INT vector promotion of the result type. 179 /// 180 /// It is promoted to a larger integer type. The result is then 181 /// truncated back to the original type. 182 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 183 184 /// Implements vector setcc operation promotion. 185 /// 186 /// All vector operands are promoted to a vector type with larger element 187 /// type. 188 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); 189 190 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 191 192 /// Calculate the reduction using a type of higher precision and round the 193 /// result to match the original type. Setting NonArithmetic signifies the 194 /// rounding of the result does not affect its value. 195 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results, 196 bool NonArithmetic); 197 198 public: 199 VectorLegalizer(SelectionDAG& dag) : 200 DAG(dag), TLI(dag.getTargetLoweringInfo()) {} 201 202 /// Begin legalizer the vector operations in the DAG. 203 bool Run(); 204 }; 205 206 } // end anonymous namespace 207 208 bool VectorLegalizer::Run() { 209 // Before we start legalizing vector nodes, check if there are any vectors. 210 bool HasVectors = false; 211 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 212 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { 213 // Check if the values of the nodes contain vectors. We don't need to check 214 // the operands because we are going to check their values at some point. 215 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); }); 216 217 // If we found a vector node we can start the legalization. 218 if (HasVectors) 219 break; 220 } 221 222 // If this basic block has no vectors then no need to legalize vectors. 223 if (!HasVectors) 224 return false; 225 226 // The legalize process is inherently a bottom-up recursive process (users 227 // legalize their uses before themselves). Given infinite stack space, we 228 // could just start legalizing on the root and traverse the whole graph. In 229 // practice however, this causes us to run out of stack space on large basic 230 // blocks. To avoid this problem, compute an ordering of the nodes where each 231 // node is only legalized after all of its operands are legalized. 232 DAG.AssignTopologicalOrder(); 233 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 234 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) 235 LegalizeOp(SDValue(&*I, 0)); 236 237 // Finally, it's possible the root changed. Get the new root. 238 SDValue OldRoot = DAG.getRoot(); 239 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); 240 DAG.setRoot(LegalizedNodes[OldRoot]); 241 242 LegalizedNodes.clear(); 243 244 // Remove dead nodes now. 245 DAG.RemoveDeadNodes(); 246 247 return Changed; 248 } 249 250 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { 251 assert(Op->getNumValues() == Result->getNumValues() && 252 "Unexpected number of results"); 253 // Generic legalization: just pass the operand through. 254 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) 255 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i)); 256 return SDValue(Result, Op.getResNo()); 257 } 258 259 SDValue 260 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, 261 MutableArrayRef<SDValue> Results) { 262 assert(Results.size() == Op->getNumValues() && 263 "Unexpected number of results"); 264 // Make sure that the generated code is itself legal. 265 for (unsigned i = 0, e = Results.size(); i != e; ++i) { 266 Results[i] = LegalizeOp(Results[i]); 267 AddLegalizedOperand(Op.getValue(i), Results[i]); 268 } 269 270 return Results[Op.getResNo()]; 271 } 272 273 SDValue VectorLegalizer::LegalizeOp(SDValue Op) { 274 // Note that LegalizeOp may be reentered even from single-use nodes, which 275 // means that we always must cache transformed nodes. 276 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); 277 if (I != LegalizedNodes.end()) return I->second; 278 279 // Legalize the operands 280 SmallVector<SDValue, 8> Ops; 281 for (const SDValue &Oper : Op->op_values()) 282 Ops.push_back(LegalizeOp(Oper)); 283 284 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops); 285 286 bool HasVectorValueOrOp = 287 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) || 288 llvm::any_of(Node->op_values(), 289 [](SDValue O) { return O.getValueType().isVector(); }); 290 if (!HasVectorValueOrOp) 291 return TranslateLegalizeResults(Op, Node); 292 293 TargetLowering::LegalizeAction Action = TargetLowering::Legal; 294 EVT ValVT; 295 switch (Op.getOpcode()) { 296 default: 297 return TranslateLegalizeResults(Op, Node); 298 case ISD::LOAD: { 299 LoadSDNode *LD = cast<LoadSDNode>(Node); 300 ISD::LoadExtType ExtType = LD->getExtensionType(); 301 EVT LoadedVT = LD->getMemoryVT(); 302 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) 303 Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT); 304 break; 305 } 306 case ISD::STORE: { 307 StoreSDNode *ST = cast<StoreSDNode>(Node); 308 EVT StVT = ST->getMemoryVT(); 309 MVT ValVT = ST->getValue().getSimpleValueType(); 310 if (StVT.isVector() && ST->isTruncatingStore()) 311 Action = TLI.getTruncStoreAction(ValVT, StVT); 312 break; 313 } 314 case ISD::MERGE_VALUES: 315 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 316 // This operation lies about being legal: when it claims to be legal, 317 // it should actually be expanded. 318 if (Action == TargetLowering::Legal) 319 Action = TargetLowering::Expand; 320 break; 321 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 322 case ISD::STRICT_##DAGN: 323 #include "llvm/IR/ConstrainedOps.def" 324 ValVT = Node->getValueType(0); 325 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || 326 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 327 ValVT = Node->getOperand(1).getValueType(); 328 if (Op.getOpcode() == ISD::STRICT_FSETCC || 329 Op.getOpcode() == ISD::STRICT_FSETCCS) { 330 MVT OpVT = Node->getOperand(1).getSimpleValueType(); 331 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get(); 332 Action = TLI.getCondCodeAction(CCCode, OpVT); 333 if (Action == TargetLowering::Legal) 334 Action = TLI.getOperationAction(Node->getOpcode(), OpVT); 335 } else { 336 Action = TLI.getOperationAction(Node->getOpcode(), ValVT); 337 } 338 // If we're asked to expand a strict vector floating-point operation, 339 // by default we're going to simply unroll it. That is usually the 340 // best approach, except in the case where the resulting strict (scalar) 341 // operations would themselves use the fallback mutation to non-strict. 342 // In that specific case, just do the fallback on the vector op. 343 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && 344 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) == 345 TargetLowering::Legal) { 346 EVT EltVT = ValVT.getVectorElementType(); 347 if (TLI.getOperationAction(Node->getOpcode(), EltVT) 348 == TargetLowering::Expand && 349 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) 350 == TargetLowering::Legal) 351 Action = TargetLowering::Legal; 352 } 353 break; 354 case ISD::ADD: 355 case ISD::SUB: 356 case ISD::MUL: 357 case ISD::MULHS: 358 case ISD::MULHU: 359 case ISD::SDIV: 360 case ISD::UDIV: 361 case ISD::SREM: 362 case ISD::UREM: 363 case ISD::SDIVREM: 364 case ISD::UDIVREM: 365 case ISD::FADD: 366 case ISD::FSUB: 367 case ISD::FMUL: 368 case ISD::FDIV: 369 case ISD::FREM: 370 case ISD::AND: 371 case ISD::OR: 372 case ISD::XOR: 373 case ISD::SHL: 374 case ISD::SRA: 375 case ISD::SRL: 376 case ISD::FSHL: 377 case ISD::FSHR: 378 case ISD::ROTL: 379 case ISD::ROTR: 380 case ISD::ABS: 381 case ISD::ABDS: 382 case ISD::ABDU: 383 case ISD::AVGCEILS: 384 case ISD::AVGCEILU: 385 case ISD::AVGFLOORS: 386 case ISD::AVGFLOORU: 387 case ISD::BSWAP: 388 case ISD::BITREVERSE: 389 case ISD::CTLZ: 390 case ISD::CTTZ: 391 case ISD::CTLZ_ZERO_UNDEF: 392 case ISD::CTTZ_ZERO_UNDEF: 393 case ISD::CTPOP: 394 case ISD::SELECT: 395 case ISD::VSELECT: 396 case ISD::SELECT_CC: 397 case ISD::ZERO_EXTEND: 398 case ISD::ANY_EXTEND: 399 case ISD::TRUNCATE: 400 case ISD::SIGN_EXTEND: 401 case ISD::FP_TO_SINT: 402 case ISD::FP_TO_UINT: 403 case ISD::FNEG: 404 case ISD::FABS: 405 case ISD::FMINNUM: 406 case ISD::FMAXNUM: 407 case ISD::FMINNUM_IEEE: 408 case ISD::FMAXNUM_IEEE: 409 case ISD::FMINIMUM: 410 case ISD::FMAXIMUM: 411 case ISD::FMINIMUMNUM: 412 case ISD::FMAXIMUMNUM: 413 case ISD::FCOPYSIGN: 414 case ISD::FSQRT: 415 case ISD::FSIN: 416 case ISD::FCOS: 417 case ISD::FTAN: 418 case ISD::FASIN: 419 case ISD::FACOS: 420 case ISD::FATAN: 421 case ISD::FATAN2: 422 case ISD::FSINH: 423 case ISD::FCOSH: 424 case ISD::FTANH: 425 case ISD::FLDEXP: 426 case ISD::FPOWI: 427 case ISD::FPOW: 428 case ISD::FLOG: 429 case ISD::FLOG2: 430 case ISD::FLOG10: 431 case ISD::FEXP: 432 case ISD::FEXP2: 433 case ISD::FEXP10: 434 case ISD::FCEIL: 435 case ISD::FTRUNC: 436 case ISD::FRINT: 437 case ISD::FNEARBYINT: 438 case ISD::FROUND: 439 case ISD::FROUNDEVEN: 440 case ISD::FFLOOR: 441 case ISD::FP_ROUND: 442 case ISD::FP_EXTEND: 443 case ISD::FPTRUNC_ROUND: 444 case ISD::FMA: 445 case ISD::SIGN_EXTEND_INREG: 446 case ISD::ANY_EXTEND_VECTOR_INREG: 447 case ISD::SIGN_EXTEND_VECTOR_INREG: 448 case ISD::ZERO_EXTEND_VECTOR_INREG: 449 case ISD::SMIN: 450 case ISD::SMAX: 451 case ISD::UMIN: 452 case ISD::UMAX: 453 case ISD::SMUL_LOHI: 454 case ISD::UMUL_LOHI: 455 case ISD::SADDO: 456 case ISD::UADDO: 457 case ISD::SSUBO: 458 case ISD::USUBO: 459 case ISD::SMULO: 460 case ISD::UMULO: 461 case ISD::FCANONICALIZE: 462 case ISD::FFREXP: 463 case ISD::FMODF: 464 case ISD::FSINCOS: 465 case ISD::FSINCOSPI: 466 case ISD::SADDSAT: 467 case ISD::UADDSAT: 468 case ISD::SSUBSAT: 469 case ISD::USUBSAT: 470 case ISD::SSHLSAT: 471 case ISD::USHLSAT: 472 case ISD::FP_TO_SINT_SAT: 473 case ISD::FP_TO_UINT_SAT: 474 case ISD::MGATHER: 475 case ISD::VECTOR_COMPRESS: 476 case ISD::SCMP: 477 case ISD::UCMP: 478 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 479 break; 480 case ISD::SMULFIX: 481 case ISD::SMULFIXSAT: 482 case ISD::UMULFIX: 483 case ISD::UMULFIXSAT: 484 case ISD::SDIVFIX: 485 case ISD::SDIVFIXSAT: 486 case ISD::UDIVFIX: 487 case ISD::UDIVFIXSAT: { 488 unsigned Scale = Node->getConstantOperandVal(2); 489 Action = TLI.getFixedPointOperationAction(Node->getOpcode(), 490 Node->getValueType(0), Scale); 491 break; 492 } 493 case ISD::LROUND: 494 case ISD::LLROUND: 495 case ISD::LRINT: 496 case ISD::LLRINT: 497 case ISD::SINT_TO_FP: 498 case ISD::UINT_TO_FP: 499 case ISD::VECREDUCE_ADD: 500 case ISD::VECREDUCE_MUL: 501 case ISD::VECREDUCE_AND: 502 case ISD::VECREDUCE_OR: 503 case ISD::VECREDUCE_XOR: 504 case ISD::VECREDUCE_SMAX: 505 case ISD::VECREDUCE_SMIN: 506 case ISD::VECREDUCE_UMAX: 507 case ISD::VECREDUCE_UMIN: 508 case ISD::VECREDUCE_FADD: 509 case ISD::VECREDUCE_FMAX: 510 case ISD::VECREDUCE_FMAXIMUM: 511 case ISD::VECREDUCE_FMIN: 512 case ISD::VECREDUCE_FMINIMUM: 513 case ISD::VECREDUCE_FMUL: 514 case ISD::VECTOR_FIND_LAST_ACTIVE: 515 Action = TLI.getOperationAction(Node->getOpcode(), 516 Node->getOperand(0).getValueType()); 517 break; 518 case ISD::VECREDUCE_SEQ_FADD: 519 case ISD::VECREDUCE_SEQ_FMUL: 520 Action = TLI.getOperationAction(Node->getOpcode(), 521 Node->getOperand(1).getValueType()); 522 break; 523 case ISD::SETCC: { 524 MVT OpVT = Node->getOperand(0).getSimpleValueType(); 525 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); 526 Action = TLI.getCondCodeAction(CCCode, OpVT); 527 if (Action == TargetLowering::Legal) 528 Action = TLI.getOperationAction(Node->getOpcode(), OpVT); 529 break; 530 } 531 case ISD::PARTIAL_REDUCE_UMLA: 532 case ISD::PARTIAL_REDUCE_SMLA: 533 case ISD::PARTIAL_REDUCE_SUMLA: 534 Action = 535 TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0), 536 Node->getOperand(1).getValueType()); 537 break; 538 539 #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ 540 case ISD::VPID: { \ 541 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ 542 : Node->getOperand(LEGALPOS).getValueType(); \ 543 if (ISD::VPID == ISD::VP_SETCC) { \ 544 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ 545 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ 546 if (Action != TargetLowering::Legal) \ 547 break; \ 548 } \ 549 /* Defer non-vector results to LegalizeDAG. */ \ 550 if (!Node->getValueType(0).isVector() && \ 551 Node->getValueType(0) != MVT::Other) { \ 552 Action = TargetLowering::Legal; \ 553 break; \ 554 } \ 555 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ 556 } break; 557 #include "llvm/IR/VPIntrinsics.def" 558 } 559 560 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); 561 562 SmallVector<SDValue, 8> ResultVals; 563 switch (Action) { 564 default: llvm_unreachable("This action is not supported yet!"); 565 case TargetLowering::Promote: 566 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && 567 "This action is not supported yet!"); 568 LLVM_DEBUG(dbgs() << "Promoting\n"); 569 Promote(Node, ResultVals); 570 assert(!ResultVals.empty() && "No results for promotion?"); 571 break; 572 case TargetLowering::Legal: 573 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); 574 break; 575 case TargetLowering::Custom: 576 LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); 577 if (LowerOperationWrapper(Node, ResultVals)) 578 break; 579 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); 580 [[fallthrough]]; 581 case TargetLowering::Expand: 582 LLVM_DEBUG(dbgs() << "Expanding\n"); 583 Expand(Node, ResultVals); 584 break; 585 } 586 587 if (ResultVals.empty()) 588 return TranslateLegalizeResults(Op, Node); 589 590 Changed = true; 591 return RecursivelyLegalizeResults(Op, ResultVals); 592 } 593 594 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we 595 // merge them somehow? 596 bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, 597 SmallVectorImpl<SDValue> &Results) { 598 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); 599 600 if (!Res.getNode()) 601 return false; 602 603 if (Res == SDValue(Node, 0)) 604 return true; 605 606 // If the original node has one result, take the return value from 607 // LowerOperation as is. It might not be result number 0. 608 if (Node->getNumValues() == 1) { 609 Results.push_back(Res); 610 return true; 611 } 612 613 // If the original node has multiple results, then the return node should 614 // have the same number of results. 615 assert((Node->getNumValues() == Res->getNumValues()) && 616 "Lowering returned the wrong number of results!"); 617 618 // Places new result values base on N result number. 619 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) 620 Results.push_back(Res.getValue(I)); 621 622 return true; 623 } 624 625 void VectorLegalizer::PromoteSETCC(SDNode *Node, 626 SmallVectorImpl<SDValue> &Results) { 627 MVT VecVT = Node->getOperand(0).getSimpleValueType(); 628 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); 629 630 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; 631 632 SDLoc DL(Node); 633 SmallVector<SDValue, 5> Operands(Node->getNumOperands()); 634 635 Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0)); 636 Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1)); 637 Operands[2] = Node->getOperand(2); 638 639 if (Node->getOpcode() == ISD::VP_SETCC) { 640 Operands[3] = Node->getOperand(3); // mask 641 Operands[4] = Node->getOperand(4); // evl 642 } 643 644 SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0), 645 Operands, Node->getFlags()); 646 647 Results.push_back(Res); 648 } 649 650 void VectorLegalizer::PromoteSTRICT(SDNode *Node, 651 SmallVectorImpl<SDValue> &Results) { 652 MVT VecVT = Node->getOperand(1).getSimpleValueType(); 653 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); 654 655 assert(VecVT.isFloatingPoint()); 656 657 SDLoc DL(Node); 658 SmallVector<SDValue, 5> Operands(Node->getNumOperands()); 659 SmallVector<SDValue, 2> Chains; 660 661 for (unsigned j = 1; j != Node->getNumOperands(); ++j) 662 if (Node->getOperand(j).getValueType().isVector() && 663 !(ISD::isVPOpcode(Node->getOpcode()) && 664 ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. 665 { 666 // promote the vector operand. 667 SDValue Ext = 668 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other}, 669 {Node->getOperand(0), Node->getOperand(j)}); 670 Operands[j] = Ext.getValue(0); 671 Chains.push_back(Ext.getValue(1)); 672 } else 673 Operands[j] = Node->getOperand(j); // Skip no vector operand. 674 675 SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1)); 676 677 Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 678 679 SDValue Res = 680 DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags()); 681 682 SDValue Round = 683 DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other}, 684 {Res.getValue(1), Res.getValue(0), 685 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}); 686 687 Results.push_back(Round.getValue(0)); 688 Results.push_back(Round.getValue(1)); 689 } 690 691 void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node, 692 SmallVectorImpl<SDValue> &Results, 693 bool NonArithmetic) { 694 MVT OpVT = Node->getOperand(0).getSimpleValueType(); 695 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!"); 696 MVT NewOpVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OpVT); 697 698 SDLoc DL(Node); 699 SDValue NewOp = DAG.getNode(ISD::FP_EXTEND, DL, NewOpVT, Node->getOperand(0)); 700 SDValue Rdx = 701 DAG.getNode(Node->getOpcode(), DL, NewOpVT.getVectorElementType(), NewOp, 702 Node->getFlags()); 703 SDValue Res = 704 DAG.getNode(ISD::FP_ROUND, DL, Node->getValueType(0), Rdx, 705 DAG.getIntPtrConstant(NonArithmetic, DL, /*isTarget=*/true)); 706 Results.push_back(Res); 707 } 708 709 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { 710 // For a few operations there is a specific concept for promotion based on 711 // the operand's type. 712 switch (Node->getOpcode()) { 713 case ISD::SINT_TO_FP: 714 case ISD::UINT_TO_FP: 715 case ISD::STRICT_SINT_TO_FP: 716 case ISD::STRICT_UINT_TO_FP: 717 // "Promote" the operation by extending the operand. 718 PromoteINT_TO_FP(Node, Results); 719 return; 720 case ISD::FP_TO_UINT: 721 case ISD::FP_TO_SINT: 722 case ISD::STRICT_FP_TO_UINT: 723 case ISD::STRICT_FP_TO_SINT: 724 // Promote the operation by extending the operand. 725 PromoteFP_TO_INT(Node, Results); 726 return; 727 case ISD::VP_SETCC: 728 case ISD::SETCC: 729 // Promote the operation by extending the operand. 730 PromoteSETCC(Node, Results); 731 return; 732 case ISD::STRICT_FADD: 733 case ISD::STRICT_FSUB: 734 case ISD::STRICT_FMUL: 735 case ISD::STRICT_FDIV: 736 case ISD::STRICT_FSQRT: 737 case ISD::STRICT_FMA: 738 PromoteSTRICT(Node, Results); 739 return; 740 case ISD::VECREDUCE_FADD: 741 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false); 742 return; 743 case ISD::VECREDUCE_FMAX: 744 case ISD::VECREDUCE_FMAXIMUM: 745 case ISD::VECREDUCE_FMIN: 746 case ISD::VECREDUCE_FMINIMUM: 747 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true); 748 return; 749 case ISD::FP_ROUND: 750 case ISD::FP_EXTEND: 751 // These operations are used to do promotion so they can't be promoted 752 // themselves. 753 llvm_unreachable("Don't know how to promote this operation!"); 754 case ISD::VP_FABS: 755 case ISD::VP_FCOPYSIGN: 756 case ISD::VP_FNEG: 757 // Promoting fabs, fneg, and fcopysign changes their semantics. 758 llvm_unreachable("These operations should not be promoted"); 759 } 760 761 // There are currently two cases of vector promotion: 762 // 1) Bitcasting a vector of integers to a different type to a vector of the 763 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. 764 // 2) Extending a vector of floats to a vector of the same number of larger 765 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. 766 assert(Node->getNumValues() == 1 && 767 "Can't promote a vector with multiple results!"); 768 MVT VT = Node->getSimpleValueType(0); 769 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 770 SDLoc dl(Node); 771 SmallVector<SDValue, 4> Operands(Node->getNumOperands()); 772 773 for (unsigned j = 0; j != Node->getNumOperands(); ++j) { 774 // Do not promote the mask operand of a VP OP. 775 bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) && 776 ISD::getVPMaskIdx(Node->getOpcode()) == j; 777 if (Node->getOperand(j).getValueType().isVector() && !SkipPromote) 778 if (Node->getOperand(j) 779 .getValueType() 780 .getVectorElementType() 781 .isFloatingPoint() && 782 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) 783 if (ISD::isVPOpcode(Node->getOpcode())) { 784 unsigned EVLIdx = 785 *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); 786 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); 787 Operands[j] = 788 DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j), 789 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); 790 } else { 791 Operands[j] = 792 DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); 793 } 794 else 795 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j)); 796 else 797 Operands[j] = Node->getOperand(j); 798 } 799 800 SDValue Res = 801 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags()); 802 803 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || 804 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && 805 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) 806 if (ISD::isVPOpcode(Node->getOpcode())) { 807 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); 808 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); 809 Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res, 810 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); 811 } else { 812 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, 813 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); 814 } 815 else 816 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); 817 818 Results.push_back(Res); 819 } 820 821 void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, 822 SmallVectorImpl<SDValue> &Results) { 823 // INT_TO_FP operations may require the input operand be promoted even 824 // when the type is otherwise legal. 825 bool IsStrict = Node->isStrictFPOpcode(); 826 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType(); 827 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 828 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 829 "Vectors have different number of elements!"); 830 831 SDLoc dl(Node); 832 SmallVector<SDValue, 4> Operands(Node->getNumOperands()); 833 834 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || 835 Node->getOpcode() == ISD::STRICT_UINT_TO_FP) 836 ? ISD::ZERO_EXTEND 837 : ISD::SIGN_EXTEND; 838 for (unsigned j = 0; j != Node->getNumOperands(); ++j) { 839 if (Node->getOperand(j).getValueType().isVector()) 840 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j)); 841 else 842 Operands[j] = Node->getOperand(j); 843 } 844 845 if (IsStrict) { 846 SDValue Res = DAG.getNode(Node->getOpcode(), dl, 847 {Node->getValueType(0), MVT::Other}, Operands); 848 Results.push_back(Res); 849 Results.push_back(Res.getValue(1)); 850 return; 851 } 852 853 SDValue Res = 854 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands); 855 Results.push_back(Res); 856 } 857 858 // For FP_TO_INT we promote the result type to a vector type with wider 859 // elements and then truncate the result. This is different from the default 860 // PromoteVector which uses bitcast to promote thus assumning that the 861 // promoted vector type has the same overall size. 862 void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, 863 SmallVectorImpl<SDValue> &Results) { 864 MVT VT = Node->getSimpleValueType(0); 865 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 866 bool IsStrict = Node->isStrictFPOpcode(); 867 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 868 "Vectors have different number of elements!"); 869 870 unsigned NewOpc = Node->getOpcode(); 871 // Change FP_TO_UINT to FP_TO_SINT if possible. 872 // TODO: Should we only do this if FP_TO_UINT itself isn't legal? 873 if (NewOpc == ISD::FP_TO_UINT && 874 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) 875 NewOpc = ISD::FP_TO_SINT; 876 877 if (NewOpc == ISD::STRICT_FP_TO_UINT && 878 TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) 879 NewOpc = ISD::STRICT_FP_TO_SINT; 880 881 SDLoc dl(Node); 882 SDValue Promoted, Chain; 883 if (IsStrict) { 884 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, 885 {Node->getOperand(0), Node->getOperand(1)}); 886 Chain = Promoted.getValue(1); 887 } else 888 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0)); 889 890 // Assert that the converted value fits in the original type. If it doesn't 891 // (eg: because the value being converted is too big), then the result of the 892 // original operation was undefined anyway, so the assert is still correct. 893 if (Node->getOpcode() == ISD::FP_TO_UINT || 894 Node->getOpcode() == ISD::STRICT_FP_TO_UINT) 895 NewOpc = ISD::AssertZext; 896 else 897 NewOpc = ISD::AssertSext; 898 899 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted, 900 DAG.getValueType(VT.getScalarType())); 901 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); 902 Results.push_back(Promoted); 903 if (IsStrict) 904 Results.push_back(Chain); 905 } 906 907 std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { 908 LoadSDNode *LD = cast<LoadSDNode>(N); 909 return TLI.scalarizeVectorLoad(LD, DAG); 910 } 911 912 SDValue VectorLegalizer::ExpandStore(SDNode *N) { 913 StoreSDNode *ST = cast<StoreSDNode>(N); 914 SDValue TF = TLI.scalarizeVectorStore(ST, DAG); 915 return TF; 916 } 917 918 void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { 919 switch (Node->getOpcode()) { 920 case ISD::LOAD: { 921 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); 922 Results.push_back(Tmp.first); 923 Results.push_back(Tmp.second); 924 return; 925 } 926 case ISD::STORE: 927 Results.push_back(ExpandStore(Node)); 928 return; 929 case ISD::MERGE_VALUES: 930 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) 931 Results.push_back(Node->getOperand(i)); 932 return; 933 case ISD::SIGN_EXTEND_INREG: 934 if (SDValue Expanded = ExpandSEXTINREG(Node)) { 935 Results.push_back(Expanded); 936 return; 937 } 938 break; 939 case ISD::ANY_EXTEND_VECTOR_INREG: 940 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node)); 941 return; 942 case ISD::SIGN_EXTEND_VECTOR_INREG: 943 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node)); 944 return; 945 case ISD::ZERO_EXTEND_VECTOR_INREG: 946 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node)); 947 return; 948 case ISD::BSWAP: 949 if (SDValue Expanded = ExpandBSWAP(Node)) { 950 Results.push_back(Expanded); 951 return; 952 } 953 break; 954 case ISD::VP_BSWAP: 955 Results.push_back(TLI.expandVPBSWAP(Node, DAG)); 956 return; 957 case ISD::VSELECT: 958 if (SDValue Expanded = ExpandVSELECT(Node)) { 959 Results.push_back(Expanded); 960 return; 961 } 962 break; 963 case ISD::VP_SELECT: 964 if (SDValue Expanded = ExpandVP_SELECT(Node)) { 965 Results.push_back(Expanded); 966 return; 967 } 968 break; 969 case ISD::VP_SREM: 970 case ISD::VP_UREM: 971 if (SDValue Expanded = ExpandVP_REM(Node)) { 972 Results.push_back(Expanded); 973 return; 974 } 975 break; 976 case ISD::VP_FNEG: 977 if (SDValue Expanded = ExpandVP_FNEG(Node)) { 978 Results.push_back(Expanded); 979 return; 980 } 981 break; 982 case ISD::VP_FABS: 983 if (SDValue Expanded = ExpandVP_FABS(Node)) { 984 Results.push_back(Expanded); 985 return; 986 } 987 break; 988 case ISD::VP_FCOPYSIGN: 989 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) { 990 Results.push_back(Expanded); 991 return; 992 } 993 break; 994 case ISD::SELECT: 995 if (SDValue Expanded = ExpandSELECT(Node)) { 996 Results.push_back(Expanded); 997 return; 998 } 999 break; 1000 case ISD::SELECT_CC: { 1001 if (Node->getValueType(0).isScalableVector()) { 1002 EVT CondVT = TLI.getSetCCResultType( 1003 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); 1004 SDValue SetCC = 1005 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), 1006 Node->getOperand(1), Node->getOperand(4)); 1007 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, 1008 Node->getOperand(2), 1009 Node->getOperand(3))); 1010 return; 1011 } 1012 break; 1013 } 1014 case ISD::FP_TO_UINT: 1015 ExpandFP_TO_UINT(Node, Results); 1016 return; 1017 case ISD::UINT_TO_FP: 1018 ExpandUINT_TO_FLOAT(Node, Results); 1019 return; 1020 case ISD::FNEG: 1021 if (SDValue Expanded = ExpandFNEG(Node)) { 1022 Results.push_back(Expanded); 1023 return; 1024 } 1025 break; 1026 case ISD::FABS: 1027 if (SDValue Expanded = ExpandFABS(Node)) { 1028 Results.push_back(Expanded); 1029 return; 1030 } 1031 break; 1032 case ISD::FCOPYSIGN: 1033 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) { 1034 Results.push_back(Expanded); 1035 return; 1036 } 1037 break; 1038 case ISD::FSUB: 1039 ExpandFSUB(Node, Results); 1040 return; 1041 case ISD::SETCC: 1042 case ISD::VP_SETCC: 1043 ExpandSETCC(Node, Results); 1044 return; 1045 case ISD::ABS: 1046 if (SDValue Expanded = TLI.expandABS(Node, DAG)) { 1047 Results.push_back(Expanded); 1048 return; 1049 } 1050 break; 1051 case ISD::ABDS: 1052 case ISD::ABDU: 1053 if (SDValue Expanded = TLI.expandABD(Node, DAG)) { 1054 Results.push_back(Expanded); 1055 return; 1056 } 1057 break; 1058 case ISD::AVGCEILS: 1059 case ISD::AVGCEILU: 1060 case ISD::AVGFLOORS: 1061 case ISD::AVGFLOORU: 1062 if (SDValue Expanded = TLI.expandAVG(Node, DAG)) { 1063 Results.push_back(Expanded); 1064 return; 1065 } 1066 break; 1067 case ISD::BITREVERSE: 1068 if (SDValue Expanded = ExpandBITREVERSE(Node)) { 1069 Results.push_back(Expanded); 1070 return; 1071 } 1072 break; 1073 case ISD::VP_BITREVERSE: 1074 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) { 1075 Results.push_back(Expanded); 1076 return; 1077 } 1078 break; 1079 case ISD::CTPOP: 1080 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { 1081 Results.push_back(Expanded); 1082 return; 1083 } 1084 break; 1085 case ISD::VP_CTPOP: 1086 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) { 1087 Results.push_back(Expanded); 1088 return; 1089 } 1090 break; 1091 case ISD::CTLZ: 1092 case ISD::CTLZ_ZERO_UNDEF: 1093 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { 1094 Results.push_back(Expanded); 1095 return; 1096 } 1097 break; 1098 case ISD::VP_CTLZ: 1099 case ISD::VP_CTLZ_ZERO_UNDEF: 1100 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) { 1101 Results.push_back(Expanded); 1102 return; 1103 } 1104 break; 1105 case ISD::CTTZ: 1106 case ISD::CTTZ_ZERO_UNDEF: 1107 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { 1108 Results.push_back(Expanded); 1109 return; 1110 } 1111 break; 1112 case ISD::VP_CTTZ: 1113 case ISD::VP_CTTZ_ZERO_UNDEF: 1114 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) { 1115 Results.push_back(Expanded); 1116 return; 1117 } 1118 break; 1119 case ISD::FSHL: 1120 case ISD::VP_FSHL: 1121 case ISD::FSHR: 1122 case ISD::VP_FSHR: 1123 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) { 1124 Results.push_back(Expanded); 1125 return; 1126 } 1127 break; 1128 case ISD::ROTL: 1129 case ISD::ROTR: 1130 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) { 1131 Results.push_back(Expanded); 1132 return; 1133 } 1134 break; 1135 case ISD::FMINNUM: 1136 case ISD::FMAXNUM: 1137 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) { 1138 Results.push_back(Expanded); 1139 return; 1140 } 1141 break; 1142 case ISD::FMINIMUM: 1143 case ISD::FMAXIMUM: 1144 Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)); 1145 return; 1146 case ISD::FMINIMUMNUM: 1147 case ISD::FMAXIMUMNUM: 1148 Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG)); 1149 return; 1150 case ISD::SMIN: 1151 case ISD::SMAX: 1152 case ISD::UMIN: 1153 case ISD::UMAX: 1154 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { 1155 Results.push_back(Expanded); 1156 return; 1157 } 1158 break; 1159 case ISD::UADDO: 1160 case ISD::USUBO: 1161 ExpandUADDSUBO(Node, Results); 1162 return; 1163 case ISD::SADDO: 1164 case ISD::SSUBO: 1165 ExpandSADDSUBO(Node, Results); 1166 return; 1167 case ISD::UMULO: 1168 case ISD::SMULO: 1169 ExpandMULO(Node, Results); 1170 return; 1171 case ISD::USUBSAT: 1172 case ISD::SSUBSAT: 1173 case ISD::UADDSAT: 1174 case ISD::SADDSAT: 1175 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { 1176 Results.push_back(Expanded); 1177 return; 1178 } 1179 break; 1180 case ISD::USHLSAT: 1181 case ISD::SSHLSAT: 1182 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { 1183 Results.push_back(Expanded); 1184 return; 1185 } 1186 break; 1187 case ISD::FP_TO_SINT_SAT: 1188 case ISD::FP_TO_UINT_SAT: 1189 // Expand the fpsosisat if it is scalable to prevent it from unrolling below. 1190 if (Node->getValueType(0).isScalableVector()) { 1191 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { 1192 Results.push_back(Expanded); 1193 return; 1194 } 1195 } 1196 break; 1197 case ISD::SMULFIX: 1198 case ISD::UMULFIX: 1199 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { 1200 Results.push_back(Expanded); 1201 return; 1202 } 1203 break; 1204 case ISD::SMULFIXSAT: 1205 case ISD::UMULFIXSAT: 1206 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly 1207 // why. Maybe it results in worse codegen compared to the unroll for some 1208 // targets? This should probably be investigated. And if we still prefer to 1209 // unroll an explanation could be helpful. 1210 break; 1211 case ISD::SDIVFIX: 1212 case ISD::UDIVFIX: 1213 ExpandFixedPointDiv(Node, Results); 1214 return; 1215 case ISD::SDIVFIXSAT: 1216 case ISD::UDIVFIXSAT: 1217 break; 1218 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1219 case ISD::STRICT_##DAGN: 1220 #include "llvm/IR/ConstrainedOps.def" 1221 ExpandStrictFPOp(Node, Results); 1222 return; 1223 case ISD::VECREDUCE_ADD: 1224 case ISD::VECREDUCE_MUL: 1225 case ISD::VECREDUCE_AND: 1226 case ISD::VECREDUCE_OR: 1227 case ISD::VECREDUCE_XOR: 1228 case ISD::VECREDUCE_SMAX: 1229 case ISD::VECREDUCE_SMIN: 1230 case ISD::VECREDUCE_UMAX: 1231 case ISD::VECREDUCE_UMIN: 1232 case ISD::VECREDUCE_FADD: 1233 case ISD::VECREDUCE_FMUL: 1234 case ISD::VECREDUCE_FMAX: 1235 case ISD::VECREDUCE_FMIN: 1236 case ISD::VECREDUCE_FMAXIMUM: 1237 case ISD::VECREDUCE_FMINIMUM: 1238 Results.push_back(TLI.expandVecReduce(Node, DAG)); 1239 return; 1240 case ISD::PARTIAL_REDUCE_UMLA: 1241 case ISD::PARTIAL_REDUCE_SMLA: 1242 case ISD::PARTIAL_REDUCE_SUMLA: 1243 Results.push_back(TLI.expandPartialReduceMLA(Node, DAG)); 1244 return; 1245 case ISD::VECREDUCE_SEQ_FADD: 1246 case ISD::VECREDUCE_SEQ_FMUL: 1247 Results.push_back(TLI.expandVecReduceSeq(Node, DAG)); 1248 return; 1249 case ISD::SREM: 1250 case ISD::UREM: 1251 ExpandREM(Node, Results); 1252 return; 1253 case ISD::VP_MERGE: 1254 if (SDValue Expanded = ExpandVP_MERGE(Node)) { 1255 Results.push_back(Expanded); 1256 return; 1257 } 1258 break; 1259 case ISD::FREM: 1260 if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, 1261 RTLIB::REM_F80, RTLIB::REM_F128, 1262 RTLIB::REM_PPCF128, Results)) 1263 return; 1264 1265 break; 1266 case ISD::FSINCOS: 1267 case ISD::FSINCOSPI: { 1268 EVT VT = Node->getValueType(0).getVectorElementType(); 1269 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS 1270 ? RTLIB::getSINCOS(VT) 1271 : RTLIB::getSINCOSPI(VT); 1272 if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) 1273 return; 1274 break; 1275 } 1276 case ISD::FMODF: { 1277 RTLIB::Libcall LC = 1278 RTLIB::getMODF(Node->getValueType(0).getVectorElementType()); 1279 if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, 1280 /*CallRetResNo=*/0)) 1281 return; 1282 break; 1283 } 1284 case ISD::VECTOR_COMPRESS: 1285 Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); 1286 return; 1287 case ISD::VECTOR_FIND_LAST_ACTIVE: 1288 Results.push_back(TLI.expandVectorFindLastActive(Node, DAG)); 1289 return; 1290 case ISD::SCMP: 1291 case ISD::UCMP: 1292 Results.push_back(TLI.expandCMP(Node, DAG)); 1293 return; 1294 1295 case ISD::FADD: 1296 case ISD::FMUL: 1297 case ISD::FMA: 1298 case ISD::FDIV: 1299 case ISD::FCEIL: 1300 case ISD::FFLOOR: 1301 case ISD::FNEARBYINT: 1302 case ISD::FRINT: 1303 case ISD::FROUND: 1304 case ISD::FROUNDEVEN: 1305 case ISD::FTRUNC: 1306 case ISD::FSQRT: 1307 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { 1308 Results.push_back(Expanded); 1309 return; 1310 } 1311 break; 1312 } 1313 1314 SDValue Unrolled = DAG.UnrollVectorOp(Node); 1315 if (Node->getNumValues() == 1) { 1316 Results.push_back(Unrolled); 1317 } else { 1318 assert(Node->getNumValues() == Unrolled->getNumValues() && 1319 "VectorLegalizer Expand returned wrong number of results!"); 1320 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) 1321 Results.push_back(Unrolled.getValue(I)); 1322 } 1323 } 1324 1325 SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { 1326 // Lower a select instruction where the condition is a scalar and the 1327 // operands are vectors. Lower this select to VSELECT and implement it 1328 // using XOR AND OR. The selector bit is broadcasted. 1329 EVT VT = Node->getValueType(0); 1330 SDLoc DL(Node); 1331 1332 SDValue Mask = Node->getOperand(0); 1333 SDValue Op1 = Node->getOperand(1); 1334 SDValue Op2 = Node->getOperand(2); 1335 1336 assert(VT.isVector() && !Mask.getValueType().isVector() 1337 && Op1.getValueType() == Op2.getValueType() && "Invalid type"); 1338 1339 // If we can't even use the basic vector operations of 1340 // AND,OR,XOR, we will have to scalarize the op. 1341 // Notice that the operation may be 'promoted' which means that it is 1342 // 'bitcasted' to another type which is handled. 1343 // Also, we need to be able to construct a splat vector using either 1344 // BUILD_VECTOR or SPLAT_VECTOR. 1345 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to 1346 // BUILD_VECTOR? 1347 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1348 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1349 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 1350 TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR 1351 : ISD::SPLAT_VECTOR, 1352 VT) == TargetLowering::Expand) 1353 return SDValue(); 1354 1355 // Generate a mask operand. 1356 EVT MaskTy = VT.changeVectorElementTypeToInteger(); 1357 1358 // What is the size of each element in the vector mask. 1359 EVT BitTy = MaskTy.getScalarType(); 1360 1361 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy), 1362 DAG.getConstant(0, DL, BitTy)); 1363 1364 // Broadcast the mask so that the entire vector is all one or all zero. 1365 Mask = DAG.getSplat(MaskTy, DL, Mask); 1366 1367 // Bitcast the operands to be the same type as the mask. 1368 // This is needed when we select between FP types because 1369 // the mask is a vector of integers. 1370 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); 1371 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); 1372 1373 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy); 1374 1375 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); 1376 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); 1377 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); 1378 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); 1379 } 1380 1381 SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { 1382 EVT VT = Node->getValueType(0); 1383 1384 // Make sure that the SRA and SHL instructions are available. 1385 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || 1386 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) 1387 return SDValue(); 1388 1389 SDLoc DL(Node); 1390 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1391 1392 unsigned BW = VT.getScalarSizeInBits(); 1393 unsigned OrigBW = OrigTy.getScalarSizeInBits(); 1394 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); 1395 1396 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz); 1397 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); 1398 } 1399 1400 // Generically expand a vector anyext in register to a shuffle of the relevant 1401 // lanes into the appropriate locations, with other lanes left undef. 1402 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { 1403 SDLoc DL(Node); 1404 EVT VT = Node->getValueType(0); 1405 int NumElements = VT.getVectorNumElements(); 1406 SDValue Src = Node->getOperand(0); 1407 EVT SrcVT = Src.getValueType(); 1408 int NumSrcElements = SrcVT.getVectorNumElements(); 1409 1410 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector 1411 // into a larger vector type. 1412 if (SrcVT.bitsLE(VT)) { 1413 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && 1414 "ANY_EXTEND_VECTOR_INREG vector size mismatch"); 1415 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); 1416 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), 1417 NumSrcElements); 1418 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0); 1419 } 1420 1421 // Build a base mask of undef shuffles. 1422 SmallVector<int, 16> ShuffleMask; 1423 ShuffleMask.resize(NumSrcElements, -1); 1424 1425 // Place the extended lanes into the correct locations. 1426 int ExtLaneScale = NumSrcElements / NumElements; 1427 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 1428 for (int i = 0; i < NumElements; ++i) 1429 ShuffleMask[i * ExtLaneScale + EndianOffset] = i; 1430 1431 return DAG.getNode( 1432 ISD::BITCAST, DL, VT, 1433 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); 1434 } 1435 1436 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { 1437 SDLoc DL(Node); 1438 EVT VT = Node->getValueType(0); 1439 SDValue Src = Node->getOperand(0); 1440 EVT SrcVT = Src.getValueType(); 1441 1442 // First build an any-extend node which can be legalized above when we 1443 // recurse through it. 1444 SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); 1445 1446 // Now we need sign extend. Do this by shifting the elements. Even if these 1447 // aren't legal operations, they have a better chance of being legalized 1448 // without full scalarization than the sign extension does. 1449 unsigned EltWidth = VT.getScalarSizeInBits(); 1450 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); 1451 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); 1452 return DAG.getNode(ISD::SRA, DL, VT, 1453 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), 1454 ShiftAmount); 1455 } 1456 1457 // Generically expand a vector zext in register to a shuffle of the relevant 1458 // lanes into the appropriate locations, a blend of zero into the high bits, 1459 // and a bitcast to the wider element type. 1460 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { 1461 SDLoc DL(Node); 1462 EVT VT = Node->getValueType(0); 1463 int NumElements = VT.getVectorNumElements(); 1464 SDValue Src = Node->getOperand(0); 1465 EVT SrcVT = Src.getValueType(); 1466 int NumSrcElements = SrcVT.getVectorNumElements(); 1467 1468 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector 1469 // into a larger vector type. 1470 if (SrcVT.bitsLE(VT)) { 1471 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && 1472 "ZERO_EXTEND_VECTOR_INREG vector size mismatch"); 1473 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); 1474 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), 1475 NumSrcElements); 1476 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0); 1477 } 1478 1479 // Build up a zero vector to blend into this one. 1480 SDValue Zero = DAG.getConstant(0, DL, SrcVT); 1481 1482 // Shuffle the incoming lanes into the correct position, and pull all other 1483 // lanes from the zero vector. 1484 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements)); 1485 1486 int ExtLaneScale = NumSrcElements / NumElements; 1487 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 1488 for (int i = 0; i < NumElements; ++i) 1489 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; 1490 1491 return DAG.getNode(ISD::BITCAST, DL, VT, 1492 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); 1493 } 1494 1495 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { 1496 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; 1497 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) 1498 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) 1499 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); 1500 } 1501 1502 SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { 1503 EVT VT = Node->getValueType(0); 1504 1505 // Scalable vectors can't use shuffle expansion. 1506 if (VT.isScalableVector()) 1507 return TLI.expandBSWAP(Node, DAG); 1508 1509 // Generate a byte wise shuffle mask for the BSWAP. 1510 SmallVector<int, 16> ShuffleMask; 1511 createBSWAPShuffleMask(VT, ShuffleMask); 1512 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); 1513 1514 // Only emit a shuffle if the mask is legal. 1515 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { 1516 SDLoc DL(Node); 1517 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); 1518 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); 1519 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 1520 } 1521 1522 // If we have the appropriate vector bit operations, it is better to use them 1523 // than unrolling and expanding each component. 1524 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && 1525 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && 1526 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && 1527 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) 1528 return TLI.expandBSWAP(Node, DAG); 1529 1530 // Otherwise let the caller unroll. 1531 return SDValue(); 1532 } 1533 1534 SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) { 1535 EVT VT = Node->getValueType(0); 1536 1537 // We can't unroll or use shuffles for scalable vectors. 1538 if (VT.isScalableVector()) 1539 return TLI.expandBITREVERSE(Node, DAG); 1540 1541 // If we have the scalar operation, it's probably cheaper to unroll it. 1542 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) 1543 return SDValue(); 1544 1545 // If the vector element width is a whole number of bytes, test if its legal 1546 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte 1547 // vector. This greatly reduces the number of bit shifts necessary. 1548 unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); 1549 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { 1550 SmallVector<int, 16> BSWAPMask; 1551 createBSWAPShuffleMask(VT, BSWAPMask); 1552 1553 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); 1554 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && 1555 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || 1556 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && 1557 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && 1558 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && 1559 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { 1560 SDLoc DL(Node); 1561 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); 1562 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), 1563 BSWAPMask); 1564 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); 1565 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 1566 return Op; 1567 } 1568 } 1569 1570 // If we have the appropriate vector bit operations, it is better to use them 1571 // than unrolling and expanding each component. 1572 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && 1573 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && 1574 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && 1575 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) 1576 return TLI.expandBITREVERSE(Node, DAG); 1577 1578 // Otherwise unroll. 1579 return SDValue(); 1580 } 1581 1582 SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { 1583 // Implement VSELECT in terms of XOR, AND, OR 1584 // on platforms which do not support blend natively. 1585 SDLoc DL(Node); 1586 1587 SDValue Mask = Node->getOperand(0); 1588 SDValue Op1 = Node->getOperand(1); 1589 SDValue Op2 = Node->getOperand(2); 1590 1591 EVT VT = Mask.getValueType(); 1592 1593 // If we can't even use the basic vector operations of 1594 // AND,OR,XOR, we will have to scalarize the op. 1595 // Notice that the operation may be 'promoted' which means that it is 1596 // 'bitcasted' to another type which is handled. 1597 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1598 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1599 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) 1600 return SDValue(); 1601 1602 // This operation also isn't safe with AND, OR, XOR when the boolean type is 1603 // 0/1 and the select operands aren't also booleans, as we need an all-ones 1604 // vector constant to mask with. 1605 // FIXME: Sign extend 1 to all ones if that's legal on the target. 1606 auto BoolContents = TLI.getBooleanContents(Op1.getValueType()); 1607 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && 1608 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && 1609 Op1.getValueType().getVectorElementType() == MVT::i1)) 1610 return SDValue(); 1611 1612 // If the mask and the type are different sizes, unroll the vector op. This 1613 // can occur when getSetCCResultType returns something that is different in 1614 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. 1615 if (VT.getSizeInBits() != Op1.getValueSizeInBits()) 1616 return SDValue(); 1617 1618 // Bitcast the operands to be the same type as the mask. 1619 // This is needed when we select between FP types because 1620 // the mask is a vector of integers. 1621 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); 1622 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); 1623 1624 SDValue NotMask = DAG.getNOT(DL, Mask, VT); 1625 1626 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); 1627 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); 1628 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); 1629 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); 1630 } 1631 1632 SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { 1633 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which 1634 // do not support it natively. 1635 SDLoc DL(Node); 1636 1637 SDValue Mask = Node->getOperand(0); 1638 SDValue Op1 = Node->getOperand(1); 1639 SDValue Op2 = Node->getOperand(2); 1640 SDValue EVL = Node->getOperand(3); 1641 1642 EVT VT = Mask.getValueType(); 1643 1644 // If we can't even use the basic vector operations of 1645 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. 1646 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand || 1647 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand || 1648 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand) 1649 return SDValue(); 1650 1651 // This operation also isn't safe when the operands aren't also booleans. 1652 if (Op1.getValueType().getVectorElementType() != MVT::i1) 1653 return SDValue(); 1654 1655 SDValue Ones = DAG.getAllOnesConstant(DL, VT); 1656 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL); 1657 1658 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL); 1659 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL); 1660 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL); 1661 } 1662 1663 SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { 1664 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector 1665 // indices less than the EVL/pivot are true. Combine that with the original 1666 // mask for a full-length mask. Use a full-length VSELECT to select between 1667 // the true and false values. 1668 SDLoc DL(Node); 1669 1670 SDValue Mask = Node->getOperand(0); 1671 SDValue Op1 = Node->getOperand(1); 1672 SDValue Op2 = Node->getOperand(2); 1673 SDValue EVL = Node->getOperand(3); 1674 1675 EVT MaskVT = Mask.getValueType(); 1676 bool IsFixedLen = MaskVT.isFixedLengthVector(); 1677 1678 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(), 1679 MaskVT.getVectorElementCount()); 1680 1681 // If we can't construct the EVL mask efficiently, it's better to unroll. 1682 if ((IsFixedLen && 1683 !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) || 1684 (!IsFixedLen && 1685 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) || 1686 !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT)))) 1687 return SDValue(); 1688 1689 // If using a SETCC would result in a different type than the mask type, 1690 // unroll. 1691 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 1692 EVLVecVT) != MaskVT) 1693 return SDValue(); 1694 1695 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT); 1696 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL); 1697 SDValue EVLMask = 1698 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT); 1699 1700 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask); 1701 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2); 1702 } 1703 1704 SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { 1705 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. 1706 EVT VT = Node->getValueType(0); 1707 1708 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; 1709 1710 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) || 1711 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) || 1712 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT)) 1713 return SDValue(); 1714 1715 SDLoc DL(Node); 1716 1717 SDValue Dividend = Node->getOperand(0); 1718 SDValue Divisor = Node->getOperand(1); 1719 SDValue Mask = Node->getOperand(2); 1720 SDValue EVL = Node->getOperand(3); 1721 1722 // X % Y -> X-X/Y*Y 1723 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL); 1724 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL); 1725 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL); 1726 } 1727 1728 SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) { 1729 EVT VT = Node->getValueType(0); 1730 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1731 1732 if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT)) 1733 return SDValue(); 1734 1735 SDValue Mask = Node->getOperand(1); 1736 SDValue EVL = Node->getOperand(2); 1737 1738 SDLoc DL(Node); 1739 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1740 SDValue SignMask = DAG.getConstant( 1741 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1742 SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL); 1743 return DAG.getNode(ISD::BITCAST, DL, VT, Xor); 1744 } 1745 1746 SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) { 1747 EVT VT = Node->getValueType(0); 1748 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1749 1750 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT)) 1751 return SDValue(); 1752 1753 SDValue Mask = Node->getOperand(1); 1754 SDValue EVL = Node->getOperand(2); 1755 1756 SDLoc DL(Node); 1757 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1758 SDValue ClearSignMask = DAG.getConstant( 1759 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1760 SDValue ClearSign = 1761 DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL); 1762 return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign); 1763 } 1764 1765 SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) { 1766 EVT VT = Node->getValueType(0); 1767 1768 if (VT != Node->getOperand(1).getValueType()) 1769 return SDValue(); 1770 1771 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1772 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) || 1773 !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT)) 1774 return SDValue(); 1775 1776 SDValue Mask = Node->getOperand(2); 1777 SDValue EVL = Node->getOperand(3); 1778 1779 SDLoc DL(Node); 1780 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1781 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1)); 1782 1783 SDValue SignMask = DAG.getConstant( 1784 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1785 SDValue SignBit = 1786 DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL); 1787 1788 SDValue ClearSignMask = DAG.getConstant( 1789 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1790 SDValue ClearedSign = 1791 DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL); 1792 1793 SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit, 1794 Mask, EVL, SDNodeFlags::Disjoint); 1795 1796 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign); 1797 } 1798 1799 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, 1800 SmallVectorImpl<SDValue> &Results) { 1801 // Attempt to expand using TargetLowering. 1802 SDValue Result, Chain; 1803 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) { 1804 Results.push_back(Result); 1805 if (Node->isStrictFPOpcode()) 1806 Results.push_back(Chain); 1807 return; 1808 } 1809 1810 // Otherwise go ahead and unroll. 1811 if (Node->isStrictFPOpcode()) { 1812 UnrollStrictFPOp(Node, Results); 1813 return; 1814 } 1815 1816 Results.push_back(DAG.UnrollVectorOp(Node)); 1817 } 1818 1819 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, 1820 SmallVectorImpl<SDValue> &Results) { 1821 bool IsStrict = Node->isStrictFPOpcode(); 1822 unsigned OpNo = IsStrict ? 1 : 0; 1823 SDValue Src = Node->getOperand(OpNo); 1824 EVT SrcVT = Src.getValueType(); 1825 EVT DstVT = Node->getValueType(0); 1826 SDLoc DL(Node); 1827 1828 // Attempt to expand using TargetLowering. 1829 SDValue Result; 1830 SDValue Chain; 1831 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) { 1832 Results.push_back(Result); 1833 if (IsStrict) 1834 Results.push_back(Chain); 1835 return; 1836 } 1837 1838 // Make sure that the SINT_TO_FP and SRL instructions are available. 1839 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == 1840 TargetLowering::Expand) || 1841 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) == 1842 TargetLowering::Expand)) || 1843 TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) { 1844 if (IsStrict) { 1845 UnrollStrictFPOp(Node, Results); 1846 return; 1847 } 1848 1849 Results.push_back(DAG.UnrollVectorOp(Node)); 1850 return; 1851 } 1852 1853 unsigned BW = SrcVT.getScalarSizeInBits(); 1854 assert((BW == 64 || BW == 32) && 1855 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); 1856 1857 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the 1858 // UINT_TO_FP with a larger float and round to the smaller type 1859 if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) || 1860 (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) { 1861 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; 1862 SDValue UIToFP; 1863 SDValue Result; 1864 SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true); 1865 EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT); 1866 if (IsStrict) { 1867 UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other}, 1868 {Node->getOperand(0), Src}); 1869 Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other}, 1870 {Node->getOperand(0), UIToFP, TargetZero}); 1871 Results.push_back(Result); 1872 Results.push_back(Result.getValue(1)); 1873 } else { 1874 UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src); 1875 Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero); 1876 Results.push_back(Result); 1877 } 1878 1879 return; 1880 } 1881 1882 SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT); 1883 1884 // Constants to clear the upper part of the word. 1885 // Notice that we can also use SHL+SHR, but using a constant is slightly 1886 // faster on x86. 1887 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; 1888 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT); 1889 1890 // Two to the power of half-word-size. 1891 SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT); 1892 1893 // Clear upper part of LO, lower HI 1894 SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord); 1895 SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask); 1896 1897 if (IsStrict) { 1898 // Convert hi and lo to floats 1899 // Convert the hi part back to the upper values 1900 // TODO: Can any fast-math-flags be set on these nodes? 1901 SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, 1902 {Node->getOperand(0), HI}); 1903 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other}, 1904 {fHI.getValue(1), fHI, TWOHW}); 1905 SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, 1906 {Node->getOperand(0), LO}); 1907 1908 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), 1909 fLO.getValue(1)); 1910 1911 // Add the two halves 1912 SDValue Result = 1913 DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO}); 1914 1915 Results.push_back(Result); 1916 Results.push_back(Result.getValue(1)); 1917 return; 1918 } 1919 1920 // Convert hi and lo to floats 1921 // Convert the hi part back to the upper values 1922 // TODO: Can any fast-math-flags be set on these nodes? 1923 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI); 1924 fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW); 1925 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO); 1926 1927 // Add the two halves 1928 Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO)); 1929 } 1930 1931 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { 1932 EVT VT = Node->getValueType(0); 1933 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1934 1935 if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT)) 1936 return SDValue(); 1937 1938 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. 1939 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && 1940 !VT.isScalableVector()) 1941 return SDValue(); 1942 1943 SDLoc DL(Node); 1944 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1945 SDValue SignMask = DAG.getConstant( 1946 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1947 SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask); 1948 return DAG.getNode(ISD::BITCAST, DL, VT, Xor); 1949 } 1950 1951 SDValue VectorLegalizer::ExpandFABS(SDNode *Node) { 1952 EVT VT = Node->getValueType(0); 1953 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1954 1955 if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) 1956 return SDValue(); 1957 1958 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. 1959 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && 1960 !VT.isScalableVector()) 1961 return SDValue(); 1962 1963 SDLoc DL(Node); 1964 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1965 SDValue ClearSignMask = DAG.getConstant( 1966 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1967 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask); 1968 return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign); 1969 } 1970 1971 SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) { 1972 EVT VT = Node->getValueType(0); 1973 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1974 1975 if (VT != Node->getOperand(1).getValueType() || 1976 !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || 1977 !TLI.isOperationLegalOrCustom(ISD::OR, IntVT)) 1978 return SDValue(); 1979 1980 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. 1981 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && 1982 !VT.isScalableVector()) 1983 return SDValue(); 1984 1985 SDLoc DL(Node); 1986 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1987 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1)); 1988 1989 SDValue SignMask = DAG.getConstant( 1990 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1991 SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask); 1992 1993 SDValue ClearSignMask = DAG.getConstant( 1994 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1995 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask); 1996 1997 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, 1998 SDNodeFlags::Disjoint); 1999 2000 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign); 2001 } 2002 2003 void VectorLegalizer::ExpandFSUB(SDNode *Node, 2004 SmallVectorImpl<SDValue> &Results) { 2005 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, 2006 // we can defer this to operation legalization where it will be lowered as 2007 // a+(-b). 2008 EVT VT = Node->getValueType(0); 2009 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && 2010 TLI.isOperationLegalOrCustom(ISD::FADD, VT)) 2011 return; // Defer to LegalizeDAG 2012 2013 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { 2014 Results.push_back(Expanded); 2015 return; 2016 } 2017 2018 SDValue Tmp = DAG.UnrollVectorOp(Node); 2019 Results.push_back(Tmp); 2020 } 2021 2022 void VectorLegalizer::ExpandSETCC(SDNode *Node, 2023 SmallVectorImpl<SDValue> &Results) { 2024 bool NeedInvert = false; 2025 bool IsVP = Node->getOpcode() == ISD::VP_SETCC; 2026 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || 2027 Node->getOpcode() == ISD::STRICT_FSETCCS; 2028 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; 2029 unsigned Offset = IsStrict ? 1 : 0; 2030 2031 SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); 2032 SDValue LHS = Node->getOperand(0 + Offset); 2033 SDValue RHS = Node->getOperand(1 + Offset); 2034 SDValue CC = Node->getOperand(2 + Offset); 2035 2036 MVT OpVT = LHS.getSimpleValueType(); 2037 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); 2038 2039 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) { 2040 if (IsStrict) { 2041 UnrollStrictFPOp(Node, Results); 2042 return; 2043 } 2044 Results.push_back(UnrollVSETCC(Node)); 2045 return; 2046 } 2047 2048 SDValue Mask, EVL; 2049 if (IsVP) { 2050 Mask = Node->getOperand(3 + Offset); 2051 EVL = Node->getOperand(4 + Offset); 2052 } 2053 2054 SDLoc dl(Node); 2055 bool Legalized = 2056 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask, 2057 EVL, NeedInvert, dl, Chain, IsSignaling); 2058 2059 if (Legalized) { 2060 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the 2061 // condition code, create a new SETCC node. 2062 if (CC.getNode()) { 2063 if (IsStrict) { 2064 LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(), 2065 {Chain, LHS, RHS, CC}, Node->getFlags()); 2066 Chain = LHS.getValue(1); 2067 } else if (IsVP) { 2068 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0), 2069 {LHS, RHS, CC, Mask, EVL}, Node->getFlags()); 2070 } else { 2071 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, 2072 Node->getFlags()); 2073 } 2074 } 2075 2076 // If we expanded the SETCC by inverting the condition code, then wrap 2077 // the existing SETCC in a NOT to restore the intended condition. 2078 if (NeedInvert) { 2079 if (!IsVP) 2080 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); 2081 else 2082 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0)); 2083 } 2084 } else { 2085 assert(!IsStrict && "Don't know how to expand for strict nodes."); 2086 2087 // Otherwise, SETCC for the given comparison type must be completely 2088 // illegal; expand it into a SELECT_CC. 2089 EVT VT = Node->getValueType(0); 2090 LHS = 2091 DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS, 2092 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()), 2093 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC); 2094 LHS->setFlags(Node->getFlags()); 2095 } 2096 2097 Results.push_back(LHS); 2098 if (IsStrict) 2099 Results.push_back(Chain); 2100 } 2101 2102 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, 2103 SmallVectorImpl<SDValue> &Results) { 2104 SDValue Result, Overflow; 2105 TLI.expandUADDSUBO(Node, Result, Overflow, DAG); 2106 Results.push_back(Result); 2107 Results.push_back(Overflow); 2108 } 2109 2110 void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, 2111 SmallVectorImpl<SDValue> &Results) { 2112 SDValue Result, Overflow; 2113 TLI.expandSADDSUBO(Node, Result, Overflow, DAG); 2114 Results.push_back(Result); 2115 Results.push_back(Overflow); 2116 } 2117 2118 void VectorLegalizer::ExpandMULO(SDNode *Node, 2119 SmallVectorImpl<SDValue> &Results) { 2120 SDValue Result, Overflow; 2121 if (!TLI.expandMULO(Node, Result, Overflow, DAG)) 2122 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node); 2123 2124 Results.push_back(Result); 2125 Results.push_back(Overflow); 2126 } 2127 2128 void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, 2129 SmallVectorImpl<SDValue> &Results) { 2130 SDNode *N = Node; 2131 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), 2132 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) 2133 Results.push_back(Expanded); 2134 } 2135 2136 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, 2137 SmallVectorImpl<SDValue> &Results) { 2138 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { 2139 ExpandUINT_TO_FLOAT(Node, Results); 2140 return; 2141 } 2142 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { 2143 ExpandFP_TO_UINT(Node, Results); 2144 return; 2145 } 2146 2147 if (Node->getOpcode() == ISD::STRICT_FSETCC || 2148 Node->getOpcode() == ISD::STRICT_FSETCCS) { 2149 ExpandSETCC(Node, Results); 2150 return; 2151 } 2152 2153 UnrollStrictFPOp(Node, Results); 2154 } 2155 2156 void VectorLegalizer::ExpandREM(SDNode *Node, 2157 SmallVectorImpl<SDValue> &Results) { 2158 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && 2159 "Expected REM node"); 2160 2161 SDValue Result; 2162 if (!TLI.expandREM(Node, Result, DAG)) 2163 Result = DAG.UnrollVectorOp(Node); 2164 Results.push_back(Result); 2165 } 2166 2167 // Try to expand libm nodes into vector math routine calls. Callers provide the 2168 // LibFunc equivalent of the passed in Node, which is used to lookup mappings 2169 // within TargetLibraryInfo. The only mappings considered are those where the 2170 // result and all operands are the same vector type. While predicated nodes are 2171 // not supported, we will emit calls to masked routines by passing in an all 2172 // true mask. 2173 bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, 2174 SmallVectorImpl<SDValue> &Results) { 2175 // Chain must be propagated but currently strict fp operations are down 2176 // converted to their none strict counterpart. 2177 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!"); 2178 2179 const char *LCName = TLI.getLibcallName(LC); 2180 if (!LCName) 2181 return false; 2182 LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n"); 2183 2184 EVT VT = Node->getValueType(0); 2185 ElementCount VL = VT.getVectorElementCount(); 2186 2187 // Lookup a vector function equivalent to the specified libcall. Prefer 2188 // unmasked variants but we will generate a mask if need be. 2189 const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); 2190 const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false); 2191 if (!VD) 2192 VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true); 2193 if (!VD) 2194 return false; 2195 2196 LLVMContext *Ctx = DAG.getContext(); 2197 Type *Ty = VT.getTypeForEVT(*Ctx); 2198 Type *ScalarTy = Ty->getScalarType(); 2199 2200 // Construct a scalar function type based on Node's operands. 2201 SmallVector<Type *, 8> ArgTys; 2202 for (unsigned i = 0; i < Node->getNumOperands(); ++i) { 2203 assert(Node->getOperand(i).getValueType() == VT && 2204 "Expected matching vector types!"); 2205 ArgTys.push_back(ScalarTy); 2206 } 2207 FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false); 2208 2209 // Generate call information for the vector function. 2210 const std::string MangledName = VD->getVectorFunctionABIVariantString(); 2211 auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); 2212 if (!OptVFInfo) 2213 return false; 2214 2215 LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() 2216 << "\n"); 2217 2218 // Sanity check just in case OptVFInfo has unexpected parameters. 2219 if (OptVFInfo->Shape.Parameters.size() != 2220 Node->getNumOperands() + VD->isMasked()) 2221 return false; 2222 2223 // Collect vector call operands. 2224 2225 SDLoc DL(Node); 2226 TargetLowering::ArgListTy Args; 2227 TargetLowering::ArgListEntry Entry; 2228 Entry.IsSExt = false; 2229 Entry.IsZExt = false; 2230 2231 unsigned OpNum = 0; 2232 for (auto &VFParam : OptVFInfo->Shape.Parameters) { 2233 if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { 2234 EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT); 2235 Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT); 2236 Entry.Ty = MaskVT.getTypeForEVT(*Ctx); 2237 Args.push_back(Entry); 2238 continue; 2239 } 2240 2241 // Only vector operands are supported. 2242 if (VFParam.ParamKind != VFParamKind::Vector) 2243 return false; 2244 2245 Entry.Node = Node->getOperand(OpNum++); 2246 Entry.Ty = Ty; 2247 Args.push_back(Entry); 2248 } 2249 2250 // Emit a call to the vector function. 2251 SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(), 2252 TLI.getPointerTy(DAG.getDataLayout())); 2253 TargetLowering::CallLoweringInfo CLI(DAG); 2254 CLI.setDebugLoc(DL) 2255 .setChain(DAG.getEntryNode()) 2256 .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args)); 2257 2258 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); 2259 Results.push_back(CallResult.first); 2260 return true; 2261 } 2262 2263 /// Try to expand the node to a vector libcall based on the result type. 2264 bool VectorLegalizer::tryExpandVecMathCall( 2265 SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, 2266 RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, 2267 RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { 2268 RTLIB::Libcall LC = RTLIB::getFPLibCall( 2269 Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64, 2270 Call_F80, Call_F128, Call_PPCF128); 2271 2272 if (LC == RTLIB::UNKNOWN_LIBCALL) 2273 return false; 2274 2275 return tryExpandVecMathCall(Node, LC, Results); 2276 } 2277 2278 void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, 2279 SmallVectorImpl<SDValue> &Results) { 2280 EVT VT = Node->getValueType(0); 2281 EVT EltVT = VT.getVectorElementType(); 2282 unsigned NumElems = VT.getVectorNumElements(); 2283 unsigned NumOpers = Node->getNumOperands(); 2284 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2285 2286 EVT TmpEltVT = EltVT; 2287 if (Node->getOpcode() == ISD::STRICT_FSETCC || 2288 Node->getOpcode() == ISD::STRICT_FSETCCS) 2289 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), 2290 *DAG.getContext(), TmpEltVT); 2291 2292 EVT ValueVTs[] = {TmpEltVT, MVT::Other}; 2293 SDValue Chain = Node->getOperand(0); 2294 SDLoc dl(Node); 2295 2296 SmallVector<SDValue, 32> OpValues; 2297 SmallVector<SDValue, 32> OpChains; 2298 for (unsigned i = 0; i < NumElems; ++i) { 2299 SmallVector<SDValue, 4> Opers; 2300 SDValue Idx = DAG.getVectorIdxConstant(i, dl); 2301 2302 // The Chain is the first operand. 2303 Opers.push_back(Chain); 2304 2305 // Now process the remaining operands. 2306 for (unsigned j = 1; j < NumOpers; ++j) { 2307 SDValue Oper = Node->getOperand(j); 2308 EVT OperVT = Oper.getValueType(); 2309 2310 if (OperVT.isVector()) 2311 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 2312 OperVT.getVectorElementType(), Oper, Idx); 2313 2314 Opers.push_back(Oper); 2315 } 2316 2317 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); 2318 SDValue ScalarResult = ScalarOp.getValue(0); 2319 SDValue ScalarChain = ScalarOp.getValue(1); 2320 2321 if (Node->getOpcode() == ISD::STRICT_FSETCC || 2322 Node->getOpcode() == ISD::STRICT_FSETCCS) 2323 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, 2324 DAG.getAllOnesConstant(dl, EltVT), 2325 DAG.getConstant(0, dl, EltVT)); 2326 2327 OpValues.push_back(ScalarResult); 2328 OpChains.push_back(ScalarChain); 2329 } 2330 2331 SDValue Result = DAG.getBuildVector(VT, dl, OpValues); 2332 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); 2333 2334 Results.push_back(Result); 2335 Results.push_back(NewChain); 2336 } 2337 2338 SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { 2339 EVT VT = Node->getValueType(0); 2340 unsigned NumElems = VT.getVectorNumElements(); 2341 EVT EltVT = VT.getVectorElementType(); 2342 SDValue LHS = Node->getOperand(0); 2343 SDValue RHS = Node->getOperand(1); 2344 SDValue CC = Node->getOperand(2); 2345 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 2346 SDLoc dl(Node); 2347 SmallVector<SDValue, 8> Ops(NumElems); 2348 for (unsigned i = 0; i < NumElems; ++i) { 2349 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 2350 DAG.getVectorIdxConstant(i, dl)); 2351 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 2352 DAG.getVectorIdxConstant(i, dl)); 2353 // FIXME: We should use i1 setcc + boolext here, but it causes regressions. 2354 Ops[i] = DAG.getNode(ISD::SETCC, dl, 2355 TLI.getSetCCResultType(DAG.getDataLayout(), 2356 *DAG.getContext(), TmpEltVT), 2357 LHSElem, RHSElem, CC); 2358 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], 2359 DAG.getBoolConstant(true, dl, EltVT, VT), 2360 DAG.getConstant(0, dl, EltVT)); 2361 } 2362 return DAG.getBuildVector(VT, dl, Ops); 2363 } 2364 2365 bool SelectionDAG::LegalizeVectors() { 2366 return VectorLegalizer(*this).Run(); 2367 } 2368