1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HexagonISelLowering.h" 10 #include "HexagonRegisterInfo.h" 11 #include "HexagonSubtarget.h" 12 #include "llvm/IR/IntrinsicsHexagon.h" 13 #include "llvm/Support/CommandLine.h" 14 15 using namespace llvm; 16 17 static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen", 18 cl::Hidden, cl::init(16), 19 cl::desc("Lower threshold (in bytes) for widening to HVX vectors")); 20 21 static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; 22 static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; 23 static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; 24 static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; 25 26 27 void 28 HexagonTargetLowering::initializeHVXLowering() { 29 if (Subtarget.useHVX64BOps()) { 30 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); 31 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); 32 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); 33 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); 34 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); 35 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); 36 // These "short" boolean vector types should be legal because 37 // they will appear as results of vector compares. If they were 38 // not legal, type legalization would try to make them legal 39 // and that would require using operations that do not use or 40 // produce such types. That, in turn, would imply using custom 41 // nodes, which would be unoptimizable by the DAG combiner. 42 // The idea is to rely on target-independent operations as much 43 // as possible. 44 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); 45 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); 46 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); 47 } else if (Subtarget.useHVX128BOps()) { 48 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); 49 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); 50 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); 51 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); 52 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); 53 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); 54 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); 55 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); 56 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); 57 } 58 59 // Set up operation actions. 60 61 bool Use64b = Subtarget.useHVX64BOps(); 62 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; 63 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; 64 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; 65 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; 66 67 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { 68 setOperationAction(Opc, FromTy, Promote); 69 AddPromotedToType(Opc, FromTy, ToTy); 70 }; 71 72 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). 73 // Note: v16i1 -> i16 is handled in type legalization instead of op 74 // legalization. 75 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 76 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 77 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 78 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); 79 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); 80 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 81 setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); 82 setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); 83 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 84 85 for (MVT T : LegalV) { 86 setIndexedLoadAction(ISD::POST_INC, T, Legal); 87 setIndexedStoreAction(ISD::POST_INC, T, Legal); 88 89 setOperationAction(ISD::AND, T, Legal); 90 setOperationAction(ISD::OR, T, Legal); 91 setOperationAction(ISD::XOR, T, Legal); 92 setOperationAction(ISD::ADD, T, Legal); 93 setOperationAction(ISD::SUB, T, Legal); 94 setOperationAction(ISD::MUL, T, Legal); 95 setOperationAction(ISD::CTPOP, T, Legal); 96 setOperationAction(ISD::CTLZ, T, Legal); 97 setOperationAction(ISD::SELECT, T, Legal); 98 setOperationAction(ISD::SPLAT_VECTOR, T, Legal); 99 if (T != ByteV) { 100 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); 101 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); 102 setOperationAction(ISD::BSWAP, T, Legal); 103 } 104 105 setOperationAction(ISD::SMIN, T, Legal); 106 setOperationAction(ISD::SMAX, T, Legal); 107 if (T.getScalarType() != MVT::i32) { 108 setOperationAction(ISD::UMIN, T, Legal); 109 setOperationAction(ISD::UMAX, T, Legal); 110 } 111 112 setOperationAction(ISD::CTTZ, T, Custom); 113 setOperationAction(ISD::LOAD, T, Custom); 114 setOperationAction(ISD::MLOAD, T, Custom); 115 setOperationAction(ISD::MSTORE, T, Custom); 116 setOperationAction(ISD::MULHS, T, Custom); 117 setOperationAction(ISD::MULHU, T, Custom); 118 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 119 // Make concat-vectors custom to handle concats of more than 2 vectors. 120 setOperationAction(ISD::CONCAT_VECTORS, T, Custom); 121 setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); 122 setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); 123 setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); 124 setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); 125 setOperationAction(ISD::ANY_EXTEND, T, Custom); 126 setOperationAction(ISD::SIGN_EXTEND, T, Custom); 127 setOperationAction(ISD::ZERO_EXTEND, T, Custom); 128 if (T != ByteV) { 129 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); 130 // HVX only has shifts of words and halfwords. 131 setOperationAction(ISD::SRA, T, Custom); 132 setOperationAction(ISD::SHL, T, Custom); 133 setOperationAction(ISD::SRL, T, Custom); 134 135 // Promote all shuffles to operate on vectors of bytes. 136 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); 137 } 138 139 setCondCodeAction(ISD::SETNE, T, Expand); 140 setCondCodeAction(ISD::SETLE, T, Expand); 141 setCondCodeAction(ISD::SETGE, T, Expand); 142 setCondCodeAction(ISD::SETLT, T, Expand); 143 setCondCodeAction(ISD::SETULE, T, Expand); 144 setCondCodeAction(ISD::SETUGE, T, Expand); 145 setCondCodeAction(ISD::SETULT, T, Expand); 146 } 147 148 for (MVT T : LegalW) { 149 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- 150 // independent) handling of it would convert it to a load, which is 151 // not always the optimal choice. 152 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 153 // Make concat-vectors custom to handle concats of more than 2 vectors. 154 setOperationAction(ISD::CONCAT_VECTORS, T, Custom); 155 156 // Custom-lower these operations for pairs. Expand them into a concat 157 // of the corresponding operations on individual vectors. 158 setOperationAction(ISD::ANY_EXTEND, T, Custom); 159 setOperationAction(ISD::SIGN_EXTEND, T, Custom); 160 setOperationAction(ISD::ZERO_EXTEND, T, Custom); 161 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); 162 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); 163 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); 164 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); 165 setOperationAction(ISD::SPLAT_VECTOR, T, Custom); 166 167 setOperationAction(ISD::LOAD, T, Custom); 168 setOperationAction(ISD::STORE, T, Custom); 169 setOperationAction(ISD::MLOAD, T, Custom); 170 setOperationAction(ISD::MSTORE, T, Custom); 171 setOperationAction(ISD::CTLZ, T, Custom); 172 setOperationAction(ISD::CTTZ, T, Custom); 173 setOperationAction(ISD::CTPOP, T, Custom); 174 175 setOperationAction(ISD::ADD, T, Legal); 176 setOperationAction(ISD::SUB, T, Legal); 177 setOperationAction(ISD::MUL, T, Custom); 178 setOperationAction(ISD::MULHS, T, Custom); 179 setOperationAction(ISD::MULHU, T, Custom); 180 setOperationAction(ISD::AND, T, Custom); 181 setOperationAction(ISD::OR, T, Custom); 182 setOperationAction(ISD::XOR, T, Custom); 183 setOperationAction(ISD::SETCC, T, Custom); 184 setOperationAction(ISD::VSELECT, T, Custom); 185 if (T != ByteW) { 186 setOperationAction(ISD::SRA, T, Custom); 187 setOperationAction(ISD::SHL, T, Custom); 188 setOperationAction(ISD::SRL, T, Custom); 189 190 // Promote all shuffles to operate on vectors of bytes. 191 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); 192 } 193 194 setOperationAction(ISD::SMIN, T, Custom); 195 setOperationAction(ISD::SMAX, T, Custom); 196 if (T.getScalarType() != MVT::i32) { 197 setOperationAction(ISD::UMIN, T, Custom); 198 setOperationAction(ISD::UMAX, T, Custom); 199 } 200 } 201 202 // Boolean vectors. 203 204 for (MVT T : LegalW) { 205 // Boolean types for vector pairs will overlap with the boolean 206 // types for single vectors, e.g. 207 // v64i8 -> v64i1 (single) 208 // v64i16 -> v64i1 (pair) 209 // Set these actions first, and allow the single actions to overwrite 210 // any duplicates. 211 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); 212 setOperationAction(ISD::SETCC, BoolW, Custom); 213 setOperationAction(ISD::AND, BoolW, Custom); 214 setOperationAction(ISD::OR, BoolW, Custom); 215 setOperationAction(ISD::XOR, BoolW, Custom); 216 // Masked load/store takes a mask that may need splitting. 217 setOperationAction(ISD::MLOAD, BoolW, Custom); 218 setOperationAction(ISD::MSTORE, BoolW, Custom); 219 } 220 221 for (MVT T : LegalV) { 222 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); 223 setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); 224 setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); 225 setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); 226 setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); 227 setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); 228 setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); 229 setOperationAction(ISD::SELECT, BoolV, Custom); 230 setOperationAction(ISD::AND, BoolV, Legal); 231 setOperationAction(ISD::OR, BoolV, Legal); 232 setOperationAction(ISD::XOR, BoolV, Legal); 233 } 234 235 if (Use64b) { 236 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) 237 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); 238 } else { 239 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) 240 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); 241 } 242 243 // Handle store widening for short vectors. 244 unsigned HwLen = Subtarget.getVectorLength(); 245 for (MVT ElemTy : Subtarget.getHVXElementTypes()) { 246 if (ElemTy == MVT::i1) 247 continue; 248 int ElemWidth = ElemTy.getFixedSizeInBits(); 249 int MaxElems = (8*HwLen) / ElemWidth; 250 for (int N = 2; N < MaxElems; N *= 2) { 251 MVT VecTy = MVT::getVectorVT(ElemTy, N); 252 auto Action = getPreferredVectorAction(VecTy); 253 if (Action == TargetLoweringBase::TypeWidenVector) { 254 setOperationAction(ISD::LOAD, VecTy, Custom); 255 setOperationAction(ISD::STORE, VecTy, Custom); 256 setOperationAction(ISD::SETCC, VecTy, Custom); 257 setOperationAction(ISD::TRUNCATE, VecTy, Custom); 258 setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); 259 setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); 260 setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); 261 262 MVT BoolTy = MVT::getVectorVT(MVT::i1, N); 263 if (!isTypeLegal(BoolTy)) 264 setOperationAction(ISD::SETCC, BoolTy, Custom); 265 } 266 } 267 } 268 269 setTargetDAGCombine(ISD::SPLAT_VECTOR); 270 setTargetDAGCombine(ISD::VSELECT); 271 } 272 273 unsigned 274 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { 275 MVT ElemTy = VecTy.getVectorElementType(); 276 unsigned VecLen = VecTy.getVectorNumElements(); 277 unsigned HwLen = Subtarget.getVectorLength(); 278 279 // Split vectors of i1 that exceed byte vector length. 280 if (ElemTy == MVT::i1 && VecLen > HwLen) 281 return TargetLoweringBase::TypeSplitVector; 282 283 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes(); 284 // For shorter vectors of i1, widen them if any of the corresponding 285 // vectors of integers needs to be widened. 286 if (ElemTy == MVT::i1) { 287 for (MVT T : Tys) { 288 assert(T != MVT::i1); 289 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen)); 290 if (A != ~0u) 291 return A; 292 } 293 return ~0u; 294 } 295 296 // If the size of VecTy is at least half of the vector length, 297 // widen the vector. Note: the threshold was not selected in 298 // any scientific way. 299 if (llvm::is_contained(Tys, ElemTy)) { 300 unsigned VecWidth = VecTy.getSizeInBits(); 301 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0; 302 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth) 303 return TargetLoweringBase::TypeWidenVector; 304 unsigned HwWidth = 8*HwLen; 305 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) 306 return TargetLoweringBase::TypeWidenVector; 307 } 308 309 // Defer to default. 310 return ~0u; 311 } 312 313 SDValue 314 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, 315 const SDLoc &dl, SelectionDAG &DAG) const { 316 SmallVector<SDValue,4> IntOps; 317 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); 318 append_range(IntOps, Ops); 319 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); 320 } 321 322 MVT 323 HexagonTargetLowering::typeJoin(const TypePair &Tys) const { 324 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); 325 326 MVT ElemTy = Tys.first.getVectorElementType(); 327 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + 328 Tys.second.getVectorNumElements()); 329 } 330 331 HexagonTargetLowering::TypePair 332 HexagonTargetLowering::typeSplit(MVT VecTy) const { 333 assert(VecTy.isVector()); 334 unsigned NumElem = VecTy.getVectorNumElements(); 335 assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); 336 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); 337 return { HalfTy, HalfTy }; 338 } 339 340 MVT 341 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { 342 MVT ElemTy = VecTy.getVectorElementType(); 343 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); 344 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); 345 } 346 347 MVT 348 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { 349 MVT ElemTy = VecTy.getVectorElementType(); 350 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); 351 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); 352 } 353 354 SDValue 355 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, 356 SelectionDAG &DAG) const { 357 if (ty(Vec).getVectorElementType() == ElemTy) 358 return Vec; 359 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); 360 return DAG.getBitcast(CastTy, Vec); 361 } 362 363 SDValue 364 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, 365 SelectionDAG &DAG) const { 366 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), 367 Ops.second, Ops.first); 368 } 369 370 HexagonTargetLowering::VectorPair 371 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, 372 SelectionDAG &DAG) const { 373 TypePair Tys = typeSplit(ty(Vec)); 374 if (Vec.getOpcode() == HexagonISD::QCAT) 375 return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); 376 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); 377 } 378 379 bool 380 HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { 381 return Subtarget.isHVXVectorType(Ty) && 382 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); 383 } 384 385 bool 386 HexagonTargetLowering::isHvxPairTy(MVT Ty) const { 387 return Subtarget.isHVXVectorType(Ty) && 388 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); 389 } 390 391 bool 392 HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { 393 return Subtarget.isHVXVectorType(Ty, true) && 394 Ty.getVectorElementType() == MVT::i1; 395 } 396 397 bool HexagonTargetLowering::allowsHvxMemoryAccess( 398 MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { 399 // Bool vectors are excluded by default, but make it explicit to 400 // emphasize that bool vectors cannot be loaded or stored. 401 // Also, disallow double vector stores (to prevent unnecessary 402 // store widening in DAG combiner). 403 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) 404 return false; 405 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) 406 return false; 407 if (Fast) 408 *Fast = true; 409 return true; 410 } 411 412 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( 413 MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { 414 if (!Subtarget.isHVXVectorType(VecTy)) 415 return false; 416 // XXX Should this be false? vmemu are a bit slower than vmem. 417 if (Fast) 418 *Fast = true; 419 return true; 420 } 421 422 SDValue 423 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, 424 SelectionDAG &DAG) const { 425 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) 426 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); 427 428 unsigned ElemWidth = ElemTy.getSizeInBits(); 429 if (ElemWidth == 8) 430 return ElemIdx; 431 432 unsigned L = Log2_32(ElemWidth/8); 433 const SDLoc &dl(ElemIdx); 434 return DAG.getNode(ISD::SHL, dl, MVT::i32, 435 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); 436 } 437 438 SDValue 439 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, 440 SelectionDAG &DAG) const { 441 unsigned ElemWidth = ElemTy.getSizeInBits(); 442 assert(ElemWidth >= 8 && ElemWidth <= 32); 443 if (ElemWidth == 32) 444 return Idx; 445 446 if (ty(Idx) != MVT::i32) 447 Idx = DAG.getBitcast(MVT::i32, Idx); 448 const SDLoc &dl(Idx); 449 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); 450 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); 451 return SubIdx; 452 } 453 454 SDValue 455 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, 456 SDValue Op1, ArrayRef<int> Mask, 457 SelectionDAG &DAG) const { 458 MVT OpTy = ty(Op0); 459 assert(OpTy == ty(Op1)); 460 461 MVT ElemTy = OpTy.getVectorElementType(); 462 if (ElemTy == MVT::i8) 463 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); 464 assert(ElemTy.getSizeInBits() >= 8); 465 466 MVT ResTy = tyVector(OpTy, MVT::i8); 467 unsigned ElemSize = ElemTy.getSizeInBits() / 8; 468 469 SmallVector<int,128> ByteMask; 470 for (int M : Mask) { 471 if (M < 0) { 472 for (unsigned I = 0; I != ElemSize; ++I) 473 ByteMask.push_back(-1); 474 } else { 475 int NewM = M*ElemSize; 476 for (unsigned I = 0; I != ElemSize; ++I) 477 ByteMask.push_back(NewM+I); 478 } 479 } 480 assert(ResTy.getVectorNumElements() == ByteMask.size()); 481 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), 482 opCastElem(Op1, MVT::i8, DAG), ByteMask); 483 } 484 485 SDValue 486 HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, 487 const SDLoc &dl, MVT VecTy, 488 SelectionDAG &DAG) const { 489 unsigned VecLen = Values.size(); 490 MachineFunction &MF = DAG.getMachineFunction(); 491 MVT ElemTy = VecTy.getVectorElementType(); 492 unsigned ElemWidth = ElemTy.getSizeInBits(); 493 unsigned HwLen = Subtarget.getVectorLength(); 494 495 unsigned ElemSize = ElemWidth / 8; 496 assert(ElemSize*VecLen == HwLen); 497 SmallVector<SDValue,32> Words; 498 499 if (VecTy.getVectorElementType() != MVT::i32) { 500 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); 501 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; 502 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); 503 for (unsigned i = 0; i != VecLen; i += OpsPerWord) { 504 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); 505 Words.push_back(DAG.getBitcast(MVT::i32, W)); 506 } 507 } else { 508 Words.assign(Values.begin(), Values.end()); 509 } 510 511 unsigned NumWords = Words.size(); 512 bool IsSplat = true, IsUndef = true; 513 SDValue SplatV; 514 for (unsigned i = 0; i != NumWords && IsSplat; ++i) { 515 if (isUndef(Words[i])) 516 continue; 517 IsUndef = false; 518 if (!SplatV.getNode()) 519 SplatV = Words[i]; 520 else if (SplatV != Words[i]) 521 IsSplat = false; 522 } 523 if (IsUndef) 524 return DAG.getUNDEF(VecTy); 525 if (IsSplat) { 526 assert(SplatV.getNode()); 527 auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode()); 528 if (IdxN && IdxN->isNullValue()) 529 return getZero(dl, VecTy, DAG); 530 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); 531 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV); 532 return DAG.getBitcast(VecTy, S); 533 } 534 535 // Delay recognizing constant vectors until here, so that we can generate 536 // a vsplat. 537 SmallVector<ConstantInt*, 128> Consts(VecLen); 538 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); 539 if (AllConst) { 540 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), 541 (Constant**)Consts.end()); 542 Constant *CV = ConstantVector::get(Tmp); 543 Align Alignment(HwLen); 544 SDValue CP = 545 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG); 546 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, 547 MachinePointerInfo::getConstantPool(MF), Alignment); 548 } 549 550 // A special case is a situation where the vector is built entirely from 551 // elements extracted from another vector. This could be done via a shuffle 552 // more efficiently, but typically, the size of the source vector will not 553 // match the size of the vector being built (which precludes the use of a 554 // shuffle directly). 555 // This only handles a single source vector, and the vector being built 556 // should be of a sub-vector type of the source vector type. 557 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec, 558 SmallVectorImpl<int> &SrcIdx) { 559 SDValue Vec; 560 for (SDValue V : Values) { 561 if (isUndef(V)) { 562 SrcIdx.push_back(-1); 563 continue; 564 } 565 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 566 return false; 567 // All extracts should come from the same vector. 568 SDValue T = V.getOperand(0); 569 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) 570 return false; 571 Vec = T; 572 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1)); 573 if (C == nullptr) 574 return false; 575 int I = C->getSExtValue(); 576 assert(I >= 0 && "Negative element index"); 577 SrcIdx.push_back(I); 578 } 579 SrcVec = Vec; 580 return true; 581 }; 582 583 SmallVector<int,128> ExtIdx; 584 SDValue ExtVec; 585 if (IsBuildFromExtracts(ExtVec, ExtIdx)) { 586 MVT ExtTy = ty(ExtVec); 587 unsigned ExtLen = ExtTy.getVectorNumElements(); 588 if (ExtLen == VecLen || ExtLen == 2*VecLen) { 589 // Construct a new shuffle mask that will produce a vector with the same 590 // number of elements as the input vector, and such that the vector we 591 // want will be the initial subvector of it. 592 SmallVector<int,128> Mask; 593 BitVector Used(ExtLen); 594 595 for (int M : ExtIdx) { 596 Mask.push_back(M); 597 if (M >= 0) 598 Used.set(M); 599 } 600 // Fill the rest of the mask with the unused elements of ExtVec in hopes 601 // that it will result in a permutation of ExtVec's elements. It's still 602 // fine if it doesn't (e.g. if undefs are present, or elements are 603 // repeated), but permutations can always be done efficiently via vdelta 604 // and vrdelta. 605 for (unsigned I = 0; I != ExtLen; ++I) { 606 if (Mask.size() == ExtLen) 607 break; 608 if (!Used.test(I)) 609 Mask.push_back(I); 610 } 611 612 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec, 613 DAG.getUNDEF(ExtTy), Mask); 614 if (ExtLen == VecLen) 615 return S; 616 return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S); 617 } 618 } 619 620 // Construct two halves in parallel, then or them together. 621 assert(4*Words.size() == Subtarget.getVectorLength()); 622 SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); 623 SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); 624 SDValue S = DAG.getConstant(4, dl, MVT::i32); 625 for (unsigned i = 0; i != NumWords/2; ++i) { 626 SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, 627 {HalfV0, Words[i]}); 628 SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, 629 {HalfV1, Words[i+NumWords/2]}); 630 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S}); 631 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S}); 632 } 633 634 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, 635 {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); 636 SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1}); 637 return DstV; 638 } 639 640 SDValue 641 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, 642 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { 643 MVT PredTy = ty(PredV); 644 unsigned HwLen = Subtarget.getVectorLength(); 645 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 646 647 if (Subtarget.isHVXVectorType(PredTy, true)) { 648 // Move the vector predicate SubV to a vector register, and scale it 649 // down to match the representation (bytes per type element) that VecV 650 // uses. The scaling down will pick every 2nd or 4th (every Scale-th 651 // in general) element and put them at the front of the resulting 652 // vector. This subvector will then be inserted into the Q2V of VecV. 653 // To avoid having an operation that generates an illegal type (short 654 // vector), generate a full size vector. 655 // 656 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); 657 SmallVector<int,128> Mask(HwLen); 658 // Scale = BitBytes(PredV) / Given BitBytes. 659 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); 660 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; 661 662 for (unsigned i = 0; i != HwLen; ++i) { 663 unsigned Num = i % Scale; 664 unsigned Off = i / Scale; 665 Mask[BlockLen*Num + Off] = i; 666 } 667 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); 668 if (!ZeroFill) 669 return S; 670 // Fill the bytes beyond BlockLen with 0s. 671 // V6_pred_scalar2 cannot fill the entire predicate, so it only works 672 // when BlockLen < HwLen. 673 assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); 674 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 675 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 676 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); 677 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); 678 return DAG.getNode(ISD::AND, dl, ByteTy, S, M); 679 } 680 681 // Make sure that this is a valid scalar predicate. 682 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); 683 684 unsigned Bytes = 8 / PredTy.getVectorNumElements(); 685 SmallVector<SDValue,4> Words[2]; 686 unsigned IdxW = 0; 687 688 auto Lo32 = [&DAG, &dl] (SDValue P) { 689 return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); 690 }; 691 auto Hi32 = [&DAG, &dl] (SDValue P) { 692 return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); 693 }; 694 695 SDValue W0 = isUndef(PredV) 696 ? DAG.getUNDEF(MVT::i64) 697 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); 698 Words[IdxW].push_back(Hi32(W0)); 699 Words[IdxW].push_back(Lo32(W0)); 700 701 while (Bytes < BitBytes) { 702 IdxW ^= 1; 703 Words[IdxW].clear(); 704 705 if (Bytes < 4) { 706 for (const SDValue &W : Words[IdxW ^ 1]) { 707 SDValue T = expandPredicate(W, dl, DAG); 708 Words[IdxW].push_back(Hi32(T)); 709 Words[IdxW].push_back(Lo32(T)); 710 } 711 } else { 712 for (const SDValue &W : Words[IdxW ^ 1]) { 713 Words[IdxW].push_back(W); 714 Words[IdxW].push_back(W); 715 } 716 } 717 Bytes *= 2; 718 } 719 720 assert(Bytes == BitBytes); 721 722 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); 723 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); 724 for (const SDValue &W : Words[IdxW]) { 725 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); 726 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); 727 } 728 729 return Vec; 730 } 731 732 SDValue 733 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, 734 const SDLoc &dl, MVT VecTy, 735 SelectionDAG &DAG) const { 736 // Construct a vector V of bytes, such that a comparison V >u 0 would 737 // produce the required vector predicate. 738 unsigned VecLen = Values.size(); 739 unsigned HwLen = Subtarget.getVectorLength(); 740 assert(VecLen <= HwLen || VecLen == 8*HwLen); 741 SmallVector<SDValue,128> Bytes; 742 bool AllT = true, AllF = true; 743 744 auto IsTrue = [] (SDValue V) { 745 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) 746 return !N->isNullValue(); 747 return false; 748 }; 749 auto IsFalse = [] (SDValue V) { 750 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) 751 return N->isNullValue(); 752 return false; 753 }; 754 755 if (VecLen <= HwLen) { 756 // In the hardware, each bit of a vector predicate corresponds to a byte 757 // of a vector register. Calculate how many bytes does a bit of VecTy 758 // correspond to. 759 assert(HwLen % VecLen == 0); 760 unsigned BitBytes = HwLen / VecLen; 761 for (SDValue V : Values) { 762 AllT &= IsTrue(V); 763 AllF &= IsFalse(V); 764 765 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) 766 : DAG.getUNDEF(MVT::i8); 767 for (unsigned B = 0; B != BitBytes; ++B) 768 Bytes.push_back(Ext); 769 } 770 } else { 771 // There are as many i1 values, as there are bits in a vector register. 772 // Divide the values into groups of 8 and check that each group consists 773 // of the same value (ignoring undefs). 774 for (unsigned I = 0; I != VecLen; I += 8) { 775 unsigned B = 0; 776 // Find the first non-undef value in this group. 777 for (; B != 8; ++B) { 778 if (!Values[I+B].isUndef()) 779 break; 780 } 781 SDValue F = Values[I+B]; 782 AllT &= IsTrue(F); 783 AllF &= IsFalse(F); 784 785 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) 786 : DAG.getUNDEF(MVT::i8); 787 Bytes.push_back(Ext); 788 // Verify that the rest of values in the group are the same as the 789 // first. 790 for (; B != 8; ++B) 791 assert(Values[I+B].isUndef() || Values[I+B] == F); 792 } 793 } 794 795 if (AllT) 796 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); 797 if (AllF) 798 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); 799 800 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 801 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); 802 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); 803 } 804 805 SDValue 806 HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, 807 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 808 MVT ElemTy = ty(VecV).getVectorElementType(); 809 810 unsigned ElemWidth = ElemTy.getSizeInBits(); 811 assert(ElemWidth >= 8 && ElemWidth <= 32); 812 (void)ElemWidth; 813 814 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); 815 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, 816 {VecV, ByteIdx}); 817 if (ElemTy == MVT::i32) 818 return ExWord; 819 820 // Have an extracted word, need to extract the smaller element out of it. 821 // 1. Extract the bits of (the original) IdxV that correspond to the index 822 // of the desired element in the 32-bit word. 823 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); 824 // 2. Extract the element from the word. 825 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); 826 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); 827 } 828 829 SDValue 830 HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, 831 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 832 // Implement other return types if necessary. 833 assert(ResTy == MVT::i1); 834 835 unsigned HwLen = Subtarget.getVectorLength(); 836 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 837 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 838 839 unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); 840 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); 841 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); 842 843 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); 844 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); 845 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); 846 } 847 848 SDValue 849 HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, 850 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { 851 MVT ElemTy = ty(VecV).getVectorElementType(); 852 853 unsigned ElemWidth = ElemTy.getSizeInBits(); 854 assert(ElemWidth >= 8 && ElemWidth <= 32); 855 (void)ElemWidth; 856 857 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, 858 SDValue ByteIdxV) { 859 MVT VecTy = ty(VecV); 860 unsigned HwLen = Subtarget.getVectorLength(); 861 SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, 862 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); 863 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); 864 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); 865 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, 866 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); 867 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); 868 return TorV; 869 }; 870 871 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); 872 if (ElemTy == MVT::i32) 873 return InsertWord(VecV, ValV, ByteIdx); 874 875 // If this is not inserting a 32-bit word, convert it into such a thing. 876 // 1. Extract the existing word from the target vector. 877 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, 878 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); 879 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, 880 dl, MVT::i32, DAG); 881 882 // 2. Treating the extracted word as a 32-bit vector, insert the given 883 // value into it. 884 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); 885 MVT SubVecTy = tyVector(ty(Ext), ElemTy); 886 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), 887 ValV, SubIdx, dl, ElemTy, DAG); 888 889 // 3. Insert the 32-bit word back into the original vector. 890 return InsertWord(VecV, Ins, ByteIdx); 891 } 892 893 SDValue 894 HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, 895 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { 896 unsigned HwLen = Subtarget.getVectorLength(); 897 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 898 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 899 900 unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); 901 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); 902 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); 903 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); 904 905 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); 906 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); 907 } 908 909 SDValue 910 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, 911 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 912 MVT VecTy = ty(VecV); 913 unsigned HwLen = Subtarget.getVectorLength(); 914 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 915 MVT ElemTy = VecTy.getVectorElementType(); 916 unsigned ElemWidth = ElemTy.getSizeInBits(); 917 918 // If the source vector is a vector pair, get the single vector containing 919 // the subvector of interest. The subvector will never overlap two single 920 // vectors. 921 if (isHvxPairTy(VecTy)) { 922 unsigned SubIdx; 923 if (Idx * ElemWidth >= 8*HwLen) { 924 SubIdx = Hexagon::vsub_hi; 925 Idx -= VecTy.getVectorNumElements() / 2; 926 } else { 927 SubIdx = Hexagon::vsub_lo; 928 } 929 VecTy = typeSplit(VecTy).first; 930 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); 931 if (VecTy == ResTy) 932 return VecV; 933 } 934 935 // The only meaningful subvectors of a single HVX vector are those that 936 // fit in a scalar register. 937 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); 938 939 MVT WordTy = tyVector(VecTy, MVT::i32); 940 SDValue WordVec = DAG.getBitcast(WordTy, VecV); 941 unsigned WordIdx = (Idx*ElemWidth) / 32; 942 943 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); 944 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); 945 if (ResTy.getSizeInBits() == 32) 946 return DAG.getBitcast(ResTy, W0); 947 948 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); 949 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); 950 SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); 951 return DAG.getBitcast(ResTy, WW); 952 } 953 954 SDValue 955 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, 956 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 957 MVT VecTy = ty(VecV); 958 unsigned HwLen = Subtarget.getVectorLength(); 959 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 960 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 961 // IdxV is required to be a constant. 962 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 963 964 unsigned ResLen = ResTy.getVectorNumElements(); 965 unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); 966 unsigned Offset = Idx * BitBytes; 967 SDValue Undef = DAG.getUNDEF(ByteTy); 968 SmallVector<int,128> Mask; 969 970 if (Subtarget.isHVXVectorType(ResTy, true)) { 971 // Converting between two vector predicates. Since the result is shorter 972 // than the source, it will correspond to a vector predicate with the 973 // relevant bits replicated. The replication count is the ratio of the 974 // source and target vector lengths. 975 unsigned Rep = VecTy.getVectorNumElements() / ResLen; 976 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); 977 for (unsigned i = 0; i != HwLen/Rep; ++i) { 978 for (unsigned j = 0; j != Rep; ++j) 979 Mask.push_back(i + Offset); 980 } 981 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); 982 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); 983 } 984 985 // Converting between a vector predicate and a scalar predicate. In the 986 // vector predicate, a group of BitBytes bits will correspond to a single 987 // i1 element of the source vector type. Those bits will all have the same 988 // value. The same will be true for ByteVec, where each byte corresponds 989 // to a bit in the vector predicate. 990 // The algorithm is to traverse the ByteVec, going over the i1 values from 991 // the source vector, and generate the corresponding representation in an 992 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the 993 // elements so that the interesting 8 bytes will be in the low end of the 994 // vector. 995 unsigned Rep = 8 / ResLen; 996 // Make sure the output fill the entire vector register, so repeat the 997 // 8-byte groups as many times as necessary. 998 for (unsigned r = 0; r != HwLen/ResLen; ++r) { 999 // This will generate the indexes of the 8 interesting bytes. 1000 for (unsigned i = 0; i != ResLen; ++i) { 1001 for (unsigned j = 0; j != Rep; ++j) 1002 Mask.push_back(Offset + i*BitBytes); 1003 } 1004 } 1005 1006 SDValue Zero = getZero(dl, MVT::i32, DAG); 1007 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); 1008 // Combine the two low words from ShuffV into a v8i8, and byte-compare 1009 // them against 0. 1010 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); 1011 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, 1012 {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); 1013 SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); 1014 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, 1015 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); 1016 } 1017 1018 SDValue 1019 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, 1020 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { 1021 MVT VecTy = ty(VecV); 1022 MVT SubTy = ty(SubV); 1023 unsigned HwLen = Subtarget.getVectorLength(); 1024 MVT ElemTy = VecTy.getVectorElementType(); 1025 unsigned ElemWidth = ElemTy.getSizeInBits(); 1026 1027 bool IsPair = isHvxPairTy(VecTy); 1028 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); 1029 // The two single vectors that VecV consists of, if it's a pair. 1030 SDValue V0, V1; 1031 SDValue SingleV = VecV; 1032 SDValue PickHi; 1033 1034 if (IsPair) { 1035 V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); 1036 V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); 1037 1038 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), 1039 dl, MVT::i32); 1040 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); 1041 if (isHvxSingleTy(SubTy)) { 1042 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) { 1043 unsigned Idx = CN->getZExtValue(); 1044 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); 1045 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; 1046 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); 1047 } 1048 // If IdxV is not a constant, generate the two variants: with the 1049 // SubV as the high and as the low subregister, and select the right 1050 // pair based on the IdxV. 1051 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); 1052 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); 1053 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); 1054 } 1055 // The subvector being inserted must be entirely contained in one of 1056 // the vectors V0 or V1. Set SingleV to the correct one, and update 1057 // IdxV to be the index relative to the beginning of that vector. 1058 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); 1059 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); 1060 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); 1061 } 1062 1063 // The only meaningful subvectors of a single HVX vector are those that 1064 // fit in a scalar register. 1065 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); 1066 // Convert IdxV to be index in bytes. 1067 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); 1068 if (!IdxN || !IdxN->isNullValue()) { 1069 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, 1070 DAG.getConstant(ElemWidth/8, dl, MVT::i32)); 1071 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); 1072 } 1073 // When inserting a single word, the rotation back to the original position 1074 // would be by HwLen-Idx, but if two words are inserted, it will need to be 1075 // by (HwLen-4)-Idx. 1076 unsigned RolBase = HwLen; 1077 if (VecTy.getSizeInBits() == 32) { 1078 SDValue V = DAG.getBitcast(MVT::i32, SubV); 1079 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); 1080 } else { 1081 SDValue V = DAG.getBitcast(MVT::i64, SubV); 1082 SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); 1083 SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); 1084 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); 1085 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, 1086 DAG.getConstant(4, dl, MVT::i32)); 1087 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); 1088 RolBase = HwLen-4; 1089 } 1090 // If the vector wasn't ror'ed, don't ror it back. 1091 if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) { 1092 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, 1093 DAG.getConstant(RolBase, dl, MVT::i32), IdxV); 1094 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); 1095 } 1096 1097 if (IsPair) { 1098 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); 1099 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); 1100 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); 1101 } 1102 return SingleV; 1103 } 1104 1105 SDValue 1106 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, 1107 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { 1108 MVT VecTy = ty(VecV); 1109 MVT SubTy = ty(SubV); 1110 assert(Subtarget.isHVXVectorType(VecTy, true)); 1111 // VecV is an HVX vector predicate. SubV may be either an HVX vector 1112 // predicate as well, or it can be a scalar predicate. 1113 1114 unsigned VecLen = VecTy.getVectorNumElements(); 1115 unsigned HwLen = Subtarget.getVectorLength(); 1116 assert(HwLen % VecLen == 0 && "Unexpected vector type"); 1117 1118 unsigned Scale = VecLen / SubTy.getVectorNumElements(); 1119 unsigned BitBytes = HwLen / VecLen; 1120 unsigned BlockLen = HwLen / Scale; 1121 1122 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1123 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1124 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); 1125 SDValue ByteIdx; 1126 1127 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); 1128 if (!IdxN || !IdxN->isNullValue()) { 1129 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, 1130 DAG.getConstant(BitBytes, dl, MVT::i32)); 1131 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); 1132 } 1133 1134 // ByteVec is the target vector VecV rotated in such a way that the 1135 // subvector should be inserted at index 0. Generate a predicate mask 1136 // and use vmux to do the insertion. 1137 assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); 1138 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 1139 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 1140 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); 1141 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); 1142 // Rotate ByteVec back, and convert to a vector predicate. 1143 if (!IdxN || !IdxN->isNullValue()) { 1144 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); 1145 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); 1146 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); 1147 } 1148 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); 1149 } 1150 1151 SDValue 1152 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, 1153 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { 1154 // Sign- and any-extending of a vector predicate to a vector register is 1155 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and 1156 // a vector of 1s (where the 1s are of type matching the vector type). 1157 assert(Subtarget.isHVXVectorType(ResTy)); 1158 if (!ZeroExt) 1159 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); 1160 1161 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); 1162 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1163 DAG.getConstant(1, dl, MVT::i32)); 1164 SDValue False = getZero(dl, ResTy, DAG); 1165 return DAG.getSelect(dl, ResTy, VecV, True, False); 1166 } 1167 1168 SDValue 1169 HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, 1170 MVT ResTy, SelectionDAG &DAG) const { 1171 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] 1172 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a 1173 // vector register. The remaining bits of the vector register are 1174 // unspecified. 1175 1176 MachineFunction &MF = DAG.getMachineFunction(); 1177 unsigned HwLen = Subtarget.getVectorLength(); 1178 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1179 MVT PredTy = ty(VecQ); 1180 unsigned PredLen = PredTy.getVectorNumElements(); 1181 assert(HwLen % PredLen == 0); 1182 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen); 1183 1184 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext()); 1185 SmallVector<Constant*, 128> Tmp; 1186 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... 1187 // These are bytes with the LSB rotated left with respect to their index. 1188 for (unsigned i = 0; i != HwLen/8; ++i) { 1189 for (unsigned j = 0; j != 8; ++j) 1190 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j)); 1191 } 1192 Constant *CV = ConstantVector::get(Tmp); 1193 Align Alignment(HwLen); 1194 SDValue CP = 1195 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG); 1196 SDValue Bytes = 1197 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP, 1198 MachinePointerInfo::getConstantPool(MF), Alignment); 1199 1200 // Select the bytes that correspond to true bits in the vector predicate. 1201 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes), 1202 getZero(dl, VecTy, DAG)); 1203 // Calculate the OR of all bytes in each group of 8. That will compress 1204 // all the individual bits into a single byte. 1205 // First, OR groups of 4, via vrmpy with 0x01010101. 1206 SDValue All1 = 1207 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32)); 1208 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG); 1209 // Then rotate the accumulated vector by 4 bytes, and do the final OR. 1210 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy, 1211 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG); 1212 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot}); 1213 1214 // Pick every 8th byte and coalesce them at the beginning of the output. 1215 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th 1216 // byte and so on. 1217 SmallVector<int,128> Mask; 1218 for (unsigned i = 0; i != HwLen; ++i) 1219 Mask.push_back((8*i) % HwLen + i/(HwLen/8)); 1220 SDValue Collect = 1221 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask); 1222 return DAG.getBitcast(ResTy, Collect); 1223 } 1224 1225 SDValue 1226 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) 1227 const { 1228 const SDLoc &dl(Op); 1229 MVT VecTy = ty(Op); 1230 1231 unsigned Size = Op.getNumOperands(); 1232 SmallVector<SDValue,128> Ops; 1233 for (unsigned i = 0; i != Size; ++i) 1234 Ops.push_back(Op.getOperand(i)); 1235 1236 if (VecTy.getVectorElementType() == MVT::i1) 1237 return buildHvxVectorPred(Ops, dl, VecTy, DAG); 1238 1239 if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { 1240 ArrayRef<SDValue> A(Ops); 1241 MVT SingleTy = typeSplit(VecTy).first; 1242 SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); 1243 SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); 1244 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); 1245 } 1246 1247 return buildHvxVectorReg(Ops, dl, VecTy, DAG); 1248 } 1249 1250 SDValue 1251 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) 1252 const { 1253 // Vector concatenation of two integer (non-bool) vectors does not need 1254 // special lowering. Custom-lower concats of bool vectors and expand 1255 // concats of more than 2 vectors. 1256 MVT VecTy = ty(Op); 1257 const SDLoc &dl(Op); 1258 unsigned NumOp = Op.getNumOperands(); 1259 if (VecTy.getVectorElementType() != MVT::i1) { 1260 if (NumOp == 2) 1261 return Op; 1262 // Expand the other cases into a build-vector. 1263 SmallVector<SDValue,8> Elems; 1264 for (SDValue V : Op.getNode()->ops()) 1265 DAG.ExtractVectorElements(V, Elems); 1266 // A vector of i16 will be broken up into a build_vector of i16's. 1267 // This is a problem, since at the time of operation legalization, 1268 // all operations are expected to be type-legalized, and i16 is not 1269 // a legal type. If any of the extracted elements is not of a valid 1270 // type, sign-extend it to a valid one. 1271 for (unsigned i = 0, e = Elems.size(); i != e; ++i) { 1272 SDValue V = Elems[i]; 1273 MVT Ty = ty(V); 1274 if (!isTypeLegal(Ty)) { 1275 EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); 1276 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1277 Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, 1278 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, 1279 V.getOperand(0), V.getOperand(1)), 1280 DAG.getValueType(Ty)); 1281 continue; 1282 } 1283 // A few less complicated cases. 1284 switch (V.getOpcode()) { 1285 case ISD::Constant: 1286 Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); 1287 break; 1288 case ISD::UNDEF: 1289 Elems[i] = DAG.getUNDEF(NTy); 1290 break; 1291 case ISD::TRUNCATE: 1292 Elems[i] = V.getOperand(0); 1293 break; 1294 default: 1295 llvm_unreachable("Unexpected vector element"); 1296 } 1297 } 1298 } 1299 return DAG.getBuildVector(VecTy, dl, Elems); 1300 } 1301 1302 assert(VecTy.getVectorElementType() == MVT::i1); 1303 unsigned HwLen = Subtarget.getVectorLength(); 1304 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); 1305 1306 SDValue Op0 = Op.getOperand(0); 1307 1308 // If the operands are HVX types (i.e. not scalar predicates), then 1309 // defer the concatenation, and create QCAT instead. 1310 if (Subtarget.isHVXVectorType(ty(Op0), true)) { 1311 if (NumOp == 2) 1312 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); 1313 1314 ArrayRef<SDUse> U(Op.getNode()->ops()); 1315 SmallVector<SDValue,4> SV(U.begin(), U.end()); 1316 ArrayRef<SDValue> Ops(SV); 1317 1318 MVT HalfTy = typeSplit(VecTy).first; 1319 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, 1320 Ops.take_front(NumOp/2)); 1321 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, 1322 Ops.take_back(NumOp/2)); 1323 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); 1324 } 1325 1326 // Count how many bytes (in a vector register) each bit in VecTy 1327 // corresponds to. 1328 unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); 1329 1330 SmallVector<SDValue,8> Prefixes; 1331 for (SDValue V : Op.getNode()->op_values()) { 1332 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); 1333 Prefixes.push_back(P); 1334 } 1335 1336 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); 1337 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1338 SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); 1339 SDValue Res = getZero(dl, ByteTy, DAG); 1340 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { 1341 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); 1342 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); 1343 } 1344 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); 1345 } 1346 1347 SDValue 1348 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) 1349 const { 1350 // Change the type of the extracted element to i32. 1351 SDValue VecV = Op.getOperand(0); 1352 MVT ElemTy = ty(VecV).getVectorElementType(); 1353 const SDLoc &dl(Op); 1354 SDValue IdxV = Op.getOperand(1); 1355 if (ElemTy == MVT::i1) 1356 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); 1357 1358 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); 1359 } 1360 1361 SDValue 1362 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) 1363 const { 1364 const SDLoc &dl(Op); 1365 SDValue VecV = Op.getOperand(0); 1366 SDValue ValV = Op.getOperand(1); 1367 SDValue IdxV = Op.getOperand(2); 1368 MVT ElemTy = ty(VecV).getVectorElementType(); 1369 if (ElemTy == MVT::i1) 1370 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); 1371 1372 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); 1373 } 1374 1375 SDValue 1376 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) 1377 const { 1378 SDValue SrcV = Op.getOperand(0); 1379 MVT SrcTy = ty(SrcV); 1380 MVT DstTy = ty(Op); 1381 SDValue IdxV = Op.getOperand(1); 1382 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1383 assert(Idx % DstTy.getVectorNumElements() == 0); 1384 (void)Idx; 1385 const SDLoc &dl(Op); 1386 1387 MVT ElemTy = SrcTy.getVectorElementType(); 1388 if (ElemTy == MVT::i1) 1389 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); 1390 1391 return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); 1392 } 1393 1394 SDValue 1395 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) 1396 const { 1397 // Idx does not need to be a constant. 1398 SDValue VecV = Op.getOperand(0); 1399 SDValue ValV = Op.getOperand(1); 1400 SDValue IdxV = Op.getOperand(2); 1401 1402 const SDLoc &dl(Op); 1403 MVT VecTy = ty(VecV); 1404 MVT ElemTy = VecTy.getVectorElementType(); 1405 if (ElemTy == MVT::i1) 1406 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); 1407 1408 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); 1409 } 1410 1411 SDValue 1412 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { 1413 // Lower any-extends of boolean vectors to sign-extends, since they 1414 // translate directly to Q2V. Zero-extending could also be done equally 1415 // fast, but Q2V is used/recognized in more places. 1416 // For all other vectors, use zero-extend. 1417 MVT ResTy = ty(Op); 1418 SDValue InpV = Op.getOperand(0); 1419 MVT ElemTy = ty(InpV).getVectorElementType(); 1420 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1421 return LowerHvxSignExt(Op, DAG); 1422 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); 1423 } 1424 1425 SDValue 1426 HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { 1427 MVT ResTy = ty(Op); 1428 SDValue InpV = Op.getOperand(0); 1429 MVT ElemTy = ty(InpV).getVectorElementType(); 1430 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1431 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); 1432 return Op; 1433 } 1434 1435 SDValue 1436 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { 1437 MVT ResTy = ty(Op); 1438 SDValue InpV = Op.getOperand(0); 1439 MVT ElemTy = ty(InpV).getVectorElementType(); 1440 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1441 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); 1442 return Op; 1443 } 1444 1445 SDValue 1446 HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { 1447 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): 1448 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) 1449 const SDLoc &dl(Op); 1450 MVT ResTy = ty(Op); 1451 SDValue InpV = Op.getOperand(0); 1452 assert(ResTy == ty(InpV)); 1453 1454 // Calculate the vectors of 1 and bitwidth(x). 1455 MVT ElemTy = ty(InpV).getVectorElementType(); 1456 unsigned ElemWidth = ElemTy.getSizeInBits(); 1457 1458 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1459 DAG.getConstant(1, dl, MVT::i32)); 1460 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1461 DAG.getConstant(ElemWidth, dl, MVT::i32)); 1462 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1463 DAG.getConstant(-1, dl, MVT::i32)); 1464 1465 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with 1466 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle 1467 // it separately in custom combine or selection). 1468 SDValue A = DAG.getNode(ISD::AND, dl, ResTy, 1469 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), 1470 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); 1471 return DAG.getNode(ISD::SUB, dl, ResTy, 1472 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); 1473 } 1474 1475 SDValue 1476 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { 1477 MVT ResTy = ty(Op); 1478 assert(ResTy.isVector()); 1479 const SDLoc &dl(Op); 1480 SmallVector<int,256> ShuffMask; 1481 1482 MVT ElemTy = ResTy.getVectorElementType(); 1483 unsigned VecLen = ResTy.getVectorNumElements(); 1484 SDValue Vs = Op.getOperand(0); 1485 SDValue Vt = Op.getOperand(1); 1486 bool IsSigned = Op.getOpcode() == ISD::MULHS; 1487 1488 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { 1489 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), 1490 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, 1491 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). 1492 // For i16, use V6_vmpyhv, which behaves in an analogous way to 1493 // V6_vmpybv: results Lo and Hi are products of even/odd elements 1494 // respectively. 1495 MVT ExtTy = typeExtElem(ResTy, 2); 1496 unsigned MpyOpc = ElemTy == MVT::i8 1497 ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) 1498 : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); 1499 SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); 1500 1501 // Discard low halves of the resulting values, collect the high halves. 1502 for (unsigned I = 0; I < VecLen; I += 2) { 1503 ShuffMask.push_back(I+1); // Pick even element. 1504 ShuffMask.push_back(I+VecLen+1); // Pick odd element. 1505 } 1506 VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); 1507 SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); 1508 return DAG.getBitcast(ResTy, BS); 1509 } 1510 1511 assert(ElemTy == MVT::i32); 1512 SDValue S16 = DAG.getConstant(16, dl, MVT::i32); 1513 1514 auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) { 1515 // mulhs(Vs,Vt) = 1516 // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 1517 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 1518 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 1519 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 1520 // + Lo(Vs) *us Vt] >> 32 1521 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to 1522 // anything, so it cannot produce any carry over to higher bits), 1523 // so everything in [] can be shifted by 16 without loss of precision. 1524 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 1525 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 1526 // Denote Hi(Vs) = Vs': 1527 // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 1528 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 1529 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); 1530 // Get Vs': 1531 SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); 1532 SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, 1533 {T0, S0, Vt}, DAG); 1534 // Shift by 16: 1535 SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); 1536 // Get Vs'*Hi(Vt): 1537 SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); 1538 // Add: 1539 SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); 1540 return T3; 1541 }; 1542 1543 auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) { 1544 MVT PairTy = typeJoin({ResTy, ResTy}); 1545 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG); 1546 SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, 1547 {T0, Vs, Vt}, DAG); 1548 return opSplit(T1, dl, DAG).second; 1549 }; 1550 1551 if (IsSigned) { 1552 if (Subtarget.useHVXV62Ops()) 1553 return MulHS_V62(Vs, Vt); 1554 return MulHS_V60(Vs, Vt); 1555 } 1556 1557 // Unsigned mulhw. (Would expansion using signed mulhw be better?) 1558 1559 auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { 1560 return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); 1561 }; 1562 auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { 1563 return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); 1564 }; 1565 1566 MVT PairTy = typeJoin({ResTy, ResTy}); 1567 SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, 1568 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); 1569 // Multiply-unsigned halfwords: 1570 // LoVec = Vs.uh[2i] * Vt.uh[2i], 1571 // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] 1572 SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); 1573 // The low halves in the LoVec of the pair can be discarded. They are 1574 // not added to anything (in the full-precision product), so they cannot 1575 // produce a carry into the higher bits. 1576 SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); 1577 // Swap low and high halves in Vt, and do the halfword multiplication 1578 // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. 1579 SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); 1580 SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); 1581 // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). 1582 // These products are words, but cannot be added directly because the 1583 // sums could overflow. Add these products, by halfwords, where each sum 1584 // of a pair of halfwords gives a word. 1585 SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, 1586 {LoVec(T2), HiVec(T2)}, DAG); 1587 // Add the high halfwords from the products of the low halfwords. 1588 SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); 1589 SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); 1590 SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); 1591 SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); 1592 return T7; 1593 } 1594 1595 SDValue 1596 HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { 1597 SDValue ValQ = Op.getOperand(0); 1598 MVT ResTy = ty(Op); 1599 MVT VecTy = ty(ValQ); 1600 const SDLoc &dl(Op); 1601 1602 if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) { 1603 unsigned HwLen = Subtarget.getVectorLength(); 1604 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); 1605 SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG); 1606 unsigned BitWidth = ResTy.getSizeInBits(); 1607 1608 if (BitWidth < 64) { 1609 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32), 1610 dl, MVT::i32, DAG); 1611 if (BitWidth == 32) 1612 return W0; 1613 assert(BitWidth < 32u); 1614 return DAG.getZExtOrTrunc(W0, dl, ResTy); 1615 } 1616 1617 // The result is >= 64 bits. The only options are 64 or 128. 1618 assert(BitWidth == 64 || BitWidth == 128); 1619 SmallVector<SDValue,4> Words; 1620 for (unsigned i = 0; i != BitWidth/32; ++i) { 1621 SDValue W = extractHvxElementReg( 1622 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG); 1623 Words.push_back(W); 1624 } 1625 SmallVector<SDValue,2> Combines; 1626 assert(Words.size() % 2 == 0); 1627 for (unsigned i = 0, e = Words.size(); i < e; i += 2) { 1628 SDValue C = DAG.getNode( 1629 HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]}); 1630 Combines.push_back(C); 1631 } 1632 1633 if (BitWidth == 64) 1634 return Combines[0]; 1635 1636 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); 1637 } 1638 1639 return Op; 1640 } 1641 1642 SDValue 1643 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { 1644 // Sign- and zero-extends are legal. 1645 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); 1646 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op), 1647 Op.getOperand(0)); 1648 } 1649 1650 SDValue 1651 HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { 1652 MVT ResTy = ty(Op); 1653 if (ResTy.getVectorElementType() != MVT::i1) 1654 return Op; 1655 1656 const SDLoc &dl(Op); 1657 unsigned HwLen = Subtarget.getVectorLength(); 1658 unsigned VecLen = ResTy.getVectorNumElements(); 1659 assert(HwLen % VecLen == 0); 1660 unsigned ElemSize = HwLen / VecLen; 1661 1662 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen); 1663 SDValue S = 1664 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0), 1665 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)), 1666 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2))); 1667 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S); 1668 } 1669 1670 SDValue 1671 HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { 1672 if (SDValue S = getVectorShiftByInt(Op, DAG)) 1673 return S; 1674 return Op; 1675 } 1676 1677 SDValue 1678 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { 1679 const SDLoc &dl(Op); 1680 MVT ResTy = ty(Op); 1681 1682 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1683 bool Use64b = Subtarget.useHVX64BOps(); 1684 unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast 1685 : Intrinsic::hexagon_V6_pred_typecast_128B; 1686 if (IntNo == IntPredCast) { 1687 SDValue Vs = Op.getOperand(1); 1688 MVT OpTy = ty(Vs); 1689 if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) { 1690 if (ResTy == OpTy) 1691 return Vs; 1692 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs); 1693 } 1694 } 1695 1696 return Op; 1697 } 1698 1699 SDValue 1700 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { 1701 const SDLoc &dl(Op); 1702 unsigned HwLen = Subtarget.getVectorLength(); 1703 MachineFunction &MF = DAG.getMachineFunction(); 1704 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode()); 1705 SDValue Mask = MaskN->getMask(); 1706 SDValue Chain = MaskN->getChain(); 1707 SDValue Base = MaskN->getBasePtr(); 1708 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen); 1709 1710 unsigned Opc = Op->getOpcode(); 1711 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); 1712 1713 if (Opc == ISD::MLOAD) { 1714 MVT ValTy = ty(Op); 1715 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp); 1716 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru(); 1717 if (isUndef(Thru)) 1718 return Load; 1719 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru); 1720 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl); 1721 } 1722 1723 // MSTORE 1724 // HVX only has aligned masked stores. 1725 1726 // TODO: Fold negations of the mask into the store. 1727 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai; 1728 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue(); 1729 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base)); 1730 1731 if (MaskN->getAlign().value() % HwLen == 0) { 1732 SDValue Store = getInstr(StoreOpc, dl, MVT::Other, 1733 {Mask, Base, Offset0, Value, Chain}, DAG); 1734 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp}); 1735 return Store; 1736 } 1737 1738 // Unaligned case. 1739 auto StoreAlign = [&](SDValue V, SDValue A) { 1740 SDValue Z = getZero(dl, ty(V), DAG); 1741 // TODO: use funnel shifts? 1742 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the 1743 // upper half. 1744 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG); 1745 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG); 1746 return std::make_pair(LoV, HiV); 1747 }; 1748 1749 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1750 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 1751 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask); 1752 VectorPair Tmp = StoreAlign(MaskV, Base); 1753 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first), 1754 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)}; 1755 VectorPair ValueU = StoreAlign(Value, Base); 1756 1757 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32); 1758 SDValue StoreLo = 1759 getInstr(StoreOpc, dl, MVT::Other, 1760 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG); 1761 SDValue StoreHi = 1762 getInstr(StoreOpc, dl, MVT::Other, 1763 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG); 1764 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp}); 1765 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp}); 1766 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi}); 1767 } 1768 1769 SDValue 1770 HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { 1771 assert(!Op.isMachineOpcode()); 1772 SmallVector<SDValue,2> OpsL, OpsH; 1773 const SDLoc &dl(Op); 1774 1775 auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { 1776 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; 1777 SDValue TV = DAG.getValueType(Ty); 1778 return std::make_pair(TV, TV); 1779 }; 1780 1781 for (SDValue A : Op.getNode()->ops()) { 1782 VectorPair P = Subtarget.isHVXVectorType(ty(A), true) 1783 ? opSplit(A, dl, DAG) 1784 : std::make_pair(A, A); 1785 // Special case for type operand. 1786 if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1787 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode())) 1788 P = SplitVTNode(N); 1789 } 1790 OpsL.push_back(P.first); 1791 OpsH.push_back(P.second); 1792 } 1793 1794 MVT ResTy = ty(Op); 1795 MVT HalfTy = typeSplit(ResTy).first; 1796 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); 1797 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); 1798 SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); 1799 return S; 1800 } 1801 1802 SDValue 1803 HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { 1804 auto *MemN = cast<MemSDNode>(Op.getNode()); 1805 1806 MVT MemTy = MemN->getMemoryVT().getSimpleVT(); 1807 if (!isHvxPairTy(MemTy)) 1808 return Op; 1809 1810 const SDLoc &dl(Op); 1811 unsigned HwLen = Subtarget.getVectorLength(); 1812 MVT SingleTy = typeSplit(MemTy).first; 1813 SDValue Chain = MemN->getChain(); 1814 SDValue Base0 = MemN->getBasePtr(); 1815 SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); 1816 1817 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; 1818 if (MachineMemOperand *MMO = MemN->getMemOperand()) { 1819 MachineFunction &MF = DAG.getMachineFunction(); 1820 MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); 1821 MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); 1822 } 1823 1824 unsigned MemOpc = MemN->getOpcode(); 1825 1826 if (MemOpc == ISD::LOAD) { 1827 assert(cast<LoadSDNode>(Op)->isUnindexed()); 1828 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); 1829 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); 1830 return DAG.getMergeValues( 1831 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), 1832 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1833 Load0.getValue(1), Load1.getValue(1)) }, dl); 1834 } 1835 if (MemOpc == ISD::STORE) { 1836 assert(cast<StoreSDNode>(Op)->isUnindexed()); 1837 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG); 1838 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); 1839 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); 1840 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); 1841 } 1842 1843 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE); 1844 1845 auto MaskN = cast<MaskedLoadStoreSDNode>(Op); 1846 assert(MaskN->isUnindexed()); 1847 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG); 1848 SDValue Offset = DAG.getUNDEF(MVT::i32); 1849 1850 if (MemOpc == ISD::MLOAD) { 1851 VectorPair Thru = 1852 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG); 1853 SDValue MLoad0 = 1854 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first, 1855 Thru.first, SingleTy, MOp0, ISD::UNINDEXED, 1856 ISD::NON_EXTLOAD, false); 1857 SDValue MLoad1 = 1858 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second, 1859 Thru.second, SingleTy, MOp1, ISD::UNINDEXED, 1860 ISD::NON_EXTLOAD, false); 1861 return DAG.getMergeValues( 1862 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1), 1863 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1864 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl); 1865 } 1866 if (MemOpc == ISD::MSTORE) { 1867 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG); 1868 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset, 1869 Masks.first, SingleTy, MOp0, 1870 ISD::UNINDEXED, false, false); 1871 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset, 1872 Masks.second, SingleTy, MOp1, 1873 ISD::UNINDEXED, false, false); 1874 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1); 1875 } 1876 1877 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG); 1878 llvm_unreachable(Name.c_str()); 1879 } 1880 1881 SDValue 1882 HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { 1883 const SDLoc &dl(Op); 1884 auto *LoadN = cast<LoadSDNode>(Op.getNode()); 1885 assert(LoadN->isUnindexed() && "Not widening indexed loads yet"); 1886 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && 1887 "Not widening loads of i1 yet"); 1888 1889 SDValue Chain = LoadN->getChain(); 1890 SDValue Base = LoadN->getBasePtr(); 1891 SDValue Offset = DAG.getUNDEF(MVT::i32); 1892 1893 MVT ResTy = ty(Op); 1894 unsigned HwLen = Subtarget.getVectorLength(); 1895 unsigned ResLen = ResTy.getStoreSize(); 1896 assert(ResLen < HwLen && "vsetq(v1) prerequisite"); 1897 1898 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 1899 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 1900 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG); 1901 1902 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen); 1903 MachineFunction &MF = DAG.getMachineFunction(); 1904 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen); 1905 1906 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask, 1907 DAG.getUNDEF(LoadTy), LoadTy, MemOp, 1908 ISD::UNINDEXED, ISD::NON_EXTLOAD, false); 1909 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG); 1910 return DAG.getMergeValues({Value, Chain}, dl); 1911 } 1912 1913 SDValue 1914 HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { 1915 const SDLoc &dl(Op); 1916 auto *StoreN = cast<StoreSDNode>(Op.getNode()); 1917 assert(StoreN->isUnindexed() && "Not widening indexed stores yet"); 1918 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 && 1919 "Not widening stores of i1 yet"); 1920 1921 SDValue Chain = StoreN->getChain(); 1922 SDValue Base = StoreN->getBasePtr(); 1923 SDValue Offset = DAG.getUNDEF(MVT::i32); 1924 1925 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG); 1926 MVT ValueTy = ty(Value); 1927 unsigned ValueLen = ValueTy.getVectorNumElements(); 1928 unsigned HwLen = Subtarget.getVectorLength(); 1929 assert(isPowerOf2_32(ValueLen)); 1930 1931 for (unsigned Len = ValueLen; Len < HwLen; ) { 1932 Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG); 1933 Len = ty(Value).getVectorNumElements(); // This is Len *= 2 1934 } 1935 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia 1936 1937 assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); 1938 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 1939 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 1940 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); 1941 MachineFunction &MF = DAG.getMachineFunction(); 1942 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen); 1943 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value), 1944 MemOp, ISD::UNINDEXED, false, false); 1945 } 1946 1947 SDValue 1948 HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { 1949 const SDLoc &dl(Op); 1950 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); 1951 MVT ElemTy = ty(Op0).getVectorElementType(); 1952 unsigned HwLen = Subtarget.getVectorLength(); 1953 1954 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits(); 1955 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen); 1956 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen); 1957 1958 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG); 1959 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG); 1960 EVT ResTy = 1961 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy); 1962 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy, 1963 {WideOp0, WideOp1, Op.getOperand(2)}); 1964 1965 EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op)); 1966 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy, 1967 {SetCC, getZero(dl, MVT::i32, DAG)}); 1968 } 1969 1970 SDValue 1971 HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const { 1972 const SDLoc &dl(Op); 1973 unsigned HwWidth = 8*Subtarget.getVectorLength(); 1974 1975 SDValue Op0 = Op.getOperand(0); 1976 MVT ResTy = ty(Op); 1977 MVT OpTy = ty(Op0); 1978 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) 1979 return SDValue(); 1980 1981 // .-res, op-> ScalarVec Illegal HVX 1982 // Scalar ok - - 1983 // Illegal widen(insert) widen - 1984 // HVX - widen ok 1985 1986 auto getFactor = [HwWidth](MVT Ty) { 1987 unsigned Width = Ty.getSizeInBits(); 1988 return HwWidth > Width ? HwWidth / Width : 1; 1989 }; 1990 1991 auto getWideTy = [getFactor](MVT Ty) { 1992 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); 1993 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); 1994 }; 1995 1996 unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK 1997 : HexagonISD::VUNPACKU; 1998 SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); 1999 SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp); 2000 return WideRes; 2001 } 2002 2003 SDValue 2004 HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { 2005 const SDLoc &dl(Op); 2006 unsigned HwWidth = 8*Subtarget.getVectorLength(); 2007 2008 SDValue Op0 = Op.getOperand(0); 2009 MVT ResTy = ty(Op); 2010 MVT OpTy = ty(Op0); 2011 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) 2012 return SDValue(); 2013 2014 // .-res, op-> ScalarVec Illegal HVX 2015 // Scalar ok extract(widen) - 2016 // Illegal - widen widen 2017 // HVX - - ok 2018 2019 auto getFactor = [HwWidth](MVT Ty) { 2020 unsigned Width = Ty.getSizeInBits(); 2021 assert(HwWidth % Width == 0); 2022 return HwWidth / Width; 2023 }; 2024 2025 auto getWideTy = [getFactor](MVT Ty) { 2026 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); 2027 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); 2028 }; 2029 2030 if (Subtarget.isHVXVectorType(OpTy)) 2031 return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); 2032 2033 assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); 2034 2035 SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); 2036 SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), 2037 WideOp); 2038 // If the original result wasn't legal and was supposed to be widened, 2039 // we're done. 2040 if (shouldWidenToHvx(ResTy, DAG)) 2041 return WideRes; 2042 2043 // The original result type wasn't meant to be widened to HVX, so 2044 // leave it as it is. Standard legalization should be able to deal 2045 // with it (since now it's a result of a target-idendependent ISD 2046 // node). 2047 assert(ResTy.isVector()); 2048 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, 2049 {WideRes, getZero(dl, MVT::i32, DAG)}); 2050 } 2051 2052 SDValue 2053 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { 2054 unsigned Opc = Op.getOpcode(); 2055 bool IsPairOp = isHvxPairTy(ty(Op)) || 2056 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { 2057 return isHvxPairTy(ty(V)); 2058 }); 2059 2060 if (IsPairOp) { 2061 switch (Opc) { 2062 default: 2063 break; 2064 case ISD::LOAD: 2065 case ISD::STORE: 2066 case ISD::MLOAD: 2067 case ISD::MSTORE: 2068 return SplitHvxMemOp(Op, DAG); 2069 case ISD::CTPOP: 2070 case ISD::CTLZ: 2071 case ISD::CTTZ: 2072 case ISD::MUL: 2073 case ISD::MULHS: 2074 case ISD::MULHU: 2075 case ISD::AND: 2076 case ISD::OR: 2077 case ISD::XOR: 2078 case ISD::SRA: 2079 case ISD::SHL: 2080 case ISD::SRL: 2081 case ISD::SMIN: 2082 case ISD::SMAX: 2083 case ISD::UMIN: 2084 case ISD::UMAX: 2085 case ISD::SETCC: 2086 case ISD::VSELECT: 2087 case ISD::SIGN_EXTEND: 2088 case ISD::ZERO_EXTEND: 2089 case ISD::SIGN_EXTEND_INREG: 2090 case ISD::SPLAT_VECTOR: 2091 return SplitHvxPairOp(Op, DAG); 2092 } 2093 } 2094 2095 switch (Opc) { 2096 default: 2097 break; 2098 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); 2099 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); 2100 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); 2101 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); 2102 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); 2103 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); 2104 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); 2105 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); 2106 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); 2107 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); 2108 case ISD::CTTZ: return LowerHvxCttz(Op, DAG); 2109 case ISD::SELECT: return LowerHvxSelect(Op, DAG); 2110 case ISD::SRA: 2111 case ISD::SHL: 2112 case ISD::SRL: return LowerHvxShift(Op, DAG); 2113 case ISD::MULHS: 2114 case ISD::MULHU: return LowerHvxMulh(Op, DAG); 2115 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); 2116 case ISD::SETCC: 2117 case ISD::INTRINSIC_VOID: return Op; 2118 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); 2119 case ISD::MLOAD: 2120 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); 2121 // Unaligned loads will be handled by the default lowering. 2122 case ISD::LOAD: return SDValue(); 2123 } 2124 #ifndef NDEBUG 2125 Op.dumpr(&DAG); 2126 #endif 2127 llvm_unreachable("Unhandled HVX operation"); 2128 } 2129 2130 void 2131 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, 2132 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2133 unsigned Opc = N->getOpcode(); 2134 SDValue Op(N, 0); 2135 2136 switch (Opc) { 2137 case ISD::ANY_EXTEND: 2138 case ISD::SIGN_EXTEND: 2139 case ISD::ZERO_EXTEND: 2140 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2141 if (SDValue T = WidenHvxExtend(Op, DAG)) 2142 Results.push_back(T); 2143 } 2144 break; 2145 case ISD::SETCC: 2146 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2147 if (SDValue T = WidenHvxSetCC(Op, DAG)) 2148 Results.push_back(T); 2149 } 2150 break; 2151 case ISD::TRUNCATE: 2152 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2153 if (SDValue T = WidenHvxTruncate(Op, DAG)) 2154 Results.push_back(T); 2155 } 2156 break; 2157 case ISD::STORE: { 2158 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) { 2159 SDValue Store = WidenHvxStore(Op, DAG); 2160 Results.push_back(Store); 2161 } 2162 break; 2163 } 2164 case ISD::MLOAD: 2165 if (isHvxPairTy(ty(Op))) { 2166 SDValue S = SplitHvxMemOp(Op, DAG); 2167 assert(S->getOpcode() == ISD::MERGE_VALUES); 2168 Results.push_back(S.getOperand(0)); 2169 Results.push_back(S.getOperand(1)); 2170 } 2171 break; 2172 case ISD::MSTORE: 2173 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value 2174 SDValue S = SplitHvxMemOp(Op, DAG); 2175 Results.push_back(S); 2176 } 2177 break; 2178 default: 2179 break; 2180 } 2181 } 2182 2183 void 2184 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, 2185 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2186 unsigned Opc = N->getOpcode(); 2187 SDValue Op(N, 0); 2188 switch (Opc) { 2189 case ISD::ANY_EXTEND: 2190 case ISD::SIGN_EXTEND: 2191 case ISD::ZERO_EXTEND: 2192 if (shouldWidenToHvx(ty(Op), DAG)) { 2193 if (SDValue T = WidenHvxExtend(Op, DAG)) 2194 Results.push_back(T); 2195 } 2196 break; 2197 case ISD::SETCC: 2198 if (shouldWidenToHvx(ty(Op), DAG)) { 2199 if (SDValue T = WidenHvxSetCC(Op, DAG)) 2200 Results.push_back(T); 2201 } 2202 break; 2203 case ISD::TRUNCATE: 2204 if (shouldWidenToHvx(ty(Op), DAG)) { 2205 if (SDValue T = WidenHvxTruncate(Op, DAG)) 2206 Results.push_back(T); 2207 } 2208 break; 2209 case ISD::LOAD: { 2210 if (shouldWidenToHvx(ty(Op), DAG)) { 2211 SDValue Load = WidenHvxLoad(Op, DAG); 2212 assert(Load->getOpcode() == ISD::MERGE_VALUES); 2213 Results.push_back(Load.getOperand(0)); 2214 Results.push_back(Load.getOperand(1)); 2215 } 2216 break; 2217 } 2218 case ISD::BITCAST: 2219 if (isHvxBoolTy(ty(N->getOperand(0)))) { 2220 SDValue Op(N, 0); 2221 SDValue C = LowerHvxBitcast(Op, DAG); 2222 Results.push_back(C); 2223 } 2224 break; 2225 default: 2226 break; 2227 } 2228 } 2229 2230 SDValue 2231 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) 2232 const { 2233 const SDLoc &dl(N); 2234 SelectionDAG &DAG = DCI.DAG; 2235 SDValue Op(N, 0); 2236 unsigned Opc = Op.getOpcode(); 2237 if (DCI.isBeforeLegalizeOps()) 2238 return SDValue(); 2239 2240 SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end()); 2241 2242 switch (Opc) { 2243 case ISD::VSELECT: { 2244 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) 2245 SDValue Cond = Ops[0]; 2246 if (Cond->getOpcode() == ISD::XOR) { 2247 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); 2248 if (C1->getOpcode() == HexagonISD::QTRUE) 2249 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]); 2250 } 2251 break; 2252 } 2253 case HexagonISD::V2Q: 2254 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { 2255 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0))) 2256 return C->isNullValue() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op)) 2257 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op)); 2258 } 2259 break; 2260 case HexagonISD::Q2V: 2261 if (Ops[0].getOpcode() == HexagonISD::QTRUE) 2262 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op), 2263 DAG.getConstant(-1, dl, MVT::i32)); 2264 if (Ops[0].getOpcode() == HexagonISD::QFALSE) 2265 return getZero(dl, ty(Op), DAG); 2266 break; 2267 case HexagonISD::VINSERTW0: 2268 if (isUndef(Ops[1])) 2269 return Ops[0];; 2270 break; 2271 case HexagonISD::VROR: { 2272 if (Ops[0].getOpcode() == HexagonISD::VROR) { 2273 SDValue Vec = Ops[0].getOperand(0); 2274 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1); 2275 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1}); 2276 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot}); 2277 } 2278 break; 2279 } 2280 } 2281 2282 return SDValue(); 2283 } 2284 2285 bool 2286 HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { 2287 auto Action = getPreferredHvxVectorAction(Ty); 2288 if (Action == TargetLoweringBase::TypeWidenVector) { 2289 EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); 2290 assert(WideTy.isSimple()); 2291 return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); 2292 } 2293 return false; 2294 } 2295 2296 bool 2297 HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { 2298 if (!Subtarget.useHVXOps()) 2299 return false; 2300 // If the type of any result, or any operand type are HVX vector types, 2301 // this is an HVX operation. 2302 auto IsHvxTy = [this](EVT Ty) { 2303 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); 2304 }; 2305 auto IsHvxOp = [this](SDValue Op) { 2306 return Op.getValueType().isSimple() && 2307 Subtarget.isHVXVectorType(ty(Op), true); 2308 }; 2309 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp)) 2310 return true; 2311 2312 // Check if this could be an HVX operation after type widening. 2313 auto IsWidenedToHvx = [this, &DAG](SDValue Op) { 2314 if (!Op.getValueType().isSimple()) 2315 return false; 2316 MVT ValTy = ty(Op); 2317 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); 2318 }; 2319 2320 for (int i = 0, e = N->getNumValues(); i != e; ++i) { 2321 if (IsWidenedToHvx(SDValue(N, i))) 2322 return true; 2323 } 2324 return llvm::any_of(N->ops(), IsWidenedToHvx); 2325 } 2326