1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HexagonISelLowering.h" 10 #include "HexagonRegisterInfo.h" 11 #include "HexagonSubtarget.h" 12 #include "llvm/Analysis/MemoryLocation.h" 13 #include "llvm/IR/IntrinsicsHexagon.h" 14 #include "llvm/Support/CommandLine.h" 15 16 using namespace llvm; 17 18 static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen", 19 cl::Hidden, cl::init(16), 20 cl::desc("Lower threshold (in bytes) for widening to HVX vectors")); 21 22 static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; 23 static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; 24 static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; 25 static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; 26 27 void 28 HexagonTargetLowering::initializeHVXLowering() { 29 if (Subtarget.useHVX64BOps()) { 30 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); 31 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); 32 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); 33 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); 34 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); 35 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); 36 // These "short" boolean vector types should be legal because 37 // they will appear as results of vector compares. If they were 38 // not legal, type legalization would try to make them legal 39 // and that would require using operations that do not use or 40 // produce such types. That, in turn, would imply using custom 41 // nodes, which would be unoptimizable by the DAG combiner. 42 // The idea is to rely on target-independent operations as much 43 // as possible. 44 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); 45 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); 46 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); 47 } else if (Subtarget.useHVX128BOps()) { 48 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); 49 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); 50 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); 51 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); 52 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); 53 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); 54 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); 55 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); 56 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); 57 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { 58 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass); 59 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass); 60 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); 61 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass); 62 } 63 } 64 65 // Set up operation actions. 66 67 bool Use64b = Subtarget.useHVX64BOps(); 68 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; 69 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; 70 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; 71 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; 72 73 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { 74 setOperationAction(Opc, FromTy, Promote); 75 AddPromotedToType(Opc, FromTy, ToTy); 76 }; 77 78 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). 79 // Note: v16i1 -> i16 is handled in type legalization instead of op 80 // legalization. 81 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 82 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 83 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 84 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); 85 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); 86 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 87 setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); 88 setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); 89 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 90 91 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && 92 Subtarget.useHVXFloatingPoint()) { 93 94 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 }; 95 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 }; 96 97 for (MVT T : FloatV) { 98 setOperationAction(ISD::FADD, T, Legal); 99 setOperationAction(ISD::FSUB, T, Legal); 100 setOperationAction(ISD::FMUL, T, Legal); 101 setOperationAction(ISD::FMINNUM, T, Legal); 102 setOperationAction(ISD::FMAXNUM, T, Legal); 103 104 setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); 105 setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); 106 107 setOperationAction(ISD::SPLAT_VECTOR, T, Legal); 108 setOperationAction(ISD::SPLAT_VECTOR, T, Legal); 109 110 setOperationAction(ISD::MLOAD, T, Custom); 111 setOperationAction(ISD::MSTORE, T, Custom); 112 // Custom-lower BUILD_VECTOR. The standard (target-independent) 113 // handling of it would convert it to a load, which is not always 114 // the optimal choice. 115 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 116 } 117 118 119 // BUILD_VECTOR with f16 operands cannot be promoted without 120 // promoting the result, so lower the node to vsplat or constant pool 121 setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); 122 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom); 123 setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); 124 125 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is 126 // generated. 127 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); 128 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); 129 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); 130 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); 131 132 for (MVT P : FloatW) { 133 setOperationAction(ISD::LOAD, P, Custom); 134 setOperationAction(ISD::STORE, P, Custom); 135 setOperationAction(ISD::FADD, P, Custom); 136 setOperationAction(ISD::FSUB, P, Custom); 137 setOperationAction(ISD::FMUL, P, Custom); 138 setOperationAction(ISD::FMINNUM, P, Custom); 139 setOperationAction(ISD::FMAXNUM, P, Custom); 140 setOperationAction(ISD::VSELECT, P, Custom); 141 142 // Custom-lower BUILD_VECTOR. The standard (target-independent) 143 // handling of it would convert it to a load, which is not always 144 // the optimal choice. 145 setOperationAction(ISD::BUILD_VECTOR, P, Custom); 146 // Make concat-vectors custom to handle concats of more than 2 vectors. 147 setOperationAction(ISD::CONCAT_VECTORS, P, Custom); 148 149 setOperationAction(ISD::MLOAD, P, Custom); 150 setOperationAction(ISD::MSTORE, P, Custom); 151 } 152 153 if (Subtarget.useHVXQFloatOps()) { 154 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom); 155 setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); 156 } else if (Subtarget.useHVXIEEEFPOps()) { 157 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal); 158 setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); 159 } 160 } 161 162 for (MVT T : LegalV) { 163 setIndexedLoadAction(ISD::POST_INC, T, Legal); 164 setIndexedStoreAction(ISD::POST_INC, T, Legal); 165 166 setOperationAction(ISD::AND, T, Legal); 167 setOperationAction(ISD::OR, T, Legal); 168 setOperationAction(ISD::XOR, T, Legal); 169 setOperationAction(ISD::ADD, T, Legal); 170 setOperationAction(ISD::SUB, T, Legal); 171 setOperationAction(ISD::MUL, T, Legal); 172 setOperationAction(ISD::CTPOP, T, Legal); 173 setOperationAction(ISD::CTLZ, T, Legal); 174 setOperationAction(ISD::SELECT, T, Legal); 175 setOperationAction(ISD::SPLAT_VECTOR, T, Legal); 176 if (T != ByteV) { 177 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); 178 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); 179 setOperationAction(ISD::BSWAP, T, Legal); 180 } 181 182 setOperationAction(ISD::SMIN, T, Legal); 183 setOperationAction(ISD::SMAX, T, Legal); 184 if (T.getScalarType() != MVT::i32) { 185 setOperationAction(ISD::UMIN, T, Legal); 186 setOperationAction(ISD::UMAX, T, Legal); 187 } 188 189 setOperationAction(ISD::CTTZ, T, Custom); 190 setOperationAction(ISD::LOAD, T, Custom); 191 setOperationAction(ISD::MLOAD, T, Custom); 192 setOperationAction(ISD::MSTORE, T, Custom); 193 setOperationAction(ISD::MULHS, T, Custom); 194 setOperationAction(ISD::MULHU, T, Custom); 195 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 196 // Make concat-vectors custom to handle concats of more than 2 vectors. 197 setOperationAction(ISD::CONCAT_VECTORS, T, Custom); 198 setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); 199 setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); 200 setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); 201 setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); 202 setOperationAction(ISD::ANY_EXTEND, T, Custom); 203 setOperationAction(ISD::SIGN_EXTEND, T, Custom); 204 setOperationAction(ISD::ZERO_EXTEND, T, Custom); 205 if (T != ByteV) { 206 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); 207 // HVX only has shifts of words and halfwords. 208 setOperationAction(ISD::SRA, T, Custom); 209 setOperationAction(ISD::SHL, T, Custom); 210 setOperationAction(ISD::SRL, T, Custom); 211 212 // Promote all shuffles to operate on vectors of bytes. 213 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); 214 } 215 216 if (Subtarget.useHVXQFloatOps()) { 217 setOperationAction(ISD::SINT_TO_FP, T, Expand); 218 setOperationAction(ISD::UINT_TO_FP, T, Expand); 219 setOperationAction(ISD::FP_TO_SINT, T, Expand); 220 setOperationAction(ISD::FP_TO_UINT, T, Expand); 221 } else if (Subtarget.useHVXIEEEFPOps()) { 222 setOperationAction(ISD::SINT_TO_FP, T, Custom); 223 setOperationAction(ISD::UINT_TO_FP, T, Custom); 224 setOperationAction(ISD::FP_TO_SINT, T, Custom); 225 setOperationAction(ISD::FP_TO_UINT, T, Custom); 226 } 227 228 setCondCodeAction(ISD::SETNE, T, Expand); 229 setCondCodeAction(ISD::SETLE, T, Expand); 230 setCondCodeAction(ISD::SETGE, T, Expand); 231 setCondCodeAction(ISD::SETLT, T, Expand); 232 setCondCodeAction(ISD::SETULE, T, Expand); 233 setCondCodeAction(ISD::SETUGE, T, Expand); 234 setCondCodeAction(ISD::SETULT, T, Expand); 235 } 236 237 for (MVT T : LegalW) { 238 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- 239 // independent) handling of it would convert it to a load, which is 240 // not always the optimal choice. 241 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 242 // Make concat-vectors custom to handle concats of more than 2 vectors. 243 setOperationAction(ISD::CONCAT_VECTORS, T, Custom); 244 245 // Custom-lower these operations for pairs. Expand them into a concat 246 // of the corresponding operations on individual vectors. 247 setOperationAction(ISD::ANY_EXTEND, T, Custom); 248 setOperationAction(ISD::SIGN_EXTEND, T, Custom); 249 setOperationAction(ISD::ZERO_EXTEND, T, Custom); 250 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); 251 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); 252 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); 253 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); 254 setOperationAction(ISD::SPLAT_VECTOR, T, Custom); 255 256 setOperationAction(ISD::LOAD, T, Custom); 257 setOperationAction(ISD::STORE, T, Custom); 258 setOperationAction(ISD::MLOAD, T, Custom); 259 setOperationAction(ISD::MSTORE, T, Custom); 260 setOperationAction(ISD::CTLZ, T, Custom); 261 setOperationAction(ISD::CTTZ, T, Custom); 262 setOperationAction(ISD::CTPOP, T, Custom); 263 264 setOperationAction(ISD::ADD, T, Legal); 265 setOperationAction(ISD::SUB, T, Legal); 266 setOperationAction(ISD::MUL, T, Custom); 267 setOperationAction(ISD::MULHS, T, Custom); 268 setOperationAction(ISD::MULHU, T, Custom); 269 setOperationAction(ISD::AND, T, Custom); 270 setOperationAction(ISD::OR, T, Custom); 271 setOperationAction(ISD::XOR, T, Custom); 272 setOperationAction(ISD::SETCC, T, Custom); 273 setOperationAction(ISD::VSELECT, T, Custom); 274 if (T != ByteW) { 275 setOperationAction(ISD::SRA, T, Custom); 276 setOperationAction(ISD::SHL, T, Custom); 277 setOperationAction(ISD::SRL, T, Custom); 278 279 // Promote all shuffles to operate on vectors of bytes. 280 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); 281 } 282 283 setOperationAction(ISD::SMIN, T, Custom); 284 setOperationAction(ISD::SMAX, T, Custom); 285 if (T.getScalarType() != MVT::i32) { 286 setOperationAction(ISD::UMIN, T, Custom); 287 setOperationAction(ISD::UMAX, T, Custom); 288 } 289 290 setOperationAction(ISD::SINT_TO_FP, T, Custom); 291 setOperationAction(ISD::UINT_TO_FP, T, Custom); 292 setOperationAction(ISD::FP_TO_SINT, T, Custom); 293 setOperationAction(ISD::FP_TO_UINT, T, Custom); 294 } 295 296 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand); 297 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand); 298 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand); 299 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand); 300 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand); 301 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand); 302 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand); 303 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand); 304 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand); 305 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); 306 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); 307 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); 308 309 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); 310 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); 311 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand); 312 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand); 313 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand); 314 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand); 315 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand); 316 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand); 317 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand); 318 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); 319 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); 320 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); 321 322 // Boolean vectors. 323 324 for (MVT T : LegalW) { 325 // Boolean types for vector pairs will overlap with the boolean 326 // types for single vectors, e.g. 327 // v64i8 -> v64i1 (single) 328 // v64i16 -> v64i1 (pair) 329 // Set these actions first, and allow the single actions to overwrite 330 // any duplicates. 331 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); 332 setOperationAction(ISD::SETCC, BoolW, Custom); 333 setOperationAction(ISD::AND, BoolW, Custom); 334 setOperationAction(ISD::OR, BoolW, Custom); 335 setOperationAction(ISD::XOR, BoolW, Custom); 336 // Masked load/store takes a mask that may need splitting. 337 setOperationAction(ISD::MLOAD, BoolW, Custom); 338 setOperationAction(ISD::MSTORE, BoolW, Custom); 339 } 340 341 for (MVT T : LegalV) { 342 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); 343 setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); 344 setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); 345 setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); 346 setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); 347 setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); 348 setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); 349 setOperationAction(ISD::SELECT, BoolV, Custom); 350 setOperationAction(ISD::AND, BoolV, Legal); 351 setOperationAction(ISD::OR, BoolV, Legal); 352 setOperationAction(ISD::XOR, BoolV, Legal); 353 } 354 355 if (Use64b) { 356 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) 357 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); 358 } else { 359 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) 360 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); 361 } 362 363 // Handle store widening for short vectors. 364 unsigned HwLen = Subtarget.getVectorLength(); 365 for (MVT ElemTy : Subtarget.getHVXElementTypes()) { 366 if (ElemTy == MVT::i1) 367 continue; 368 int ElemWidth = ElemTy.getFixedSizeInBits(); 369 int MaxElems = (8*HwLen) / ElemWidth; 370 for (int N = 2; N < MaxElems; N *= 2) { 371 MVT VecTy = MVT::getVectorVT(ElemTy, N); 372 auto Action = getPreferredVectorAction(VecTy); 373 if (Action == TargetLoweringBase::TypeWidenVector) { 374 setOperationAction(ISD::LOAD, VecTy, Custom); 375 setOperationAction(ISD::STORE, VecTy, Custom); 376 setOperationAction(ISD::SETCC, VecTy, Custom); 377 setOperationAction(ISD::TRUNCATE, VecTy, Custom); 378 setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); 379 setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); 380 setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); 381 382 MVT BoolTy = MVT::getVectorVT(MVT::i1, N); 383 if (!isTypeLegal(BoolTy)) 384 setOperationAction(ISD::SETCC, BoolTy, Custom); 385 } 386 } 387 } 388 389 setTargetDAGCombine({ISD::SPLAT_VECTOR, ISD::VSELECT}); 390 } 391 392 unsigned 393 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { 394 MVT ElemTy = VecTy.getVectorElementType(); 395 unsigned VecLen = VecTy.getVectorNumElements(); 396 unsigned HwLen = Subtarget.getVectorLength(); 397 398 // Split vectors of i1 that exceed byte vector length. 399 if (ElemTy == MVT::i1 && VecLen > HwLen) 400 return TargetLoweringBase::TypeSplitVector; 401 402 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes(); 403 // For shorter vectors of i1, widen them if any of the corresponding 404 // vectors of integers needs to be widened. 405 if (ElemTy == MVT::i1) { 406 for (MVT T : Tys) { 407 assert(T != MVT::i1); 408 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen)); 409 if (A != ~0u) 410 return A; 411 } 412 return ~0u; 413 } 414 415 // If the size of VecTy is at least half of the vector length, 416 // widen the vector. Note: the threshold was not selected in 417 // any scientific way. 418 if (llvm::is_contained(Tys, ElemTy)) { 419 unsigned VecWidth = VecTy.getSizeInBits(); 420 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0; 421 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth) 422 return TargetLoweringBase::TypeWidenVector; 423 unsigned HwWidth = 8*HwLen; 424 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) 425 return TargetLoweringBase::TypeWidenVector; 426 } 427 428 // Defer to default. 429 return ~0u; 430 } 431 432 SDValue 433 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, 434 const SDLoc &dl, SelectionDAG &DAG) const { 435 SmallVector<SDValue,4> IntOps; 436 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); 437 append_range(IntOps, Ops); 438 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); 439 } 440 441 MVT 442 HexagonTargetLowering::typeJoin(const TypePair &Tys) const { 443 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); 444 445 MVT ElemTy = Tys.first.getVectorElementType(); 446 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + 447 Tys.second.getVectorNumElements()); 448 } 449 450 HexagonTargetLowering::TypePair 451 HexagonTargetLowering::typeSplit(MVT VecTy) const { 452 assert(VecTy.isVector()); 453 unsigned NumElem = VecTy.getVectorNumElements(); 454 assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); 455 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); 456 return { HalfTy, HalfTy }; 457 } 458 459 MVT 460 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { 461 MVT ElemTy = VecTy.getVectorElementType(); 462 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); 463 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); 464 } 465 466 MVT 467 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { 468 MVT ElemTy = VecTy.getVectorElementType(); 469 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); 470 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); 471 } 472 473 SDValue 474 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, 475 SelectionDAG &DAG) const { 476 if (ty(Vec).getVectorElementType() == ElemTy) 477 return Vec; 478 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); 479 return DAG.getBitcast(CastTy, Vec); 480 } 481 482 SDValue 483 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, 484 SelectionDAG &DAG) const { 485 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), 486 Ops.second, Ops.first); 487 } 488 489 HexagonTargetLowering::VectorPair 490 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, 491 SelectionDAG &DAG) const { 492 TypePair Tys = typeSplit(ty(Vec)); 493 if (Vec.getOpcode() == HexagonISD::QCAT) 494 return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); 495 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); 496 } 497 498 bool 499 HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { 500 return Subtarget.isHVXVectorType(Ty) && 501 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); 502 } 503 504 bool 505 HexagonTargetLowering::isHvxPairTy(MVT Ty) const { 506 return Subtarget.isHVXVectorType(Ty) && 507 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); 508 } 509 510 bool 511 HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { 512 return Subtarget.isHVXVectorType(Ty, true) && 513 Ty.getVectorElementType() == MVT::i1; 514 } 515 516 bool HexagonTargetLowering::allowsHvxMemoryAccess( 517 MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { 518 // Bool vectors are excluded by default, but make it explicit to 519 // emphasize that bool vectors cannot be loaded or stored. 520 // Also, disallow double vector stores (to prevent unnecessary 521 // store widening in DAG combiner). 522 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) 523 return false; 524 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) 525 return false; 526 if (Fast) 527 *Fast = true; 528 return true; 529 } 530 531 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( 532 MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { 533 if (!Subtarget.isHVXVectorType(VecTy)) 534 return false; 535 // XXX Should this be false? vmemu are a bit slower than vmem. 536 if (Fast) 537 *Fast = true; 538 return true; 539 } 540 541 SDValue 542 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, 543 SelectionDAG &DAG) const { 544 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) 545 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); 546 547 unsigned ElemWidth = ElemTy.getSizeInBits(); 548 if (ElemWidth == 8) 549 return ElemIdx; 550 551 unsigned L = Log2_32(ElemWidth/8); 552 const SDLoc &dl(ElemIdx); 553 return DAG.getNode(ISD::SHL, dl, MVT::i32, 554 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); 555 } 556 557 SDValue 558 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, 559 SelectionDAG &DAG) const { 560 unsigned ElemWidth = ElemTy.getSizeInBits(); 561 assert(ElemWidth >= 8 && ElemWidth <= 32); 562 if (ElemWidth == 32) 563 return Idx; 564 565 if (ty(Idx) != MVT::i32) 566 Idx = DAG.getBitcast(MVT::i32, Idx); 567 const SDLoc &dl(Idx); 568 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); 569 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); 570 return SubIdx; 571 } 572 573 SDValue 574 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, 575 SDValue Op1, ArrayRef<int> Mask, 576 SelectionDAG &DAG) const { 577 MVT OpTy = ty(Op0); 578 assert(OpTy == ty(Op1)); 579 580 MVT ElemTy = OpTy.getVectorElementType(); 581 if (ElemTy == MVT::i8) 582 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); 583 assert(ElemTy.getSizeInBits() >= 8); 584 585 MVT ResTy = tyVector(OpTy, MVT::i8); 586 unsigned ElemSize = ElemTy.getSizeInBits() / 8; 587 588 SmallVector<int,128> ByteMask; 589 for (int M : Mask) { 590 if (M < 0) { 591 for (unsigned I = 0; I != ElemSize; ++I) 592 ByteMask.push_back(-1); 593 } else { 594 int NewM = M*ElemSize; 595 for (unsigned I = 0; I != ElemSize; ++I) 596 ByteMask.push_back(NewM+I); 597 } 598 } 599 assert(ResTy.getVectorNumElements() == ByteMask.size()); 600 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), 601 opCastElem(Op1, MVT::i8, DAG), ByteMask); 602 } 603 604 SDValue 605 HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, 606 const SDLoc &dl, MVT VecTy, 607 SelectionDAG &DAG) const { 608 unsigned VecLen = Values.size(); 609 MachineFunction &MF = DAG.getMachineFunction(); 610 MVT ElemTy = VecTy.getVectorElementType(); 611 unsigned ElemWidth = ElemTy.getSizeInBits(); 612 unsigned HwLen = Subtarget.getVectorLength(); 613 614 unsigned ElemSize = ElemWidth / 8; 615 assert(ElemSize*VecLen == HwLen); 616 SmallVector<SDValue,32> Words; 617 618 if (VecTy.getVectorElementType() != MVT::i32 && 619 !(Subtarget.useHVXFloatingPoint() && 620 VecTy.getVectorElementType() == MVT::f32)) { 621 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); 622 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; 623 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); 624 for (unsigned i = 0; i != VecLen; i += OpsPerWord) { 625 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); 626 Words.push_back(DAG.getBitcast(MVT::i32, W)); 627 } 628 } else { 629 for (SDValue V : Values) 630 Words.push_back(DAG.getBitcast(MVT::i32, V)); 631 } 632 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) { 633 unsigned NumValues = Values.size(); 634 assert(NumValues > 0); 635 bool IsUndef = true; 636 for (unsigned i = 0; i != NumValues; ++i) { 637 if (Values[i].isUndef()) 638 continue; 639 IsUndef = false; 640 if (!SplatV.getNode()) 641 SplatV = Values[i]; 642 else if (SplatV != Values[i]) 643 return false; 644 } 645 if (IsUndef) 646 SplatV = Values[0]; 647 return true; 648 }; 649 650 unsigned NumWords = Words.size(); 651 SDValue SplatV; 652 bool IsSplat = isSplat(Words, SplatV); 653 if (IsSplat && isUndef(SplatV)) 654 return DAG.getUNDEF(VecTy); 655 if (IsSplat) { 656 assert(SplatV.getNode()); 657 auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode()); 658 if (IdxN && IdxN->isZero()) 659 return getZero(dl, VecTy, DAG); 660 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); 661 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV); 662 return DAG.getBitcast(VecTy, S); 663 } 664 665 // Delay recognizing constant vectors until here, so that we can generate 666 // a vsplat. 667 SmallVector<ConstantInt*, 128> Consts(VecLen); 668 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); 669 if (AllConst) { 670 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), 671 (Constant**)Consts.end()); 672 Constant *CV = ConstantVector::get(Tmp); 673 Align Alignment(HwLen); 674 SDValue CP = 675 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG); 676 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, 677 MachinePointerInfo::getConstantPool(MF), Alignment); 678 } 679 680 // A special case is a situation where the vector is built entirely from 681 // elements extracted from another vector. This could be done via a shuffle 682 // more efficiently, but typically, the size of the source vector will not 683 // match the size of the vector being built (which precludes the use of a 684 // shuffle directly). 685 // This only handles a single source vector, and the vector being built 686 // should be of a sub-vector type of the source vector type. 687 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec, 688 SmallVectorImpl<int> &SrcIdx) { 689 SDValue Vec; 690 for (SDValue V : Values) { 691 if (isUndef(V)) { 692 SrcIdx.push_back(-1); 693 continue; 694 } 695 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 696 return false; 697 // All extracts should come from the same vector. 698 SDValue T = V.getOperand(0); 699 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) 700 return false; 701 Vec = T; 702 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1)); 703 if (C == nullptr) 704 return false; 705 int I = C->getSExtValue(); 706 assert(I >= 0 && "Negative element index"); 707 SrcIdx.push_back(I); 708 } 709 SrcVec = Vec; 710 return true; 711 }; 712 713 SmallVector<int,128> ExtIdx; 714 SDValue ExtVec; 715 if (IsBuildFromExtracts(ExtVec, ExtIdx)) { 716 MVT ExtTy = ty(ExtVec); 717 unsigned ExtLen = ExtTy.getVectorNumElements(); 718 if (ExtLen == VecLen || ExtLen == 2*VecLen) { 719 // Construct a new shuffle mask that will produce a vector with the same 720 // number of elements as the input vector, and such that the vector we 721 // want will be the initial subvector of it. 722 SmallVector<int,128> Mask; 723 BitVector Used(ExtLen); 724 725 for (int M : ExtIdx) { 726 Mask.push_back(M); 727 if (M >= 0) 728 Used.set(M); 729 } 730 // Fill the rest of the mask with the unused elements of ExtVec in hopes 731 // that it will result in a permutation of ExtVec's elements. It's still 732 // fine if it doesn't (e.g. if undefs are present, or elements are 733 // repeated), but permutations can always be done efficiently via vdelta 734 // and vrdelta. 735 for (unsigned I = 0; I != ExtLen; ++I) { 736 if (Mask.size() == ExtLen) 737 break; 738 if (!Used.test(I)) 739 Mask.push_back(I); 740 } 741 742 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec, 743 DAG.getUNDEF(ExtTy), Mask); 744 if (ExtLen == VecLen) 745 return S; 746 return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S); 747 } 748 } 749 750 // Find most common element to initialize vector with. This is to avoid 751 // unnecessary vinsert/valign for cases where the same value is present 752 // many times. Creates a histogram of the vector's elements to find the 753 // most common element n. 754 assert(4*Words.size() == Subtarget.getVectorLength()); 755 int VecHist[32]; 756 int n = 0; 757 for (unsigned i = 0; i != NumWords; ++i) { 758 VecHist[i] = 0; 759 if (Words[i].isUndef()) 760 continue; 761 for (unsigned j = i; j != NumWords; ++j) 762 if (Words[i] == Words[j]) 763 VecHist[i]++; 764 765 if (VecHist[i] > VecHist[n]) 766 n = i; 767 } 768 769 SDValue HalfV = getZero(dl, VecTy, DAG); 770 if (VecHist[n] > 1) { 771 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); 772 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, 773 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); 774 } 775 SDValue HalfV0 = HalfV; 776 SDValue HalfV1 = HalfV; 777 778 // Construct two halves in parallel, then or them together. Rn and Rm count 779 // number of rotations needed before the next element. One last rotation is 780 // performed post-loop to position the last element. 781 int Rn = 0, Rm = 0; 782 SDValue Sn, Sm; 783 SDValue N = HalfV0; 784 SDValue M = HalfV1; 785 for (unsigned i = 0; i != NumWords/2; ++i) { 786 // Rotate by element count since last insertion. 787 if (Words[i] != Words[n] || VecHist[n] <= 1) { 788 Sn = DAG.getConstant(Rn, dl, MVT::i32); 789 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); 790 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, 791 {HalfV0, Words[i]}); 792 Rn = 0; 793 } 794 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { 795 Sm = DAG.getConstant(Rm, dl, MVT::i32); 796 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); 797 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, 798 {HalfV1, Words[i+NumWords/2]}); 799 Rm = 0; 800 } 801 Rn += 4; 802 Rm += 4; 803 } 804 // Perform last rotation. 805 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); 806 Sm = DAG.getConstant(Rm, dl, MVT::i32); 807 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); 808 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); 809 810 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); 811 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); 812 813 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); 814 815 SDValue OutV = 816 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); 817 return OutV; 818 } 819 820 SDValue 821 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, 822 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { 823 MVT PredTy = ty(PredV); 824 unsigned HwLen = Subtarget.getVectorLength(); 825 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 826 827 if (Subtarget.isHVXVectorType(PredTy, true)) { 828 // Move the vector predicate SubV to a vector register, and scale it 829 // down to match the representation (bytes per type element) that VecV 830 // uses. The scaling down will pick every 2nd or 4th (every Scale-th 831 // in general) element and put them at the front of the resulting 832 // vector. This subvector will then be inserted into the Q2V of VecV. 833 // To avoid having an operation that generates an illegal type (short 834 // vector), generate a full size vector. 835 // 836 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); 837 SmallVector<int,128> Mask(HwLen); 838 // Scale = BitBytes(PredV) / Given BitBytes. 839 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); 840 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; 841 842 for (unsigned i = 0; i != HwLen; ++i) { 843 unsigned Num = i % Scale; 844 unsigned Off = i / Scale; 845 Mask[BlockLen*Num + Off] = i; 846 } 847 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); 848 if (!ZeroFill) 849 return S; 850 // Fill the bytes beyond BlockLen with 0s. 851 // V6_pred_scalar2 cannot fill the entire predicate, so it only works 852 // when BlockLen < HwLen. 853 assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); 854 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 855 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 856 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); 857 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); 858 return DAG.getNode(ISD::AND, dl, ByteTy, S, M); 859 } 860 861 // Make sure that this is a valid scalar predicate. 862 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); 863 864 unsigned Bytes = 8 / PredTy.getVectorNumElements(); 865 SmallVector<SDValue,4> Words[2]; 866 unsigned IdxW = 0; 867 868 auto Lo32 = [&DAG, &dl] (SDValue P) { 869 return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); 870 }; 871 auto Hi32 = [&DAG, &dl] (SDValue P) { 872 return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); 873 }; 874 875 SDValue W0 = isUndef(PredV) 876 ? DAG.getUNDEF(MVT::i64) 877 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); 878 Words[IdxW].push_back(Hi32(W0)); 879 Words[IdxW].push_back(Lo32(W0)); 880 881 while (Bytes < BitBytes) { 882 IdxW ^= 1; 883 Words[IdxW].clear(); 884 885 if (Bytes < 4) { 886 for (const SDValue &W : Words[IdxW ^ 1]) { 887 SDValue T = expandPredicate(W, dl, DAG); 888 Words[IdxW].push_back(Hi32(T)); 889 Words[IdxW].push_back(Lo32(T)); 890 } 891 } else { 892 for (const SDValue &W : Words[IdxW ^ 1]) { 893 Words[IdxW].push_back(W); 894 Words[IdxW].push_back(W); 895 } 896 } 897 Bytes *= 2; 898 } 899 900 assert(Bytes == BitBytes); 901 902 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); 903 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); 904 for (const SDValue &W : Words[IdxW]) { 905 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); 906 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); 907 } 908 909 return Vec; 910 } 911 912 SDValue 913 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, 914 const SDLoc &dl, MVT VecTy, 915 SelectionDAG &DAG) const { 916 // Construct a vector V of bytes, such that a comparison V >u 0 would 917 // produce the required vector predicate. 918 unsigned VecLen = Values.size(); 919 unsigned HwLen = Subtarget.getVectorLength(); 920 assert(VecLen <= HwLen || VecLen == 8*HwLen); 921 SmallVector<SDValue,128> Bytes; 922 bool AllT = true, AllF = true; 923 924 auto IsTrue = [] (SDValue V) { 925 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) 926 return !N->isZero(); 927 return false; 928 }; 929 auto IsFalse = [] (SDValue V) { 930 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) 931 return N->isZero(); 932 return false; 933 }; 934 935 if (VecLen <= HwLen) { 936 // In the hardware, each bit of a vector predicate corresponds to a byte 937 // of a vector register. Calculate how many bytes does a bit of VecTy 938 // correspond to. 939 assert(HwLen % VecLen == 0); 940 unsigned BitBytes = HwLen / VecLen; 941 for (SDValue V : Values) { 942 AllT &= IsTrue(V); 943 AllF &= IsFalse(V); 944 945 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) 946 : DAG.getUNDEF(MVT::i8); 947 for (unsigned B = 0; B != BitBytes; ++B) 948 Bytes.push_back(Ext); 949 } 950 } else { 951 // There are as many i1 values, as there are bits in a vector register. 952 // Divide the values into groups of 8 and check that each group consists 953 // of the same value (ignoring undefs). 954 for (unsigned I = 0; I != VecLen; I += 8) { 955 unsigned B = 0; 956 // Find the first non-undef value in this group. 957 for (; B != 8; ++B) { 958 if (!Values[I+B].isUndef()) 959 break; 960 } 961 SDValue F = Values[I+B]; 962 AllT &= IsTrue(F); 963 AllF &= IsFalse(F); 964 965 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) 966 : DAG.getUNDEF(MVT::i8); 967 Bytes.push_back(Ext); 968 // Verify that the rest of values in the group are the same as the 969 // first. 970 for (; B != 8; ++B) 971 assert(Values[I+B].isUndef() || Values[I+B] == F); 972 } 973 } 974 975 if (AllT) 976 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); 977 if (AllF) 978 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); 979 980 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 981 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); 982 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); 983 } 984 985 SDValue 986 HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, 987 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 988 MVT ElemTy = ty(VecV).getVectorElementType(); 989 990 unsigned ElemWidth = ElemTy.getSizeInBits(); 991 assert(ElemWidth >= 8 && ElemWidth <= 32); 992 (void)ElemWidth; 993 994 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); 995 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, 996 {VecV, ByteIdx}); 997 if (ElemTy == MVT::i32) 998 return ExWord; 999 1000 // Have an extracted word, need to extract the smaller element out of it. 1001 // 1. Extract the bits of (the original) IdxV that correspond to the index 1002 // of the desired element in the 32-bit word. 1003 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); 1004 // 2. Extract the element from the word. 1005 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); 1006 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); 1007 } 1008 1009 SDValue 1010 HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, 1011 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 1012 // Implement other return types if necessary. 1013 assert(ResTy == MVT::i1); 1014 1015 unsigned HwLen = Subtarget.getVectorLength(); 1016 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1017 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1018 1019 unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); 1020 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); 1021 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); 1022 1023 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); 1024 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); 1025 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); 1026 } 1027 1028 SDValue 1029 HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, 1030 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { 1031 MVT ElemTy = ty(VecV).getVectorElementType(); 1032 1033 unsigned ElemWidth = ElemTy.getSizeInBits(); 1034 assert(ElemWidth >= 8 && ElemWidth <= 32); 1035 (void)ElemWidth; 1036 1037 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, 1038 SDValue ByteIdxV) { 1039 MVT VecTy = ty(VecV); 1040 unsigned HwLen = Subtarget.getVectorLength(); 1041 SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, 1042 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); 1043 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); 1044 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); 1045 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, 1046 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); 1047 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); 1048 return TorV; 1049 }; 1050 1051 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); 1052 if (ElemTy == MVT::i32) 1053 return InsertWord(VecV, ValV, ByteIdx); 1054 1055 // If this is not inserting a 32-bit word, convert it into such a thing. 1056 // 1. Extract the existing word from the target vector. 1057 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, 1058 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); 1059 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, 1060 dl, MVT::i32, DAG); 1061 1062 // 2. Treating the extracted word as a 32-bit vector, insert the given 1063 // value into it. 1064 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); 1065 MVT SubVecTy = tyVector(ty(Ext), ElemTy); 1066 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), 1067 ValV, SubIdx, dl, ElemTy, DAG); 1068 1069 // 3. Insert the 32-bit word back into the original vector. 1070 return InsertWord(VecV, Ins, ByteIdx); 1071 } 1072 1073 SDValue 1074 HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, 1075 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { 1076 unsigned HwLen = Subtarget.getVectorLength(); 1077 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1078 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1079 1080 unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); 1081 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); 1082 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); 1083 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); 1084 1085 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); 1086 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); 1087 } 1088 1089 SDValue 1090 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, 1091 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 1092 MVT VecTy = ty(VecV); 1093 unsigned HwLen = Subtarget.getVectorLength(); 1094 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1095 MVT ElemTy = VecTy.getVectorElementType(); 1096 unsigned ElemWidth = ElemTy.getSizeInBits(); 1097 1098 // If the source vector is a vector pair, get the single vector containing 1099 // the subvector of interest. The subvector will never overlap two single 1100 // vectors. 1101 if (isHvxPairTy(VecTy)) { 1102 unsigned SubIdx; 1103 if (Idx * ElemWidth >= 8*HwLen) { 1104 SubIdx = Hexagon::vsub_hi; 1105 Idx -= VecTy.getVectorNumElements() / 2; 1106 } else { 1107 SubIdx = Hexagon::vsub_lo; 1108 } 1109 VecTy = typeSplit(VecTy).first; 1110 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); 1111 if (VecTy == ResTy) 1112 return VecV; 1113 } 1114 1115 // The only meaningful subvectors of a single HVX vector are those that 1116 // fit in a scalar register. 1117 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); 1118 1119 MVT WordTy = tyVector(VecTy, MVT::i32); 1120 SDValue WordVec = DAG.getBitcast(WordTy, VecV); 1121 unsigned WordIdx = (Idx*ElemWidth) / 32; 1122 1123 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); 1124 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); 1125 if (ResTy.getSizeInBits() == 32) 1126 return DAG.getBitcast(ResTy, W0); 1127 1128 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); 1129 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); 1130 SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); 1131 return DAG.getBitcast(ResTy, WW); 1132 } 1133 1134 SDValue 1135 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, 1136 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 1137 MVT VecTy = ty(VecV); 1138 unsigned HwLen = Subtarget.getVectorLength(); 1139 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1140 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1141 // IdxV is required to be a constant. 1142 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1143 1144 unsigned ResLen = ResTy.getVectorNumElements(); 1145 unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); 1146 unsigned Offset = Idx * BitBytes; 1147 SDValue Undef = DAG.getUNDEF(ByteTy); 1148 SmallVector<int,128> Mask; 1149 1150 if (Subtarget.isHVXVectorType(ResTy, true)) { 1151 // Converting between two vector predicates. Since the result is shorter 1152 // than the source, it will correspond to a vector predicate with the 1153 // relevant bits replicated. The replication count is the ratio of the 1154 // source and target vector lengths. 1155 unsigned Rep = VecTy.getVectorNumElements() / ResLen; 1156 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); 1157 for (unsigned i = 0; i != HwLen/Rep; ++i) { 1158 for (unsigned j = 0; j != Rep; ++j) 1159 Mask.push_back(i + Offset); 1160 } 1161 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); 1162 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); 1163 } 1164 1165 // Converting between a vector predicate and a scalar predicate. In the 1166 // vector predicate, a group of BitBytes bits will correspond to a single 1167 // i1 element of the source vector type. Those bits will all have the same 1168 // value. The same will be true for ByteVec, where each byte corresponds 1169 // to a bit in the vector predicate. 1170 // The algorithm is to traverse the ByteVec, going over the i1 values from 1171 // the source vector, and generate the corresponding representation in an 1172 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the 1173 // elements so that the interesting 8 bytes will be in the low end of the 1174 // vector. 1175 unsigned Rep = 8 / ResLen; 1176 // Make sure the output fill the entire vector register, so repeat the 1177 // 8-byte groups as many times as necessary. 1178 for (unsigned r = 0; r != HwLen/ResLen; ++r) { 1179 // This will generate the indexes of the 8 interesting bytes. 1180 for (unsigned i = 0; i != ResLen; ++i) { 1181 for (unsigned j = 0; j != Rep; ++j) 1182 Mask.push_back(Offset + i*BitBytes); 1183 } 1184 } 1185 1186 SDValue Zero = getZero(dl, MVT::i32, DAG); 1187 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); 1188 // Combine the two low words from ShuffV into a v8i8, and byte-compare 1189 // them against 0. 1190 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); 1191 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, 1192 {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); 1193 SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); 1194 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, 1195 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); 1196 } 1197 1198 SDValue 1199 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, 1200 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { 1201 MVT VecTy = ty(VecV); 1202 MVT SubTy = ty(SubV); 1203 unsigned HwLen = Subtarget.getVectorLength(); 1204 MVT ElemTy = VecTy.getVectorElementType(); 1205 unsigned ElemWidth = ElemTy.getSizeInBits(); 1206 1207 bool IsPair = isHvxPairTy(VecTy); 1208 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); 1209 // The two single vectors that VecV consists of, if it's a pair. 1210 SDValue V0, V1; 1211 SDValue SingleV = VecV; 1212 SDValue PickHi; 1213 1214 if (IsPair) { 1215 V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); 1216 V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); 1217 1218 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), 1219 dl, MVT::i32); 1220 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); 1221 if (isHvxSingleTy(SubTy)) { 1222 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) { 1223 unsigned Idx = CN->getZExtValue(); 1224 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); 1225 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; 1226 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); 1227 } 1228 // If IdxV is not a constant, generate the two variants: with the 1229 // SubV as the high and as the low subregister, and select the right 1230 // pair based on the IdxV. 1231 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); 1232 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); 1233 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); 1234 } 1235 // The subvector being inserted must be entirely contained in one of 1236 // the vectors V0 or V1. Set SingleV to the correct one, and update 1237 // IdxV to be the index relative to the beginning of that vector. 1238 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); 1239 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); 1240 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); 1241 } 1242 1243 // The only meaningful subvectors of a single HVX vector are those that 1244 // fit in a scalar register. 1245 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); 1246 // Convert IdxV to be index in bytes. 1247 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); 1248 if (!IdxN || !IdxN->isZero()) { 1249 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, 1250 DAG.getConstant(ElemWidth/8, dl, MVT::i32)); 1251 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); 1252 } 1253 // When inserting a single word, the rotation back to the original position 1254 // would be by HwLen-Idx, but if two words are inserted, it will need to be 1255 // by (HwLen-4)-Idx. 1256 unsigned RolBase = HwLen; 1257 if (VecTy.getSizeInBits() == 32) { 1258 SDValue V = DAG.getBitcast(MVT::i32, SubV); 1259 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); 1260 } else { 1261 SDValue V = DAG.getBitcast(MVT::i64, SubV); 1262 SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); 1263 SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); 1264 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); 1265 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, 1266 DAG.getConstant(4, dl, MVT::i32)); 1267 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); 1268 RolBase = HwLen-4; 1269 } 1270 // If the vector wasn't ror'ed, don't ror it back. 1271 if (RolBase != 4 || !IdxN || !IdxN->isZero()) { 1272 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, 1273 DAG.getConstant(RolBase, dl, MVT::i32), IdxV); 1274 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); 1275 } 1276 1277 if (IsPair) { 1278 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); 1279 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); 1280 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); 1281 } 1282 return SingleV; 1283 } 1284 1285 SDValue 1286 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, 1287 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { 1288 MVT VecTy = ty(VecV); 1289 MVT SubTy = ty(SubV); 1290 assert(Subtarget.isHVXVectorType(VecTy, true)); 1291 // VecV is an HVX vector predicate. SubV may be either an HVX vector 1292 // predicate as well, or it can be a scalar predicate. 1293 1294 unsigned VecLen = VecTy.getVectorNumElements(); 1295 unsigned HwLen = Subtarget.getVectorLength(); 1296 assert(HwLen % VecLen == 0 && "Unexpected vector type"); 1297 1298 unsigned Scale = VecLen / SubTy.getVectorNumElements(); 1299 unsigned BitBytes = HwLen / VecLen; 1300 unsigned BlockLen = HwLen / Scale; 1301 1302 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1303 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1304 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); 1305 SDValue ByteIdx; 1306 1307 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); 1308 if (!IdxN || !IdxN->isZero()) { 1309 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, 1310 DAG.getConstant(BitBytes, dl, MVT::i32)); 1311 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); 1312 } 1313 1314 // ByteVec is the target vector VecV rotated in such a way that the 1315 // subvector should be inserted at index 0. Generate a predicate mask 1316 // and use vmux to do the insertion. 1317 assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); 1318 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 1319 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 1320 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); 1321 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); 1322 // Rotate ByteVec back, and convert to a vector predicate. 1323 if (!IdxN || !IdxN->isZero()) { 1324 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); 1325 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); 1326 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); 1327 } 1328 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); 1329 } 1330 1331 SDValue 1332 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, 1333 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { 1334 // Sign- and any-extending of a vector predicate to a vector register is 1335 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and 1336 // a vector of 1s (where the 1s are of type matching the vector type). 1337 assert(Subtarget.isHVXVectorType(ResTy)); 1338 if (!ZeroExt) 1339 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); 1340 1341 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); 1342 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1343 DAG.getConstant(1, dl, MVT::i32)); 1344 SDValue False = getZero(dl, ResTy, DAG); 1345 return DAG.getSelect(dl, ResTy, VecV, True, False); 1346 } 1347 1348 SDValue 1349 HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, 1350 MVT ResTy, SelectionDAG &DAG) const { 1351 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] 1352 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a 1353 // vector register. The remaining bits of the vector register are 1354 // unspecified. 1355 1356 MachineFunction &MF = DAG.getMachineFunction(); 1357 unsigned HwLen = Subtarget.getVectorLength(); 1358 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1359 MVT PredTy = ty(VecQ); 1360 unsigned PredLen = PredTy.getVectorNumElements(); 1361 assert(HwLen % PredLen == 0); 1362 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen); 1363 1364 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext()); 1365 SmallVector<Constant*, 128> Tmp; 1366 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... 1367 // These are bytes with the LSB rotated left with respect to their index. 1368 for (unsigned i = 0; i != HwLen/8; ++i) { 1369 for (unsigned j = 0; j != 8; ++j) 1370 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j)); 1371 } 1372 Constant *CV = ConstantVector::get(Tmp); 1373 Align Alignment(HwLen); 1374 SDValue CP = 1375 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG); 1376 SDValue Bytes = 1377 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP, 1378 MachinePointerInfo::getConstantPool(MF), Alignment); 1379 1380 // Select the bytes that correspond to true bits in the vector predicate. 1381 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes), 1382 getZero(dl, VecTy, DAG)); 1383 // Calculate the OR of all bytes in each group of 8. That will compress 1384 // all the individual bits into a single byte. 1385 // First, OR groups of 4, via vrmpy with 0x01010101. 1386 SDValue All1 = 1387 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32)); 1388 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG); 1389 // Then rotate the accumulated vector by 4 bytes, and do the final OR. 1390 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy, 1391 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG); 1392 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot}); 1393 1394 // Pick every 8th byte and coalesce them at the beginning of the output. 1395 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th 1396 // byte and so on. 1397 SmallVector<int,128> Mask; 1398 for (unsigned i = 0; i != HwLen; ++i) 1399 Mask.push_back((8*i) % HwLen + i/(HwLen/8)); 1400 SDValue Collect = 1401 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask); 1402 return DAG.getBitcast(ResTy, Collect); 1403 } 1404 1405 SDValue 1406 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) 1407 const { 1408 const SDLoc &dl(Op); 1409 MVT VecTy = ty(Op); 1410 1411 unsigned Size = Op.getNumOperands(); 1412 SmallVector<SDValue,128> Ops; 1413 for (unsigned i = 0; i != Size; ++i) 1414 Ops.push_back(Op.getOperand(i)); 1415 1416 // First, split the BUILD_VECTOR for vector pairs. We could generate 1417 // some pairs directly (via splat), but splats should be generated 1418 // by the combiner prior to getting here. 1419 if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { 1420 ArrayRef<SDValue> A(Ops); 1421 MVT SingleTy = typeSplit(VecTy).first; 1422 SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); 1423 SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); 1424 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); 1425 } 1426 1427 if (VecTy.getVectorElementType() == MVT::i1) 1428 return buildHvxVectorPred(Ops, dl, VecTy, DAG); 1429 1430 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is 1431 // not a legal type, just bitcast the node to use i16 1432 // types and bitcast the result back to f16 1433 if (VecTy.getVectorElementType() == MVT::f16) { 1434 SmallVector<SDValue,64> NewOps; 1435 for (unsigned i = 0; i != Size; i++) 1436 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i])); 1437 1438 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl, 1439 tyVector(VecTy, MVT::i16), NewOps); 1440 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); 1441 } 1442 1443 return buildHvxVectorReg(Ops, dl, VecTy, DAG); 1444 } 1445 1446 SDValue 1447 HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) 1448 const { 1449 const SDLoc &dl(Op); 1450 MVT VecTy = ty(Op); 1451 MVT ArgTy = ty(Op.getOperand(0)); 1452 1453 if (ArgTy == MVT::f16) { 1454 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements()); 1455 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0)); 1456 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16); 1457 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32); 1458 return DAG.getBitcast(VecTy, Splat); 1459 } 1460 1461 return SDValue(); 1462 } 1463 1464 SDValue 1465 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) 1466 const { 1467 // Vector concatenation of two integer (non-bool) vectors does not need 1468 // special lowering. Custom-lower concats of bool vectors and expand 1469 // concats of more than 2 vectors. 1470 MVT VecTy = ty(Op); 1471 const SDLoc &dl(Op); 1472 unsigned NumOp = Op.getNumOperands(); 1473 if (VecTy.getVectorElementType() != MVT::i1) { 1474 if (NumOp == 2) 1475 return Op; 1476 // Expand the other cases into a build-vector. 1477 SmallVector<SDValue,8> Elems; 1478 for (SDValue V : Op.getNode()->ops()) 1479 DAG.ExtractVectorElements(V, Elems); 1480 // A vector of i16 will be broken up into a build_vector of i16's. 1481 // This is a problem, since at the time of operation legalization, 1482 // all operations are expected to be type-legalized, and i16 is not 1483 // a legal type. If any of the extracted elements is not of a valid 1484 // type, sign-extend it to a valid one. 1485 for (unsigned i = 0, e = Elems.size(); i != e; ++i) { 1486 SDValue V = Elems[i]; 1487 MVT Ty = ty(V); 1488 if (!isTypeLegal(Ty)) { 1489 EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); 1490 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1491 Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, 1492 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, 1493 V.getOperand(0), V.getOperand(1)), 1494 DAG.getValueType(Ty)); 1495 continue; 1496 } 1497 // A few less complicated cases. 1498 switch (V.getOpcode()) { 1499 case ISD::Constant: 1500 Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); 1501 break; 1502 case ISD::UNDEF: 1503 Elems[i] = DAG.getUNDEF(NTy); 1504 break; 1505 case ISD::TRUNCATE: 1506 Elems[i] = V.getOperand(0); 1507 break; 1508 default: 1509 llvm_unreachable("Unexpected vector element"); 1510 } 1511 } 1512 } 1513 return DAG.getBuildVector(VecTy, dl, Elems); 1514 } 1515 1516 assert(VecTy.getVectorElementType() == MVT::i1); 1517 unsigned HwLen = Subtarget.getVectorLength(); 1518 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); 1519 1520 SDValue Op0 = Op.getOperand(0); 1521 1522 // If the operands are HVX types (i.e. not scalar predicates), then 1523 // defer the concatenation, and create QCAT instead. 1524 if (Subtarget.isHVXVectorType(ty(Op0), true)) { 1525 if (NumOp == 2) 1526 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); 1527 1528 ArrayRef<SDUse> U(Op.getNode()->ops()); 1529 SmallVector<SDValue,4> SV(U.begin(), U.end()); 1530 ArrayRef<SDValue> Ops(SV); 1531 1532 MVT HalfTy = typeSplit(VecTy).first; 1533 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, 1534 Ops.take_front(NumOp/2)); 1535 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, 1536 Ops.take_back(NumOp/2)); 1537 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); 1538 } 1539 1540 // Count how many bytes (in a vector register) each bit in VecTy 1541 // corresponds to. 1542 unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); 1543 1544 SmallVector<SDValue,8> Prefixes; 1545 for (SDValue V : Op.getNode()->op_values()) { 1546 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); 1547 Prefixes.push_back(P); 1548 } 1549 1550 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); 1551 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1552 SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); 1553 SDValue Res = getZero(dl, ByteTy, DAG); 1554 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { 1555 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); 1556 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); 1557 } 1558 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); 1559 } 1560 1561 SDValue 1562 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) 1563 const { 1564 // Change the type of the extracted element to i32. 1565 SDValue VecV = Op.getOperand(0); 1566 MVT ElemTy = ty(VecV).getVectorElementType(); 1567 const SDLoc &dl(Op); 1568 SDValue IdxV = Op.getOperand(1); 1569 if (ElemTy == MVT::i1) 1570 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); 1571 1572 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); 1573 } 1574 1575 SDValue 1576 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) 1577 const { 1578 const SDLoc &dl(Op); 1579 MVT VecTy = ty(Op); 1580 SDValue VecV = Op.getOperand(0); 1581 SDValue ValV = Op.getOperand(1); 1582 SDValue IdxV = Op.getOperand(2); 1583 MVT ElemTy = ty(VecV).getVectorElementType(); 1584 if (ElemTy == MVT::i1) 1585 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); 1586 1587 if (ElemTy == MVT::f16) { 1588 SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, 1589 tyVector(VecTy, MVT::i16), 1590 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV), 1591 DAG.getBitcast(MVT::i16, ValV), IdxV); 1592 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); 1593 } 1594 1595 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); 1596 } 1597 1598 SDValue 1599 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) 1600 const { 1601 SDValue SrcV = Op.getOperand(0); 1602 MVT SrcTy = ty(SrcV); 1603 MVT DstTy = ty(Op); 1604 SDValue IdxV = Op.getOperand(1); 1605 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1606 assert(Idx % DstTy.getVectorNumElements() == 0); 1607 (void)Idx; 1608 const SDLoc &dl(Op); 1609 1610 MVT ElemTy = SrcTy.getVectorElementType(); 1611 if (ElemTy == MVT::i1) 1612 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); 1613 1614 return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); 1615 } 1616 1617 SDValue 1618 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) 1619 const { 1620 // Idx does not need to be a constant. 1621 SDValue VecV = Op.getOperand(0); 1622 SDValue ValV = Op.getOperand(1); 1623 SDValue IdxV = Op.getOperand(2); 1624 1625 const SDLoc &dl(Op); 1626 MVT VecTy = ty(VecV); 1627 MVT ElemTy = VecTy.getVectorElementType(); 1628 if (ElemTy == MVT::i1) 1629 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); 1630 1631 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); 1632 } 1633 1634 SDValue 1635 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { 1636 // Lower any-extends of boolean vectors to sign-extends, since they 1637 // translate directly to Q2V. Zero-extending could also be done equally 1638 // fast, but Q2V is used/recognized in more places. 1639 // For all other vectors, use zero-extend. 1640 MVT ResTy = ty(Op); 1641 SDValue InpV = Op.getOperand(0); 1642 MVT ElemTy = ty(InpV).getVectorElementType(); 1643 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1644 return LowerHvxSignExt(Op, DAG); 1645 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); 1646 } 1647 1648 SDValue 1649 HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { 1650 MVT ResTy = ty(Op); 1651 SDValue InpV = Op.getOperand(0); 1652 MVT ElemTy = ty(InpV).getVectorElementType(); 1653 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1654 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); 1655 return Op; 1656 } 1657 1658 SDValue 1659 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { 1660 MVT ResTy = ty(Op); 1661 SDValue InpV = Op.getOperand(0); 1662 MVT ElemTy = ty(InpV).getVectorElementType(); 1663 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1664 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); 1665 return Op; 1666 } 1667 1668 SDValue 1669 HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { 1670 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): 1671 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) 1672 const SDLoc &dl(Op); 1673 MVT ResTy = ty(Op); 1674 SDValue InpV = Op.getOperand(0); 1675 assert(ResTy == ty(InpV)); 1676 1677 // Calculate the vectors of 1 and bitwidth(x). 1678 MVT ElemTy = ty(InpV).getVectorElementType(); 1679 unsigned ElemWidth = ElemTy.getSizeInBits(); 1680 1681 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1682 DAG.getConstant(1, dl, MVT::i32)); 1683 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1684 DAG.getConstant(ElemWidth, dl, MVT::i32)); 1685 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1686 DAG.getConstant(-1, dl, MVT::i32)); 1687 1688 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with 1689 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle 1690 // it separately in custom combine or selection). 1691 SDValue A = DAG.getNode(ISD::AND, dl, ResTy, 1692 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), 1693 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); 1694 return DAG.getNode(ISD::SUB, dl, ResTy, 1695 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); 1696 } 1697 1698 SDValue 1699 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { 1700 MVT ResTy = ty(Op); 1701 assert(ResTy.isVector()); 1702 const SDLoc &dl(Op); 1703 SmallVector<int,256> ShuffMask; 1704 1705 MVT ElemTy = ResTy.getVectorElementType(); 1706 unsigned VecLen = ResTy.getVectorNumElements(); 1707 SDValue Vs = Op.getOperand(0); 1708 SDValue Vt = Op.getOperand(1); 1709 bool IsSigned = Op.getOpcode() == ISD::MULHS; 1710 1711 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { 1712 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), 1713 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, 1714 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). 1715 // For i16, use V6_vmpyhv, which behaves in an analogous way to 1716 // V6_vmpybv: results Lo and Hi are products of even/odd elements 1717 // respectively. 1718 MVT ExtTy = typeExtElem(ResTy, 2); 1719 unsigned MpyOpc = ElemTy == MVT::i8 1720 ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) 1721 : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); 1722 SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); 1723 1724 // Discard low halves of the resulting values, collect the high halves. 1725 for (unsigned I = 0; I < VecLen; I += 2) { 1726 ShuffMask.push_back(I+1); // Pick even element. 1727 ShuffMask.push_back(I+VecLen+1); // Pick odd element. 1728 } 1729 VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); 1730 SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); 1731 return DAG.getBitcast(ResTy, BS); 1732 } 1733 1734 assert(ElemTy == MVT::i32); 1735 SDValue S16 = DAG.getConstant(16, dl, MVT::i32); 1736 1737 auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) { 1738 // mulhs(Vs,Vt) = 1739 // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 1740 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 1741 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 1742 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 1743 // + Lo(Vs) *us Vt] >> 32 1744 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to 1745 // anything, so it cannot produce any carry over to higher bits), 1746 // so everything in [] can be shifted by 16 without loss of precision. 1747 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 1748 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 1749 // Denote Hi(Vs) = Vs': 1750 // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 1751 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 1752 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); 1753 // Get Vs': 1754 SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); 1755 SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, 1756 {T0, S0, Vt}, DAG); 1757 // Shift by 16: 1758 SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); 1759 // Get Vs'*Hi(Vt): 1760 SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); 1761 // Add: 1762 SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); 1763 return T3; 1764 }; 1765 1766 auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) { 1767 MVT PairTy = typeJoin({ResTy, ResTy}); 1768 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG); 1769 SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, 1770 {T0, Vs, Vt}, DAG); 1771 return opSplit(T1, dl, DAG).second; 1772 }; 1773 1774 if (IsSigned) { 1775 if (Subtarget.useHVXV62Ops()) 1776 return MulHS_V62(Vs, Vt); 1777 return MulHS_V60(Vs, Vt); 1778 } 1779 1780 // Unsigned mulhw. (Would expansion using signed mulhw be better?) 1781 1782 auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { 1783 return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); 1784 }; 1785 auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { 1786 return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); 1787 }; 1788 1789 MVT PairTy = typeJoin({ResTy, ResTy}); 1790 SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, 1791 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); 1792 // Multiply-unsigned halfwords: 1793 // LoVec = Vs.uh[2i] * Vt.uh[2i], 1794 // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] 1795 SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); 1796 // The low halves in the LoVec of the pair can be discarded. They are 1797 // not added to anything (in the full-precision product), so they cannot 1798 // produce a carry into the higher bits. 1799 SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); 1800 // Swap low and high halves in Vt, and do the halfword multiplication 1801 // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. 1802 SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); 1803 SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); 1804 // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). 1805 // These products are words, but cannot be added directly because the 1806 // sums could overflow. Add these products, by halfwords, where each sum 1807 // of a pair of halfwords gives a word. 1808 SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, 1809 {LoVec(T2), HiVec(T2)}, DAG); 1810 // Add the high halfwords from the products of the low halfwords. 1811 SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); 1812 SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); 1813 SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); 1814 SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); 1815 return T7; 1816 } 1817 1818 SDValue 1819 HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { 1820 SDValue Val = Op.getOperand(0); 1821 MVT ResTy = ty(Op); 1822 MVT ValTy = ty(Val); 1823 const SDLoc &dl(Op); 1824 1825 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) { 1826 unsigned HwLen = Subtarget.getVectorLength(); 1827 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); 1828 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG); 1829 unsigned BitWidth = ResTy.getSizeInBits(); 1830 1831 if (BitWidth < 64) { 1832 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32), 1833 dl, MVT::i32, DAG); 1834 if (BitWidth == 32) 1835 return W0; 1836 assert(BitWidth < 32u); 1837 return DAG.getZExtOrTrunc(W0, dl, ResTy); 1838 } 1839 1840 // The result is >= 64 bits. The only options are 64 or 128. 1841 assert(BitWidth == 64 || BitWidth == 128); 1842 SmallVector<SDValue,4> Words; 1843 for (unsigned i = 0; i != BitWidth/32; ++i) { 1844 SDValue W = extractHvxElementReg( 1845 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG); 1846 Words.push_back(W); 1847 } 1848 SmallVector<SDValue,2> Combines; 1849 assert(Words.size() % 2 == 0); 1850 for (unsigned i = 0, e = Words.size(); i < e; i += 2) { 1851 SDValue C = DAG.getNode( 1852 HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]}); 1853 Combines.push_back(C); 1854 } 1855 1856 if (BitWidth == 64) 1857 return Combines[0]; 1858 1859 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); 1860 } 1861 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { 1862 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. 1863 unsigned BitWidth = ValTy.getSizeInBits(); 1864 unsigned HwLen = Subtarget.getVectorLength(); 1865 assert(BitWidth == HwLen); 1866 1867 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8); 1868 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val); 1869 // Splat each byte of Val 8 times. 1870 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8] 1871 // where b0, b1,..., b15 are least to most significant bytes of I. 1872 SmallVector<SDValue, 128> Bytes; 1873 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,... 1874 // These are bytes with the LSB rotated left with respect to their index. 1875 SmallVector<SDValue, 128> Tmp; 1876 for (unsigned I = 0; I != HwLen / 8; ++I) { 1877 SDValue Idx = DAG.getConstant(I, dl, MVT::i32); 1878 SDValue Byte = 1879 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx); 1880 for (unsigned J = 0; J != 8; ++J) { 1881 Bytes.push_back(Byte); 1882 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8)); 1883 } 1884 } 1885 1886 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen); 1887 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp); 1888 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG); 1889 1890 // Each Byte in the I2V will be set iff corresponding bit is set in Val. 1891 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec}); 1892 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V); 1893 } 1894 1895 return Op; 1896 } 1897 1898 SDValue 1899 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { 1900 // Sign- and zero-extends are legal. 1901 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); 1902 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op), 1903 Op.getOperand(0)); 1904 } 1905 1906 SDValue 1907 HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { 1908 MVT ResTy = ty(Op); 1909 if (ResTy.getVectorElementType() != MVT::i1) 1910 return Op; 1911 1912 const SDLoc &dl(Op); 1913 unsigned HwLen = Subtarget.getVectorLength(); 1914 unsigned VecLen = ResTy.getVectorNumElements(); 1915 assert(HwLen % VecLen == 0); 1916 unsigned ElemSize = HwLen / VecLen; 1917 1918 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen); 1919 SDValue S = 1920 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0), 1921 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)), 1922 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2))); 1923 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S); 1924 } 1925 1926 SDValue 1927 HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { 1928 if (SDValue S = getVectorShiftByInt(Op, DAG)) 1929 return S; 1930 return Op; 1931 } 1932 1933 SDValue 1934 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { 1935 const SDLoc &dl(Op); 1936 MVT ResTy = ty(Op); 1937 1938 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1939 bool Use64b = Subtarget.useHVX64BOps(); 1940 unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast 1941 : Intrinsic::hexagon_V6_pred_typecast_128B; 1942 if (IntNo == IntPredCast) { 1943 SDValue Vs = Op.getOperand(1); 1944 MVT OpTy = ty(Vs); 1945 if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) { 1946 if (ResTy == OpTy) 1947 return Vs; 1948 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs); 1949 } 1950 } 1951 1952 return Op; 1953 } 1954 1955 SDValue 1956 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { 1957 const SDLoc &dl(Op); 1958 unsigned HwLen = Subtarget.getVectorLength(); 1959 MachineFunction &MF = DAG.getMachineFunction(); 1960 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode()); 1961 SDValue Mask = MaskN->getMask(); 1962 SDValue Chain = MaskN->getChain(); 1963 SDValue Base = MaskN->getBasePtr(); 1964 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen); 1965 1966 unsigned Opc = Op->getOpcode(); 1967 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); 1968 1969 if (Opc == ISD::MLOAD) { 1970 MVT ValTy = ty(Op); 1971 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp); 1972 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru(); 1973 if (isUndef(Thru)) 1974 return Load; 1975 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru); 1976 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl); 1977 } 1978 1979 // MSTORE 1980 // HVX only has aligned masked stores. 1981 1982 // TODO: Fold negations of the mask into the store. 1983 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai; 1984 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue(); 1985 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base)); 1986 1987 if (MaskN->getAlign().value() % HwLen == 0) { 1988 SDValue Store = getInstr(StoreOpc, dl, MVT::Other, 1989 {Mask, Base, Offset0, Value, Chain}, DAG); 1990 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp}); 1991 return Store; 1992 } 1993 1994 // Unaligned case. 1995 auto StoreAlign = [&](SDValue V, SDValue A) { 1996 SDValue Z = getZero(dl, ty(V), DAG); 1997 // TODO: use funnel shifts? 1998 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the 1999 // upper half. 2000 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG); 2001 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG); 2002 return std::make_pair(LoV, HiV); 2003 }; 2004 2005 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 2006 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 2007 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask); 2008 VectorPair Tmp = StoreAlign(MaskV, Base); 2009 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first), 2010 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)}; 2011 VectorPair ValueU = StoreAlign(Value, Base); 2012 2013 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32); 2014 SDValue StoreLo = 2015 getInstr(StoreOpc, dl, MVT::Other, 2016 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG); 2017 SDValue StoreHi = 2018 getInstr(StoreOpc, dl, MVT::Other, 2019 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG); 2020 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp}); 2021 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp}); 2022 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi}); 2023 } 2024 2025 SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, 2026 SelectionDAG &DAG) const { 2027 // This conversion only applies to QFloat. 2028 assert(Subtarget.useHVXQFloatOps()); 2029 2030 assert(Op->getOpcode() == ISD::FP_EXTEND); 2031 2032 MVT VecTy = ty(Op); 2033 MVT ArgTy = ty(Op.getOperand(0)); 2034 const SDLoc &dl(Op); 2035 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); 2036 2037 SDValue F16Vec = Op.getOperand(0); 2038 2039 APFloat FloatVal = APFloat(1.0f); 2040 bool Ignored; 2041 FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); 2042 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy); 2043 SDValue VmpyVec = 2044 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG); 2045 2046 MVT HalfTy = typeSplit(VecTy).first; 2047 VectorPair Pair = opSplit(VmpyVec, dl, DAG); 2048 SDValue LoVec = 2049 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG); 2050 SDValue HiVec = 2051 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG); 2052 2053 SDValue ShuffVec = 2054 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy, 2055 {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG); 2056 2057 return ShuffVec; 2058 } 2059 2060 SDValue 2061 HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG) 2062 const { 2063 // This conversion only applies to IEEE. 2064 assert(Subtarget.useHVXIEEEFPOps()); 2065 2066 unsigned Opc = Op.getOpcode(); 2067 // Catch invalid conversion ops (just in case). 2068 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT || 2069 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); 2070 MVT ResTy = ty(Op); 2071 2072 if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) { 2073 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType(); 2074 // There are only conversions of f16. 2075 if (FpTy != MVT::f16) 2076 return SDValue(); 2077 2078 MVT IntTy = ResTy.getVectorElementType(); 2079 // Other int types aren't legal in HVX, so we shouldn't see them here. 2080 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); 2081 // Conversions to i8 and i16 are legal. 2082 if (IntTy == MVT::i8 || IntTy == MVT::i16) 2083 return Op; 2084 } else { 2085 // Converting int -> fp. 2086 if (ResTy.getVectorElementType() != MVT::f16) 2087 return SDValue(); 2088 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); 2089 // Other int types aren't legal in HVX, so we shouldn't see them here. 2090 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); 2091 // i8, i16 -> f16 is legal. 2092 if (IntTy == MVT::i8 || IntTy == MVT::i16) 2093 return Op; 2094 } 2095 2096 return SDValue(); 2097 } 2098 2099 SDValue 2100 HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { 2101 assert(!Op.isMachineOpcode()); 2102 SmallVector<SDValue,2> OpsL, OpsH; 2103 const SDLoc &dl(Op); 2104 2105 auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { 2106 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; 2107 SDValue TV = DAG.getValueType(Ty); 2108 return std::make_pair(TV, TV); 2109 }; 2110 2111 for (SDValue A : Op.getNode()->ops()) { 2112 VectorPair P = Subtarget.isHVXVectorType(ty(A), true) 2113 ? opSplit(A, dl, DAG) 2114 : std::make_pair(A, A); 2115 // Special case for type operand. 2116 if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2117 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode())) 2118 P = SplitVTNode(N); 2119 } 2120 OpsL.push_back(P.first); 2121 OpsH.push_back(P.second); 2122 } 2123 2124 MVT ResTy = ty(Op); 2125 MVT HalfTy = typeSplit(ResTy).first; 2126 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); 2127 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); 2128 SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); 2129 return S; 2130 } 2131 2132 SDValue 2133 HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { 2134 auto *MemN = cast<MemSDNode>(Op.getNode()); 2135 2136 MVT MemTy = MemN->getMemoryVT().getSimpleVT(); 2137 if (!isHvxPairTy(MemTy)) 2138 return Op; 2139 2140 const SDLoc &dl(Op); 2141 unsigned HwLen = Subtarget.getVectorLength(); 2142 MVT SingleTy = typeSplit(MemTy).first; 2143 SDValue Chain = MemN->getChain(); 2144 SDValue Base0 = MemN->getBasePtr(); 2145 SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); 2146 unsigned MemOpc = MemN->getOpcode(); 2147 2148 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; 2149 if (MachineMemOperand *MMO = MemN->getMemOperand()) { 2150 MachineFunction &MF = DAG.getMachineFunction(); 2151 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) 2152 ? (uint64_t)MemoryLocation::UnknownSize 2153 : HwLen; 2154 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize); 2155 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize); 2156 } 2157 2158 if (MemOpc == ISD::LOAD) { 2159 assert(cast<LoadSDNode>(Op)->isUnindexed()); 2160 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); 2161 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); 2162 return DAG.getMergeValues( 2163 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), 2164 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2165 Load0.getValue(1), Load1.getValue(1)) }, dl); 2166 } 2167 if (MemOpc == ISD::STORE) { 2168 assert(cast<StoreSDNode>(Op)->isUnindexed()); 2169 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG); 2170 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); 2171 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); 2172 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); 2173 } 2174 2175 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE); 2176 2177 auto MaskN = cast<MaskedLoadStoreSDNode>(Op); 2178 assert(MaskN->isUnindexed()); 2179 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG); 2180 SDValue Offset = DAG.getUNDEF(MVT::i32); 2181 2182 if (MemOpc == ISD::MLOAD) { 2183 VectorPair Thru = 2184 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG); 2185 SDValue MLoad0 = 2186 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first, 2187 Thru.first, SingleTy, MOp0, ISD::UNINDEXED, 2188 ISD::NON_EXTLOAD, false); 2189 SDValue MLoad1 = 2190 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second, 2191 Thru.second, SingleTy, MOp1, ISD::UNINDEXED, 2192 ISD::NON_EXTLOAD, false); 2193 return DAG.getMergeValues( 2194 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1), 2195 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2196 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl); 2197 } 2198 if (MemOpc == ISD::MSTORE) { 2199 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG); 2200 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset, 2201 Masks.first, SingleTy, MOp0, 2202 ISD::UNINDEXED, false, false); 2203 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset, 2204 Masks.second, SingleTy, MOp1, 2205 ISD::UNINDEXED, false, false); 2206 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1); 2207 } 2208 2209 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG); 2210 llvm_unreachable(Name.c_str()); 2211 } 2212 2213 SDValue 2214 HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { 2215 const SDLoc &dl(Op); 2216 auto *LoadN = cast<LoadSDNode>(Op.getNode()); 2217 assert(LoadN->isUnindexed() && "Not widening indexed loads yet"); 2218 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && 2219 "Not widening loads of i1 yet"); 2220 2221 SDValue Chain = LoadN->getChain(); 2222 SDValue Base = LoadN->getBasePtr(); 2223 SDValue Offset = DAG.getUNDEF(MVT::i32); 2224 2225 MVT ResTy = ty(Op); 2226 unsigned HwLen = Subtarget.getVectorLength(); 2227 unsigned ResLen = ResTy.getStoreSize(); 2228 assert(ResLen < HwLen && "vsetq(v1) prerequisite"); 2229 2230 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 2231 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 2232 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG); 2233 2234 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen); 2235 MachineFunction &MF = DAG.getMachineFunction(); 2236 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen); 2237 2238 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask, 2239 DAG.getUNDEF(LoadTy), LoadTy, MemOp, 2240 ISD::UNINDEXED, ISD::NON_EXTLOAD, false); 2241 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG); 2242 return DAG.getMergeValues({Value, Chain}, dl); 2243 } 2244 2245 SDValue 2246 HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { 2247 const SDLoc &dl(Op); 2248 auto *StoreN = cast<StoreSDNode>(Op.getNode()); 2249 assert(StoreN->isUnindexed() && "Not widening indexed stores yet"); 2250 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 && 2251 "Not widening stores of i1 yet"); 2252 2253 SDValue Chain = StoreN->getChain(); 2254 SDValue Base = StoreN->getBasePtr(); 2255 SDValue Offset = DAG.getUNDEF(MVT::i32); 2256 2257 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG); 2258 MVT ValueTy = ty(Value); 2259 unsigned ValueLen = ValueTy.getVectorNumElements(); 2260 unsigned HwLen = Subtarget.getVectorLength(); 2261 assert(isPowerOf2_32(ValueLen)); 2262 2263 for (unsigned Len = ValueLen; Len < HwLen; ) { 2264 Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG); 2265 Len = ty(Value).getVectorNumElements(); // This is Len *= 2 2266 } 2267 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia 2268 2269 assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); 2270 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 2271 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 2272 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); 2273 MachineFunction &MF = DAG.getMachineFunction(); 2274 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen); 2275 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value), 2276 MemOp, ISD::UNINDEXED, false, false); 2277 } 2278 2279 SDValue 2280 HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { 2281 const SDLoc &dl(Op); 2282 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); 2283 MVT ElemTy = ty(Op0).getVectorElementType(); 2284 unsigned HwLen = Subtarget.getVectorLength(); 2285 2286 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits(); 2287 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen); 2288 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen); 2289 if (!Subtarget.isHVXVectorType(WideOpTy, true)) 2290 return SDValue(); 2291 2292 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG); 2293 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG); 2294 EVT ResTy = 2295 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy); 2296 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy, 2297 {WideOp0, WideOp1, Op.getOperand(2)}); 2298 2299 EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op)); 2300 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy, 2301 {SetCC, getZero(dl, MVT::i32, DAG)}); 2302 } 2303 2304 SDValue 2305 HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const { 2306 const SDLoc &dl(Op); 2307 unsigned HwWidth = 8*Subtarget.getVectorLength(); 2308 2309 SDValue Op0 = Op.getOperand(0); 2310 MVT ResTy = ty(Op); 2311 MVT OpTy = ty(Op0); 2312 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) 2313 return SDValue(); 2314 2315 // .-res, op-> ScalarVec Illegal HVX 2316 // Scalar ok - - 2317 // Illegal widen(insert) widen - 2318 // HVX - widen ok 2319 2320 auto getFactor = [HwWidth](MVT Ty) { 2321 unsigned Width = Ty.getSizeInBits(); 2322 return HwWidth > Width ? HwWidth / Width : 1; 2323 }; 2324 2325 auto getWideTy = [getFactor](MVT Ty) { 2326 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); 2327 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); 2328 }; 2329 2330 unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK 2331 : HexagonISD::VUNPACKU; 2332 SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); 2333 SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp); 2334 return WideRes; 2335 } 2336 2337 SDValue 2338 HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { 2339 const SDLoc &dl(Op); 2340 unsigned HwWidth = 8*Subtarget.getVectorLength(); 2341 2342 SDValue Op0 = Op.getOperand(0); 2343 MVT ResTy = ty(Op); 2344 MVT OpTy = ty(Op0); 2345 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) 2346 return SDValue(); 2347 2348 // .-res, op-> ScalarVec Illegal HVX 2349 // Scalar ok extract(widen) - 2350 // Illegal - widen widen 2351 // HVX - - ok 2352 2353 auto getFactor = [HwWidth](MVT Ty) { 2354 unsigned Width = Ty.getSizeInBits(); 2355 assert(HwWidth % Width == 0); 2356 return HwWidth / Width; 2357 }; 2358 2359 auto getWideTy = [getFactor](MVT Ty) { 2360 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); 2361 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); 2362 }; 2363 2364 if (Subtarget.isHVXVectorType(OpTy)) 2365 return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); 2366 2367 assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); 2368 2369 SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); 2370 SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), 2371 WideOp); 2372 // If the original result wasn't legal and was supposed to be widened, 2373 // we're done. 2374 if (shouldWidenToHvx(ResTy, DAG)) 2375 return WideRes; 2376 2377 // The original result type wasn't meant to be widened to HVX, so 2378 // leave it as it is. Standard legalization should be able to deal 2379 // with it (since now it's a result of a target-idendependent ISD 2380 // node). 2381 assert(ResTy.isVector()); 2382 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, 2383 {WideRes, getZero(dl, MVT::i32, DAG)}); 2384 } 2385 2386 SDValue 2387 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { 2388 unsigned Opc = Op.getOpcode(); 2389 bool IsPairOp = isHvxPairTy(ty(Op)) || 2390 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { 2391 return isHvxPairTy(ty(V)); 2392 }); 2393 2394 if (IsPairOp) { 2395 switch (Opc) { 2396 default: 2397 break; 2398 case ISD::LOAD: 2399 case ISD::STORE: 2400 case ISD::MLOAD: 2401 case ISD::MSTORE: 2402 return SplitHvxMemOp(Op, DAG); 2403 case ISD::SINT_TO_FP: 2404 case ISD::UINT_TO_FP: 2405 case ISD::FP_TO_SINT: 2406 case ISD::FP_TO_UINT: 2407 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits()) 2408 return SplitHvxPairOp(Op, DAG); 2409 break; 2410 case ISD::CTPOP: 2411 case ISD::CTLZ: 2412 case ISD::CTTZ: 2413 case ISD::MUL: 2414 case ISD::FADD: 2415 case ISD::FSUB: 2416 case ISD::FMUL: 2417 case ISD::FMINNUM: 2418 case ISD::FMAXNUM: 2419 case ISD::MULHS: 2420 case ISD::MULHU: 2421 case ISD::AND: 2422 case ISD::OR: 2423 case ISD::XOR: 2424 case ISD::SRA: 2425 case ISD::SHL: 2426 case ISD::SRL: 2427 case ISD::SMIN: 2428 case ISD::SMAX: 2429 case ISD::UMIN: 2430 case ISD::UMAX: 2431 case ISD::SETCC: 2432 case ISD::VSELECT: 2433 case ISD::SIGN_EXTEND: 2434 case ISD::ZERO_EXTEND: 2435 case ISD::SIGN_EXTEND_INREG: 2436 case ISD::SPLAT_VECTOR: 2437 return SplitHvxPairOp(Op, DAG); 2438 } 2439 } 2440 2441 switch (Opc) { 2442 default: 2443 break; 2444 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); 2445 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG); 2446 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); 2447 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); 2448 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); 2449 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); 2450 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); 2451 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); 2452 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); 2453 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); 2454 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); 2455 case ISD::CTTZ: return LowerHvxCttz(Op, DAG); 2456 case ISD::SELECT: return LowerHvxSelect(Op, DAG); 2457 case ISD::SRA: 2458 case ISD::SHL: 2459 case ISD::SRL: return LowerHvxShift(Op, DAG); 2460 case ISD::MULHS: 2461 case ISD::MULHU: return LowerHvxMulh(Op, DAG); 2462 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); 2463 case ISD::SETCC: 2464 case ISD::INTRINSIC_VOID: return Op; 2465 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); 2466 case ISD::MLOAD: 2467 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); 2468 // Unaligned loads will be handled by the default lowering. 2469 case ISD::LOAD: return SDValue(); 2470 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG); 2471 case ISD::FP_TO_SINT: 2472 case ISD::FP_TO_UINT: 2473 case ISD::SINT_TO_FP: 2474 case ISD::UINT_TO_FP: return LowerHvxConvertFpInt(Op, DAG); 2475 } 2476 #ifndef NDEBUG 2477 Op.dumpr(&DAG); 2478 #endif 2479 llvm_unreachable("Unhandled HVX operation"); 2480 } 2481 2482 void 2483 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, 2484 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2485 unsigned Opc = N->getOpcode(); 2486 SDValue Op(N, 0); 2487 2488 switch (Opc) { 2489 case ISD::ANY_EXTEND: 2490 case ISD::SIGN_EXTEND: 2491 case ISD::ZERO_EXTEND: 2492 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2493 if (SDValue T = WidenHvxExtend(Op, DAG)) 2494 Results.push_back(T); 2495 } 2496 break; 2497 case ISD::SETCC: 2498 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2499 if (SDValue T = WidenHvxSetCC(Op, DAG)) 2500 Results.push_back(T); 2501 } 2502 break; 2503 case ISD::TRUNCATE: 2504 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2505 if (SDValue T = WidenHvxTruncate(Op, DAG)) 2506 Results.push_back(T); 2507 } 2508 break; 2509 case ISD::STORE: { 2510 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) { 2511 SDValue Store = WidenHvxStore(Op, DAG); 2512 Results.push_back(Store); 2513 } 2514 break; 2515 } 2516 case ISD::MLOAD: 2517 if (isHvxPairTy(ty(Op))) { 2518 SDValue S = SplitHvxMemOp(Op, DAG); 2519 assert(S->getOpcode() == ISD::MERGE_VALUES); 2520 Results.push_back(S.getOperand(0)); 2521 Results.push_back(S.getOperand(1)); 2522 } 2523 break; 2524 case ISD::MSTORE: 2525 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value 2526 SDValue S = SplitHvxMemOp(Op, DAG); 2527 Results.push_back(S); 2528 } 2529 break; 2530 default: 2531 break; 2532 } 2533 } 2534 2535 void 2536 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, 2537 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2538 unsigned Opc = N->getOpcode(); 2539 SDValue Op(N, 0); 2540 switch (Opc) { 2541 case ISD::ANY_EXTEND: 2542 case ISD::SIGN_EXTEND: 2543 case ISD::ZERO_EXTEND: 2544 if (shouldWidenToHvx(ty(Op), DAG)) { 2545 if (SDValue T = WidenHvxExtend(Op, DAG)) 2546 Results.push_back(T); 2547 } 2548 break; 2549 case ISD::SETCC: 2550 if (shouldWidenToHvx(ty(Op), DAG)) { 2551 if (SDValue T = WidenHvxSetCC(Op, DAG)) 2552 Results.push_back(T); 2553 } 2554 break; 2555 case ISD::TRUNCATE: 2556 if (shouldWidenToHvx(ty(Op), DAG)) { 2557 if (SDValue T = WidenHvxTruncate(Op, DAG)) 2558 Results.push_back(T); 2559 } 2560 break; 2561 case ISD::LOAD: { 2562 if (shouldWidenToHvx(ty(Op), DAG)) { 2563 SDValue Load = WidenHvxLoad(Op, DAG); 2564 assert(Load->getOpcode() == ISD::MERGE_VALUES); 2565 Results.push_back(Load.getOperand(0)); 2566 Results.push_back(Load.getOperand(1)); 2567 } 2568 break; 2569 } 2570 case ISD::BITCAST: 2571 if (isHvxBoolTy(ty(N->getOperand(0)))) { 2572 SDValue Op(N, 0); 2573 SDValue C = LowerHvxBitcast(Op, DAG); 2574 Results.push_back(C); 2575 } 2576 break; 2577 default: 2578 break; 2579 } 2580 } 2581 2582 SDValue 2583 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) 2584 const { 2585 const SDLoc &dl(N); 2586 SelectionDAG &DAG = DCI.DAG; 2587 SDValue Op(N, 0); 2588 unsigned Opc = Op.getOpcode(); 2589 if (DCI.isBeforeLegalizeOps()) 2590 return SDValue(); 2591 2592 SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end()); 2593 2594 switch (Opc) { 2595 case ISD::VSELECT: { 2596 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) 2597 SDValue Cond = Ops[0]; 2598 if (Cond->getOpcode() == ISD::XOR) { 2599 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); 2600 if (C1->getOpcode() == HexagonISD::QTRUE) 2601 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]); 2602 } 2603 break; 2604 } 2605 case HexagonISD::V2Q: 2606 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { 2607 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0))) 2608 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op)) 2609 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op)); 2610 } 2611 break; 2612 case HexagonISD::Q2V: 2613 if (Ops[0].getOpcode() == HexagonISD::QTRUE) 2614 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op), 2615 DAG.getConstant(-1, dl, MVT::i32)); 2616 if (Ops[0].getOpcode() == HexagonISD::QFALSE) 2617 return getZero(dl, ty(Op), DAG); 2618 break; 2619 case HexagonISD::VINSERTW0: 2620 if (isUndef(Ops[1])) 2621 return Ops[0];; 2622 break; 2623 case HexagonISD::VROR: { 2624 if (Ops[0].getOpcode() == HexagonISD::VROR) { 2625 SDValue Vec = Ops[0].getOperand(0); 2626 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1); 2627 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1}); 2628 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot}); 2629 } 2630 break; 2631 } 2632 } 2633 2634 return SDValue(); 2635 } 2636 2637 bool 2638 HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { 2639 auto Action = getPreferredHvxVectorAction(Ty); 2640 if (Action == TargetLoweringBase::TypeWidenVector) { 2641 EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); 2642 assert(WideTy.isSimple()); 2643 return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); 2644 } 2645 return false; 2646 } 2647 2648 bool 2649 HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { 2650 if (!Subtarget.useHVXOps()) 2651 return false; 2652 // If the type of any result, or any operand type are HVX vector types, 2653 // this is an HVX operation. 2654 auto IsHvxTy = [this](EVT Ty) { 2655 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); 2656 }; 2657 auto IsHvxOp = [this](SDValue Op) { 2658 return Op.getValueType().isSimple() && 2659 Subtarget.isHVXVectorType(ty(Op), true); 2660 }; 2661 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp)) 2662 return true; 2663 2664 // Check if this could be an HVX operation after type widening. 2665 auto IsWidenedToHvx = [this, &DAG](SDValue Op) { 2666 if (!Op.getValueType().isSimple()) 2667 return false; 2668 MVT ValTy = ty(Op); 2669 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); 2670 }; 2671 2672 for (int i = 0, e = N->getNumValues(); i != e; ++i) { 2673 if (IsWidenedToHvx(SDValue(N, i))) 2674 return true; 2675 } 2676 return llvm::any_of(N->ops(), IsWidenedToHvx); 2677 } 2678