1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HexagonISelLowering.h" 10 #include "HexagonRegisterInfo.h" 11 #include "HexagonSubtarget.h" 12 #include "llvm/Analysis/MemoryLocation.h" 13 #include "llvm/IR/IntrinsicsHexagon.h" 14 #include "llvm/Support/CommandLine.h" 15 16 using namespace llvm; 17 18 static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen", 19 cl::Hidden, cl::init(16), 20 cl::desc("Lower threshold (in bytes) for widening to HVX vectors")); 21 22 static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; 23 static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; 24 static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; 25 static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; 26 27 28 void 29 HexagonTargetLowering::initializeHVXLowering() { 30 if (Subtarget.useHVX64BOps()) { 31 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); 32 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); 33 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); 34 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); 35 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); 36 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); 37 // These "short" boolean vector types should be legal because 38 // they will appear as results of vector compares. If they were 39 // not legal, type legalization would try to make them legal 40 // and that would require using operations that do not use or 41 // produce such types. That, in turn, would imply using custom 42 // nodes, which would be unoptimizable by the DAG combiner. 43 // The idea is to rely on target-independent operations as much 44 // as possible. 45 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); 46 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); 47 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); 48 } else if (Subtarget.useHVX128BOps()) { 49 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); 50 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); 51 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); 52 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); 53 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); 54 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); 55 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); 56 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); 57 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); 58 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { 59 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass); 60 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass); 61 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); 62 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass); 63 } 64 } 65 66 // Set up operation actions. 67 68 bool Use64b = Subtarget.useHVX64BOps(); 69 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; 70 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; 71 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; 72 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; 73 74 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { 75 setOperationAction(Opc, FromTy, Promote); 76 AddPromotedToType(Opc, FromTy, ToTy); 77 }; 78 79 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). 80 // Note: v16i1 -> i16 is handled in type legalization instead of op 81 // legalization. 82 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 83 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 84 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 85 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); 86 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); 87 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 88 setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); 89 setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); 90 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 91 92 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && 93 Subtarget.useHVXFloatingPoint()) { 94 setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); 95 setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); 96 setOperationAction(ISD::FADD, MVT::v64f16, Legal); 97 setOperationAction(ISD::FSUB, MVT::v64f16, Legal); 98 setOperationAction(ISD::FMUL, MVT::v64f16, Legal); 99 setOperationAction(ISD::FADD, MVT::v32f32, Legal); 100 setOperationAction(ISD::FSUB, MVT::v32f32, Legal); 101 setOperationAction(ISD::FMUL, MVT::v32f32, Legal); 102 setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); 103 setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); 104 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); 105 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom); 106 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom); 107 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); 108 109 // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat 110 setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom); 111 112 // BUILD_VECTOR with f16 operands cannot be promoted without 113 // promoting the result, so lower the node to vsplat or constant pool 114 setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); 115 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom); 116 setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); 117 setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal); 118 setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal); 119 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is 120 // generated. 121 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); 122 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); 123 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); 124 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); 125 126 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- 127 // independent) handling of it would convert it to a load, which is 128 // not always the optimal choice. 129 setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom); 130 // Make concat-vectors custom to handle concats of more than 2 vectors. 131 setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom); 132 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom); 133 134 setOperationAction(ISD::LOAD, MVT::v64f32, Custom); 135 setOperationAction(ISD::STORE, MVT::v64f32, Custom); 136 setOperationAction(ISD::FADD, MVT::v64f32, Custom); 137 setOperationAction(ISD::FSUB, MVT::v64f32, Custom); 138 setOperationAction(ISD::FMUL, MVT::v64f32, Custom); 139 setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); 140 setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); 141 setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); 142 143 if (Subtarget.useHVXQFloatOps()) { 144 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom); 145 setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); 146 } else if (Subtarget.useHVXIEEEFPOps()) { 147 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal); 148 setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); 149 } 150 151 setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); 152 setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); 153 setOperationAction(ISD::MLOAD, MVT::v64f16, Custom); 154 setOperationAction(ISD::MSTORE, MVT::v64f16, Custom); 155 setOperationAction(ISD::MLOAD, MVT::v64f32, Custom); 156 setOperationAction(ISD::MSTORE, MVT::v64f32, Custom); 157 } 158 159 for (MVT T : LegalV) { 160 setIndexedLoadAction(ISD::POST_INC, T, Legal); 161 setIndexedStoreAction(ISD::POST_INC, T, Legal); 162 163 setOperationAction(ISD::AND, T, Legal); 164 setOperationAction(ISD::OR, T, Legal); 165 setOperationAction(ISD::XOR, T, Legal); 166 setOperationAction(ISD::ADD, T, Legal); 167 setOperationAction(ISD::SUB, T, Legal); 168 setOperationAction(ISD::MUL, T, Legal); 169 setOperationAction(ISD::CTPOP, T, Legal); 170 setOperationAction(ISD::CTLZ, T, Legal); 171 setOperationAction(ISD::SELECT, T, Legal); 172 setOperationAction(ISD::SPLAT_VECTOR, T, Legal); 173 if (T != ByteV) { 174 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); 175 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); 176 setOperationAction(ISD::BSWAP, T, Legal); 177 } 178 179 setOperationAction(ISD::SMIN, T, Legal); 180 setOperationAction(ISD::SMAX, T, Legal); 181 if (T.getScalarType() != MVT::i32) { 182 setOperationAction(ISD::UMIN, T, Legal); 183 setOperationAction(ISD::UMAX, T, Legal); 184 } 185 186 setOperationAction(ISD::CTTZ, T, Custom); 187 setOperationAction(ISD::LOAD, T, Custom); 188 setOperationAction(ISD::MLOAD, T, Custom); 189 setOperationAction(ISD::MSTORE, T, Custom); 190 setOperationAction(ISD::MULHS, T, Custom); 191 setOperationAction(ISD::MULHU, T, Custom); 192 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 193 // Make concat-vectors custom to handle concats of more than 2 vectors. 194 setOperationAction(ISD::CONCAT_VECTORS, T, Custom); 195 setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); 196 setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); 197 setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); 198 setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); 199 setOperationAction(ISD::ANY_EXTEND, T, Custom); 200 setOperationAction(ISD::SIGN_EXTEND, T, Custom); 201 setOperationAction(ISD::ZERO_EXTEND, T, Custom); 202 if (T != ByteV) { 203 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); 204 // HVX only has shifts of words and halfwords. 205 setOperationAction(ISD::SRA, T, Custom); 206 setOperationAction(ISD::SHL, T, Custom); 207 setOperationAction(ISD::SRL, T, Custom); 208 209 // Promote all shuffles to operate on vectors of bytes. 210 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); 211 } 212 213 if (Subtarget.useHVXQFloatOps()) { 214 setOperationAction(ISD::SINT_TO_FP, T, Expand); 215 setOperationAction(ISD::UINT_TO_FP, T, Expand); 216 setOperationAction(ISD::FP_TO_SINT, T, Expand); 217 setOperationAction(ISD::FP_TO_UINT, T, Expand); 218 } else if (Subtarget.useHVXIEEEFPOps()) { 219 setOperationAction(ISD::SINT_TO_FP, T, Custom); 220 setOperationAction(ISD::UINT_TO_FP, T, Custom); 221 setOperationAction(ISD::FP_TO_SINT, T, Custom); 222 setOperationAction(ISD::FP_TO_UINT, T, Custom); 223 } 224 225 setCondCodeAction(ISD::SETNE, T, Expand); 226 setCondCodeAction(ISD::SETLE, T, Expand); 227 setCondCodeAction(ISD::SETGE, T, Expand); 228 setCondCodeAction(ISD::SETLT, T, Expand); 229 setCondCodeAction(ISD::SETULE, T, Expand); 230 setCondCodeAction(ISD::SETUGE, T, Expand); 231 setCondCodeAction(ISD::SETULT, T, Expand); 232 } 233 234 for (MVT T : LegalW) { 235 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- 236 // independent) handling of it would convert it to a load, which is 237 // not always the optimal choice. 238 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 239 // Make concat-vectors custom to handle concats of more than 2 vectors. 240 setOperationAction(ISD::CONCAT_VECTORS, T, Custom); 241 242 // Custom-lower these operations for pairs. Expand them into a concat 243 // of the corresponding operations on individual vectors. 244 setOperationAction(ISD::ANY_EXTEND, T, Custom); 245 setOperationAction(ISD::SIGN_EXTEND, T, Custom); 246 setOperationAction(ISD::ZERO_EXTEND, T, Custom); 247 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); 248 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); 249 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); 250 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); 251 setOperationAction(ISD::SPLAT_VECTOR, T, Custom); 252 253 setOperationAction(ISD::LOAD, T, Custom); 254 setOperationAction(ISD::STORE, T, Custom); 255 setOperationAction(ISD::MLOAD, T, Custom); 256 setOperationAction(ISD::MSTORE, T, Custom); 257 setOperationAction(ISD::CTLZ, T, Custom); 258 setOperationAction(ISD::CTTZ, T, Custom); 259 setOperationAction(ISD::CTPOP, T, Custom); 260 261 setOperationAction(ISD::ADD, T, Legal); 262 setOperationAction(ISD::SUB, T, Legal); 263 setOperationAction(ISD::MUL, T, Custom); 264 setOperationAction(ISD::MULHS, T, Custom); 265 setOperationAction(ISD::MULHU, T, Custom); 266 setOperationAction(ISD::AND, T, Custom); 267 setOperationAction(ISD::OR, T, Custom); 268 setOperationAction(ISD::XOR, T, Custom); 269 setOperationAction(ISD::SETCC, T, Custom); 270 setOperationAction(ISD::VSELECT, T, Custom); 271 if (T != ByteW) { 272 setOperationAction(ISD::SRA, T, Custom); 273 setOperationAction(ISD::SHL, T, Custom); 274 setOperationAction(ISD::SRL, T, Custom); 275 276 // Promote all shuffles to operate on vectors of bytes. 277 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); 278 } 279 280 setOperationAction(ISD::SMIN, T, Custom); 281 setOperationAction(ISD::SMAX, T, Custom); 282 if (T.getScalarType() != MVT::i32) { 283 setOperationAction(ISD::UMIN, T, Custom); 284 setOperationAction(ISD::UMAX, T, Custom); 285 } 286 287 setOperationAction(ISD::SINT_TO_FP, T, Custom); 288 setOperationAction(ISD::UINT_TO_FP, T, Custom); 289 setOperationAction(ISD::FP_TO_SINT, T, Custom); 290 setOperationAction(ISD::FP_TO_UINT, T, Custom); 291 } 292 293 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand); 294 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand); 295 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand); 296 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand); 297 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand); 298 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand); 299 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand); 300 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand); 301 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand); 302 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); 303 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); 304 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); 305 306 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); 307 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); 308 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand); 309 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand); 310 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand); 311 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand); 312 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand); 313 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand); 314 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand); 315 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); 316 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); 317 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); 318 319 // Boolean vectors. 320 321 for (MVT T : LegalW) { 322 // Boolean types for vector pairs will overlap with the boolean 323 // types for single vectors, e.g. 324 // v64i8 -> v64i1 (single) 325 // v64i16 -> v64i1 (pair) 326 // Set these actions first, and allow the single actions to overwrite 327 // any duplicates. 328 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); 329 setOperationAction(ISD::SETCC, BoolW, Custom); 330 setOperationAction(ISD::AND, BoolW, Custom); 331 setOperationAction(ISD::OR, BoolW, Custom); 332 setOperationAction(ISD::XOR, BoolW, Custom); 333 // Masked load/store takes a mask that may need splitting. 334 setOperationAction(ISD::MLOAD, BoolW, Custom); 335 setOperationAction(ISD::MSTORE, BoolW, Custom); 336 } 337 338 for (MVT T : LegalV) { 339 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); 340 setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); 341 setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); 342 setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); 343 setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); 344 setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); 345 setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); 346 setOperationAction(ISD::SELECT, BoolV, Custom); 347 setOperationAction(ISD::AND, BoolV, Legal); 348 setOperationAction(ISD::OR, BoolV, Legal); 349 setOperationAction(ISD::XOR, BoolV, Legal); 350 } 351 352 if (Use64b) { 353 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) 354 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); 355 } else { 356 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) 357 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); 358 } 359 360 // Handle store widening for short vectors. 361 unsigned HwLen = Subtarget.getVectorLength(); 362 for (MVT ElemTy : Subtarget.getHVXElementTypes()) { 363 if (ElemTy == MVT::i1) 364 continue; 365 int ElemWidth = ElemTy.getFixedSizeInBits(); 366 int MaxElems = (8*HwLen) / ElemWidth; 367 for (int N = 2; N < MaxElems; N *= 2) { 368 MVT VecTy = MVT::getVectorVT(ElemTy, N); 369 auto Action = getPreferredVectorAction(VecTy); 370 if (Action == TargetLoweringBase::TypeWidenVector) { 371 setOperationAction(ISD::LOAD, VecTy, Custom); 372 setOperationAction(ISD::STORE, VecTy, Custom); 373 setOperationAction(ISD::SETCC, VecTy, Custom); 374 setOperationAction(ISD::TRUNCATE, VecTy, Custom); 375 setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); 376 setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); 377 setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); 378 379 MVT BoolTy = MVT::getVectorVT(MVT::i1, N); 380 if (!isTypeLegal(BoolTy)) 381 setOperationAction(ISD::SETCC, BoolTy, Custom); 382 } 383 } 384 } 385 386 setTargetDAGCombine(ISD::SPLAT_VECTOR); 387 setTargetDAGCombine(ISD::VSELECT); 388 } 389 390 unsigned 391 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { 392 MVT ElemTy = VecTy.getVectorElementType(); 393 unsigned VecLen = VecTy.getVectorNumElements(); 394 unsigned HwLen = Subtarget.getVectorLength(); 395 396 // Split vectors of i1 that exceed byte vector length. 397 if (ElemTy == MVT::i1 && VecLen > HwLen) 398 return TargetLoweringBase::TypeSplitVector; 399 400 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes(); 401 // For shorter vectors of i1, widen them if any of the corresponding 402 // vectors of integers needs to be widened. 403 if (ElemTy == MVT::i1) { 404 for (MVT T : Tys) { 405 assert(T != MVT::i1); 406 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen)); 407 if (A != ~0u) 408 return A; 409 } 410 return ~0u; 411 } 412 413 // If the size of VecTy is at least half of the vector length, 414 // widen the vector. Note: the threshold was not selected in 415 // any scientific way. 416 if (llvm::is_contained(Tys, ElemTy)) { 417 unsigned VecWidth = VecTy.getSizeInBits(); 418 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0; 419 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth) 420 return TargetLoweringBase::TypeWidenVector; 421 unsigned HwWidth = 8*HwLen; 422 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) 423 return TargetLoweringBase::TypeWidenVector; 424 } 425 426 // Defer to default. 427 return ~0u; 428 } 429 430 SDValue 431 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, 432 const SDLoc &dl, SelectionDAG &DAG) const { 433 SmallVector<SDValue,4> IntOps; 434 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); 435 append_range(IntOps, Ops); 436 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); 437 } 438 439 MVT 440 HexagonTargetLowering::typeJoin(const TypePair &Tys) const { 441 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); 442 443 MVT ElemTy = Tys.first.getVectorElementType(); 444 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + 445 Tys.second.getVectorNumElements()); 446 } 447 448 HexagonTargetLowering::TypePair 449 HexagonTargetLowering::typeSplit(MVT VecTy) const { 450 assert(VecTy.isVector()); 451 unsigned NumElem = VecTy.getVectorNumElements(); 452 assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); 453 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); 454 return { HalfTy, HalfTy }; 455 } 456 457 MVT 458 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { 459 MVT ElemTy = VecTy.getVectorElementType(); 460 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); 461 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); 462 } 463 464 MVT 465 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { 466 MVT ElemTy = VecTy.getVectorElementType(); 467 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); 468 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); 469 } 470 471 SDValue 472 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, 473 SelectionDAG &DAG) const { 474 if (ty(Vec).getVectorElementType() == ElemTy) 475 return Vec; 476 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); 477 return DAG.getBitcast(CastTy, Vec); 478 } 479 480 SDValue 481 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, 482 SelectionDAG &DAG) const { 483 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), 484 Ops.second, Ops.first); 485 } 486 487 HexagonTargetLowering::VectorPair 488 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, 489 SelectionDAG &DAG) const { 490 TypePair Tys = typeSplit(ty(Vec)); 491 if (Vec.getOpcode() == HexagonISD::QCAT) 492 return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); 493 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); 494 } 495 496 bool 497 HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { 498 return Subtarget.isHVXVectorType(Ty) && 499 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); 500 } 501 502 bool 503 HexagonTargetLowering::isHvxPairTy(MVT Ty) const { 504 return Subtarget.isHVXVectorType(Ty) && 505 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); 506 } 507 508 bool 509 HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { 510 return Subtarget.isHVXVectorType(Ty, true) && 511 Ty.getVectorElementType() == MVT::i1; 512 } 513 514 bool HexagonTargetLowering::allowsHvxMemoryAccess( 515 MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { 516 // Bool vectors are excluded by default, but make it explicit to 517 // emphasize that bool vectors cannot be loaded or stored. 518 // Also, disallow double vector stores (to prevent unnecessary 519 // store widening in DAG combiner). 520 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) 521 return false; 522 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) 523 return false; 524 if (Fast) 525 *Fast = true; 526 return true; 527 } 528 529 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( 530 MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { 531 if (!Subtarget.isHVXVectorType(VecTy)) 532 return false; 533 // XXX Should this be false? vmemu are a bit slower than vmem. 534 if (Fast) 535 *Fast = true; 536 return true; 537 } 538 539 SDValue 540 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, 541 SelectionDAG &DAG) const { 542 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) 543 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); 544 545 unsigned ElemWidth = ElemTy.getSizeInBits(); 546 if (ElemWidth == 8) 547 return ElemIdx; 548 549 unsigned L = Log2_32(ElemWidth/8); 550 const SDLoc &dl(ElemIdx); 551 return DAG.getNode(ISD::SHL, dl, MVT::i32, 552 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); 553 } 554 555 SDValue 556 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, 557 SelectionDAG &DAG) const { 558 unsigned ElemWidth = ElemTy.getSizeInBits(); 559 assert(ElemWidth >= 8 && ElemWidth <= 32); 560 if (ElemWidth == 32) 561 return Idx; 562 563 if (ty(Idx) != MVT::i32) 564 Idx = DAG.getBitcast(MVT::i32, Idx); 565 const SDLoc &dl(Idx); 566 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); 567 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); 568 return SubIdx; 569 } 570 571 SDValue 572 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, 573 SDValue Op1, ArrayRef<int> Mask, 574 SelectionDAG &DAG) const { 575 MVT OpTy = ty(Op0); 576 assert(OpTy == ty(Op1)); 577 578 MVT ElemTy = OpTy.getVectorElementType(); 579 if (ElemTy == MVT::i8) 580 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); 581 assert(ElemTy.getSizeInBits() >= 8); 582 583 MVT ResTy = tyVector(OpTy, MVT::i8); 584 unsigned ElemSize = ElemTy.getSizeInBits() / 8; 585 586 SmallVector<int,128> ByteMask; 587 for (int M : Mask) { 588 if (M < 0) { 589 for (unsigned I = 0; I != ElemSize; ++I) 590 ByteMask.push_back(-1); 591 } else { 592 int NewM = M*ElemSize; 593 for (unsigned I = 0; I != ElemSize; ++I) 594 ByteMask.push_back(NewM+I); 595 } 596 } 597 assert(ResTy.getVectorNumElements() == ByteMask.size()); 598 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), 599 opCastElem(Op1, MVT::i8, DAG), ByteMask); 600 } 601 602 SDValue 603 HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, 604 const SDLoc &dl, MVT VecTy, 605 SelectionDAG &DAG) const { 606 unsigned VecLen = Values.size(); 607 MachineFunction &MF = DAG.getMachineFunction(); 608 MVT ElemTy = VecTy.getVectorElementType(); 609 unsigned ElemWidth = ElemTy.getSizeInBits(); 610 unsigned HwLen = Subtarget.getVectorLength(); 611 612 unsigned ElemSize = ElemWidth / 8; 613 assert(ElemSize*VecLen == HwLen); 614 SmallVector<SDValue,32> Words; 615 616 if (VecTy.getVectorElementType() != MVT::i32 && 617 !(Subtarget.useHVXFloatingPoint() && 618 VecTy.getVectorElementType() == MVT::f32)) { 619 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); 620 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; 621 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); 622 for (unsigned i = 0; i != VecLen; i += OpsPerWord) { 623 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); 624 Words.push_back(DAG.getBitcast(MVT::i32, W)); 625 } 626 } else { 627 for (SDValue V : Values) 628 Words.push_back(DAG.getBitcast(MVT::i32, V)); 629 } 630 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) { 631 unsigned NumValues = Values.size(); 632 assert(NumValues > 0); 633 bool IsUndef = true; 634 for (unsigned i = 0; i != NumValues; ++i) { 635 if (Values[i].isUndef()) 636 continue; 637 IsUndef = false; 638 if (!SplatV.getNode()) 639 SplatV = Values[i]; 640 else if (SplatV != Values[i]) 641 return false; 642 } 643 if (IsUndef) 644 SplatV = Values[0]; 645 return true; 646 }; 647 648 unsigned NumWords = Words.size(); 649 SDValue SplatV; 650 bool IsSplat = isSplat(Words, SplatV); 651 if (IsSplat && isUndef(SplatV)) 652 return DAG.getUNDEF(VecTy); 653 if (IsSplat) { 654 assert(SplatV.getNode()); 655 auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode()); 656 if (IdxN && IdxN->isZero()) 657 return getZero(dl, VecTy, DAG); 658 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); 659 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV); 660 return DAG.getBitcast(VecTy, S); 661 } 662 663 // Delay recognizing constant vectors until here, so that we can generate 664 // a vsplat. 665 SmallVector<ConstantInt*, 128> Consts(VecLen); 666 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); 667 if (AllConst) { 668 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), 669 (Constant**)Consts.end()); 670 Constant *CV = ConstantVector::get(Tmp); 671 Align Alignment(HwLen); 672 SDValue CP = 673 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG); 674 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, 675 MachinePointerInfo::getConstantPool(MF), Alignment); 676 } 677 678 // A special case is a situation where the vector is built entirely from 679 // elements extracted from another vector. This could be done via a shuffle 680 // more efficiently, but typically, the size of the source vector will not 681 // match the size of the vector being built (which precludes the use of a 682 // shuffle directly). 683 // This only handles a single source vector, and the vector being built 684 // should be of a sub-vector type of the source vector type. 685 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec, 686 SmallVectorImpl<int> &SrcIdx) { 687 SDValue Vec; 688 for (SDValue V : Values) { 689 if (isUndef(V)) { 690 SrcIdx.push_back(-1); 691 continue; 692 } 693 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 694 return false; 695 // All extracts should come from the same vector. 696 SDValue T = V.getOperand(0); 697 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) 698 return false; 699 Vec = T; 700 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1)); 701 if (C == nullptr) 702 return false; 703 int I = C->getSExtValue(); 704 assert(I >= 0 && "Negative element index"); 705 SrcIdx.push_back(I); 706 } 707 SrcVec = Vec; 708 return true; 709 }; 710 711 SmallVector<int,128> ExtIdx; 712 SDValue ExtVec; 713 if (IsBuildFromExtracts(ExtVec, ExtIdx)) { 714 MVT ExtTy = ty(ExtVec); 715 unsigned ExtLen = ExtTy.getVectorNumElements(); 716 if (ExtLen == VecLen || ExtLen == 2*VecLen) { 717 // Construct a new shuffle mask that will produce a vector with the same 718 // number of elements as the input vector, and such that the vector we 719 // want will be the initial subvector of it. 720 SmallVector<int,128> Mask; 721 BitVector Used(ExtLen); 722 723 for (int M : ExtIdx) { 724 Mask.push_back(M); 725 if (M >= 0) 726 Used.set(M); 727 } 728 // Fill the rest of the mask with the unused elements of ExtVec in hopes 729 // that it will result in a permutation of ExtVec's elements. It's still 730 // fine if it doesn't (e.g. if undefs are present, or elements are 731 // repeated), but permutations can always be done efficiently via vdelta 732 // and vrdelta. 733 for (unsigned I = 0; I != ExtLen; ++I) { 734 if (Mask.size() == ExtLen) 735 break; 736 if (!Used.test(I)) 737 Mask.push_back(I); 738 } 739 740 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec, 741 DAG.getUNDEF(ExtTy), Mask); 742 if (ExtLen == VecLen) 743 return S; 744 return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S); 745 } 746 } 747 748 // Find most common element to initialize vector with. This is to avoid 749 // unnecessary vinsert/valign for cases where the same value is present 750 // many times. Creates a histogram of the vector's elements to find the 751 // most common element n. 752 assert(4*Words.size() == Subtarget.getVectorLength()); 753 int VecHist[32]; 754 int n = 0; 755 for (unsigned i = 0; i != NumWords; ++i) { 756 VecHist[i] = 0; 757 if (Words[i].isUndef()) 758 continue; 759 for (unsigned j = i; j != NumWords; ++j) 760 if (Words[i] == Words[j]) 761 VecHist[i]++; 762 763 if (VecHist[i] > VecHist[n]) 764 n = i; 765 } 766 767 SDValue HalfV = getZero(dl, VecTy, DAG); 768 if (VecHist[n] > 1) { 769 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); 770 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, 771 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); 772 } 773 SDValue HalfV0 = HalfV; 774 SDValue HalfV1 = HalfV; 775 776 // Construct two halves in parallel, then or them together. Rn and Rm count 777 // number of rotations needed before the next element. One last rotation is 778 // performed post-loop to position the last element. 779 int Rn = 0, Rm = 0; 780 SDValue Sn, Sm; 781 SDValue N = HalfV0; 782 SDValue M = HalfV1; 783 for (unsigned i = 0; i != NumWords/2; ++i) { 784 // Rotate by element count since last insertion. 785 if (Words[i] != Words[n] || VecHist[n] <= 1) { 786 Sn = DAG.getConstant(Rn, dl, MVT::i32); 787 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); 788 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, 789 {HalfV0, Words[i]}); 790 Rn = 0; 791 } 792 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { 793 Sm = DAG.getConstant(Rm, dl, MVT::i32); 794 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); 795 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, 796 {HalfV1, Words[i+NumWords/2]}); 797 Rm = 0; 798 } 799 Rn += 4; 800 Rm += 4; 801 } 802 // Perform last rotation. 803 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); 804 Sm = DAG.getConstant(Rm, dl, MVT::i32); 805 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); 806 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); 807 808 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); 809 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); 810 811 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); 812 813 SDValue OutV = 814 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); 815 return OutV; 816 } 817 818 SDValue 819 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, 820 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { 821 MVT PredTy = ty(PredV); 822 unsigned HwLen = Subtarget.getVectorLength(); 823 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 824 825 if (Subtarget.isHVXVectorType(PredTy, true)) { 826 // Move the vector predicate SubV to a vector register, and scale it 827 // down to match the representation (bytes per type element) that VecV 828 // uses. The scaling down will pick every 2nd or 4th (every Scale-th 829 // in general) element and put them at the front of the resulting 830 // vector. This subvector will then be inserted into the Q2V of VecV. 831 // To avoid having an operation that generates an illegal type (short 832 // vector), generate a full size vector. 833 // 834 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); 835 SmallVector<int,128> Mask(HwLen); 836 // Scale = BitBytes(PredV) / Given BitBytes. 837 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); 838 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; 839 840 for (unsigned i = 0; i != HwLen; ++i) { 841 unsigned Num = i % Scale; 842 unsigned Off = i / Scale; 843 Mask[BlockLen*Num + Off] = i; 844 } 845 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); 846 if (!ZeroFill) 847 return S; 848 // Fill the bytes beyond BlockLen with 0s. 849 // V6_pred_scalar2 cannot fill the entire predicate, so it only works 850 // when BlockLen < HwLen. 851 assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); 852 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 853 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 854 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); 855 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); 856 return DAG.getNode(ISD::AND, dl, ByteTy, S, M); 857 } 858 859 // Make sure that this is a valid scalar predicate. 860 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); 861 862 unsigned Bytes = 8 / PredTy.getVectorNumElements(); 863 SmallVector<SDValue,4> Words[2]; 864 unsigned IdxW = 0; 865 866 auto Lo32 = [&DAG, &dl] (SDValue P) { 867 return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); 868 }; 869 auto Hi32 = [&DAG, &dl] (SDValue P) { 870 return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); 871 }; 872 873 SDValue W0 = isUndef(PredV) 874 ? DAG.getUNDEF(MVT::i64) 875 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); 876 Words[IdxW].push_back(Hi32(W0)); 877 Words[IdxW].push_back(Lo32(W0)); 878 879 while (Bytes < BitBytes) { 880 IdxW ^= 1; 881 Words[IdxW].clear(); 882 883 if (Bytes < 4) { 884 for (const SDValue &W : Words[IdxW ^ 1]) { 885 SDValue T = expandPredicate(W, dl, DAG); 886 Words[IdxW].push_back(Hi32(T)); 887 Words[IdxW].push_back(Lo32(T)); 888 } 889 } else { 890 for (const SDValue &W : Words[IdxW ^ 1]) { 891 Words[IdxW].push_back(W); 892 Words[IdxW].push_back(W); 893 } 894 } 895 Bytes *= 2; 896 } 897 898 assert(Bytes == BitBytes); 899 900 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); 901 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); 902 for (const SDValue &W : Words[IdxW]) { 903 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); 904 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); 905 } 906 907 return Vec; 908 } 909 910 SDValue 911 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, 912 const SDLoc &dl, MVT VecTy, 913 SelectionDAG &DAG) const { 914 // Construct a vector V of bytes, such that a comparison V >u 0 would 915 // produce the required vector predicate. 916 unsigned VecLen = Values.size(); 917 unsigned HwLen = Subtarget.getVectorLength(); 918 assert(VecLen <= HwLen || VecLen == 8*HwLen); 919 SmallVector<SDValue,128> Bytes; 920 bool AllT = true, AllF = true; 921 922 auto IsTrue = [] (SDValue V) { 923 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) 924 return !N->isZero(); 925 return false; 926 }; 927 auto IsFalse = [] (SDValue V) { 928 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) 929 return N->isZero(); 930 return false; 931 }; 932 933 if (VecLen <= HwLen) { 934 // In the hardware, each bit of a vector predicate corresponds to a byte 935 // of a vector register. Calculate how many bytes does a bit of VecTy 936 // correspond to. 937 assert(HwLen % VecLen == 0); 938 unsigned BitBytes = HwLen / VecLen; 939 for (SDValue V : Values) { 940 AllT &= IsTrue(V); 941 AllF &= IsFalse(V); 942 943 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) 944 : DAG.getUNDEF(MVT::i8); 945 for (unsigned B = 0; B != BitBytes; ++B) 946 Bytes.push_back(Ext); 947 } 948 } else { 949 // There are as many i1 values, as there are bits in a vector register. 950 // Divide the values into groups of 8 and check that each group consists 951 // of the same value (ignoring undefs). 952 for (unsigned I = 0; I != VecLen; I += 8) { 953 unsigned B = 0; 954 // Find the first non-undef value in this group. 955 for (; B != 8; ++B) { 956 if (!Values[I+B].isUndef()) 957 break; 958 } 959 SDValue F = Values[I+B]; 960 AllT &= IsTrue(F); 961 AllF &= IsFalse(F); 962 963 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) 964 : DAG.getUNDEF(MVT::i8); 965 Bytes.push_back(Ext); 966 // Verify that the rest of values in the group are the same as the 967 // first. 968 for (; B != 8; ++B) 969 assert(Values[I+B].isUndef() || Values[I+B] == F); 970 } 971 } 972 973 if (AllT) 974 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); 975 if (AllF) 976 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); 977 978 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 979 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); 980 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); 981 } 982 983 SDValue 984 HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, 985 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 986 MVT ElemTy = ty(VecV).getVectorElementType(); 987 988 unsigned ElemWidth = ElemTy.getSizeInBits(); 989 assert(ElemWidth >= 8 && ElemWidth <= 32); 990 (void)ElemWidth; 991 992 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); 993 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, 994 {VecV, ByteIdx}); 995 if (ElemTy == MVT::i32) 996 return ExWord; 997 998 // Have an extracted word, need to extract the smaller element out of it. 999 // 1. Extract the bits of (the original) IdxV that correspond to the index 1000 // of the desired element in the 32-bit word. 1001 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); 1002 // 2. Extract the element from the word. 1003 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); 1004 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); 1005 } 1006 1007 SDValue 1008 HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, 1009 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 1010 // Implement other return types if necessary. 1011 assert(ResTy == MVT::i1); 1012 1013 unsigned HwLen = Subtarget.getVectorLength(); 1014 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1015 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1016 1017 unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); 1018 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); 1019 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); 1020 1021 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); 1022 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); 1023 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); 1024 } 1025 1026 SDValue 1027 HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, 1028 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { 1029 MVT ElemTy = ty(VecV).getVectorElementType(); 1030 1031 unsigned ElemWidth = ElemTy.getSizeInBits(); 1032 assert(ElemWidth >= 8 && ElemWidth <= 32); 1033 (void)ElemWidth; 1034 1035 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, 1036 SDValue ByteIdxV) { 1037 MVT VecTy = ty(VecV); 1038 unsigned HwLen = Subtarget.getVectorLength(); 1039 SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, 1040 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); 1041 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); 1042 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); 1043 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, 1044 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); 1045 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); 1046 return TorV; 1047 }; 1048 1049 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); 1050 if (ElemTy == MVT::i32) 1051 return InsertWord(VecV, ValV, ByteIdx); 1052 1053 // If this is not inserting a 32-bit word, convert it into such a thing. 1054 // 1. Extract the existing word from the target vector. 1055 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, 1056 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); 1057 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, 1058 dl, MVT::i32, DAG); 1059 1060 // 2. Treating the extracted word as a 32-bit vector, insert the given 1061 // value into it. 1062 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); 1063 MVT SubVecTy = tyVector(ty(Ext), ElemTy); 1064 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), 1065 ValV, SubIdx, dl, ElemTy, DAG); 1066 1067 // 3. Insert the 32-bit word back into the original vector. 1068 return InsertWord(VecV, Ins, ByteIdx); 1069 } 1070 1071 SDValue 1072 HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, 1073 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { 1074 unsigned HwLen = Subtarget.getVectorLength(); 1075 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1076 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1077 1078 unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); 1079 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); 1080 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); 1081 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); 1082 1083 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); 1084 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); 1085 } 1086 1087 SDValue 1088 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, 1089 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 1090 MVT VecTy = ty(VecV); 1091 unsigned HwLen = Subtarget.getVectorLength(); 1092 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1093 MVT ElemTy = VecTy.getVectorElementType(); 1094 unsigned ElemWidth = ElemTy.getSizeInBits(); 1095 1096 // If the source vector is a vector pair, get the single vector containing 1097 // the subvector of interest. The subvector will never overlap two single 1098 // vectors. 1099 if (isHvxPairTy(VecTy)) { 1100 unsigned SubIdx; 1101 if (Idx * ElemWidth >= 8*HwLen) { 1102 SubIdx = Hexagon::vsub_hi; 1103 Idx -= VecTy.getVectorNumElements() / 2; 1104 } else { 1105 SubIdx = Hexagon::vsub_lo; 1106 } 1107 VecTy = typeSplit(VecTy).first; 1108 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); 1109 if (VecTy == ResTy) 1110 return VecV; 1111 } 1112 1113 // The only meaningful subvectors of a single HVX vector are those that 1114 // fit in a scalar register. 1115 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); 1116 1117 MVT WordTy = tyVector(VecTy, MVT::i32); 1118 SDValue WordVec = DAG.getBitcast(WordTy, VecV); 1119 unsigned WordIdx = (Idx*ElemWidth) / 32; 1120 1121 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); 1122 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); 1123 if (ResTy.getSizeInBits() == 32) 1124 return DAG.getBitcast(ResTy, W0); 1125 1126 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); 1127 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); 1128 SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); 1129 return DAG.getBitcast(ResTy, WW); 1130 } 1131 1132 SDValue 1133 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, 1134 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { 1135 MVT VecTy = ty(VecV); 1136 unsigned HwLen = Subtarget.getVectorLength(); 1137 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1138 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1139 // IdxV is required to be a constant. 1140 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1141 1142 unsigned ResLen = ResTy.getVectorNumElements(); 1143 unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); 1144 unsigned Offset = Idx * BitBytes; 1145 SDValue Undef = DAG.getUNDEF(ByteTy); 1146 SmallVector<int,128> Mask; 1147 1148 if (Subtarget.isHVXVectorType(ResTy, true)) { 1149 // Converting between two vector predicates. Since the result is shorter 1150 // than the source, it will correspond to a vector predicate with the 1151 // relevant bits replicated. The replication count is the ratio of the 1152 // source and target vector lengths. 1153 unsigned Rep = VecTy.getVectorNumElements() / ResLen; 1154 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); 1155 for (unsigned i = 0; i != HwLen/Rep; ++i) { 1156 for (unsigned j = 0; j != Rep; ++j) 1157 Mask.push_back(i + Offset); 1158 } 1159 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); 1160 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); 1161 } 1162 1163 // Converting between a vector predicate and a scalar predicate. In the 1164 // vector predicate, a group of BitBytes bits will correspond to a single 1165 // i1 element of the source vector type. Those bits will all have the same 1166 // value. The same will be true for ByteVec, where each byte corresponds 1167 // to a bit in the vector predicate. 1168 // The algorithm is to traverse the ByteVec, going over the i1 values from 1169 // the source vector, and generate the corresponding representation in an 1170 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the 1171 // elements so that the interesting 8 bytes will be in the low end of the 1172 // vector. 1173 unsigned Rep = 8 / ResLen; 1174 // Make sure the output fill the entire vector register, so repeat the 1175 // 8-byte groups as many times as necessary. 1176 for (unsigned r = 0; r != HwLen/ResLen; ++r) { 1177 // This will generate the indexes of the 8 interesting bytes. 1178 for (unsigned i = 0; i != ResLen; ++i) { 1179 for (unsigned j = 0; j != Rep; ++j) 1180 Mask.push_back(Offset + i*BitBytes); 1181 } 1182 } 1183 1184 SDValue Zero = getZero(dl, MVT::i32, DAG); 1185 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); 1186 // Combine the two low words from ShuffV into a v8i8, and byte-compare 1187 // them against 0. 1188 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); 1189 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, 1190 {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); 1191 SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); 1192 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, 1193 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); 1194 } 1195 1196 SDValue 1197 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, 1198 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { 1199 MVT VecTy = ty(VecV); 1200 MVT SubTy = ty(SubV); 1201 unsigned HwLen = Subtarget.getVectorLength(); 1202 MVT ElemTy = VecTy.getVectorElementType(); 1203 unsigned ElemWidth = ElemTy.getSizeInBits(); 1204 1205 bool IsPair = isHvxPairTy(VecTy); 1206 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); 1207 // The two single vectors that VecV consists of, if it's a pair. 1208 SDValue V0, V1; 1209 SDValue SingleV = VecV; 1210 SDValue PickHi; 1211 1212 if (IsPair) { 1213 V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); 1214 V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); 1215 1216 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), 1217 dl, MVT::i32); 1218 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); 1219 if (isHvxSingleTy(SubTy)) { 1220 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) { 1221 unsigned Idx = CN->getZExtValue(); 1222 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); 1223 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; 1224 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); 1225 } 1226 // If IdxV is not a constant, generate the two variants: with the 1227 // SubV as the high and as the low subregister, and select the right 1228 // pair based on the IdxV. 1229 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); 1230 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); 1231 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); 1232 } 1233 // The subvector being inserted must be entirely contained in one of 1234 // the vectors V0 or V1. Set SingleV to the correct one, and update 1235 // IdxV to be the index relative to the beginning of that vector. 1236 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); 1237 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); 1238 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); 1239 } 1240 1241 // The only meaningful subvectors of a single HVX vector are those that 1242 // fit in a scalar register. 1243 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); 1244 // Convert IdxV to be index in bytes. 1245 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); 1246 if (!IdxN || !IdxN->isZero()) { 1247 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, 1248 DAG.getConstant(ElemWidth/8, dl, MVT::i32)); 1249 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); 1250 } 1251 // When inserting a single word, the rotation back to the original position 1252 // would be by HwLen-Idx, but if two words are inserted, it will need to be 1253 // by (HwLen-4)-Idx. 1254 unsigned RolBase = HwLen; 1255 if (VecTy.getSizeInBits() == 32) { 1256 SDValue V = DAG.getBitcast(MVT::i32, SubV); 1257 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); 1258 } else { 1259 SDValue V = DAG.getBitcast(MVT::i64, SubV); 1260 SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); 1261 SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); 1262 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); 1263 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, 1264 DAG.getConstant(4, dl, MVT::i32)); 1265 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); 1266 RolBase = HwLen-4; 1267 } 1268 // If the vector wasn't ror'ed, don't ror it back. 1269 if (RolBase != 4 || !IdxN || !IdxN->isZero()) { 1270 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, 1271 DAG.getConstant(RolBase, dl, MVT::i32), IdxV); 1272 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); 1273 } 1274 1275 if (IsPair) { 1276 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); 1277 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); 1278 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); 1279 } 1280 return SingleV; 1281 } 1282 1283 SDValue 1284 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, 1285 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { 1286 MVT VecTy = ty(VecV); 1287 MVT SubTy = ty(SubV); 1288 assert(Subtarget.isHVXVectorType(VecTy, true)); 1289 // VecV is an HVX vector predicate. SubV may be either an HVX vector 1290 // predicate as well, or it can be a scalar predicate. 1291 1292 unsigned VecLen = VecTy.getVectorNumElements(); 1293 unsigned HwLen = Subtarget.getVectorLength(); 1294 assert(HwLen % VecLen == 0 && "Unexpected vector type"); 1295 1296 unsigned Scale = VecLen / SubTy.getVectorNumElements(); 1297 unsigned BitBytes = HwLen / VecLen; 1298 unsigned BlockLen = HwLen / Scale; 1299 1300 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1301 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); 1302 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); 1303 SDValue ByteIdx; 1304 1305 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); 1306 if (!IdxN || !IdxN->isZero()) { 1307 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, 1308 DAG.getConstant(BitBytes, dl, MVT::i32)); 1309 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); 1310 } 1311 1312 // ByteVec is the target vector VecV rotated in such a way that the 1313 // subvector should be inserted at index 0. Generate a predicate mask 1314 // and use vmux to do the insertion. 1315 assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); 1316 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 1317 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 1318 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); 1319 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); 1320 // Rotate ByteVec back, and convert to a vector predicate. 1321 if (!IdxN || !IdxN->isZero()) { 1322 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); 1323 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); 1324 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); 1325 } 1326 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); 1327 } 1328 1329 SDValue 1330 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, 1331 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { 1332 // Sign- and any-extending of a vector predicate to a vector register is 1333 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and 1334 // a vector of 1s (where the 1s are of type matching the vector type). 1335 assert(Subtarget.isHVXVectorType(ResTy)); 1336 if (!ZeroExt) 1337 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); 1338 1339 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); 1340 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1341 DAG.getConstant(1, dl, MVT::i32)); 1342 SDValue False = getZero(dl, ResTy, DAG); 1343 return DAG.getSelect(dl, ResTy, VecV, True, False); 1344 } 1345 1346 SDValue 1347 HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, 1348 MVT ResTy, SelectionDAG &DAG) const { 1349 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] 1350 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a 1351 // vector register. The remaining bits of the vector register are 1352 // unspecified. 1353 1354 MachineFunction &MF = DAG.getMachineFunction(); 1355 unsigned HwLen = Subtarget.getVectorLength(); 1356 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1357 MVT PredTy = ty(VecQ); 1358 unsigned PredLen = PredTy.getVectorNumElements(); 1359 assert(HwLen % PredLen == 0); 1360 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen); 1361 1362 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext()); 1363 SmallVector<Constant*, 128> Tmp; 1364 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... 1365 // These are bytes with the LSB rotated left with respect to their index. 1366 for (unsigned i = 0; i != HwLen/8; ++i) { 1367 for (unsigned j = 0; j != 8; ++j) 1368 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j)); 1369 } 1370 Constant *CV = ConstantVector::get(Tmp); 1371 Align Alignment(HwLen); 1372 SDValue CP = 1373 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG); 1374 SDValue Bytes = 1375 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP, 1376 MachinePointerInfo::getConstantPool(MF), Alignment); 1377 1378 // Select the bytes that correspond to true bits in the vector predicate. 1379 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes), 1380 getZero(dl, VecTy, DAG)); 1381 // Calculate the OR of all bytes in each group of 8. That will compress 1382 // all the individual bits into a single byte. 1383 // First, OR groups of 4, via vrmpy with 0x01010101. 1384 SDValue All1 = 1385 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32)); 1386 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG); 1387 // Then rotate the accumulated vector by 4 bytes, and do the final OR. 1388 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy, 1389 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG); 1390 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot}); 1391 1392 // Pick every 8th byte and coalesce them at the beginning of the output. 1393 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th 1394 // byte and so on. 1395 SmallVector<int,128> Mask; 1396 for (unsigned i = 0; i != HwLen; ++i) 1397 Mask.push_back((8*i) % HwLen + i/(HwLen/8)); 1398 SDValue Collect = 1399 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask); 1400 return DAG.getBitcast(ResTy, Collect); 1401 } 1402 1403 SDValue 1404 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) 1405 const { 1406 const SDLoc &dl(Op); 1407 MVT VecTy = ty(Op); 1408 1409 unsigned Size = Op.getNumOperands(); 1410 SmallVector<SDValue,128> Ops; 1411 for (unsigned i = 0; i != Size; ++i) 1412 Ops.push_back(Op.getOperand(i)); 1413 1414 if (VecTy.getVectorElementType() == MVT::i1) 1415 return buildHvxVectorPred(Ops, dl, VecTy, DAG); 1416 1417 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is 1418 // not a legal type, just bitcast the node to use i16 1419 // types and bitcast the result back to f16 1420 if (VecTy.getVectorElementType() == MVT::f16) { 1421 SmallVector<SDValue,64> NewOps; 1422 for (unsigned i = 0; i != Size; i++) 1423 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i])); 1424 1425 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl, 1426 tyVector(VecTy, MVT::i16), NewOps); 1427 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); 1428 } 1429 1430 if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { 1431 ArrayRef<SDValue> A(Ops); 1432 MVT SingleTy = typeSplit(VecTy).first; 1433 SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); 1434 SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); 1435 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); 1436 } 1437 1438 return buildHvxVectorReg(Ops, dl, VecTy, DAG); 1439 } 1440 1441 SDValue 1442 HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) 1443 const { 1444 const SDLoc &dl(Op); 1445 MVT VecTy = ty(Op); 1446 MVT ArgTy = ty(Op.getOperand(0)); 1447 1448 if (ArgTy == MVT::f16) { 1449 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements()); 1450 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0)); 1451 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16); 1452 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32); 1453 return DAG.getBitcast(VecTy, Splat); 1454 } 1455 1456 return SDValue(); 1457 } 1458 1459 SDValue 1460 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) 1461 const { 1462 // Vector concatenation of two integer (non-bool) vectors does not need 1463 // special lowering. Custom-lower concats of bool vectors and expand 1464 // concats of more than 2 vectors. 1465 MVT VecTy = ty(Op); 1466 const SDLoc &dl(Op); 1467 unsigned NumOp = Op.getNumOperands(); 1468 if (VecTy.getVectorElementType() != MVT::i1) { 1469 if (NumOp == 2) 1470 return Op; 1471 // Expand the other cases into a build-vector. 1472 SmallVector<SDValue,8> Elems; 1473 for (SDValue V : Op.getNode()->ops()) 1474 DAG.ExtractVectorElements(V, Elems); 1475 // A vector of i16 will be broken up into a build_vector of i16's. 1476 // This is a problem, since at the time of operation legalization, 1477 // all operations are expected to be type-legalized, and i16 is not 1478 // a legal type. If any of the extracted elements is not of a valid 1479 // type, sign-extend it to a valid one. 1480 for (unsigned i = 0, e = Elems.size(); i != e; ++i) { 1481 SDValue V = Elems[i]; 1482 MVT Ty = ty(V); 1483 if (!isTypeLegal(Ty)) { 1484 EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); 1485 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1486 Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, 1487 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, 1488 V.getOperand(0), V.getOperand(1)), 1489 DAG.getValueType(Ty)); 1490 continue; 1491 } 1492 // A few less complicated cases. 1493 switch (V.getOpcode()) { 1494 case ISD::Constant: 1495 Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); 1496 break; 1497 case ISD::UNDEF: 1498 Elems[i] = DAG.getUNDEF(NTy); 1499 break; 1500 case ISD::TRUNCATE: 1501 Elems[i] = V.getOperand(0); 1502 break; 1503 default: 1504 llvm_unreachable("Unexpected vector element"); 1505 } 1506 } 1507 } 1508 return DAG.getBuildVector(VecTy, dl, Elems); 1509 } 1510 1511 assert(VecTy.getVectorElementType() == MVT::i1); 1512 unsigned HwLen = Subtarget.getVectorLength(); 1513 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); 1514 1515 SDValue Op0 = Op.getOperand(0); 1516 1517 // If the operands are HVX types (i.e. not scalar predicates), then 1518 // defer the concatenation, and create QCAT instead. 1519 if (Subtarget.isHVXVectorType(ty(Op0), true)) { 1520 if (NumOp == 2) 1521 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); 1522 1523 ArrayRef<SDUse> U(Op.getNode()->ops()); 1524 SmallVector<SDValue,4> SV(U.begin(), U.end()); 1525 ArrayRef<SDValue> Ops(SV); 1526 1527 MVT HalfTy = typeSplit(VecTy).first; 1528 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, 1529 Ops.take_front(NumOp/2)); 1530 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, 1531 Ops.take_back(NumOp/2)); 1532 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); 1533 } 1534 1535 // Count how many bytes (in a vector register) each bit in VecTy 1536 // corresponds to. 1537 unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); 1538 1539 SmallVector<SDValue,8> Prefixes; 1540 for (SDValue V : Op.getNode()->op_values()) { 1541 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); 1542 Prefixes.push_back(P); 1543 } 1544 1545 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); 1546 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 1547 SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); 1548 SDValue Res = getZero(dl, ByteTy, DAG); 1549 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { 1550 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); 1551 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); 1552 } 1553 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); 1554 } 1555 1556 SDValue 1557 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) 1558 const { 1559 // Change the type of the extracted element to i32. 1560 SDValue VecV = Op.getOperand(0); 1561 MVT ElemTy = ty(VecV).getVectorElementType(); 1562 const SDLoc &dl(Op); 1563 SDValue IdxV = Op.getOperand(1); 1564 if (ElemTy == MVT::i1) 1565 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); 1566 1567 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); 1568 } 1569 1570 SDValue 1571 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) 1572 const { 1573 const SDLoc &dl(Op); 1574 MVT VecTy = ty(Op); 1575 SDValue VecV = Op.getOperand(0); 1576 SDValue ValV = Op.getOperand(1); 1577 SDValue IdxV = Op.getOperand(2); 1578 MVT ElemTy = ty(VecV).getVectorElementType(); 1579 if (ElemTy == MVT::i1) 1580 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); 1581 1582 if (ElemTy == MVT::f16) { 1583 SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, 1584 tyVector(VecTy, MVT::i16), 1585 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV), 1586 DAG.getBitcast(MVT::i16, ValV), IdxV); 1587 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); 1588 } 1589 1590 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); 1591 } 1592 1593 SDValue 1594 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) 1595 const { 1596 SDValue SrcV = Op.getOperand(0); 1597 MVT SrcTy = ty(SrcV); 1598 MVT DstTy = ty(Op); 1599 SDValue IdxV = Op.getOperand(1); 1600 unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); 1601 assert(Idx % DstTy.getVectorNumElements() == 0); 1602 (void)Idx; 1603 const SDLoc &dl(Op); 1604 1605 MVT ElemTy = SrcTy.getVectorElementType(); 1606 if (ElemTy == MVT::i1) 1607 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); 1608 1609 return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); 1610 } 1611 1612 SDValue 1613 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) 1614 const { 1615 // Idx does not need to be a constant. 1616 SDValue VecV = Op.getOperand(0); 1617 SDValue ValV = Op.getOperand(1); 1618 SDValue IdxV = Op.getOperand(2); 1619 1620 const SDLoc &dl(Op); 1621 MVT VecTy = ty(VecV); 1622 MVT ElemTy = VecTy.getVectorElementType(); 1623 if (ElemTy == MVT::i1) 1624 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); 1625 1626 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); 1627 } 1628 1629 SDValue 1630 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { 1631 // Lower any-extends of boolean vectors to sign-extends, since they 1632 // translate directly to Q2V. Zero-extending could also be done equally 1633 // fast, but Q2V is used/recognized in more places. 1634 // For all other vectors, use zero-extend. 1635 MVT ResTy = ty(Op); 1636 SDValue InpV = Op.getOperand(0); 1637 MVT ElemTy = ty(InpV).getVectorElementType(); 1638 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1639 return LowerHvxSignExt(Op, DAG); 1640 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); 1641 } 1642 1643 SDValue 1644 HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { 1645 MVT ResTy = ty(Op); 1646 SDValue InpV = Op.getOperand(0); 1647 MVT ElemTy = ty(InpV).getVectorElementType(); 1648 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1649 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); 1650 return Op; 1651 } 1652 1653 SDValue 1654 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { 1655 MVT ResTy = ty(Op); 1656 SDValue InpV = Op.getOperand(0); 1657 MVT ElemTy = ty(InpV).getVectorElementType(); 1658 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) 1659 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); 1660 return Op; 1661 } 1662 1663 SDValue 1664 HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { 1665 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): 1666 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) 1667 const SDLoc &dl(Op); 1668 MVT ResTy = ty(Op); 1669 SDValue InpV = Op.getOperand(0); 1670 assert(ResTy == ty(InpV)); 1671 1672 // Calculate the vectors of 1 and bitwidth(x). 1673 MVT ElemTy = ty(InpV).getVectorElementType(); 1674 unsigned ElemWidth = ElemTy.getSizeInBits(); 1675 1676 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1677 DAG.getConstant(1, dl, MVT::i32)); 1678 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1679 DAG.getConstant(ElemWidth, dl, MVT::i32)); 1680 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, 1681 DAG.getConstant(-1, dl, MVT::i32)); 1682 1683 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with 1684 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle 1685 // it separately in custom combine or selection). 1686 SDValue A = DAG.getNode(ISD::AND, dl, ResTy, 1687 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), 1688 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); 1689 return DAG.getNode(ISD::SUB, dl, ResTy, 1690 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); 1691 } 1692 1693 SDValue 1694 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { 1695 MVT ResTy = ty(Op); 1696 assert(ResTy.isVector()); 1697 const SDLoc &dl(Op); 1698 SmallVector<int,256> ShuffMask; 1699 1700 MVT ElemTy = ResTy.getVectorElementType(); 1701 unsigned VecLen = ResTy.getVectorNumElements(); 1702 SDValue Vs = Op.getOperand(0); 1703 SDValue Vt = Op.getOperand(1); 1704 bool IsSigned = Op.getOpcode() == ISD::MULHS; 1705 1706 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { 1707 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), 1708 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, 1709 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). 1710 // For i16, use V6_vmpyhv, which behaves in an analogous way to 1711 // V6_vmpybv: results Lo and Hi are products of even/odd elements 1712 // respectively. 1713 MVT ExtTy = typeExtElem(ResTy, 2); 1714 unsigned MpyOpc = ElemTy == MVT::i8 1715 ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) 1716 : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); 1717 SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); 1718 1719 // Discard low halves of the resulting values, collect the high halves. 1720 for (unsigned I = 0; I < VecLen; I += 2) { 1721 ShuffMask.push_back(I+1); // Pick even element. 1722 ShuffMask.push_back(I+VecLen+1); // Pick odd element. 1723 } 1724 VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); 1725 SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); 1726 return DAG.getBitcast(ResTy, BS); 1727 } 1728 1729 assert(ElemTy == MVT::i32); 1730 SDValue S16 = DAG.getConstant(16, dl, MVT::i32); 1731 1732 auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) { 1733 // mulhs(Vs,Vt) = 1734 // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 1735 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 1736 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 1737 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 1738 // + Lo(Vs) *us Vt] >> 32 1739 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to 1740 // anything, so it cannot produce any carry over to higher bits), 1741 // so everything in [] can be shifted by 16 without loss of precision. 1742 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 1743 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 1744 // Denote Hi(Vs) = Vs': 1745 // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 1746 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 1747 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); 1748 // Get Vs': 1749 SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); 1750 SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, 1751 {T0, S0, Vt}, DAG); 1752 // Shift by 16: 1753 SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); 1754 // Get Vs'*Hi(Vt): 1755 SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); 1756 // Add: 1757 SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); 1758 return T3; 1759 }; 1760 1761 auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) { 1762 MVT PairTy = typeJoin({ResTy, ResTy}); 1763 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG); 1764 SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, 1765 {T0, Vs, Vt}, DAG); 1766 return opSplit(T1, dl, DAG).second; 1767 }; 1768 1769 if (IsSigned) { 1770 if (Subtarget.useHVXV62Ops()) 1771 return MulHS_V62(Vs, Vt); 1772 return MulHS_V60(Vs, Vt); 1773 } 1774 1775 // Unsigned mulhw. (Would expansion using signed mulhw be better?) 1776 1777 auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { 1778 return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); 1779 }; 1780 auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { 1781 return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); 1782 }; 1783 1784 MVT PairTy = typeJoin({ResTy, ResTy}); 1785 SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, 1786 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); 1787 // Multiply-unsigned halfwords: 1788 // LoVec = Vs.uh[2i] * Vt.uh[2i], 1789 // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] 1790 SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); 1791 // The low halves in the LoVec of the pair can be discarded. They are 1792 // not added to anything (in the full-precision product), so they cannot 1793 // produce a carry into the higher bits. 1794 SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); 1795 // Swap low and high halves in Vt, and do the halfword multiplication 1796 // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. 1797 SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); 1798 SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); 1799 // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). 1800 // These products are words, but cannot be added directly because the 1801 // sums could overflow. Add these products, by halfwords, where each sum 1802 // of a pair of halfwords gives a word. 1803 SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, 1804 {LoVec(T2), HiVec(T2)}, DAG); 1805 // Add the high halfwords from the products of the low halfwords. 1806 SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); 1807 SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); 1808 SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); 1809 SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); 1810 return T7; 1811 } 1812 1813 SDValue 1814 HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { 1815 SDValue Val = Op.getOperand(0); 1816 MVT ResTy = ty(Op); 1817 MVT ValTy = ty(Val); 1818 const SDLoc &dl(Op); 1819 1820 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) { 1821 unsigned HwLen = Subtarget.getVectorLength(); 1822 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); 1823 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG); 1824 unsigned BitWidth = ResTy.getSizeInBits(); 1825 1826 if (BitWidth < 64) { 1827 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32), 1828 dl, MVT::i32, DAG); 1829 if (BitWidth == 32) 1830 return W0; 1831 assert(BitWidth < 32u); 1832 return DAG.getZExtOrTrunc(W0, dl, ResTy); 1833 } 1834 1835 // The result is >= 64 bits. The only options are 64 or 128. 1836 assert(BitWidth == 64 || BitWidth == 128); 1837 SmallVector<SDValue,4> Words; 1838 for (unsigned i = 0; i != BitWidth/32; ++i) { 1839 SDValue W = extractHvxElementReg( 1840 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG); 1841 Words.push_back(W); 1842 } 1843 SmallVector<SDValue,2> Combines; 1844 assert(Words.size() % 2 == 0); 1845 for (unsigned i = 0, e = Words.size(); i < e; i += 2) { 1846 SDValue C = DAG.getNode( 1847 HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]}); 1848 Combines.push_back(C); 1849 } 1850 1851 if (BitWidth == 64) 1852 return Combines[0]; 1853 1854 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); 1855 } 1856 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { 1857 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. 1858 unsigned BitWidth = ValTy.getSizeInBits(); 1859 unsigned HwLen = Subtarget.getVectorLength(); 1860 assert(BitWidth == HwLen); 1861 1862 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8); 1863 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val); 1864 // Splat each byte of Val 8 times. 1865 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8] 1866 // where b0, b1,..., b15 are least to most significant bytes of I. 1867 SmallVector<SDValue, 128> Bytes; 1868 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,... 1869 // These are bytes with the LSB rotated left with respect to their index. 1870 SmallVector<SDValue, 128> Tmp; 1871 for (unsigned I = 0; I != HwLen / 8; ++I) { 1872 SDValue Idx = DAG.getConstant(I, dl, MVT::i32); 1873 SDValue Byte = 1874 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx); 1875 for (unsigned J = 0; J != 8; ++J) { 1876 Bytes.push_back(Byte); 1877 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8)); 1878 } 1879 } 1880 1881 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen); 1882 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp); 1883 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG); 1884 1885 // Each Byte in the I2V will be set iff corresponding bit is set in Val. 1886 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec}); 1887 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V); 1888 } 1889 1890 return Op; 1891 } 1892 1893 SDValue 1894 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { 1895 // Sign- and zero-extends are legal. 1896 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); 1897 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op), 1898 Op.getOperand(0)); 1899 } 1900 1901 SDValue 1902 HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { 1903 MVT ResTy = ty(Op); 1904 if (ResTy.getVectorElementType() != MVT::i1) 1905 return Op; 1906 1907 const SDLoc &dl(Op); 1908 unsigned HwLen = Subtarget.getVectorLength(); 1909 unsigned VecLen = ResTy.getVectorNumElements(); 1910 assert(HwLen % VecLen == 0); 1911 unsigned ElemSize = HwLen / VecLen; 1912 1913 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen); 1914 SDValue S = 1915 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0), 1916 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)), 1917 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2))); 1918 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S); 1919 } 1920 1921 SDValue 1922 HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { 1923 if (SDValue S = getVectorShiftByInt(Op, DAG)) 1924 return S; 1925 return Op; 1926 } 1927 1928 SDValue 1929 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { 1930 const SDLoc &dl(Op); 1931 MVT ResTy = ty(Op); 1932 1933 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1934 bool Use64b = Subtarget.useHVX64BOps(); 1935 unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast 1936 : Intrinsic::hexagon_V6_pred_typecast_128B; 1937 if (IntNo == IntPredCast) { 1938 SDValue Vs = Op.getOperand(1); 1939 MVT OpTy = ty(Vs); 1940 if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) { 1941 if (ResTy == OpTy) 1942 return Vs; 1943 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs); 1944 } 1945 } 1946 1947 return Op; 1948 } 1949 1950 SDValue 1951 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { 1952 const SDLoc &dl(Op); 1953 unsigned HwLen = Subtarget.getVectorLength(); 1954 MachineFunction &MF = DAG.getMachineFunction(); 1955 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode()); 1956 SDValue Mask = MaskN->getMask(); 1957 SDValue Chain = MaskN->getChain(); 1958 SDValue Base = MaskN->getBasePtr(); 1959 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen); 1960 1961 unsigned Opc = Op->getOpcode(); 1962 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); 1963 1964 if (Opc == ISD::MLOAD) { 1965 MVT ValTy = ty(Op); 1966 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp); 1967 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru(); 1968 if (isUndef(Thru)) 1969 return Load; 1970 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru); 1971 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl); 1972 } 1973 1974 // MSTORE 1975 // HVX only has aligned masked stores. 1976 1977 // TODO: Fold negations of the mask into the store. 1978 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai; 1979 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue(); 1980 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base)); 1981 1982 if (MaskN->getAlign().value() % HwLen == 0) { 1983 SDValue Store = getInstr(StoreOpc, dl, MVT::Other, 1984 {Mask, Base, Offset0, Value, Chain}, DAG); 1985 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp}); 1986 return Store; 1987 } 1988 1989 // Unaligned case. 1990 auto StoreAlign = [&](SDValue V, SDValue A) { 1991 SDValue Z = getZero(dl, ty(V), DAG); 1992 // TODO: use funnel shifts? 1993 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the 1994 // upper half. 1995 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG); 1996 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG); 1997 return std::make_pair(LoV, HiV); 1998 }; 1999 2000 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); 2001 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 2002 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask); 2003 VectorPair Tmp = StoreAlign(MaskV, Base); 2004 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first), 2005 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)}; 2006 VectorPair ValueU = StoreAlign(Value, Base); 2007 2008 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32); 2009 SDValue StoreLo = 2010 getInstr(StoreOpc, dl, MVT::Other, 2011 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG); 2012 SDValue StoreHi = 2013 getInstr(StoreOpc, dl, MVT::Other, 2014 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG); 2015 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp}); 2016 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp}); 2017 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi}); 2018 } 2019 2020 SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, 2021 SelectionDAG &DAG) const { 2022 // This conversion only applies to QFloat. 2023 assert(Subtarget.useHVXQFloatOps()); 2024 2025 assert(Op->getOpcode() == ISD::FP_EXTEND); 2026 2027 MVT VecTy = ty(Op); 2028 MVT ArgTy = ty(Op.getOperand(0)); 2029 const SDLoc &dl(Op); 2030 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); 2031 2032 SDValue F16Vec = Op.getOperand(0); 2033 2034 APFloat FloatVal = APFloat(1.0f); 2035 bool Ignored; 2036 FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); 2037 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy); 2038 SDValue VmpyVec = 2039 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG); 2040 2041 MVT HalfTy = typeSplit(VecTy).first; 2042 VectorPair Pair = opSplit(VmpyVec, dl, DAG); 2043 SDValue LoVec = 2044 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG); 2045 SDValue HiVec = 2046 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG); 2047 2048 SDValue ShuffVec = 2049 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy, 2050 {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG); 2051 2052 return ShuffVec; 2053 } 2054 2055 SDValue 2056 HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG) 2057 const { 2058 // This conversion only applies to IEEE. 2059 assert(Subtarget.useHVXIEEEFPOps()); 2060 2061 unsigned Opc = Op.getOpcode(); 2062 // Catch invalid conversion ops (just in case). 2063 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT || 2064 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); 2065 MVT ResTy = ty(Op); 2066 2067 if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) { 2068 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType(); 2069 // There are only conversions of f16. 2070 if (FpTy != MVT::f16) 2071 return SDValue(); 2072 2073 MVT IntTy = ResTy.getVectorElementType(); 2074 // Other int types aren't legal in HVX, so we shouldn't see them here. 2075 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); 2076 // Conversions to i8 and i16 are legal. 2077 if (IntTy == MVT::i8 || IntTy == MVT::i16) 2078 return Op; 2079 } else { 2080 // Converting int -> fp. 2081 if (ResTy.getVectorElementType() != MVT::f16) 2082 return SDValue(); 2083 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); 2084 // Other int types aren't legal in HVX, so we shouldn't see them here. 2085 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); 2086 // i8, i16 -> f16 is legal. 2087 if (IntTy == MVT::i8 || IntTy == MVT::i16) 2088 return Op; 2089 } 2090 2091 return SDValue(); 2092 } 2093 2094 SDValue 2095 HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { 2096 assert(!Op.isMachineOpcode()); 2097 SmallVector<SDValue,2> OpsL, OpsH; 2098 const SDLoc &dl(Op); 2099 2100 auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { 2101 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; 2102 SDValue TV = DAG.getValueType(Ty); 2103 return std::make_pair(TV, TV); 2104 }; 2105 2106 for (SDValue A : Op.getNode()->ops()) { 2107 VectorPair P = Subtarget.isHVXVectorType(ty(A), true) 2108 ? opSplit(A, dl, DAG) 2109 : std::make_pair(A, A); 2110 // Special case for type operand. 2111 if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2112 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode())) 2113 P = SplitVTNode(N); 2114 } 2115 OpsL.push_back(P.first); 2116 OpsH.push_back(P.second); 2117 } 2118 2119 MVT ResTy = ty(Op); 2120 MVT HalfTy = typeSplit(ResTy).first; 2121 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); 2122 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); 2123 SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); 2124 return S; 2125 } 2126 2127 SDValue 2128 HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { 2129 auto *MemN = cast<MemSDNode>(Op.getNode()); 2130 2131 MVT MemTy = MemN->getMemoryVT().getSimpleVT(); 2132 if (!isHvxPairTy(MemTy)) 2133 return Op; 2134 2135 const SDLoc &dl(Op); 2136 unsigned HwLen = Subtarget.getVectorLength(); 2137 MVT SingleTy = typeSplit(MemTy).first; 2138 SDValue Chain = MemN->getChain(); 2139 SDValue Base0 = MemN->getBasePtr(); 2140 SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); 2141 unsigned MemOpc = MemN->getOpcode(); 2142 2143 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; 2144 if (MachineMemOperand *MMO = MemN->getMemOperand()) { 2145 MachineFunction &MF = DAG.getMachineFunction(); 2146 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) 2147 ? (uint64_t)MemoryLocation::UnknownSize 2148 : HwLen; 2149 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize); 2150 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize); 2151 } 2152 2153 if (MemOpc == ISD::LOAD) { 2154 assert(cast<LoadSDNode>(Op)->isUnindexed()); 2155 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); 2156 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); 2157 return DAG.getMergeValues( 2158 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), 2159 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2160 Load0.getValue(1), Load1.getValue(1)) }, dl); 2161 } 2162 if (MemOpc == ISD::STORE) { 2163 assert(cast<StoreSDNode>(Op)->isUnindexed()); 2164 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG); 2165 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); 2166 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); 2167 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); 2168 } 2169 2170 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE); 2171 2172 auto MaskN = cast<MaskedLoadStoreSDNode>(Op); 2173 assert(MaskN->isUnindexed()); 2174 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG); 2175 SDValue Offset = DAG.getUNDEF(MVT::i32); 2176 2177 if (MemOpc == ISD::MLOAD) { 2178 VectorPair Thru = 2179 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG); 2180 SDValue MLoad0 = 2181 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first, 2182 Thru.first, SingleTy, MOp0, ISD::UNINDEXED, 2183 ISD::NON_EXTLOAD, false); 2184 SDValue MLoad1 = 2185 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second, 2186 Thru.second, SingleTy, MOp1, ISD::UNINDEXED, 2187 ISD::NON_EXTLOAD, false); 2188 return DAG.getMergeValues( 2189 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1), 2190 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2191 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl); 2192 } 2193 if (MemOpc == ISD::MSTORE) { 2194 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG); 2195 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset, 2196 Masks.first, SingleTy, MOp0, 2197 ISD::UNINDEXED, false, false); 2198 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset, 2199 Masks.second, SingleTy, MOp1, 2200 ISD::UNINDEXED, false, false); 2201 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1); 2202 } 2203 2204 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG); 2205 llvm_unreachable(Name.c_str()); 2206 } 2207 2208 SDValue 2209 HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { 2210 const SDLoc &dl(Op); 2211 auto *LoadN = cast<LoadSDNode>(Op.getNode()); 2212 assert(LoadN->isUnindexed() && "Not widening indexed loads yet"); 2213 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && 2214 "Not widening loads of i1 yet"); 2215 2216 SDValue Chain = LoadN->getChain(); 2217 SDValue Base = LoadN->getBasePtr(); 2218 SDValue Offset = DAG.getUNDEF(MVT::i32); 2219 2220 MVT ResTy = ty(Op); 2221 unsigned HwLen = Subtarget.getVectorLength(); 2222 unsigned ResLen = ResTy.getStoreSize(); 2223 assert(ResLen < HwLen && "vsetq(v1) prerequisite"); 2224 2225 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 2226 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 2227 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG); 2228 2229 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen); 2230 MachineFunction &MF = DAG.getMachineFunction(); 2231 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen); 2232 2233 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask, 2234 DAG.getUNDEF(LoadTy), LoadTy, MemOp, 2235 ISD::UNINDEXED, ISD::NON_EXTLOAD, false); 2236 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG); 2237 return DAG.getMergeValues({Value, Chain}, dl); 2238 } 2239 2240 SDValue 2241 HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { 2242 const SDLoc &dl(Op); 2243 auto *StoreN = cast<StoreSDNode>(Op.getNode()); 2244 assert(StoreN->isUnindexed() && "Not widening indexed stores yet"); 2245 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 && 2246 "Not widening stores of i1 yet"); 2247 2248 SDValue Chain = StoreN->getChain(); 2249 SDValue Base = StoreN->getBasePtr(); 2250 SDValue Offset = DAG.getUNDEF(MVT::i32); 2251 2252 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG); 2253 MVT ValueTy = ty(Value); 2254 unsigned ValueLen = ValueTy.getVectorNumElements(); 2255 unsigned HwLen = Subtarget.getVectorLength(); 2256 assert(isPowerOf2_32(ValueLen)); 2257 2258 for (unsigned Len = ValueLen; Len < HwLen; ) { 2259 Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG); 2260 Len = ty(Value).getVectorNumElements(); // This is Len *= 2 2261 } 2262 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia 2263 2264 assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); 2265 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); 2266 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, 2267 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); 2268 MachineFunction &MF = DAG.getMachineFunction(); 2269 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen); 2270 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value), 2271 MemOp, ISD::UNINDEXED, false, false); 2272 } 2273 2274 SDValue 2275 HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { 2276 const SDLoc &dl(Op); 2277 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); 2278 MVT ElemTy = ty(Op0).getVectorElementType(); 2279 unsigned HwLen = Subtarget.getVectorLength(); 2280 2281 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits(); 2282 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen); 2283 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen); 2284 if (!Subtarget.isHVXVectorType(WideOpTy, true)) 2285 return SDValue(); 2286 2287 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG); 2288 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG); 2289 EVT ResTy = 2290 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy); 2291 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy, 2292 {WideOp0, WideOp1, Op.getOperand(2)}); 2293 2294 EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op)); 2295 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy, 2296 {SetCC, getZero(dl, MVT::i32, DAG)}); 2297 } 2298 2299 SDValue 2300 HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const { 2301 const SDLoc &dl(Op); 2302 unsigned HwWidth = 8*Subtarget.getVectorLength(); 2303 2304 SDValue Op0 = Op.getOperand(0); 2305 MVT ResTy = ty(Op); 2306 MVT OpTy = ty(Op0); 2307 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) 2308 return SDValue(); 2309 2310 // .-res, op-> ScalarVec Illegal HVX 2311 // Scalar ok - - 2312 // Illegal widen(insert) widen - 2313 // HVX - widen ok 2314 2315 auto getFactor = [HwWidth](MVT Ty) { 2316 unsigned Width = Ty.getSizeInBits(); 2317 return HwWidth > Width ? HwWidth / Width : 1; 2318 }; 2319 2320 auto getWideTy = [getFactor](MVT Ty) { 2321 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); 2322 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); 2323 }; 2324 2325 unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK 2326 : HexagonISD::VUNPACKU; 2327 SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); 2328 SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp); 2329 return WideRes; 2330 } 2331 2332 SDValue 2333 HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { 2334 const SDLoc &dl(Op); 2335 unsigned HwWidth = 8*Subtarget.getVectorLength(); 2336 2337 SDValue Op0 = Op.getOperand(0); 2338 MVT ResTy = ty(Op); 2339 MVT OpTy = ty(Op0); 2340 if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) 2341 return SDValue(); 2342 2343 // .-res, op-> ScalarVec Illegal HVX 2344 // Scalar ok extract(widen) - 2345 // Illegal - widen widen 2346 // HVX - - ok 2347 2348 auto getFactor = [HwWidth](MVT Ty) { 2349 unsigned Width = Ty.getSizeInBits(); 2350 assert(HwWidth % Width == 0); 2351 return HwWidth / Width; 2352 }; 2353 2354 auto getWideTy = [getFactor](MVT Ty) { 2355 unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); 2356 return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); 2357 }; 2358 2359 if (Subtarget.isHVXVectorType(OpTy)) 2360 return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); 2361 2362 assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); 2363 2364 SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); 2365 SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), 2366 WideOp); 2367 // If the original result wasn't legal and was supposed to be widened, 2368 // we're done. 2369 if (shouldWidenToHvx(ResTy, DAG)) 2370 return WideRes; 2371 2372 // The original result type wasn't meant to be widened to HVX, so 2373 // leave it as it is. Standard legalization should be able to deal 2374 // with it (since now it's a result of a target-idendependent ISD 2375 // node). 2376 assert(ResTy.isVector()); 2377 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, 2378 {WideRes, getZero(dl, MVT::i32, DAG)}); 2379 } 2380 2381 SDValue 2382 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { 2383 unsigned Opc = Op.getOpcode(); 2384 bool IsPairOp = isHvxPairTy(ty(Op)) || 2385 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { 2386 return isHvxPairTy(ty(V)); 2387 }); 2388 2389 if (IsPairOp) { 2390 switch (Opc) { 2391 default: 2392 break; 2393 case ISD::LOAD: 2394 case ISD::STORE: 2395 case ISD::MLOAD: 2396 case ISD::MSTORE: 2397 return SplitHvxMemOp(Op, DAG); 2398 case ISD::SINT_TO_FP: 2399 case ISD::UINT_TO_FP: 2400 case ISD::FP_TO_SINT: 2401 case ISD::FP_TO_UINT: 2402 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits()) 2403 return SplitHvxPairOp(Op, DAG); 2404 break; 2405 case ISD::CTPOP: 2406 case ISD::CTLZ: 2407 case ISD::CTTZ: 2408 case ISD::MUL: 2409 case ISD::FADD: 2410 case ISD::FSUB: 2411 case ISD::FMUL: 2412 case ISD::FMINNUM: 2413 case ISD::FMAXNUM: 2414 case ISD::MULHS: 2415 case ISD::MULHU: 2416 case ISD::AND: 2417 case ISD::OR: 2418 case ISD::XOR: 2419 case ISD::SRA: 2420 case ISD::SHL: 2421 case ISD::SRL: 2422 case ISD::SMIN: 2423 case ISD::SMAX: 2424 case ISD::UMIN: 2425 case ISD::UMAX: 2426 case ISD::SETCC: 2427 case ISD::VSELECT: 2428 case ISD::SIGN_EXTEND: 2429 case ISD::ZERO_EXTEND: 2430 case ISD::SIGN_EXTEND_INREG: 2431 case ISD::SPLAT_VECTOR: 2432 return SplitHvxPairOp(Op, DAG); 2433 } 2434 } 2435 2436 switch (Opc) { 2437 default: 2438 break; 2439 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); 2440 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG); 2441 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); 2442 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); 2443 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); 2444 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); 2445 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); 2446 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); 2447 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); 2448 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); 2449 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); 2450 case ISD::CTTZ: return LowerHvxCttz(Op, DAG); 2451 case ISD::SELECT: return LowerHvxSelect(Op, DAG); 2452 case ISD::SRA: 2453 case ISD::SHL: 2454 case ISD::SRL: return LowerHvxShift(Op, DAG); 2455 case ISD::MULHS: 2456 case ISD::MULHU: return LowerHvxMulh(Op, DAG); 2457 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); 2458 case ISD::SETCC: 2459 case ISD::INTRINSIC_VOID: return Op; 2460 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); 2461 case ISD::MLOAD: 2462 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); 2463 // Unaligned loads will be handled by the default lowering. 2464 case ISD::LOAD: return SDValue(); 2465 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG); 2466 case ISD::FP_TO_SINT: 2467 case ISD::FP_TO_UINT: 2468 case ISD::SINT_TO_FP: 2469 case ISD::UINT_TO_FP: return LowerHvxConvertFpInt(Op, DAG); 2470 } 2471 #ifndef NDEBUG 2472 Op.dumpr(&DAG); 2473 #endif 2474 llvm_unreachable("Unhandled HVX operation"); 2475 } 2476 2477 void 2478 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, 2479 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2480 unsigned Opc = N->getOpcode(); 2481 SDValue Op(N, 0); 2482 2483 switch (Opc) { 2484 case ISD::ANY_EXTEND: 2485 case ISD::SIGN_EXTEND: 2486 case ISD::ZERO_EXTEND: 2487 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2488 if (SDValue T = WidenHvxExtend(Op, DAG)) 2489 Results.push_back(T); 2490 } 2491 break; 2492 case ISD::SETCC: 2493 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2494 if (SDValue T = WidenHvxSetCC(Op, DAG)) 2495 Results.push_back(T); 2496 } 2497 break; 2498 case ISD::TRUNCATE: 2499 if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { 2500 if (SDValue T = WidenHvxTruncate(Op, DAG)) 2501 Results.push_back(T); 2502 } 2503 break; 2504 case ISD::STORE: { 2505 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) { 2506 SDValue Store = WidenHvxStore(Op, DAG); 2507 Results.push_back(Store); 2508 } 2509 break; 2510 } 2511 case ISD::MLOAD: 2512 if (isHvxPairTy(ty(Op))) { 2513 SDValue S = SplitHvxMemOp(Op, DAG); 2514 assert(S->getOpcode() == ISD::MERGE_VALUES); 2515 Results.push_back(S.getOperand(0)); 2516 Results.push_back(S.getOperand(1)); 2517 } 2518 break; 2519 case ISD::MSTORE: 2520 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value 2521 SDValue S = SplitHvxMemOp(Op, DAG); 2522 Results.push_back(S); 2523 } 2524 break; 2525 default: 2526 break; 2527 } 2528 } 2529 2530 void 2531 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, 2532 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2533 unsigned Opc = N->getOpcode(); 2534 SDValue Op(N, 0); 2535 switch (Opc) { 2536 case ISD::ANY_EXTEND: 2537 case ISD::SIGN_EXTEND: 2538 case ISD::ZERO_EXTEND: 2539 if (shouldWidenToHvx(ty(Op), DAG)) { 2540 if (SDValue T = WidenHvxExtend(Op, DAG)) 2541 Results.push_back(T); 2542 } 2543 break; 2544 case ISD::SETCC: 2545 if (shouldWidenToHvx(ty(Op), DAG)) { 2546 if (SDValue T = WidenHvxSetCC(Op, DAG)) 2547 Results.push_back(T); 2548 } 2549 break; 2550 case ISD::TRUNCATE: 2551 if (shouldWidenToHvx(ty(Op), DAG)) { 2552 if (SDValue T = WidenHvxTruncate(Op, DAG)) 2553 Results.push_back(T); 2554 } 2555 break; 2556 case ISD::LOAD: { 2557 if (shouldWidenToHvx(ty(Op), DAG)) { 2558 SDValue Load = WidenHvxLoad(Op, DAG); 2559 assert(Load->getOpcode() == ISD::MERGE_VALUES); 2560 Results.push_back(Load.getOperand(0)); 2561 Results.push_back(Load.getOperand(1)); 2562 } 2563 break; 2564 } 2565 case ISD::BITCAST: 2566 if (isHvxBoolTy(ty(N->getOperand(0)))) { 2567 SDValue Op(N, 0); 2568 SDValue C = LowerHvxBitcast(Op, DAG); 2569 Results.push_back(C); 2570 } 2571 break; 2572 default: 2573 break; 2574 } 2575 } 2576 2577 SDValue 2578 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) 2579 const { 2580 const SDLoc &dl(N); 2581 SelectionDAG &DAG = DCI.DAG; 2582 SDValue Op(N, 0); 2583 unsigned Opc = Op.getOpcode(); 2584 if (DCI.isBeforeLegalizeOps()) 2585 return SDValue(); 2586 2587 SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end()); 2588 2589 switch (Opc) { 2590 case ISD::VSELECT: { 2591 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) 2592 SDValue Cond = Ops[0]; 2593 if (Cond->getOpcode() == ISD::XOR) { 2594 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); 2595 if (C1->getOpcode() == HexagonISD::QTRUE) 2596 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]); 2597 } 2598 break; 2599 } 2600 case HexagonISD::V2Q: 2601 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { 2602 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0))) 2603 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op)) 2604 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op)); 2605 } 2606 break; 2607 case HexagonISD::Q2V: 2608 if (Ops[0].getOpcode() == HexagonISD::QTRUE) 2609 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op), 2610 DAG.getConstant(-1, dl, MVT::i32)); 2611 if (Ops[0].getOpcode() == HexagonISD::QFALSE) 2612 return getZero(dl, ty(Op), DAG); 2613 break; 2614 case HexagonISD::VINSERTW0: 2615 if (isUndef(Ops[1])) 2616 return Ops[0];; 2617 break; 2618 case HexagonISD::VROR: { 2619 if (Ops[0].getOpcode() == HexagonISD::VROR) { 2620 SDValue Vec = Ops[0].getOperand(0); 2621 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1); 2622 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1}); 2623 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot}); 2624 } 2625 break; 2626 } 2627 } 2628 2629 return SDValue(); 2630 } 2631 2632 bool 2633 HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { 2634 auto Action = getPreferredHvxVectorAction(Ty); 2635 if (Action == TargetLoweringBase::TypeWidenVector) { 2636 EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); 2637 assert(WideTy.isSimple()); 2638 return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); 2639 } 2640 return false; 2641 } 2642 2643 bool 2644 HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { 2645 if (!Subtarget.useHVXOps()) 2646 return false; 2647 // If the type of any result, or any operand type are HVX vector types, 2648 // this is an HVX operation. 2649 auto IsHvxTy = [this](EVT Ty) { 2650 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); 2651 }; 2652 auto IsHvxOp = [this](SDValue Op) { 2653 return Op.getValueType().isSimple() && 2654 Subtarget.isHVXVectorType(ty(Op), true); 2655 }; 2656 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp)) 2657 return true; 2658 2659 // Check if this could be an HVX operation after type widening. 2660 auto IsWidenedToHvx = [this, &DAG](SDValue Op) { 2661 if (!Op.getValueType().isSimple()) 2662 return false; 2663 MVT ValTy = ty(Op); 2664 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); 2665 }; 2666 2667 for (int i = 0, e = N->getNumValues(); i != e; ++i) { 2668 if (IsWidenedToHvx(SDValue(N, i))) 2669 return true; 2670 } 2671 return llvm::any_of(N->ops(), IsWidenedToHvx); 2672 } 2673