1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the WebAssemblyTargetLowering class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyISelLowering.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "Utils/WebAssemblyTypeUtilities.h" 17 #include "WebAssemblyMachineFunctionInfo.h" 18 #include "WebAssemblySubtarget.h" 19 #include "WebAssemblyTargetMachine.h" 20 #include "WebAssemblyUtilities.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineJumpTableInfo.h" 25 #include "llvm/CodeGen/MachineModuleInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/SDPatternMatch.h" 28 #include "llvm/CodeGen/SelectionDAG.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/IntrinsicInst.h" 34 #include "llvm/IR/Intrinsics.h" 35 #include "llvm/IR/IntrinsicsWebAssembly.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/KnownBits.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Target/TargetOptions.h" 40 using namespace llvm; 41 42 #define DEBUG_TYPE "wasm-lower" 43 44 WebAssemblyTargetLowering::WebAssemblyTargetLowering( 45 const TargetMachine &TM, const WebAssemblySubtarget &STI) 46 : TargetLowering(TM), Subtarget(&STI) { 47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; 48 49 // Booleans always contain 0 or 1. 50 setBooleanContents(ZeroOrOneBooleanContent); 51 // Except in SIMD vectors 52 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 53 // We don't know the microarchitecture here, so just reduce register pressure. 54 setSchedulingPreference(Sched::RegPressure); 55 // Tell ISel that we have a stack pointer. 56 setStackPointerRegisterToSaveRestore( 57 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); 58 // Set up the register classes. 59 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); 60 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); 61 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); 62 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); 63 if (Subtarget->hasSIMD128()) { 64 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); 65 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); 66 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); 67 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); 68 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); 69 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); 70 } 71 if (Subtarget->hasFP16()) { 72 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass); 73 } 74 if (Subtarget->hasReferenceTypes()) { 75 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass); 76 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass); 77 if (Subtarget->hasExceptionHandling()) { 78 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass); 79 } 80 } 81 // Compute derived properties from the register classes. 82 computeRegisterProperties(Subtarget->getRegisterInfo()); 83 84 // Transform loads and stores to pointers in address space 1 to loads and 85 // stores to WebAssembly global variables, outside linear memory. 86 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) { 87 setOperationAction(ISD::LOAD, T, Custom); 88 setOperationAction(ISD::STORE, T, Custom); 89 } 90 if (Subtarget->hasSIMD128()) { 91 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 92 MVT::v2f64}) { 93 setOperationAction(ISD::LOAD, T, Custom); 94 setOperationAction(ISD::STORE, T, Custom); 95 } 96 } 97 if (Subtarget->hasFP16()) { 98 setOperationAction(ISD::LOAD, MVT::v8f16, Custom); 99 setOperationAction(ISD::STORE, MVT::v8f16, Custom); 100 } 101 if (Subtarget->hasReferenceTypes()) { 102 // We need custom load and store lowering for both externref, funcref and 103 // Other. The MVT::Other here represents tables of reference types. 104 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) { 105 setOperationAction(ISD::LOAD, T, Custom); 106 setOperationAction(ISD::STORE, T, Custom); 107 } 108 } 109 110 setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); 111 setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); 112 setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); 113 setOperationAction(ISD::JumpTable, MVTPtr, Custom); 114 setOperationAction(ISD::BlockAddress, MVTPtr, Custom); 115 setOperationAction(ISD::BRIND, MVT::Other, Custom); 116 setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom); 117 118 // Take the default expansion for va_arg, va_copy, and va_end. There is no 119 // default action for va_start, so we do that custom. 120 setOperationAction(ISD::VASTART, MVT::Other, Custom); 121 setOperationAction(ISD::VAARG, MVT::Other, Expand); 122 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 123 setOperationAction(ISD::VAEND, MVT::Other, Expand); 124 125 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) { 126 if (!Subtarget->hasFP16() && T == MVT::v8f16) { 127 continue; 128 } 129 // Don't expand the floating-point types to constant pools. 130 setOperationAction(ISD::ConstantFP, T, Legal); 131 // Expand floating-point comparisons. 132 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, 133 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) 134 setCondCodeAction(CC, T, Expand); 135 // Expand floating-point library function operators. 136 for (auto Op : 137 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) 138 setOperationAction(Op, T, Expand); 139 // Note supported floating-point library function operators that otherwise 140 // default to expand. 141 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, 142 ISD::FRINT, ISD::FROUNDEVEN}) 143 setOperationAction(Op, T, Legal); 144 // Support minimum and maximum, which otherwise default to expand. 145 setOperationAction(ISD::FMINIMUM, T, Legal); 146 setOperationAction(ISD::FMAXIMUM, T, Legal); 147 // When experimental v8f16 support is enabled these instructions don't need 148 // to be expanded. 149 if (T != MVT::v8f16) { 150 setOperationAction(ISD::FP16_TO_FP, T, Expand); 151 setOperationAction(ISD::FP_TO_FP16, T, Expand); 152 } 153 setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); 154 setTruncStoreAction(T, MVT::f16, Expand); 155 } 156 157 // Expand unavailable integer operations. 158 for (auto Op : 159 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, 160 ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, 161 ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { 162 for (auto T : {MVT::i32, MVT::i64}) 163 setOperationAction(Op, T, Expand); 164 if (Subtarget->hasSIMD128()) 165 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 166 setOperationAction(Op, T, Expand); 167 } 168 169 if (Subtarget->hasWideArithmetic()) { 170 setOperationAction(ISD::ADD, MVT::i128, Custom); 171 setOperationAction(ISD::SUB, MVT::i128, Custom); 172 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 173 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 174 setOperationAction(ISD::UADDO, MVT::i64, Custom); 175 } 176 177 if (Subtarget->hasNontrappingFPToInt()) 178 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) 179 for (auto T : {MVT::i32, MVT::i64}) 180 setOperationAction(Op, T, Custom); 181 182 // SIMD-specific configuration 183 if (Subtarget->hasSIMD128()) { 184 185 // Combine partial.reduce.add before legalization gets confused. 186 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 187 188 // Combine wide-vector muls, with extend inputs, to extmul_half. 189 setTargetDAGCombine(ISD::MUL); 190 191 // Combine vector mask reductions into alltrue/anytrue 192 setTargetDAGCombine(ISD::SETCC); 193 194 // Convert vector to integer bitcasts to bitmask 195 setTargetDAGCombine(ISD::BITCAST); 196 197 // Hoist bitcasts out of shuffles 198 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 199 200 // Combine extends of extract_subvectors into widening ops 201 setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}); 202 203 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into 204 // conversions ops 205 setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND, 206 ISD::EXTRACT_SUBVECTOR}); 207 208 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa 209 // into conversion ops 210 setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, 211 ISD::FP_ROUND, ISD::CONCAT_VECTORS}); 212 213 setTargetDAGCombine(ISD::TRUNCATE); 214 215 // Support saturating add/sub for i8x16 and i16x8 216 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) 217 for (auto T : {MVT::v16i8, MVT::v8i16}) 218 setOperationAction(Op, T, Legal); 219 220 // Support integer abs 221 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 222 setOperationAction(ISD::ABS, T, Legal); 223 224 // Custom lower BUILD_VECTORs to minimize number of replace_lanes 225 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 226 MVT::v2f64}) 227 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 228 229 if (Subtarget->hasFP16()) 230 setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); 231 232 // We have custom shuffle lowering to expose the shuffle mask 233 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 234 MVT::v2f64}) 235 setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); 236 237 if (Subtarget->hasFP16()) 238 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom); 239 240 // Support splatting 241 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 242 MVT::v2f64}) 243 setOperationAction(ISD::SPLAT_VECTOR, T, Legal); 244 245 // Custom lowering since wasm shifts must have a scalar shift amount 246 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) 247 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 248 setOperationAction(Op, T, Custom); 249 250 // Custom lower lane accesses to expand out variable indices 251 for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) 252 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 253 MVT::v2f64}) 254 setOperationAction(Op, T, Custom); 255 256 // There is no i8x16.mul instruction 257 setOperationAction(ISD::MUL, MVT::v16i8, Expand); 258 259 // There is no vector conditional select instruction 260 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 261 MVT::v2f64}) 262 setOperationAction(ISD::SELECT_CC, T, Expand); 263 264 // Expand integer operations supported for scalars but not SIMD 265 for (auto Op : 266 {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) 267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 268 setOperationAction(Op, T, Expand); 269 270 // But we do have integer min and max operations 271 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) 272 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 273 setOperationAction(Op, T, Legal); 274 275 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz. 276 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal); 277 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand); 278 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand); 279 280 // Custom lower bit counting operations for other types to scalarize them. 281 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}) 282 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) 283 setOperationAction(Op, T, Custom); 284 285 // Expand float operations supported for scalars but not SIMD 286 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, 287 ISD::FEXP, ISD::FEXP2}) 288 for (auto T : {MVT::v4f32, MVT::v2f64}) 289 setOperationAction(Op, T, Expand); 290 291 // Unsigned comparison operations are unavailable for i64x2 vectors. 292 for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE}) 293 setCondCodeAction(CC, MVT::v2i64, Custom); 294 295 // 64x2 conversions are not in the spec 296 for (auto Op : 297 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) 298 for (auto T : {MVT::v2i64, MVT::v2f64}) 299 setOperationAction(Op, T, Expand); 300 301 // But saturating fp_to_int converstions are 302 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) { 303 setOperationAction(Op, MVT::v4i32, Custom); 304 if (Subtarget->hasFP16()) { 305 setOperationAction(Op, MVT::v8i16, Custom); 306 } 307 } 308 309 // Support vector extending 310 for (auto T : MVT::integer_fixedlen_vector_valuetypes()) { 311 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom); 312 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom); 313 } 314 } 315 316 // As a special case, these operators use the type to mean the type to 317 // sign-extend from. 318 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 319 if (!Subtarget->hasSignExt()) { 320 // Sign extends are legal only when extending a vector extract 321 auto Action = Subtarget->hasSIMD128() ? Custom : Expand; 322 for (auto T : {MVT::i8, MVT::i16, MVT::i32}) 323 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); 324 } 325 for (auto T : MVT::integer_fixedlen_vector_valuetypes()) 326 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); 327 328 // Dynamic stack allocation: use the default expansion. 329 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 330 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 331 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); 332 333 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 334 setOperationAction(ISD::FrameIndex, MVT::i64, Custom); 335 setOperationAction(ISD::CopyToReg, MVT::Other, Custom); 336 337 // Expand these forms; we pattern-match the forms that we can handle in isel. 338 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) 339 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) 340 setOperationAction(Op, T, Expand); 341 342 // We have custom switch handling. 343 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 344 345 // WebAssembly doesn't have: 346 // - Floating-point extending loads. 347 // - Floating-point truncating stores. 348 // - i1 extending loads. 349 // - truncating SIMD stores and most extending loads 350 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 351 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 352 for (auto T : MVT::integer_valuetypes()) 353 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 354 setLoadExtAction(Ext, T, MVT::i1, Promote); 355 if (Subtarget->hasSIMD128()) { 356 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, 357 MVT::v2f64}) { 358 for (auto MemT : MVT::fixedlen_vector_valuetypes()) { 359 if (MVT(T) != MemT) { 360 setTruncStoreAction(T, MemT, Expand); 361 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 362 setLoadExtAction(Ext, T, MemT, Expand); 363 } 364 } 365 } 366 // But some vector extending loads are legal 367 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { 368 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); 369 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); 370 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); 371 } 372 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal); 373 } 374 375 // Don't do anything clever with build_pairs 376 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 377 378 // Trap lowers to wasm unreachable 379 setOperationAction(ISD::TRAP, MVT::Other, Legal); 380 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 381 382 // Exception handling intrinsics 383 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 384 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 385 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 386 387 setMaxAtomicSizeInBitsSupported(64); 388 389 // Always convert switches to br_tables unless there is only one case, which 390 // is equivalent to a simple branch. This reduces code size for wasm, and we 391 // defer possible jump table optimizations to the VM. 392 setMinimumJumpTableEntries(2); 393 } 394 395 MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL, 396 uint32_t AS) const { 397 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF) 398 return MVT::externref; 399 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF) 400 return MVT::funcref; 401 return TargetLowering::getPointerTy(DL, AS); 402 } 403 404 MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL, 405 uint32_t AS) const { 406 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF) 407 return MVT::externref; 408 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF) 409 return MVT::funcref; 410 return TargetLowering::getPointerMemTy(DL, AS); 411 } 412 413 bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic( 414 const IntrinsicInst *I) const { 415 if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add) 416 return true; 417 418 EVT VT = EVT::getEVT(I->getType()); 419 auto Op1 = I->getOperand(1); 420 421 if (auto *InputInst = dyn_cast<Instruction>(Op1)) { 422 if (InstructionOpcodeToISD(InputInst->getOpcode()) != ISD::MUL) 423 return true; 424 425 if (isa<Instruction>(InputInst->getOperand(0)) && 426 isa<Instruction>(InputInst->getOperand(1))) { 427 // dot only supports signed inputs but also support lowering unsigned. 428 if (cast<Instruction>(InputInst->getOperand(0))->getOpcode() != 429 cast<Instruction>(InputInst->getOperand(1))->getOpcode()) 430 return true; 431 432 EVT Op1VT = EVT::getEVT(Op1->getType()); 433 if (Op1VT.getVectorElementType() == VT.getVectorElementType() && 434 ((VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()) || 435 (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount()))) 436 return false; 437 } 438 } 439 return true; 440 } 441 442 TargetLowering::AtomicExpansionKind 443 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 444 // We have wasm instructions for these 445 switch (AI->getOperation()) { 446 case AtomicRMWInst::Add: 447 case AtomicRMWInst::Sub: 448 case AtomicRMWInst::And: 449 case AtomicRMWInst::Or: 450 case AtomicRMWInst::Xor: 451 case AtomicRMWInst::Xchg: 452 return AtomicExpansionKind::None; 453 default: 454 break; 455 } 456 return AtomicExpansionKind::CmpXChg; 457 } 458 459 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 460 // Implementation copied from X86TargetLowering. 461 unsigned Opc = VecOp.getOpcode(); 462 463 // Assume target opcodes can't be scalarized. 464 // TODO - do we have any exceptions? 465 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) 466 return false; 467 468 // If the vector op is not supported, try to convert to scalar. 469 EVT VecVT = VecOp.getValueType(); 470 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 471 return true; 472 473 // If the vector op is supported, but the scalar op is not, the transform may 474 // not be worthwhile. 475 EVT ScalarVT = VecVT.getScalarType(); 476 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); 477 } 478 479 FastISel *WebAssemblyTargetLowering::createFastISel( 480 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { 481 return WebAssembly::createFastISel(FuncInfo, LibInfo); 482 } 483 484 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, 485 EVT VT) const { 486 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); 487 if (BitWidth > 1 && BitWidth < 8) 488 BitWidth = 8; 489 490 if (BitWidth > 64) { 491 // The shift will be lowered to a libcall, and compiler-rt libcalls expect 492 // the count to be an i32. 493 BitWidth = 32; 494 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && 495 "32-bit shift counts ought to be enough for anyone"); 496 } 497 498 MVT Result = MVT::getIntegerVT(BitWidth); 499 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && 500 "Unable to represent scalar shift amount type"); 501 return Result; 502 } 503 504 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an 505 // undefined result on invalid/overflow, to the WebAssembly opcode, which 506 // traps on invalid/overflow. 507 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, 508 MachineBasicBlock *BB, 509 const TargetInstrInfo &TII, 510 bool IsUnsigned, bool Int64, 511 bool Float64, unsigned LoweredOpcode) { 512 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 513 514 Register OutReg = MI.getOperand(0).getReg(); 515 Register InReg = MI.getOperand(1).getReg(); 516 517 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; 518 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; 519 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; 520 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; 521 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; 522 unsigned Eqz = WebAssembly::EQZ_I32; 523 unsigned And = WebAssembly::AND_I32; 524 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; 525 int64_t Substitute = IsUnsigned ? 0 : Limit; 526 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; 527 auto &Context = BB->getParent()->getFunction().getContext(); 528 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); 529 530 const BasicBlock *LLVMBB = BB->getBasicBlock(); 531 MachineFunction *F = BB->getParent(); 532 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 533 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); 534 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 535 536 MachineFunction::iterator It = ++BB->getIterator(); 537 F->insert(It, FalseMBB); 538 F->insert(It, TrueMBB); 539 F->insert(It, DoneMBB); 540 541 // Transfer the remainder of BB and its successor edges to DoneMBB. 542 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 543 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 544 545 BB->addSuccessor(TrueMBB); 546 BB->addSuccessor(FalseMBB); 547 TrueMBB->addSuccessor(DoneMBB); 548 FalseMBB->addSuccessor(DoneMBB); 549 550 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; 551 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 552 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 553 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 554 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 555 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 556 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 557 558 MI.eraseFromParent(); 559 // For signed numbers, we can do a single comparison to determine whether 560 // fabs(x) is within range. 561 if (IsUnsigned) { 562 Tmp0 = InReg; 563 } else { 564 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); 565 } 566 BuildMI(BB, DL, TII.get(FConst), Tmp1) 567 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); 568 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); 569 570 // For unsigned numbers, we have to do a separate comparison with zero. 571 if (IsUnsigned) { 572 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 573 Register SecondCmpReg = 574 MRI.createVirtualRegister(&WebAssembly::I32RegClass); 575 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 576 BuildMI(BB, DL, TII.get(FConst), Tmp1) 577 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); 578 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); 579 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); 580 CmpReg = AndReg; 581 } 582 583 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); 584 585 // Create the CFG diamond to select between doing the conversion or using 586 // the substitute value. 587 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); 588 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); 589 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 590 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); 591 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) 592 .addReg(FalseReg) 593 .addMBB(FalseMBB) 594 .addReg(TrueReg) 595 .addMBB(TrueMBB); 596 597 return DoneMBB; 598 } 599 600 // Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY` 601 // instuction to handle the zero-length case. 602 static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL, 603 MachineBasicBlock *BB, 604 const TargetInstrInfo &TII, bool Int64) { 605 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 606 607 MachineOperand DstMem = MI.getOperand(0); 608 MachineOperand SrcMem = MI.getOperand(1); 609 MachineOperand Dst = MI.getOperand(2); 610 MachineOperand Src = MI.getOperand(3); 611 MachineOperand Len = MI.getOperand(4); 612 613 // We're going to add an extra use to `Len` to test if it's zero; that 614 // use shouldn't be a kill, even if the original use is. 615 MachineOperand NoKillLen = Len; 616 NoKillLen.setIsKill(false); 617 618 // Decide on which `MachineInstr` opcode we're going to use. 619 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32; 620 unsigned MemoryCopy = 621 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32; 622 623 // Create two new basic blocks; one for the new `memory.fill` that we can 624 // branch over, and one for the rest of the instructions after the original 625 // `memory.fill`. 626 const BasicBlock *LLVMBB = BB->getBasicBlock(); 627 MachineFunction *F = BB->getParent(); 628 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 629 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 630 631 MachineFunction::iterator It = ++BB->getIterator(); 632 F->insert(It, TrueMBB); 633 F->insert(It, DoneMBB); 634 635 // Transfer the remainder of BB and its successor edges to DoneMBB. 636 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 637 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 638 639 // Connect the CFG edges. 640 BB->addSuccessor(TrueMBB); 641 BB->addSuccessor(DoneMBB); 642 TrueMBB->addSuccessor(DoneMBB); 643 644 // Create a virtual register for the `Eqz` result. 645 unsigned EqzReg; 646 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 647 648 // Erase the original `memory.copy`. 649 MI.eraseFromParent(); 650 651 // Test if `Len` is zero. 652 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen); 653 654 // Insert a new `memory.copy`. 655 BuildMI(TrueMBB, DL, TII.get(MemoryCopy)) 656 .add(DstMem) 657 .add(SrcMem) 658 .add(Dst) 659 .add(Src) 660 .add(Len); 661 662 // Create the CFG triangle. 663 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg); 664 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 665 666 return DoneMBB; 667 } 668 669 // Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL` 670 // instuction to handle the zero-length case. 671 static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL, 672 MachineBasicBlock *BB, 673 const TargetInstrInfo &TII, bool Int64) { 674 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 675 676 MachineOperand Mem = MI.getOperand(0); 677 MachineOperand Dst = MI.getOperand(1); 678 MachineOperand Val = MI.getOperand(2); 679 MachineOperand Len = MI.getOperand(3); 680 681 // We're going to add an extra use to `Len` to test if it's zero; that 682 // use shouldn't be a kill, even if the original use is. 683 MachineOperand NoKillLen = Len; 684 NoKillLen.setIsKill(false); 685 686 // Decide on which `MachineInstr` opcode we're going to use. 687 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32; 688 unsigned MemoryFill = 689 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32; 690 691 // Create two new basic blocks; one for the new `memory.fill` that we can 692 // branch over, and one for the rest of the instructions after the original 693 // `memory.fill`. 694 const BasicBlock *LLVMBB = BB->getBasicBlock(); 695 MachineFunction *F = BB->getParent(); 696 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 697 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 698 699 MachineFunction::iterator It = ++BB->getIterator(); 700 F->insert(It, TrueMBB); 701 F->insert(It, DoneMBB); 702 703 // Transfer the remainder of BB and its successor edges to DoneMBB. 704 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 705 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 706 707 // Connect the CFG edges. 708 BB->addSuccessor(TrueMBB); 709 BB->addSuccessor(DoneMBB); 710 TrueMBB->addSuccessor(DoneMBB); 711 712 // Create a virtual register for the `Eqz` result. 713 unsigned EqzReg; 714 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 715 716 // Erase the original `memory.fill`. 717 MI.eraseFromParent(); 718 719 // Test if `Len` is zero. 720 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen); 721 722 // Insert a new `memory.copy`. 723 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len); 724 725 // Create the CFG triangle. 726 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg); 727 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 728 729 return DoneMBB; 730 } 731 732 static MachineBasicBlock * 733 LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, 734 const WebAssemblySubtarget *Subtarget, 735 const TargetInstrInfo &TII) { 736 MachineInstr &CallParams = *CallResults.getPrevNode(); 737 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS); 738 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS || 739 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS); 740 741 bool IsIndirect = 742 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI(); 743 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; 744 745 bool IsFuncrefCall = false; 746 if (IsIndirect && CallParams.getOperand(0).isReg()) { 747 Register Reg = CallParams.getOperand(0).getReg(); 748 const MachineFunction *MF = BB->getParent(); 749 const MachineRegisterInfo &MRI = MF->getRegInfo(); 750 const TargetRegisterClass *TRC = MRI.getRegClass(Reg); 751 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass); 752 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes()); 753 } 754 755 unsigned CallOp; 756 if (IsIndirect && IsRetCall) { 757 CallOp = WebAssembly::RET_CALL_INDIRECT; 758 } else if (IsIndirect) { 759 CallOp = WebAssembly::CALL_INDIRECT; 760 } else if (IsRetCall) { 761 CallOp = WebAssembly::RET_CALL; 762 } else { 763 CallOp = WebAssembly::CALL; 764 } 765 766 MachineFunction &MF = *BB->getParent(); 767 const MCInstrDesc &MCID = TII.get(CallOp); 768 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); 769 770 // Move the function pointer to the end of the arguments for indirect calls 771 if (IsIndirect) { 772 auto FnPtr = CallParams.getOperand(0); 773 CallParams.removeOperand(0); 774 775 // For funcrefs, call_indirect is done through __funcref_call_table and the 776 // funcref is always installed in slot 0 of the table, therefore instead of 777 // having the function pointer added at the end of the params list, a zero 778 // (the index in 779 // __funcref_call_table is added). 780 if (IsFuncrefCall) { 781 Register RegZero = 782 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 783 MachineInstrBuilder MIBC0 = 784 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0); 785 786 BB->insert(CallResults.getIterator(), MIBC0); 787 MachineInstrBuilder(MF, CallParams).addReg(RegZero); 788 } else 789 CallParams.addOperand(FnPtr); 790 } 791 792 for (auto Def : CallResults.defs()) 793 MIB.add(Def); 794 795 if (IsIndirect) { 796 // Placeholder for the type index. 797 MIB.addImm(0); 798 // The table into which this call_indirect indexes. 799 MCSymbolWasm *Table = IsFuncrefCall 800 ? WebAssembly::getOrCreateFuncrefCallTableSymbol( 801 MF.getContext(), Subtarget) 802 : WebAssembly::getOrCreateFunctionTableSymbol( 803 MF.getContext(), Subtarget); 804 if (Subtarget->hasCallIndirectOverlong()) { 805 MIB.addSym(Table); 806 } else { 807 // For the MVP there is at most one table whose number is 0, but we can't 808 // write a table symbol or issue relocations. Instead we just ensure the 809 // table is live and write a zero. 810 Table->setNoStrip(); 811 MIB.addImm(0); 812 } 813 } 814 815 for (auto Use : CallParams.uses()) 816 MIB.add(Use); 817 818 BB->insert(CallResults.getIterator(), MIB); 819 CallParams.eraseFromParent(); 820 CallResults.eraseFromParent(); 821 822 // If this is a funcref call, to avoid hidden GC roots, we need to clear the 823 // table slot with ref.null upon call_indirect return. 824 // 825 // This generates the following code, which comes right after a call_indirect 826 // of a funcref: 827 // 828 // i32.const 0 829 // ref.null func 830 // table.set __funcref_call_table 831 if (IsIndirect && IsFuncrefCall) { 832 MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( 833 MF.getContext(), Subtarget); 834 Register RegZero = 835 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 836 MachineInstr *Const0 = 837 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0); 838 BB->insertAfter(MIB.getInstr()->getIterator(), Const0); 839 840 Register RegFuncref = 841 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass); 842 MachineInstr *RefNull = 843 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref); 844 BB->insertAfter(Const0->getIterator(), RefNull); 845 846 MachineInstr *TableSet = 847 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF)) 848 .addSym(Table) 849 .addReg(RegZero) 850 .addReg(RegFuncref); 851 BB->insertAfter(RefNull->getIterator(), TableSet); 852 } 853 854 return BB; 855 } 856 857 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( 858 MachineInstr &MI, MachineBasicBlock *BB) const { 859 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 860 DebugLoc DL = MI.getDebugLoc(); 861 862 switch (MI.getOpcode()) { 863 default: 864 llvm_unreachable("Unexpected instr type to insert"); 865 case WebAssembly::FP_TO_SINT_I32_F32: 866 return LowerFPToInt(MI, DL, BB, TII, false, false, false, 867 WebAssembly::I32_TRUNC_S_F32); 868 case WebAssembly::FP_TO_UINT_I32_F32: 869 return LowerFPToInt(MI, DL, BB, TII, true, false, false, 870 WebAssembly::I32_TRUNC_U_F32); 871 case WebAssembly::FP_TO_SINT_I64_F32: 872 return LowerFPToInt(MI, DL, BB, TII, false, true, false, 873 WebAssembly::I64_TRUNC_S_F32); 874 case WebAssembly::FP_TO_UINT_I64_F32: 875 return LowerFPToInt(MI, DL, BB, TII, true, true, false, 876 WebAssembly::I64_TRUNC_U_F32); 877 case WebAssembly::FP_TO_SINT_I32_F64: 878 return LowerFPToInt(MI, DL, BB, TII, false, false, true, 879 WebAssembly::I32_TRUNC_S_F64); 880 case WebAssembly::FP_TO_UINT_I32_F64: 881 return LowerFPToInt(MI, DL, BB, TII, true, false, true, 882 WebAssembly::I32_TRUNC_U_F64); 883 case WebAssembly::FP_TO_SINT_I64_F64: 884 return LowerFPToInt(MI, DL, BB, TII, false, true, true, 885 WebAssembly::I64_TRUNC_S_F64); 886 case WebAssembly::FP_TO_UINT_I64_F64: 887 return LowerFPToInt(MI, DL, BB, TII, true, true, true, 888 WebAssembly::I64_TRUNC_U_F64); 889 case WebAssembly::MEMCPY_A32: 890 return LowerMemcpy(MI, DL, BB, TII, false); 891 case WebAssembly::MEMCPY_A64: 892 return LowerMemcpy(MI, DL, BB, TII, true); 893 case WebAssembly::MEMSET_A32: 894 return LowerMemset(MI, DL, BB, TII, false); 895 case WebAssembly::MEMSET_A64: 896 return LowerMemset(MI, DL, BB, TII, true); 897 case WebAssembly::CALL_RESULTS: 898 case WebAssembly::RET_CALL_RESULTS: 899 return LowerCallResults(MI, DL, BB, Subtarget, TII); 900 } 901 } 902 903 const char * 904 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { 905 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { 906 case WebAssemblyISD::FIRST_NUMBER: 907 break; 908 #define HANDLE_NODETYPE(NODE) \ 909 case WebAssemblyISD::NODE: \ 910 return "WebAssemblyISD::" #NODE; 911 #include "WebAssemblyISD.def" 912 #undef HANDLE_NODETYPE 913 } 914 return nullptr; 915 } 916 917 std::pair<unsigned, const TargetRegisterClass *> 918 WebAssemblyTargetLowering::getRegForInlineAsmConstraint( 919 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 920 // First, see if this is a constraint that directly corresponds to a 921 // WebAssembly register class. 922 if (Constraint.size() == 1) { 923 switch (Constraint[0]) { 924 case 'r': 925 assert(VT != MVT::iPTR && "Pointer MVT not expected here"); 926 if (Subtarget->hasSIMD128() && VT.isVector()) { 927 if (VT.getSizeInBits() == 128) 928 return std::make_pair(0U, &WebAssembly::V128RegClass); 929 } 930 if (VT.isInteger() && !VT.isVector()) { 931 if (VT.getSizeInBits() <= 32) 932 return std::make_pair(0U, &WebAssembly::I32RegClass); 933 if (VT.getSizeInBits() <= 64) 934 return std::make_pair(0U, &WebAssembly::I64RegClass); 935 } 936 if (VT.isFloatingPoint() && !VT.isVector()) { 937 switch (VT.getSizeInBits()) { 938 case 32: 939 return std::make_pair(0U, &WebAssembly::F32RegClass); 940 case 64: 941 return std::make_pair(0U, &WebAssembly::F64RegClass); 942 default: 943 break; 944 } 945 } 946 break; 947 default: 948 break; 949 } 950 } 951 952 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 953 } 954 955 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 956 // Assume ctz is a relatively cheap operation. 957 return true; 958 } 959 960 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 961 // Assume clz is a relatively cheap operation. 962 return true; 963 } 964 965 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, 966 const AddrMode &AM, 967 Type *Ty, unsigned AS, 968 Instruction *I) const { 969 // WebAssembly offsets are added as unsigned without wrapping. The 970 // isLegalAddressingMode gives us no way to determine if wrapping could be 971 // happening, so we approximate this by accepting only non-negative offsets. 972 if (AM.BaseOffs < 0) 973 return false; 974 975 // WebAssembly has no scale register operands. 976 if (AM.Scale != 0) 977 return false; 978 979 // Everything else is legal. 980 return true; 981 } 982 983 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( 984 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/, 985 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const { 986 // WebAssembly supports unaligned accesses, though it should be declared 987 // with the p2align attribute on loads and stores which do so, and there 988 // may be a performance impact. We tell LLVM they're "fast" because 989 // for the kinds of things that LLVM uses this for (merging adjacent stores 990 // of constants, etc.), WebAssembly implementations will either want the 991 // unaligned access or they'll split anyway. 992 if (Fast) 993 *Fast = 1; 994 return true; 995 } 996 997 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, 998 AttributeList Attr) const { 999 // The current thinking is that wasm engines will perform this optimization, 1000 // so we can save on code size. 1001 return true; 1002 } 1003 1004 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { 1005 EVT ExtT = ExtVal.getValueType(); 1006 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0); 1007 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || 1008 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || 1009 (ExtT == MVT::v2i64 && MemT == MVT::v2i32); 1010 } 1011 1012 bool WebAssemblyTargetLowering::isOffsetFoldingLegal( 1013 const GlobalAddressSDNode *GA) const { 1014 // Wasm doesn't support function addresses with offsets 1015 const GlobalValue *GV = GA->getGlobal(); 1016 return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA); 1017 } 1018 1019 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, 1020 LLVMContext &C, 1021 EVT VT) const { 1022 if (VT.isVector()) 1023 return VT.changeVectorElementTypeToInteger(); 1024 1025 // So far, all branch instructions in Wasm take an I32 condition. 1026 // The default TargetLowering::getSetCCResultType returns the pointer size, 1027 // which would be useful to reduce instruction counts when testing 1028 // against 64-bit pointers/values if at some point Wasm supports that. 1029 return EVT::getIntegerVT(C, 32); 1030 } 1031 1032 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1033 const CallInst &I, 1034 MachineFunction &MF, 1035 unsigned Intrinsic) const { 1036 switch (Intrinsic) { 1037 case Intrinsic::wasm_memory_atomic_notify: 1038 Info.opc = ISD::INTRINSIC_W_CHAIN; 1039 Info.memVT = MVT::i32; 1040 Info.ptrVal = I.getArgOperand(0); 1041 Info.offset = 0; 1042 Info.align = Align(4); 1043 // atomic.notify instruction does not really load the memory specified with 1044 // this argument, but MachineMemOperand should either be load or store, so 1045 // we set this to a load. 1046 // FIXME Volatile isn't really correct, but currently all LLVM atomic 1047 // instructions are treated as volatiles in the backend, so we should be 1048 // consistent. The same applies for wasm_atomic_wait intrinsics too. 1049 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 1050 return true; 1051 case Intrinsic::wasm_memory_atomic_wait32: 1052 Info.opc = ISD::INTRINSIC_W_CHAIN; 1053 Info.memVT = MVT::i32; 1054 Info.ptrVal = I.getArgOperand(0); 1055 Info.offset = 0; 1056 Info.align = Align(4); 1057 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 1058 return true; 1059 case Intrinsic::wasm_memory_atomic_wait64: 1060 Info.opc = ISD::INTRINSIC_W_CHAIN; 1061 Info.memVT = MVT::i64; 1062 Info.ptrVal = I.getArgOperand(0); 1063 Info.offset = 0; 1064 Info.align = Align(8); 1065 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 1066 return true; 1067 case Intrinsic::wasm_loadf16_f32: 1068 Info.opc = ISD::INTRINSIC_W_CHAIN; 1069 Info.memVT = MVT::f16; 1070 Info.ptrVal = I.getArgOperand(0); 1071 Info.offset = 0; 1072 Info.align = Align(2); 1073 Info.flags = MachineMemOperand::MOLoad; 1074 return true; 1075 case Intrinsic::wasm_storef16_f32: 1076 Info.opc = ISD::INTRINSIC_VOID; 1077 Info.memVT = MVT::f16; 1078 Info.ptrVal = I.getArgOperand(1); 1079 Info.offset = 0; 1080 Info.align = Align(2); 1081 Info.flags = MachineMemOperand::MOStore; 1082 return true; 1083 default: 1084 return false; 1085 } 1086 } 1087 1088 void WebAssemblyTargetLowering::computeKnownBitsForTargetNode( 1089 const SDValue Op, KnownBits &Known, const APInt &DemandedElts, 1090 const SelectionDAG &DAG, unsigned Depth) const { 1091 switch (Op.getOpcode()) { 1092 default: 1093 break; 1094 case ISD::INTRINSIC_WO_CHAIN: { 1095 unsigned IntNo = Op.getConstantOperandVal(0); 1096 switch (IntNo) { 1097 default: 1098 break; 1099 case Intrinsic::wasm_bitmask: { 1100 unsigned BitWidth = Known.getBitWidth(); 1101 EVT VT = Op.getOperand(1).getSimpleValueType(); 1102 unsigned PossibleBits = VT.getVectorNumElements(); 1103 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits); 1104 Known.Zero |= ZeroMask; 1105 break; 1106 } 1107 } 1108 break; 1109 } 1110 1111 // For 128-bit addition if the upper bits are all zero then it's known that 1112 // the upper bits of the result will have all bits guaranteed zero except the 1113 // first. 1114 case WebAssemblyISD::I64_ADD128: 1115 if (Op.getResNo() == 1) { 1116 SDValue LHS_HI = Op.getOperand(1); 1117 SDValue RHS_HI = Op.getOperand(3); 1118 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI)) 1119 Known.Zero.setBitsFrom(1); 1120 } 1121 break; 1122 } 1123 } 1124 1125 TargetLoweringBase::LegalizeTypeAction 1126 WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const { 1127 if (VT.isFixedLengthVector()) { 1128 MVT EltVT = VT.getVectorElementType(); 1129 // We have legal vector types with these lane types, so widening the 1130 // vector would let us use some of the lanes directly without having to 1131 // extend or truncate values. 1132 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || 1133 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64) 1134 return TypeWidenVector; 1135 } 1136 1137 return TargetLoweringBase::getPreferredVectorAction(VT); 1138 } 1139 1140 bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts( 1141 SDValue Op, const TargetLoweringOpt &TLO) const { 1142 // ISel process runs DAGCombiner after legalization; this step is called 1143 // SelectionDAG optimization phase. This post-legalization combining process 1144 // runs DAGCombiner on each node, and if there was a change to be made, 1145 // re-runs legalization again on it and its user nodes to make sure 1146 // everythiing is in a legalized state. 1147 // 1148 // The legalization calls lowering routines, and we do our custom lowering for 1149 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements 1150 // into zeros. But there is a set of routines in DAGCombiner that turns unused 1151 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts 1152 // turns unused vector elements into undefs. But this routine does not work 1153 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This 1154 // combination can result in a infinite loop, in which undefs are converted to 1155 // zeros in legalization and back to undefs in combining. 1156 // 1157 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from 1158 // running for build_vectors. 1159 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys) 1160 return false; 1161 return true; 1162 } 1163 1164 //===----------------------------------------------------------------------===// 1165 // WebAssembly Lowering private implementation. 1166 //===----------------------------------------------------------------------===// 1167 1168 //===----------------------------------------------------------------------===// 1169 // Lowering Code 1170 //===----------------------------------------------------------------------===// 1171 1172 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { 1173 MachineFunction &MF = DAG.getMachineFunction(); 1174 DAG.getContext()->diagnose( 1175 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); 1176 } 1177 1178 // Test whether the given calling convention is supported. 1179 static bool callingConvSupported(CallingConv::ID CallConv) { 1180 // We currently support the language-independent target-independent 1181 // conventions. We don't yet have a way to annotate calls with properties like 1182 // "cold", and we don't have any call-clobbered registers, so these are mostly 1183 // all handled the same. 1184 return CallConv == CallingConv::C || CallConv == CallingConv::Fast || 1185 CallConv == CallingConv::Cold || 1186 CallConv == CallingConv::PreserveMost || 1187 CallConv == CallingConv::PreserveAll || 1188 CallConv == CallingConv::CXX_FAST_TLS || 1189 CallConv == CallingConv::WASM_EmscriptenInvoke || 1190 CallConv == CallingConv::Swift; 1191 } 1192 1193 SDValue 1194 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, 1195 SmallVectorImpl<SDValue> &InVals) const { 1196 SelectionDAG &DAG = CLI.DAG; 1197 SDLoc DL = CLI.DL; 1198 SDValue Chain = CLI.Chain; 1199 SDValue Callee = CLI.Callee; 1200 MachineFunction &MF = DAG.getMachineFunction(); 1201 auto Layout = MF.getDataLayout(); 1202 1203 CallingConv::ID CallConv = CLI.CallConv; 1204 if (!callingConvSupported(CallConv)) 1205 fail(DL, DAG, 1206 "WebAssembly doesn't support language-specific or target-specific " 1207 "calling conventions yet"); 1208 if (CLI.IsPatchPoint) 1209 fail(DL, DAG, "WebAssembly doesn't support patch point yet"); 1210 1211 if (CLI.IsTailCall) { 1212 auto NoTail = [&](const char *Msg) { 1213 if (CLI.CB && CLI.CB->isMustTailCall()) 1214 fail(DL, DAG, Msg); 1215 CLI.IsTailCall = false; 1216 }; 1217 1218 if (!Subtarget->hasTailCall()) 1219 NoTail("WebAssembly 'tail-call' feature not enabled"); 1220 1221 // Varargs calls cannot be tail calls because the buffer is on the stack 1222 if (CLI.IsVarArg) 1223 NoTail("WebAssembly does not support varargs tail calls"); 1224 1225 // Do not tail call unless caller and callee return types match 1226 const Function &F = MF.getFunction(); 1227 const TargetMachine &TM = getTargetMachine(); 1228 Type *RetTy = F.getReturnType(); 1229 SmallVector<MVT, 4> CallerRetTys; 1230 SmallVector<MVT, 4> CalleeRetTys; 1231 computeLegalValueVTs(F, TM, RetTy, CallerRetTys); 1232 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); 1233 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && 1234 std::equal(CallerRetTys.begin(), CallerRetTys.end(), 1235 CalleeRetTys.begin()); 1236 if (!TypesMatch) 1237 NoTail("WebAssembly tail call requires caller and callee return types to " 1238 "match"); 1239 1240 // If pointers to local stack values are passed, we cannot tail call 1241 if (CLI.CB) { 1242 for (auto &Arg : CLI.CB->args()) { 1243 Value *Val = Arg.get(); 1244 // Trace the value back through pointer operations 1245 while (true) { 1246 Value *Src = Val->stripPointerCastsAndAliases(); 1247 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src)) 1248 Src = GEP->getPointerOperand(); 1249 if (Val == Src) 1250 break; 1251 Val = Src; 1252 } 1253 if (isa<AllocaInst>(Val)) { 1254 NoTail( 1255 "WebAssembly does not support tail calling with stack arguments"); 1256 break; 1257 } 1258 } 1259 } 1260 } 1261 1262 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1263 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1264 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1265 1266 // The generic code may have added an sret argument. If we're lowering an 1267 // invoke function, the ABI requires that the function pointer be the first 1268 // argument, so we may have to swap the arguments. 1269 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && 1270 Outs[0].Flags.isSRet()) { 1271 std::swap(Outs[0], Outs[1]); 1272 std::swap(OutVals[0], OutVals[1]); 1273 } 1274 1275 bool HasSwiftSelfArg = false; 1276 bool HasSwiftErrorArg = false; 1277 unsigned NumFixedArgs = 0; 1278 for (unsigned I = 0; I < Outs.size(); ++I) { 1279 const ISD::OutputArg &Out = Outs[I]; 1280 SDValue &OutVal = OutVals[I]; 1281 HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); 1282 HasSwiftErrorArg |= Out.Flags.isSwiftError(); 1283 if (Out.Flags.isNest()) 1284 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 1285 if (Out.Flags.isInAlloca()) 1286 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 1287 if (Out.Flags.isInConsecutiveRegs()) 1288 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 1289 if (Out.Flags.isInConsecutiveRegsLast()) 1290 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 1291 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { 1292 auto &MFI = MF.getFrameInfo(); 1293 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), 1294 Out.Flags.getNonZeroByValAlign(), 1295 /*isSS=*/false); 1296 SDValue SizeNode = 1297 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); 1298 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 1299 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode, 1300 Out.Flags.getNonZeroByValAlign(), 1301 /*isVolatile*/ false, /*AlwaysInline=*/false, 1302 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), 1303 MachinePointerInfo()); 1304 OutVal = FINode; 1305 } 1306 // Count the number of fixed args *after* legalization. 1307 NumFixedArgs += Out.IsFixed; 1308 } 1309 1310 bool IsVarArg = CLI.IsVarArg; 1311 auto PtrVT = getPointerTy(Layout); 1312 1313 // For swiftcc, emit additional swiftself and swifterror arguments 1314 // if there aren't. These additional arguments are also added for callee 1315 // signature They are necessary to match callee and caller signature for 1316 // indirect call. 1317 if (CallConv == CallingConv::Swift) { 1318 if (!HasSwiftSelfArg) { 1319 NumFixedArgs++; 1320 ISD::OutputArg Arg; 1321 Arg.Flags.setSwiftSelf(); 1322 CLI.Outs.push_back(Arg); 1323 SDValue ArgVal = DAG.getUNDEF(PtrVT); 1324 CLI.OutVals.push_back(ArgVal); 1325 } 1326 if (!HasSwiftErrorArg) { 1327 NumFixedArgs++; 1328 ISD::OutputArg Arg; 1329 Arg.Flags.setSwiftError(); 1330 CLI.Outs.push_back(Arg); 1331 SDValue ArgVal = DAG.getUNDEF(PtrVT); 1332 CLI.OutVals.push_back(ArgVal); 1333 } 1334 } 1335 1336 // Analyze operands of the call, assigning locations to each operand. 1337 SmallVector<CCValAssign, 16> ArgLocs; 1338 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1339 1340 if (IsVarArg) { 1341 // Outgoing non-fixed arguments are placed in a buffer. First 1342 // compute their offsets and the total amount of buffer space needed. 1343 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { 1344 const ISD::OutputArg &Out = Outs[I]; 1345 SDValue &Arg = OutVals[I]; 1346 EVT VT = Arg.getValueType(); 1347 assert(VT != MVT::iPTR && "Legalized args should be concrete"); 1348 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 1349 Align Alignment = 1350 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty)); 1351 unsigned Offset = 1352 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment); 1353 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), 1354 Offset, VT.getSimpleVT(), 1355 CCValAssign::Full)); 1356 } 1357 } 1358 1359 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 1360 1361 SDValue FINode; 1362 if (IsVarArg && NumBytes) { 1363 // For non-fixed arguments, next emit stores to store the argument values 1364 // to the stack buffer at the offsets computed above. 1365 MaybeAlign StackAlign = Layout.getStackAlignment(); 1366 assert(StackAlign && "data layout string is missing stack alignment"); 1367 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign, 1368 /*isSS=*/false); 1369 unsigned ValNo = 0; 1370 SmallVector<SDValue, 8> Chains; 1371 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) { 1372 assert(ArgLocs[ValNo].getValNo() == ValNo && 1373 "ArgLocs should remain in order and only hold varargs args"); 1374 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); 1375 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 1376 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, 1377 DAG.getConstant(Offset, DL, PtrVT)); 1378 Chains.push_back( 1379 DAG.getStore(Chain, DL, Arg, Add, 1380 MachinePointerInfo::getFixedStack(MF, FI, Offset))); 1381 } 1382 if (!Chains.empty()) 1383 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 1384 } else if (IsVarArg) { 1385 FINode = DAG.getIntPtrConstant(0, DL); 1386 } 1387 1388 if (Callee->getOpcode() == ISD::GlobalAddress) { 1389 // If the callee is a GlobalAddress node (quite common, every direct call 1390 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress 1391 // doesn't at MO_GOT which is not needed for direct calls. 1392 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee); 1393 Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, 1394 getPointerTy(DAG.getDataLayout()), 1395 GA->getOffset()); 1396 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, 1397 getPointerTy(DAG.getDataLayout()), Callee); 1398 } 1399 1400 // Compute the operands for the CALLn node. 1401 SmallVector<SDValue, 16> Ops; 1402 Ops.push_back(Chain); 1403 Ops.push_back(Callee); 1404 1405 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs 1406 // isn't reliable. 1407 Ops.append(OutVals.begin(), 1408 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); 1409 // Add a pointer to the vararg buffer. 1410 if (IsVarArg) 1411 Ops.push_back(FINode); 1412 1413 SmallVector<EVT, 8> InTys; 1414 for (const auto &In : Ins) { 1415 assert(!In.Flags.isByVal() && "byval is not valid for return values"); 1416 assert(!In.Flags.isNest() && "nest is not valid for return values"); 1417 if (In.Flags.isInAlloca()) 1418 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); 1419 if (In.Flags.isInConsecutiveRegs()) 1420 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); 1421 if (In.Flags.isInConsecutiveRegsLast()) 1422 fail(DL, DAG, 1423 "WebAssembly hasn't implemented cons regs last return values"); 1424 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1425 // registers. 1426 InTys.push_back(In.VT); 1427 } 1428 1429 // Lastly, if this is a call to a funcref we need to add an instruction 1430 // table.set to the chain and transform the call. 1431 if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType( 1432 CLI.CB->getCalledOperand()->getType())) { 1433 // In the absence of function references proposal where a funcref call is 1434 // lowered to call_ref, using reference types we generate a table.set to set 1435 // the funcref to a special table used solely for this purpose, followed by 1436 // a call_indirect. Here we just generate the table set, and return the 1437 // SDValue of the table.set so that LowerCall can finalize the lowering by 1438 // generating the call_indirect. 1439 SDValue Chain = Ops[0]; 1440 1441 MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( 1442 MF.getContext(), Subtarget); 1443 SDValue Sym = DAG.getMCSymbol(Table, PtrVT); 1444 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32); 1445 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee}; 1446 SDValue TableSet = DAG.getMemIntrinsicNode( 1447 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps, 1448 MVT::funcref, 1449 // Machine Mem Operand args 1450 MachinePointerInfo( 1451 WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF), 1452 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()), 1453 MachineMemOperand::MOStore); 1454 1455 Ops[0] = TableSet; // The new chain is the TableSet itself 1456 } 1457 1458 if (CLI.IsTailCall) { 1459 // ret_calls do not return values to the current frame 1460 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1461 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); 1462 } 1463 1464 InTys.push_back(MVT::Other); 1465 SDVTList InTyList = DAG.getVTList(InTys); 1466 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops); 1467 1468 for (size_t I = 0; I < Ins.size(); ++I) 1469 InVals.push_back(Res.getValue(I)); 1470 1471 // Return the chain 1472 return Res.getValue(Ins.size()); 1473 } 1474 1475 bool WebAssemblyTargetLowering::CanLowerReturn( 1476 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, 1477 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/, 1478 const Type *RetTy) const { 1479 // WebAssembly can only handle returning tuples with multivalue enabled 1480 return WebAssembly::canLowerReturn(Outs.size(), Subtarget); 1481 } 1482 1483 SDValue WebAssemblyTargetLowering::LowerReturn( 1484 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, 1485 const SmallVectorImpl<ISD::OutputArg> &Outs, 1486 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1487 SelectionDAG &DAG) const { 1488 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) && 1489 "MVP WebAssembly can only return up to one value"); 1490 if (!callingConvSupported(CallConv)) 1491 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1492 1493 SmallVector<SDValue, 4> RetOps(1, Chain); 1494 RetOps.append(OutVals.begin(), OutVals.end()); 1495 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); 1496 1497 // Record the number and types of the return values. 1498 for (const ISD::OutputArg &Out : Outs) { 1499 assert(!Out.Flags.isByVal() && "byval is not valid for return values"); 1500 assert(!Out.Flags.isNest() && "nest is not valid for return values"); 1501 assert(Out.IsFixed && "non-fixed return value is not valid"); 1502 if (Out.Flags.isInAlloca()) 1503 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); 1504 if (Out.Flags.isInConsecutiveRegs()) 1505 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); 1506 if (Out.Flags.isInConsecutiveRegsLast()) 1507 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); 1508 } 1509 1510 return Chain; 1511 } 1512 1513 SDValue WebAssemblyTargetLowering::LowerFormalArguments( 1514 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1515 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1516 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1517 if (!callingConvSupported(CallConv)) 1518 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1519 1520 MachineFunction &MF = DAG.getMachineFunction(); 1521 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 1522 1523 // Set up the incoming ARGUMENTS value, which serves to represent the liveness 1524 // of the incoming values before they're represented by virtual registers. 1525 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); 1526 1527 bool HasSwiftErrorArg = false; 1528 bool HasSwiftSelfArg = false; 1529 for (const ISD::InputArg &In : Ins) { 1530 HasSwiftSelfArg |= In.Flags.isSwiftSelf(); 1531 HasSwiftErrorArg |= In.Flags.isSwiftError(); 1532 if (In.Flags.isInAlloca()) 1533 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 1534 if (In.Flags.isNest()) 1535 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 1536 if (In.Flags.isInConsecutiveRegs()) 1537 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 1538 if (In.Flags.isInConsecutiveRegsLast()) 1539 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 1540 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1541 // registers. 1542 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, 1543 DAG.getTargetConstant(InVals.size(), 1544 DL, MVT::i32)) 1545 : DAG.getUNDEF(In.VT)); 1546 1547 // Record the number and types of arguments. 1548 MFI->addParam(In.VT); 1549 } 1550 1551 // For swiftcc, emit additional swiftself and swifterror arguments 1552 // if there aren't. These additional arguments are also added for callee 1553 // signature They are necessary to match callee and caller signature for 1554 // indirect call. 1555 auto PtrVT = getPointerTy(MF.getDataLayout()); 1556 if (CallConv == CallingConv::Swift) { 1557 if (!HasSwiftSelfArg) { 1558 MFI->addParam(PtrVT); 1559 } 1560 if (!HasSwiftErrorArg) { 1561 MFI->addParam(PtrVT); 1562 } 1563 } 1564 // Varargs are copied into a buffer allocated by the caller, and a pointer to 1565 // the buffer is passed as an argument. 1566 if (IsVarArg) { 1567 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1568 Register VarargVreg = 1569 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); 1570 MFI->setVarargBufferVreg(VarargVreg); 1571 Chain = DAG.getCopyToReg( 1572 Chain, DL, VarargVreg, 1573 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, 1574 DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); 1575 MFI->addParam(PtrVT); 1576 } 1577 1578 // Record the number and types of arguments and results. 1579 SmallVector<MVT, 4> Params; 1580 SmallVector<MVT, 4> Results; 1581 computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(), 1582 MF.getFunction(), DAG.getTarget(), Params, Results); 1583 for (MVT VT : Results) 1584 MFI->addResult(VT); 1585 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify 1586 // the param logic here with ComputeSignatureVTs 1587 assert(MFI->getParams().size() == Params.size() && 1588 std::equal(MFI->getParams().begin(), MFI->getParams().end(), 1589 Params.begin())); 1590 1591 return Chain; 1592 } 1593 1594 void WebAssemblyTargetLowering::ReplaceNodeResults( 1595 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1596 switch (N->getOpcode()) { 1597 case ISD::SIGN_EXTEND_INREG: 1598 // Do not add any results, signifying that N should not be custom lowered 1599 // after all. This happens because simd128 turns on custom lowering for 1600 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an 1601 // illegal type. 1602 break; 1603 case ISD::SIGN_EXTEND_VECTOR_INREG: 1604 case ISD::ZERO_EXTEND_VECTOR_INREG: 1605 // Do not add any results, signifying that N should not be custom lowered. 1606 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all. 1607 break; 1608 case ISD::ADD: 1609 case ISD::SUB: 1610 Results.push_back(Replace128Op(N, DAG)); 1611 break; 1612 default: 1613 llvm_unreachable( 1614 "ReplaceNodeResults not implemented for this op for WebAssembly!"); 1615 } 1616 } 1617 1618 //===----------------------------------------------------------------------===// 1619 // Custom lowering hooks. 1620 //===----------------------------------------------------------------------===// 1621 1622 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, 1623 SelectionDAG &DAG) const { 1624 SDLoc DL(Op); 1625 switch (Op.getOpcode()) { 1626 default: 1627 llvm_unreachable("unimplemented operation lowering"); 1628 return SDValue(); 1629 case ISD::FrameIndex: 1630 return LowerFrameIndex(Op, DAG); 1631 case ISD::GlobalAddress: 1632 return LowerGlobalAddress(Op, DAG); 1633 case ISD::GlobalTLSAddress: 1634 return LowerGlobalTLSAddress(Op, DAG); 1635 case ISD::ExternalSymbol: 1636 return LowerExternalSymbol(Op, DAG); 1637 case ISD::JumpTable: 1638 return LowerJumpTable(Op, DAG); 1639 case ISD::BR_JT: 1640 return LowerBR_JT(Op, DAG); 1641 case ISD::VASTART: 1642 return LowerVASTART(Op, DAG); 1643 case ISD::BlockAddress: 1644 case ISD::BRIND: 1645 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); 1646 return SDValue(); 1647 case ISD::RETURNADDR: 1648 return LowerRETURNADDR(Op, DAG); 1649 case ISD::FRAMEADDR: 1650 return LowerFRAMEADDR(Op, DAG); 1651 case ISD::CopyToReg: 1652 return LowerCopyToReg(Op, DAG); 1653 case ISD::EXTRACT_VECTOR_ELT: 1654 case ISD::INSERT_VECTOR_ELT: 1655 return LowerAccessVectorElement(Op, DAG); 1656 case ISD::INTRINSIC_VOID: 1657 case ISD::INTRINSIC_WO_CHAIN: 1658 case ISD::INTRINSIC_W_CHAIN: 1659 return LowerIntrinsic(Op, DAG); 1660 case ISD::SIGN_EXTEND_INREG: 1661 return LowerSIGN_EXTEND_INREG(Op, DAG); 1662 case ISD::ZERO_EXTEND_VECTOR_INREG: 1663 case ISD::SIGN_EXTEND_VECTOR_INREG: 1664 return LowerEXTEND_VECTOR_INREG(Op, DAG); 1665 case ISD::BUILD_VECTOR: 1666 return LowerBUILD_VECTOR(Op, DAG); 1667 case ISD::VECTOR_SHUFFLE: 1668 return LowerVECTOR_SHUFFLE(Op, DAG); 1669 case ISD::SETCC: 1670 return LowerSETCC(Op, DAG); 1671 case ISD::SHL: 1672 case ISD::SRA: 1673 case ISD::SRL: 1674 return LowerShift(Op, DAG); 1675 case ISD::FP_TO_SINT_SAT: 1676 case ISD::FP_TO_UINT_SAT: 1677 return LowerFP_TO_INT_SAT(Op, DAG); 1678 case ISD::LOAD: 1679 return LowerLoad(Op, DAG); 1680 case ISD::STORE: 1681 return LowerStore(Op, DAG); 1682 case ISD::CTPOP: 1683 case ISD::CTLZ: 1684 case ISD::CTTZ: 1685 return DAG.UnrollVectorOp(Op.getNode()); 1686 case ISD::CLEAR_CACHE: 1687 report_fatal_error("llvm.clear_cache is not supported on wasm"); 1688 case ISD::SMUL_LOHI: 1689 case ISD::UMUL_LOHI: 1690 return LowerMUL_LOHI(Op, DAG); 1691 case ISD::UADDO: 1692 return LowerUADDO(Op, DAG); 1693 } 1694 } 1695 1696 static bool IsWebAssemblyGlobal(SDValue Op) { 1697 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) 1698 return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace()); 1699 1700 return false; 1701 } 1702 1703 static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op, 1704 SelectionDAG &DAG) { 1705 const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op); 1706 if (!FI) 1707 return std::nullopt; 1708 1709 auto &MF = DAG.getMachineFunction(); 1710 return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex()); 1711 } 1712 1713 SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op, 1714 SelectionDAG &DAG) const { 1715 SDLoc DL(Op); 1716 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 1717 const SDValue &Value = SN->getValue(); 1718 const SDValue &Base = SN->getBasePtr(); 1719 const SDValue &Offset = SN->getOffset(); 1720 1721 if (IsWebAssemblyGlobal(Base)) { 1722 if (!Offset->isUndef()) 1723 report_fatal_error("unexpected offset when storing to webassembly global", 1724 false); 1725 1726 SDVTList Tys = DAG.getVTList(MVT::Other); 1727 SDValue Ops[] = {SN->getChain(), Value, Base}; 1728 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops, 1729 SN->getMemoryVT(), SN->getMemOperand()); 1730 } 1731 1732 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) { 1733 if (!Offset->isUndef()) 1734 report_fatal_error("unexpected offset when storing to webassembly local", 1735 false); 1736 1737 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); 1738 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain. 1739 SDValue Ops[] = {SN->getChain(), Idx, Value}; 1740 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops); 1741 } 1742 1743 if (WebAssembly::isWasmVarAddressSpace(SN->getAddressSpace())) 1744 report_fatal_error( 1745 "Encountered an unlowerable store to the wasm_var address space", 1746 false); 1747 1748 return Op; 1749 } 1750 1751 SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op, 1752 SelectionDAG &DAG) const { 1753 SDLoc DL(Op); 1754 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 1755 const SDValue &Base = LN->getBasePtr(); 1756 const SDValue &Offset = LN->getOffset(); 1757 1758 if (IsWebAssemblyGlobal(Base)) { 1759 if (!Offset->isUndef()) 1760 report_fatal_error( 1761 "unexpected offset when loading from webassembly global", false); 1762 1763 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other); 1764 SDValue Ops[] = {LN->getChain(), Base}; 1765 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops, 1766 LN->getMemoryVT(), LN->getMemOperand()); 1767 } 1768 1769 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) { 1770 if (!Offset->isUndef()) 1771 report_fatal_error( 1772 "unexpected offset when loading from webassembly local", false); 1773 1774 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); 1775 EVT LocalVT = LN->getValueType(0); 1776 SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT, 1777 {LN->getChain(), Idx}); 1778 SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL); 1779 assert(Result->getNumValues() == 2 && "Loads must carry a chain!"); 1780 return Result; 1781 } 1782 1783 if (WebAssembly::isWasmVarAddressSpace(LN->getAddressSpace())) 1784 report_fatal_error( 1785 "Encountered an unlowerable load from the wasm_var address space", 1786 false); 1787 1788 return Op; 1789 } 1790 1791 SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op, 1792 SelectionDAG &DAG) const { 1793 assert(Subtarget->hasWideArithmetic()); 1794 assert(Op.getValueType() == MVT::i64); 1795 SDLoc DL(Op); 1796 unsigned Opcode; 1797 switch (Op.getOpcode()) { 1798 case ISD::UMUL_LOHI: 1799 Opcode = WebAssemblyISD::I64_MUL_WIDE_U; 1800 break; 1801 case ISD::SMUL_LOHI: 1802 Opcode = WebAssemblyISD::I64_MUL_WIDE_S; 1803 break; 1804 default: 1805 llvm_unreachable("unexpected opcode"); 1806 } 1807 SDValue LHS = Op.getOperand(0); 1808 SDValue RHS = Op.getOperand(1); 1809 SDValue Lo = 1810 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS); 1811 SDValue Hi(Lo.getNode(), 1); 1812 SDValue Ops[] = {Lo, Hi}; 1813 return DAG.getMergeValues(Ops, DL); 1814 } 1815 1816 // Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled. 1817 // 1818 // This enables generating a single wasm instruction for this operation where 1819 // the upper half of both operands are constant zeros. The upper half of the 1820 // result is then whether the overflow happened. 1821 SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op, 1822 SelectionDAG &DAG) const { 1823 assert(Subtarget->hasWideArithmetic()); 1824 assert(Op.getValueType() == MVT::i64); 1825 assert(Op.getOpcode() == ISD::UADDO); 1826 SDLoc DL(Op); 1827 SDValue LHS = Op.getOperand(0); 1828 SDValue RHS = Op.getOperand(1); 1829 SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 1830 SDValue Result = 1831 DAG.getNode(WebAssemblyISD::I64_ADD128, DL, 1832 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero); 1833 SDValue CarryI64(Result.getNode(), 1); 1834 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64); 1835 SDValue Ops[] = {Result, CarryI32}; 1836 return DAG.getMergeValues(Ops, DL); 1837 } 1838 1839 SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N, 1840 SelectionDAG &DAG) const { 1841 assert(Subtarget->hasWideArithmetic()); 1842 assert(N->getValueType(0) == MVT::i128); 1843 SDLoc DL(N); 1844 unsigned Opcode; 1845 switch (N->getOpcode()) { 1846 case ISD::ADD: 1847 Opcode = WebAssemblyISD::I64_ADD128; 1848 break; 1849 case ISD::SUB: 1850 Opcode = WebAssemblyISD::I64_SUB128; 1851 break; 1852 default: 1853 llvm_unreachable("unexpected opcode"); 1854 } 1855 SDValue LHS = N->getOperand(0); 1856 SDValue RHS = N->getOperand(1); 1857 1858 SDValue C0 = DAG.getConstant(0, DL, MVT::i64); 1859 SDValue C1 = DAG.getConstant(1, DL, MVT::i64); 1860 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0); 1861 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1); 1862 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0); 1863 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1); 1864 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), 1865 LHS_0, LHS_1, RHS_0, RHS_1); 1866 SDValue Result_HI(Result_LO.getNode(), 1); 1867 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI); 1868 } 1869 1870 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, 1871 SelectionDAG &DAG) const { 1872 SDValue Src = Op.getOperand(2); 1873 if (isa<FrameIndexSDNode>(Src.getNode())) { 1874 // CopyToReg nodes don't support FrameIndex operands. Other targets select 1875 // the FI to some LEA-like instruction, but since we don't have that, we 1876 // need to insert some kind of instruction that can take an FI operand and 1877 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy 1878 // local.copy between Op and its FI operand. 1879 SDValue Chain = Op.getOperand(0); 1880 SDLoc DL(Op); 1881 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); 1882 EVT VT = Src.getValueType(); 1883 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 1884 : WebAssembly::COPY_I64, 1885 DL, VT, Src), 1886 0); 1887 return Op.getNode()->getNumValues() == 1 1888 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) 1889 : DAG.getCopyToReg(Chain, DL, Reg, Copy, 1890 Op.getNumOperands() == 4 ? Op.getOperand(3) 1891 : SDValue()); 1892 } 1893 return SDValue(); 1894 } 1895 1896 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, 1897 SelectionDAG &DAG) const { 1898 int FI = cast<FrameIndexSDNode>(Op)->getIndex(); 1899 return DAG.getTargetFrameIndex(FI, Op.getValueType()); 1900 } 1901 1902 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, 1903 SelectionDAG &DAG) const { 1904 SDLoc DL(Op); 1905 1906 if (!Subtarget->getTargetTriple().isOSEmscripten()) { 1907 fail(DL, DAG, 1908 "Non-Emscripten WebAssembly hasn't implemented " 1909 "__builtin_return_address"); 1910 return SDValue(); 1911 } 1912 1913 unsigned Depth = Op.getConstantOperandVal(0); 1914 MakeLibCallOptions CallOptions; 1915 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), 1916 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) 1917 .first; 1918 } 1919 1920 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, 1921 SelectionDAG &DAG) const { 1922 // Non-zero depths are not supported by WebAssembly currently. Use the 1923 // legalizer's default expansion, which is to return 0 (what this function is 1924 // documented to do). 1925 if (Op.getConstantOperandVal(0) > 0) 1926 return SDValue(); 1927 1928 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); 1929 EVT VT = Op.getValueType(); 1930 Register FP = 1931 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); 1932 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); 1933 } 1934 1935 SDValue 1936 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1937 SelectionDAG &DAG) const { 1938 SDLoc DL(Op); 1939 const auto *GA = cast<GlobalAddressSDNode>(Op); 1940 1941 MachineFunction &MF = DAG.getMachineFunction(); 1942 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory()) 1943 report_fatal_error("cannot use thread-local storage without bulk memory", 1944 false); 1945 1946 const GlobalValue *GV = GA->getGlobal(); 1947 1948 // Currently only Emscripten supports dynamic linking with threads. Therefore, 1949 // on other targets, if we have thread-local storage, only the local-exec 1950 // model is possible. 1951 auto model = Subtarget->getTargetTriple().isOSEmscripten() 1952 ? GV->getThreadLocalMode() 1953 : GlobalValue::LocalExecTLSModel; 1954 1955 // Unsupported TLS modes 1956 assert(model != GlobalValue::NotThreadLocal); 1957 assert(model != GlobalValue::InitialExecTLSModel); 1958 1959 if (model == GlobalValue::LocalExecTLSModel || 1960 model == GlobalValue::LocalDynamicTLSModel || 1961 (model == GlobalValue::GeneralDynamicTLSModel && 1962 getTargetMachine().shouldAssumeDSOLocal(GV))) { 1963 // For DSO-local TLS variables we use offset from __tls_base 1964 1965 MVT PtrVT = getPointerTy(DAG.getDataLayout()); 1966 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 1967 : WebAssembly::GLOBAL_GET_I32; 1968 const char *BaseName = MF.createExternalSymbolName("__tls_base"); 1969 1970 SDValue BaseAddr( 1971 DAG.getMachineNode(GlobalGet, DL, PtrVT, 1972 DAG.getTargetExternalSymbol(BaseName, PtrVT)), 1973 0); 1974 1975 SDValue TLSOffset = DAG.getTargetGlobalAddress( 1976 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL); 1977 SDValue SymOffset = 1978 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset); 1979 1980 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset); 1981 } 1982 1983 assert(model == GlobalValue::GeneralDynamicTLSModel); 1984 1985 EVT VT = Op.getValueType(); 1986 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1987 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 1988 GA->getOffset(), 1989 WebAssemblyII::MO_GOT_TLS)); 1990 } 1991 1992 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, 1993 SelectionDAG &DAG) const { 1994 SDLoc DL(Op); 1995 const auto *GA = cast<GlobalAddressSDNode>(Op); 1996 EVT VT = Op.getValueType(); 1997 assert(GA->getTargetFlags() == 0 && 1998 "Unexpected target flags on generic GlobalAddressSDNode"); 1999 if (!WebAssembly::isValidAddressSpace(GA->getAddressSpace())) 2000 fail(DL, DAG, "Invalid address space for WebAssembly target"); 2001 2002 unsigned OperandFlags = 0; 2003 const GlobalValue *GV = GA->getGlobal(); 2004 // Since WebAssembly tables cannot yet be shared accross modules, we don't 2005 // need special treatment for tables in PIC mode. 2006 if (isPositionIndependent() && 2007 !WebAssembly::isWebAssemblyTableType(GV->getValueType())) { 2008 if (getTargetMachine().shouldAssumeDSOLocal(GV)) { 2009 MachineFunction &MF = DAG.getMachineFunction(); 2010 MVT PtrVT = getPointerTy(MF.getDataLayout()); 2011 const char *BaseName; 2012 if (GV->getValueType()->isFunctionTy()) { 2013 BaseName = MF.createExternalSymbolName("__table_base"); 2014 OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; 2015 } else { 2016 BaseName = MF.createExternalSymbolName("__memory_base"); 2017 OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; 2018 } 2019 SDValue BaseAddr = 2020 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 2021 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 2022 2023 SDValue SymAddr = DAG.getNode( 2024 WebAssemblyISD::WrapperREL, DL, VT, 2025 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 2026 OperandFlags)); 2027 2028 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); 2029 } 2030 OperandFlags = WebAssemblyII::MO_GOT; 2031 } 2032 2033 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 2034 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 2035 GA->getOffset(), OperandFlags)); 2036 } 2037 2038 SDValue 2039 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, 2040 SelectionDAG &DAG) const { 2041 SDLoc DL(Op); 2042 const auto *ES = cast<ExternalSymbolSDNode>(Op); 2043 EVT VT = Op.getValueType(); 2044 assert(ES->getTargetFlags() == 0 && 2045 "Unexpected target flags on generic ExternalSymbolSDNode"); 2046 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 2047 DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); 2048 } 2049 2050 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, 2051 SelectionDAG &DAG) const { 2052 // There's no need for a Wrapper node because we always incorporate a jump 2053 // table operand into a BR_TABLE instruction, rather than ever 2054 // materializing it in a register. 2055 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2056 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), 2057 JT->getTargetFlags()); 2058 } 2059 2060 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, 2061 SelectionDAG &DAG) const { 2062 SDLoc DL(Op); 2063 SDValue Chain = Op.getOperand(0); 2064 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); 2065 SDValue Index = Op.getOperand(2); 2066 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); 2067 2068 SmallVector<SDValue, 8> Ops; 2069 Ops.push_back(Chain); 2070 Ops.push_back(Index); 2071 2072 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); 2073 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; 2074 2075 // Add an operand for each case. 2076 for (auto *MBB : MBBs) 2077 Ops.push_back(DAG.getBasicBlock(MBB)); 2078 2079 // Add the first MBB as a dummy default target for now. This will be replaced 2080 // with the proper default target (and the preceding range check eliminated) 2081 // if possible by WebAssemblyFixBrTableDefaults. 2082 Ops.push_back(DAG.getBasicBlock(*MBBs.begin())); 2083 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); 2084 } 2085 2086 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, 2087 SelectionDAG &DAG) const { 2088 SDLoc DL(Op); 2089 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); 2090 2091 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); 2092 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2093 2094 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, 2095 MFI->getVarargBufferVreg(), PtrVT); 2096 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), 2097 MachinePointerInfo(SV)); 2098 } 2099 2100 // Try to lower partial.reduce.add to a dot or fallback to a sequence with 2101 // extmul and adds. 2102 SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) { 2103 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); 2104 if (N->getConstantOperandVal(0) != 2105 Intrinsic::experimental_vector_partial_reduce_add) 2106 return SDValue(); 2107 2108 assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32"); 2109 SDLoc DL(N); 2110 SDValue Mul = N->getOperand(2); 2111 assert(Mul->getOpcode() == ISD::MUL && "expected mul input"); 2112 2113 SDValue ExtendLHS = Mul->getOperand(0); 2114 SDValue ExtendRHS = Mul->getOperand(1); 2115 assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) && 2116 ISD::isExtOpcode(ExtendRHS.getOpcode())) && 2117 "expected widening mul"); 2118 assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() && 2119 "expected mul to use the same extend for both operands"); 2120 2121 SDValue ExtendInLHS = ExtendLHS->getOperand(0); 2122 SDValue ExtendInRHS = ExtendRHS->getOperand(0); 2123 bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND; 2124 2125 if (ExtendInLHS->getValueType(0) == MVT::v8i16) { 2126 if (IsSigned) { 2127 // i32x4.dot_i16x8_s 2128 SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, 2129 ExtendInLHS, ExtendInRHS); 2130 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot); 2131 } 2132 2133 unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U; 2134 unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U; 2135 2136 // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs))) 2137 SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInLHS); 2138 SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInRHS); 2139 SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInLHS); 2140 SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInRHS); 2141 2142 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v4i32, LowLHS, LowRHS); 2143 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v4i32, HighLHS, HighRHS); 2144 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, MulLow, MulHigh); 2145 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); 2146 } else { 2147 assert(ExtendInLHS->getValueType(0) == MVT::v16i8 && 2148 "expected v16i8 input types"); 2149 // Lower to a wider tree, using twice the operations compared to above. 2150 if (IsSigned) { 2151 // Use two dots 2152 unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_S; 2153 unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_S; 2154 SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS); 2155 SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS); 2156 SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS); 2157 SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS); 2158 SDValue DotLHS = 2159 DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS); 2160 SDValue DotRHS = 2161 DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS); 2162 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS); 2163 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); 2164 } 2165 2166 unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U; 2167 unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U; 2168 SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS); 2169 SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS); 2170 SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS); 2171 SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS); 2172 2173 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS); 2174 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS); 2175 2176 SDValue LowLow = DAG.getNode(LowOpc, DL, MVT::v4i32, MulLow); 2177 SDValue LowHigh = DAG.getNode(LowOpc, DL, MVT::v4i32, MulHigh); 2178 SDValue HighLow = DAG.getNode(HighOpc, DL, MVT::v4i32, MulLow); 2179 SDValue HighHigh = DAG.getNode(HighOpc, DL, MVT::v4i32, MulHigh); 2180 2181 SDValue AddLow = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowLow, HighLow); 2182 SDValue AddHigh = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowHigh, HighHigh); 2183 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh); 2184 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add); 2185 } 2186 } 2187 2188 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, 2189 SelectionDAG &DAG) const { 2190 MachineFunction &MF = DAG.getMachineFunction(); 2191 unsigned IntNo; 2192 switch (Op.getOpcode()) { 2193 case ISD::INTRINSIC_VOID: 2194 case ISD::INTRINSIC_W_CHAIN: 2195 IntNo = Op.getConstantOperandVal(1); 2196 break; 2197 case ISD::INTRINSIC_WO_CHAIN: 2198 IntNo = Op.getConstantOperandVal(0); 2199 break; 2200 default: 2201 llvm_unreachable("Invalid intrinsic"); 2202 } 2203 SDLoc DL(Op); 2204 2205 switch (IntNo) { 2206 default: 2207 return SDValue(); // Don't custom lower most intrinsics. 2208 2209 case Intrinsic::wasm_lsda: { 2210 auto PtrVT = getPointerTy(MF.getDataLayout()); 2211 const char *SymName = MF.createExternalSymbolName( 2212 "GCC_except_table" + std::to_string(MF.getFunctionNumber())); 2213 if (isPositionIndependent()) { 2214 SDValue Node = DAG.getTargetExternalSymbol( 2215 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL); 2216 const char *BaseName = MF.createExternalSymbolName("__memory_base"); 2217 SDValue BaseAddr = 2218 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 2219 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 2220 SDValue SymAddr = 2221 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node); 2222 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr); 2223 } 2224 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT); 2225 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node); 2226 } 2227 2228 case Intrinsic::wasm_shuffle: { 2229 // Drop in-chain and replace undefs, but otherwise pass through unchanged 2230 SDValue Ops[18]; 2231 size_t OpIdx = 0; 2232 Ops[OpIdx++] = Op.getOperand(1); 2233 Ops[OpIdx++] = Op.getOperand(2); 2234 while (OpIdx < 18) { 2235 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); 2236 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) { 2237 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant; 2238 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget); 2239 } else { 2240 Ops[OpIdx++] = MaskIdx; 2241 } 2242 } 2243 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 2244 } 2245 2246 case Intrinsic::thread_pointer: { 2247 MVT PtrVT = getPointerTy(DAG.getDataLayout()); 2248 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 2249 : WebAssembly::GLOBAL_GET_I32; 2250 const char *TlsBase = MF.createExternalSymbolName("__tls_base"); 2251 return SDValue( 2252 DAG.getMachineNode(GlobalGet, DL, PtrVT, 2253 DAG.getTargetExternalSymbol(TlsBase, PtrVT)), 2254 0); 2255 } 2256 } 2257 } 2258 2259 SDValue 2260 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 2261 SelectionDAG &DAG) const { 2262 SDLoc DL(Op); 2263 // If sign extension operations are disabled, allow sext_inreg only if operand 2264 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign 2265 // extension operations, but allowing sext_inreg in this context lets us have 2266 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg 2267 // everywhere would be simpler in this file, but would necessitate large and 2268 // brittle patterns to undo the expansion and select extract_lane_s 2269 // instructions. 2270 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); 2271 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2272 return SDValue(); 2273 2274 const SDValue &Extract = Op.getOperand(0); 2275 MVT VecT = Extract.getOperand(0).getSimpleValueType(); 2276 if (VecT.getVectorElementType().getSizeInBits() > 32) 2277 return SDValue(); 2278 MVT ExtractedLaneT = 2279 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT(); 2280 MVT ExtractedVecT = 2281 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); 2282 if (ExtractedVecT == VecT) 2283 return Op; 2284 2285 // Bitcast vector to appropriate type to ensure ISel pattern coverage 2286 const SDNode *Index = Extract.getOperand(1).getNode(); 2287 if (!isa<ConstantSDNode>(Index)) 2288 return SDValue(); 2289 unsigned IndexVal = Index->getAsZExtVal(); 2290 unsigned Scale = 2291 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); 2292 assert(Scale > 1); 2293 SDValue NewIndex = 2294 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0)); 2295 SDValue NewExtract = DAG.getNode( 2296 ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), 2297 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); 2298 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, 2299 Op.getOperand(1)); 2300 } 2301 2302 static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, 2303 SelectionDAG &DAG) { 2304 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE) 2305 return SDValue(); 2306 2307 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U || 2308 UserOpc == WebAssemblyISD::EXTEND_LOW_S) && 2309 "expected extend_low"); 2310 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode()); 2311 2312 ArrayRef<int> Mask = Shuffle->getMask(); 2313 // Look for a shuffle which moves from the high half to the low half. 2314 size_t FirstIdx = Mask.size() / 2; 2315 for (size_t i = 0; i < Mask.size() / 2; ++i) { 2316 if (Mask[i] != static_cast<int>(FirstIdx + i)) { 2317 return SDValue(); 2318 } 2319 } 2320 2321 SDLoc DL(Op); 2322 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S 2323 ? WebAssemblyISD::EXTEND_HIGH_S 2324 : WebAssemblyISD::EXTEND_HIGH_U; 2325 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0)); 2326 } 2327 2328 SDValue 2329 WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op, 2330 SelectionDAG &DAG) const { 2331 SDLoc DL(Op); 2332 EVT VT = Op.getValueType(); 2333 SDValue Src = Op.getOperand(0); 2334 EVT SrcVT = Src.getValueType(); 2335 2336 if (SrcVT.getVectorElementType() == MVT::i1 || 2337 SrcVT.getVectorElementType() == MVT::i64) 2338 return SDValue(); 2339 2340 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 && 2341 "Unexpected extension factor."); 2342 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); 2343 2344 if (Scale != 2 && Scale != 4 && Scale != 8) 2345 return SDValue(); 2346 2347 unsigned Ext; 2348 switch (Op.getOpcode()) { 2349 case ISD::ZERO_EXTEND_VECTOR_INREG: 2350 Ext = WebAssemblyISD::EXTEND_LOW_U; 2351 break; 2352 case ISD::SIGN_EXTEND_VECTOR_INREG: 2353 Ext = WebAssemblyISD::EXTEND_LOW_S; 2354 break; 2355 } 2356 2357 if (Scale == 2) { 2358 // See if we can use EXTEND_HIGH. 2359 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG)) 2360 return ExtendHigh; 2361 } 2362 2363 SDValue Ret = Src; 2364 while (Scale != 1) { 2365 Ret = DAG.getNode(Ext, DL, 2366 Ret.getValueType() 2367 .widenIntegerVectorElementType(*DAG.getContext()) 2368 .getHalfNumVectorElementsVT(*DAG.getContext()), 2369 Ret); 2370 Scale /= 2; 2371 } 2372 assert(Ret.getValueType() == VT); 2373 return Ret; 2374 } 2375 2376 static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) { 2377 SDLoc DL(Op); 2378 if (Op.getValueType() != MVT::v2f64) 2379 return SDValue(); 2380 2381 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec, 2382 unsigned &Index) -> bool { 2383 switch (Op.getOpcode()) { 2384 case ISD::SINT_TO_FP: 2385 Opcode = WebAssemblyISD::CONVERT_LOW_S; 2386 break; 2387 case ISD::UINT_TO_FP: 2388 Opcode = WebAssemblyISD::CONVERT_LOW_U; 2389 break; 2390 case ISD::FP_EXTEND: 2391 Opcode = WebAssemblyISD::PROMOTE_LOW; 2392 break; 2393 default: 2394 return false; 2395 } 2396 2397 auto ExtractVector = Op.getOperand(0); 2398 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2399 return false; 2400 2401 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode())) 2402 return false; 2403 2404 SrcVec = ExtractVector.getOperand(0); 2405 Index = ExtractVector.getConstantOperandVal(1); 2406 return true; 2407 }; 2408 2409 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex; 2410 SDValue LHSSrcVec, RHSSrcVec; 2411 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) || 2412 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex)) 2413 return SDValue(); 2414 2415 if (LHSOpcode != RHSOpcode) 2416 return SDValue(); 2417 2418 MVT ExpectedSrcVT; 2419 switch (LHSOpcode) { 2420 case WebAssemblyISD::CONVERT_LOW_S: 2421 case WebAssemblyISD::CONVERT_LOW_U: 2422 ExpectedSrcVT = MVT::v4i32; 2423 break; 2424 case WebAssemblyISD::PROMOTE_LOW: 2425 ExpectedSrcVT = MVT::v4f32; 2426 break; 2427 } 2428 if (LHSSrcVec.getValueType() != ExpectedSrcVT) 2429 return SDValue(); 2430 2431 auto Src = LHSSrcVec; 2432 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) { 2433 // Shuffle the source vector so that the converted lanes are the low lanes. 2434 Src = DAG.getVectorShuffle( 2435 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec, 2436 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1}); 2437 } 2438 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src); 2439 } 2440 2441 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, 2442 SelectionDAG &DAG) const { 2443 MVT VT = Op.getSimpleValueType(); 2444 if (VT == MVT::v8f16) { 2445 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler 2446 // FP16 type, so cast them to I16s. 2447 MVT IVT = VT.changeVectorElementType(MVT::i16); 2448 SmallVector<SDValue, 8> NewOps; 2449 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I) 2450 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I))); 2451 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps); 2452 return DAG.getBitcast(VT, Res); 2453 } 2454 2455 if (auto ConvertLow = LowerConvertLow(Op, DAG)) 2456 return ConvertLow; 2457 2458 SDLoc DL(Op); 2459 const EVT VecT = Op.getValueType(); 2460 const EVT LaneT = Op.getOperand(0).getValueType(); 2461 const size_t Lanes = Op.getNumOperands(); 2462 bool CanSwizzle = VecT == MVT::v16i8; 2463 2464 // BUILD_VECTORs are lowered to the instruction that initializes the highest 2465 // possible number of lanes at once followed by a sequence of replace_lane 2466 // instructions to individually initialize any remaining lanes. 2467 2468 // TODO: Tune this. For example, lanewise swizzling is very expensive, so 2469 // swizzled lanes should be given greater weight. 2470 2471 // TODO: Investigate looping rather than always extracting/replacing specific 2472 // lanes to fill gaps. 2473 2474 auto IsConstant = [](const SDValue &V) { 2475 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; 2476 }; 2477 2478 // Returns the source vector and index vector pair if they exist. Checks for: 2479 // (extract_vector_elt 2480 // $src, 2481 // (sign_extend_inreg (extract_vector_elt $indices, $i)) 2482 // ) 2483 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { 2484 auto Bail = std::make_pair(SDValue(), SDValue()); 2485 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2486 return Bail; 2487 const SDValue &SwizzleSrc = Lane->getOperand(0); 2488 const SDValue &IndexExt = Lane->getOperand(1); 2489 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) 2490 return Bail; 2491 const SDValue &Index = IndexExt->getOperand(0); 2492 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2493 return Bail; 2494 const SDValue &SwizzleIndices = Index->getOperand(0); 2495 if (SwizzleSrc.getValueType() != MVT::v16i8 || 2496 SwizzleIndices.getValueType() != MVT::v16i8 || 2497 Index->getOperand(1)->getOpcode() != ISD::Constant || 2498 Index->getConstantOperandVal(1) != I) 2499 return Bail; 2500 return std::make_pair(SwizzleSrc, SwizzleIndices); 2501 }; 2502 2503 // If the lane is extracted from another vector at a constant index, return 2504 // that vector. The source vector must not have more lanes than the dest 2505 // because the shufflevector indices are in terms of the destination lanes and 2506 // would not be able to address the smaller individual source lanes. 2507 auto GetShuffleSrc = [&](const SDValue &Lane) { 2508 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2509 return SDValue(); 2510 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode())) 2511 return SDValue(); 2512 if (Lane->getOperand(0).getValueType().getVectorNumElements() > 2513 VecT.getVectorNumElements()) 2514 return SDValue(); 2515 return Lane->getOperand(0); 2516 }; 2517 2518 using ValueEntry = std::pair<SDValue, size_t>; 2519 SmallVector<ValueEntry, 16> SplatValueCounts; 2520 2521 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>; 2522 SmallVector<SwizzleEntry, 16> SwizzleCounts; 2523 2524 using ShuffleEntry = std::pair<SDValue, size_t>; 2525 SmallVector<ShuffleEntry, 16> ShuffleCounts; 2526 2527 auto AddCount = [](auto &Counts, const auto &Val) { 2528 auto CountIt = 2529 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; }); 2530 if (CountIt == Counts.end()) { 2531 Counts.emplace_back(Val, 1); 2532 } else { 2533 CountIt->second++; 2534 } 2535 }; 2536 2537 auto GetMostCommon = [](auto &Counts) { 2538 auto CommonIt = llvm::max_element(Counts, llvm::less_second()); 2539 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); 2540 return *CommonIt; 2541 }; 2542 2543 size_t NumConstantLanes = 0; 2544 2545 // Count eligible lanes for each type of vector creation op 2546 for (size_t I = 0; I < Lanes; ++I) { 2547 const SDValue &Lane = Op->getOperand(I); 2548 if (Lane.isUndef()) 2549 continue; 2550 2551 AddCount(SplatValueCounts, Lane); 2552 2553 if (IsConstant(Lane)) 2554 NumConstantLanes++; 2555 if (auto ShuffleSrc = GetShuffleSrc(Lane)) 2556 AddCount(ShuffleCounts, ShuffleSrc); 2557 if (CanSwizzle) { 2558 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); 2559 if (SwizzleSrcs.first) 2560 AddCount(SwizzleCounts, SwizzleSrcs); 2561 } 2562 } 2563 2564 SDValue SplatValue; 2565 size_t NumSplatLanes; 2566 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); 2567 2568 SDValue SwizzleSrc; 2569 SDValue SwizzleIndices; 2570 size_t NumSwizzleLanes = 0; 2571 if (SwizzleCounts.size()) 2572 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), 2573 NumSwizzleLanes) = GetMostCommon(SwizzleCounts); 2574 2575 // Shuffles can draw from up to two vectors, so find the two most common 2576 // sources. 2577 SDValue ShuffleSrc1, ShuffleSrc2; 2578 size_t NumShuffleLanes = 0; 2579 if (ShuffleCounts.size()) { 2580 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts); 2581 llvm::erase_if(ShuffleCounts, 2582 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; }); 2583 } 2584 if (ShuffleCounts.size()) { 2585 size_t AdditionalShuffleLanes; 2586 std::tie(ShuffleSrc2, AdditionalShuffleLanes) = 2587 GetMostCommon(ShuffleCounts); 2588 NumShuffleLanes += AdditionalShuffleLanes; 2589 } 2590 2591 // Predicate returning true if the lane is properly initialized by the 2592 // original instruction 2593 std::function<bool(size_t, const SDValue &)> IsLaneConstructed; 2594 SDValue Result; 2595 // Prefer swizzles over shuffles over vector consts over splats 2596 if (NumSwizzleLanes >= NumShuffleLanes && 2597 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) { 2598 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, 2599 SwizzleIndices); 2600 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); 2601 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { 2602 return Swizzled == GetSwizzleSrcs(I, Lane); 2603 }; 2604 } else if (NumShuffleLanes >= NumConstantLanes && 2605 NumShuffleLanes >= NumSplatLanes) { 2606 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8; 2607 size_t DestLaneCount = VecT.getVectorNumElements(); 2608 size_t Scale1 = 1; 2609 size_t Scale2 = 1; 2610 SDValue Src1 = ShuffleSrc1; 2611 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT); 2612 if (Src1.getValueType() != VecT) { 2613 size_t LaneSize = 2614 Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8; 2615 assert(LaneSize > DestLaneSize); 2616 Scale1 = LaneSize / DestLaneSize; 2617 Src1 = DAG.getBitcast(VecT, Src1); 2618 } 2619 if (Src2.getValueType() != VecT) { 2620 size_t LaneSize = 2621 Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8; 2622 assert(LaneSize > DestLaneSize); 2623 Scale2 = LaneSize / DestLaneSize; 2624 Src2 = DAG.getBitcast(VecT, Src2); 2625 } 2626 2627 int Mask[16]; 2628 assert(DestLaneCount <= 16); 2629 for (size_t I = 0; I < DestLaneCount; ++I) { 2630 const SDValue &Lane = Op->getOperand(I); 2631 SDValue Src = GetShuffleSrc(Lane); 2632 if (Src == ShuffleSrc1) { 2633 Mask[I] = Lane->getConstantOperandVal(1) * Scale1; 2634 } else if (Src && Src == ShuffleSrc2) { 2635 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2; 2636 } else { 2637 Mask[I] = -1; 2638 } 2639 } 2640 ArrayRef<int> MaskRef(Mask, DestLaneCount); 2641 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef); 2642 IsLaneConstructed = [&](size_t, const SDValue &Lane) { 2643 auto Src = GetShuffleSrc(Lane); 2644 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2); 2645 }; 2646 } else if (NumConstantLanes >= NumSplatLanes) { 2647 SmallVector<SDValue, 16> ConstLanes; 2648 for (const SDValue &Lane : Op->op_values()) { 2649 if (IsConstant(Lane)) { 2650 // Values may need to be fixed so that they will sign extend to be 2651 // within the expected range during ISel. Check whether the value is in 2652 // bounds based on the lane bit width and if it is out of bounds, lop 2653 // off the extra bits and subtract 2^n to reflect giving the high bit 2654 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it 2655 // cannot possibly be out of range. 2656 auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode()); 2657 int64_t Val = Const ? Const->getSExtValue() : 0; 2658 uint64_t LaneBits = 128 / Lanes; 2659 assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) && 2660 "Unexpected out of bounds negative value"); 2661 if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) { 2662 uint64_t Mask = (1ll << LaneBits) - 1; 2663 auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask; 2664 ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT)); 2665 } else { 2666 ConstLanes.push_back(Lane); 2667 } 2668 } else if (LaneT.isFloatingPoint()) { 2669 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); 2670 } else { 2671 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); 2672 } 2673 } 2674 Result = DAG.getBuildVector(VecT, DL, ConstLanes); 2675 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { 2676 return IsConstant(Lane); 2677 }; 2678 } else { 2679 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits(); 2680 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue && 2681 (DestLaneSize == 32 || DestLaneSize == 64)) { 2682 // Could be selected to load_zero. 2683 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue); 2684 } else { 2685 // Use a splat (which might be selected as a load splat) 2686 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); 2687 } 2688 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { 2689 return Lane == SplatValue; 2690 }; 2691 } 2692 2693 assert(Result); 2694 assert(IsLaneConstructed); 2695 2696 // Add replace_lane instructions for any unhandled values 2697 for (size_t I = 0; I < Lanes; ++I) { 2698 const SDValue &Lane = Op->getOperand(I); 2699 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) 2700 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, 2701 DAG.getConstant(I, DL, MVT::i32)); 2702 } 2703 2704 return Result; 2705 } 2706 2707 SDValue 2708 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 2709 SelectionDAG &DAG) const { 2710 SDLoc DL(Op); 2711 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); 2712 MVT VecType = Op.getOperand(0).getSimpleValueType(); 2713 assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); 2714 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; 2715 2716 // Space for two vector args and sixteen mask indices 2717 SDValue Ops[18]; 2718 size_t OpIdx = 0; 2719 Ops[OpIdx++] = Op.getOperand(0); 2720 Ops[OpIdx++] = Op.getOperand(1); 2721 2722 // Expand mask indices to byte indices and materialize them as operands 2723 for (int M : Mask) { 2724 for (size_t J = 0; J < LaneBytes; ++J) { 2725 // Lower undefs (represented by -1 in mask) to {0..J}, which use a 2726 // whole lane of vector input, to allow further reduction at VM. E.g. 2727 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle. 2728 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J; 2729 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); 2730 } 2731 } 2732 2733 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 2734 } 2735 2736 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op, 2737 SelectionDAG &DAG) const { 2738 SDLoc DL(Op); 2739 // The legalizer does not know how to expand the unsupported comparison modes 2740 // of i64x2 vectors, so we manually unroll them here. 2741 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64); 2742 SmallVector<SDValue, 2> LHS, RHS; 2743 DAG.ExtractVectorElements(Op->getOperand(0), LHS); 2744 DAG.ExtractVectorElements(Op->getOperand(1), RHS); 2745 const SDValue &CC = Op->getOperand(2); 2746 auto MakeLane = [&](unsigned I) { 2747 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I], 2748 DAG.getConstant(uint64_t(-1), DL, MVT::i64), 2749 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC); 2750 }; 2751 return DAG.getBuildVector(Op->getValueType(0), DL, 2752 {MakeLane(0), MakeLane(1)}); 2753 } 2754 2755 SDValue 2756 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, 2757 SelectionDAG &DAG) const { 2758 // Allow constant lane indices, expand variable lane indices 2759 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); 2760 if (isa<ConstantSDNode>(IdxNode)) { 2761 // Ensure the index type is i32 to match the tablegen patterns 2762 uint64_t Idx = IdxNode->getAsZExtVal(); 2763 SmallVector<SDValue, 3> Ops(Op.getNode()->ops()); 2764 Ops[Op.getNumOperands() - 1] = 2765 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32); 2766 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops); 2767 } 2768 // Perform default expansion 2769 return SDValue(); 2770 } 2771 2772 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { 2773 EVT LaneT = Op.getSimpleValueType().getVectorElementType(); 2774 // 32-bit and 64-bit unrolled shifts will have proper semantics 2775 if (LaneT.bitsGE(MVT::i32)) 2776 return DAG.UnrollVectorOp(Op.getNode()); 2777 // Otherwise mask the shift value to get proper semantics from 32-bit shift 2778 SDLoc DL(Op); 2779 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements(); 2780 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32); 2781 unsigned ShiftOpcode = Op.getOpcode(); 2782 SmallVector<SDValue, 16> ShiftedElements; 2783 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32); 2784 SmallVector<SDValue, 16> ShiftElements; 2785 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32); 2786 SmallVector<SDValue, 16> UnrolledOps; 2787 for (size_t i = 0; i < NumLanes; ++i) { 2788 SDValue MaskedShiftValue = 2789 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask); 2790 SDValue ShiftedValue = ShiftedElements[i]; 2791 if (ShiftOpcode == ISD::SRA) 2792 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, 2793 ShiftedValue, DAG.getValueType(LaneT)); 2794 UnrolledOps.push_back( 2795 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue)); 2796 } 2797 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps); 2798 } 2799 2800 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, 2801 SelectionDAG &DAG) const { 2802 SDLoc DL(Op); 2803 2804 // Only manually lower vector shifts 2805 assert(Op.getSimpleValueType().isVector()); 2806 2807 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits(); 2808 auto ShiftVal = Op.getOperand(1); 2809 2810 // Try to skip bitmask operation since it is implied inside shift instruction 2811 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) { 2812 if (MaskOp.getOpcode() != ISD::AND) 2813 return MaskOp; 2814 SDValue LHS = MaskOp.getOperand(0); 2815 SDValue RHS = MaskOp.getOperand(1); 2816 if (MaskOp.getValueType().isVector()) { 2817 APInt MaskVal; 2818 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal)) 2819 std::swap(LHS, RHS); 2820 2821 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) && 2822 MaskVal == MaskBits) 2823 MaskOp = LHS; 2824 } else { 2825 if (!isa<ConstantSDNode>(RHS.getNode())) 2826 std::swap(LHS, RHS); 2827 2828 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode()); 2829 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits) 2830 MaskOp = LHS; 2831 } 2832 2833 return MaskOp; 2834 }; 2835 2836 // Skip vector and operation 2837 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1); 2838 ShiftVal = DAG.getSplatValue(ShiftVal); 2839 if (!ShiftVal) 2840 return unrollVectorShift(Op, DAG); 2841 2842 // Skip scalar and operation 2843 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1); 2844 // Use anyext because none of the high bits can affect the shift 2845 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32); 2846 2847 unsigned Opcode; 2848 switch (Op.getOpcode()) { 2849 case ISD::SHL: 2850 Opcode = WebAssemblyISD::VEC_SHL; 2851 break; 2852 case ISD::SRA: 2853 Opcode = WebAssemblyISD::VEC_SHR_S; 2854 break; 2855 case ISD::SRL: 2856 Opcode = WebAssemblyISD::VEC_SHR_U; 2857 break; 2858 default: 2859 llvm_unreachable("unexpected opcode"); 2860 } 2861 2862 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal); 2863 } 2864 2865 SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op, 2866 SelectionDAG &DAG) const { 2867 EVT ResT = Op.getValueType(); 2868 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2869 2870 if ((ResT == MVT::i32 || ResT == MVT::i64) && 2871 (SatVT == MVT::i32 || SatVT == MVT::i64)) 2872 return Op; 2873 2874 if (ResT == MVT::v4i32 && SatVT == MVT::i32) 2875 return Op; 2876 2877 if (ResT == MVT::v8i16 && SatVT == MVT::i16) 2878 return Op; 2879 2880 return SDValue(); 2881 } 2882 2883 //===----------------------------------------------------------------------===// 2884 // Custom DAG combine hooks 2885 //===----------------------------------------------------------------------===// 2886 static SDValue 2887 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2888 auto &DAG = DCI.DAG; 2889 auto Shuffle = cast<ShuffleVectorSDNode>(N); 2890 2891 // Hoist vector bitcasts that don't change the number of lanes out of unary 2892 // shuffles, where they are less likely to get in the way of other combines. 2893 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) -> 2894 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask)))) 2895 SDValue Bitcast = N->getOperand(0); 2896 if (Bitcast.getOpcode() != ISD::BITCAST) 2897 return SDValue(); 2898 if (!N->getOperand(1).isUndef()) 2899 return SDValue(); 2900 SDValue CastOp = Bitcast.getOperand(0); 2901 EVT SrcType = CastOp.getValueType(); 2902 EVT DstType = Bitcast.getValueType(); 2903 if (!SrcType.is128BitVector() || 2904 SrcType.getVectorNumElements() != DstType.getVectorNumElements()) 2905 return SDValue(); 2906 SDValue NewShuffle = DAG.getVectorShuffle( 2907 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask()); 2908 return DAG.getBitcast(DstType, NewShuffle); 2909 } 2910 2911 /// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get 2912 /// split up into scalar instructions during legalization, and the vector 2913 /// extending instructions are selected in performVectorExtendCombine below. 2914 static SDValue 2915 performVectorExtendToFPCombine(SDNode *N, 2916 TargetLowering::DAGCombinerInfo &DCI) { 2917 auto &DAG = DCI.DAG; 2918 assert(N->getOpcode() == ISD::UINT_TO_FP || 2919 N->getOpcode() == ISD::SINT_TO_FP); 2920 2921 EVT InVT = N->getOperand(0)->getValueType(0); 2922 EVT ResVT = N->getValueType(0); 2923 MVT ExtVT; 2924 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8)) 2925 ExtVT = MVT::v4i32; 2926 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8)) 2927 ExtVT = MVT::v2i32; 2928 else 2929 return SDValue(); 2930 2931 unsigned Op = 2932 N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; 2933 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0)); 2934 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv); 2935 } 2936 2937 static SDValue 2938 performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2939 auto &DAG = DCI.DAG; 2940 assert(N->getOpcode() == ISD::SIGN_EXTEND || 2941 N->getOpcode() == ISD::ZERO_EXTEND); 2942 2943 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if 2944 // possible before the extract_subvector can be expanded. 2945 auto Extract = N->getOperand(0); 2946 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2947 return SDValue(); 2948 auto Source = Extract.getOperand(0); 2949 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 2950 if (IndexNode == nullptr) 2951 return SDValue(); 2952 auto Index = IndexNode->getZExtValue(); 2953 2954 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the 2955 // extracted subvector is the low or high half of its source. 2956 EVT ResVT = N->getValueType(0); 2957 if (ResVT == MVT::v8i16) { 2958 if (Extract.getValueType() != MVT::v8i8 || 2959 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8)) 2960 return SDValue(); 2961 } else if (ResVT == MVT::v4i32) { 2962 if (Extract.getValueType() != MVT::v4i16 || 2963 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4)) 2964 return SDValue(); 2965 } else if (ResVT == MVT::v2i64) { 2966 if (Extract.getValueType() != MVT::v2i32 || 2967 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2)) 2968 return SDValue(); 2969 } else { 2970 return SDValue(); 2971 } 2972 2973 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND; 2974 bool IsLow = Index == 0; 2975 2976 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S 2977 : WebAssemblyISD::EXTEND_HIGH_S) 2978 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U 2979 : WebAssemblyISD::EXTEND_HIGH_U); 2980 2981 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2982 } 2983 2984 static SDValue 2985 performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2986 auto &DAG = DCI.DAG; 2987 2988 auto GetWasmConversionOp = [](unsigned Op) { 2989 switch (Op) { 2990 case ISD::FP_TO_SINT_SAT: 2991 return WebAssemblyISD::TRUNC_SAT_ZERO_S; 2992 case ISD::FP_TO_UINT_SAT: 2993 return WebAssemblyISD::TRUNC_SAT_ZERO_U; 2994 case ISD::FP_ROUND: 2995 return WebAssemblyISD::DEMOTE_ZERO; 2996 } 2997 llvm_unreachable("unexpected op"); 2998 }; 2999 3000 auto IsZeroSplat = [](SDValue SplatVal) { 3001 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode()); 3002 APInt SplatValue, SplatUndef; 3003 unsigned SplatBitSize; 3004 bool HasAnyUndefs; 3005 // Endianness doesn't matter in this context because we are looking for 3006 // an all-zero value. 3007 return Splat && 3008 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 3009 HasAnyUndefs) && 3010 SplatValue == 0; 3011 }; 3012 3013 if (N->getOpcode() == ISD::CONCAT_VECTORS) { 3014 // Combine this: 3015 // 3016 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0))) 3017 // 3018 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x). 3019 // 3020 // Or this: 3021 // 3022 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0))) 3023 // 3024 // into (f32x4.demote_zero_f64x2 $x). 3025 EVT ResVT; 3026 EVT ExpectedConversionType; 3027 auto Conversion = N->getOperand(0); 3028 auto ConversionOp = Conversion.getOpcode(); 3029 switch (ConversionOp) { 3030 case ISD::FP_TO_SINT_SAT: 3031 case ISD::FP_TO_UINT_SAT: 3032 ResVT = MVT::v4i32; 3033 ExpectedConversionType = MVT::v2i32; 3034 break; 3035 case ISD::FP_ROUND: 3036 ResVT = MVT::v4f32; 3037 ExpectedConversionType = MVT::v2f32; 3038 break; 3039 default: 3040 return SDValue(); 3041 } 3042 3043 if (N->getValueType(0) != ResVT) 3044 return SDValue(); 3045 3046 if (Conversion.getValueType() != ExpectedConversionType) 3047 return SDValue(); 3048 3049 auto Source = Conversion.getOperand(0); 3050 if (Source.getValueType() != MVT::v2f64) 3051 return SDValue(); 3052 3053 if (!IsZeroSplat(N->getOperand(1)) || 3054 N->getOperand(1).getValueType() != ExpectedConversionType) 3055 return SDValue(); 3056 3057 unsigned Op = GetWasmConversionOp(ConversionOp); 3058 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 3059 } 3060 3061 // Combine this: 3062 // 3063 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32) 3064 // 3065 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x). 3066 // 3067 // Or this: 3068 // 3069 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0))))) 3070 // 3071 // into (f32x4.demote_zero_f64x2 $x). 3072 EVT ResVT; 3073 auto ConversionOp = N->getOpcode(); 3074 switch (ConversionOp) { 3075 case ISD::FP_TO_SINT_SAT: 3076 case ISD::FP_TO_UINT_SAT: 3077 ResVT = MVT::v4i32; 3078 break; 3079 case ISD::FP_ROUND: 3080 ResVT = MVT::v4f32; 3081 break; 3082 default: 3083 llvm_unreachable("unexpected op"); 3084 } 3085 3086 if (N->getValueType(0) != ResVT) 3087 return SDValue(); 3088 3089 auto Concat = N->getOperand(0); 3090 if (Concat.getValueType() != MVT::v4f64) 3091 return SDValue(); 3092 3093 auto Source = Concat.getOperand(0); 3094 if (Source.getValueType() != MVT::v2f64) 3095 return SDValue(); 3096 3097 if (!IsZeroSplat(Concat.getOperand(1)) || 3098 Concat.getOperand(1).getValueType() != MVT::v2f64) 3099 return SDValue(); 3100 3101 unsigned Op = GetWasmConversionOp(ConversionOp); 3102 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 3103 } 3104 3105 // Helper to extract VectorWidth bits from Vec, starting from IdxVal. 3106 static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, 3107 const SDLoc &DL, unsigned VectorWidth) { 3108 EVT VT = Vec.getValueType(); 3109 EVT ElVT = VT.getVectorElementType(); 3110 unsigned Factor = VT.getSizeInBits() / VectorWidth; 3111 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, 3112 VT.getVectorNumElements() / Factor); 3113 3114 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR 3115 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits(); 3116 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); 3117 3118 // This is the index of the first element of the VectorWidth-bit chunk 3119 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. 3120 IdxVal &= ~(ElemsPerChunk - 1); 3121 3122 // If the input is a buildvector just emit a smaller one. 3123 if (Vec.getOpcode() == ISD::BUILD_VECTOR) 3124 return DAG.getBuildVector(ResultVT, DL, 3125 Vec->ops().slice(IdxVal, ElemsPerChunk)); 3126 3127 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL); 3128 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx); 3129 } 3130 3131 // Helper to recursively truncate vector elements in half with NARROW_U. DstVT 3132 // is the expected destination value type after recursion. In is the initial 3133 // input. Note that the input should have enough leading zero bits to prevent 3134 // NARROW_U from saturating results. 3135 static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, 3136 SelectionDAG &DAG) { 3137 EVT SrcVT = In.getValueType(); 3138 3139 // No truncation required, we might get here due to recursive calls. 3140 if (SrcVT == DstVT) 3141 return In; 3142 3143 unsigned SrcSizeInBits = SrcVT.getSizeInBits(); 3144 unsigned NumElems = SrcVT.getVectorNumElements(); 3145 if (!isPowerOf2_32(NumElems)) 3146 return SDValue(); 3147 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation"); 3148 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation"); 3149 3150 LLVMContext &Ctx = *DAG.getContext(); 3151 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2); 3152 3153 // Narrow to the largest type possible: 3154 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u. 3155 EVT InVT = MVT::i16, OutVT = MVT::i8; 3156 if (SrcVT.getScalarSizeInBits() > 16) { 3157 InVT = MVT::i32; 3158 OutVT = MVT::i16; 3159 } 3160 unsigned SubSizeInBits = SrcSizeInBits / 2; 3161 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); 3162 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits()); 3163 3164 // Split lower/upper subvectors. 3165 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits); 3166 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits); 3167 3168 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors. 3169 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) { 3170 Lo = DAG.getBitcast(InVT, Lo); 3171 Hi = DAG.getBitcast(InVT, Hi); 3172 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi); 3173 return DAG.getBitcast(DstVT, Res); 3174 } 3175 3176 // Recursively narrow lower/upper subvectors, concat result and narrow again. 3177 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2); 3178 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG); 3179 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG); 3180 3181 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); 3182 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi); 3183 return truncateVectorWithNARROW(DstVT, Res, DL, DAG); 3184 } 3185 3186 static SDValue performTruncateCombine(SDNode *N, 3187 TargetLowering::DAGCombinerInfo &DCI) { 3188 auto &DAG = DCI.DAG; 3189 3190 SDValue In = N->getOperand(0); 3191 EVT InVT = In.getValueType(); 3192 if (!InVT.isSimple()) 3193 return SDValue(); 3194 3195 EVT OutVT = N->getValueType(0); 3196 if (!OutVT.isVector()) 3197 return SDValue(); 3198 3199 EVT OutSVT = OutVT.getVectorElementType(); 3200 EVT InSVT = InVT.getVectorElementType(); 3201 // Currently only cover truncate to v16i8 or v8i16. 3202 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) && 3203 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector())) 3204 return SDValue(); 3205 3206 SDLoc DL(N); 3207 APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), 3208 OutVT.getScalarSizeInBits()); 3209 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); 3210 return truncateVectorWithNARROW(OutVT, In, DL, DAG); 3211 } 3212 3213 static SDValue performBitcastCombine(SDNode *N, 3214 TargetLowering::DAGCombinerInfo &DCI) { 3215 using namespace llvm::SDPatternMatch; 3216 auto &DAG = DCI.DAG; 3217 SDLoc DL(N); 3218 SDValue Src = N->getOperand(0); 3219 EVT VT = N->getValueType(0); 3220 EVT SrcVT = Src.getValueType(); 3221 3222 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() && 3223 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1)) 3224 return SDValue(); 3225 3226 unsigned NumElts = SrcVT.getVectorNumElements(); 3227 EVT Width = MVT::getIntegerVT(128 / NumElts); 3228 3229 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal) 3230 // ==> bitmask 3231 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) { 3232 return DAG.getZExtOrTrunc( 3233 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, 3234 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), 3235 DAG.getSExtOrTrunc(N->getOperand(0), DL, 3236 SrcVT.changeVectorElementType(Width))}), 3237 DL, VT); 3238 } 3239 3240 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal) 3241 if (NumElts == 32 || NumElts == 64) { 3242 // Strategy: We will setcc them seperately in v16i8 -> v16i1 3243 // Bitcast them to i16, extend them to either i32 or i64. 3244 // Add them together, shifting left 1 by 1. 3245 SDValue Concat, SetCCVector; 3246 ISD::CondCode SetCond; 3247 3248 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector), 3249 m_CondCode(SetCond))))) 3250 return SDValue(); 3251 if (Concat.getOpcode() != ISD::CONCAT_VECTORS) 3252 return SDValue(); 3253 3254 uint64_t ElementWidth = 3255 SetCCVector.getValueType().getVectorElementType().getFixedSizeInBits(); 3256 3257 SmallVector<SDValue> VectorsToShuffle; 3258 for (size_t I = 0; I < Concat->ops().size(); I++) { 3259 VectorsToShuffle.push_back(DAG.getBitcast( 3260 MVT::i16, 3261 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I], 3262 extractSubVector(SetCCVector, I * (128 / ElementWidth), 3263 DAG, DL, 128), 3264 SetCond))); 3265 } 3266 3267 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64; 3268 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType); 3269 3270 for (SDValue V : VectorsToShuffle) { 3271 ReturningInteger = DAG.getNode( 3272 ISD::SHL, DL, ReturnType, 3273 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger}); 3274 3275 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType); 3276 ReturningInteger = 3277 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV}); 3278 } 3279 3280 return ReturningInteger; 3281 } 3282 3283 return SDValue(); 3284 } 3285 3286 static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) { 3287 // any_true (setcc <X>, 0, eq) => (not (all_true X)) 3288 // all_true (setcc <X>, 0, eq) => (not (any_true X)) 3289 // any_true (setcc <X>, 0, ne) => (any_true X) 3290 // all_true (setcc <X>, 0, ne) => (all_true X) 3291 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); 3292 using namespace llvm::SDPatternMatch; 3293 3294 SDValue LHS; 3295 if (!sd_match(N->getOperand(1), 3296 m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode()))) 3297 return SDValue(); 3298 EVT LT = LHS.getValueType(); 3299 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements()) 3300 return SDValue(); 3301 3302 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre, 3303 ISD::CondCode SetType, 3304 Intrinsic::WASMIntrinsics InPost) { 3305 if (N->getConstantOperandVal(0) != InPre) 3306 return SDValue(); 3307 3308 SDValue LHS; 3309 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(), 3310 m_SpecificCondCode(SetType)))) 3311 return SDValue(); 3312 3313 SDLoc DL(N); 3314 SDValue Ret = DAG.getZExtOrTrunc( 3315 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, 3316 {DAG.getConstant(InPost, DL, MVT::i32), LHS}), 3317 DL, MVT::i1); 3318 if (SetType == ISD::SETEQ) 3319 Ret = DAG.getNOT(DL, Ret, MVT::i1); 3320 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); 3321 }; 3322 3323 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ, 3324 Intrinsic::wasm_alltrue)) 3325 return AnyTrueEQ; 3326 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ, 3327 Intrinsic::wasm_anytrue)) 3328 return AllTrueEQ; 3329 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE, 3330 Intrinsic::wasm_anytrue)) 3331 return AnyTrueNE; 3332 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE, 3333 Intrinsic::wasm_alltrue)) 3334 return AllTrueNE; 3335 3336 return SDValue(); 3337 } 3338 3339 template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate, 3340 Intrinsic::ID Intrin> 3341 static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { 3342 SDValue LHS = N->getOperand(0); 3343 SDValue RHS = N->getOperand(1); 3344 SDValue Cond = N->getOperand(2); 3345 if (MatchCond != cast<CondCodeSDNode>(Cond)->get()) 3346 return SDValue(); 3347 3348 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue()) 3349 return SDValue(); 3350 3351 SDLoc DL(N); 3352 SDValue Ret = DAG.getZExtOrTrunc( 3353 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, 3354 {DAG.getConstant(Intrin, DL, MVT::i32), 3355 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}), 3356 DL, MVT::i1); 3357 if (RequiresNegate) 3358 Ret = DAG.getNOT(DL, Ret, MVT::i1); 3359 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); 3360 } 3361 3362 static SDValue performSETCCCombine(SDNode *N, 3363 TargetLowering::DAGCombinerInfo &DCI) { 3364 if (!DCI.isBeforeLegalize()) 3365 return SDValue(); 3366 3367 EVT VT = N->getValueType(0); 3368 if (!VT.isScalarInteger()) 3369 return SDValue(); 3370 3371 SDValue LHS = N->getOperand(0); 3372 if (LHS->getOpcode() != ISD::BITCAST) 3373 return SDValue(); 3374 3375 EVT FromVT = LHS->getOperand(0).getValueType(); 3376 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1) 3377 return SDValue(); 3378 3379 unsigned NumElts = FromVT.getVectorNumElements(); 3380 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 3381 return SDValue(); 3382 3383 if (!cast<ConstantSDNode>(N->getOperand(1))) 3384 return SDValue(); 3385 3386 EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts)); 3387 auto &DAG = DCI.DAG; 3388 // setcc (iN (bitcast (vNi1 X))), 0, ne 3389 // ==> any_true (vNi1 X) 3390 if (auto Match = TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>( 3391 N, VecVT, DAG)) { 3392 return Match; 3393 } 3394 // setcc (iN (bitcast (vNi1 X))), 0, eq 3395 // ==> xor (any_true (vNi1 X)), -1 3396 if (auto Match = TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>( 3397 N, VecVT, DAG)) { 3398 return Match; 3399 } 3400 // setcc (iN (bitcast (vNi1 X))), -1, eq 3401 // ==> all_true (vNi1 X) 3402 if (auto Match = TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>( 3403 N, VecVT, DAG)) { 3404 return Match; 3405 } 3406 // setcc (iN (bitcast (vNi1 X))), -1, ne 3407 // ==> xor (all_true (vNi1 X)), -1 3408 if (auto Match = TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>( 3409 N, VecVT, DAG)) { 3410 return Match; 3411 } 3412 return SDValue(); 3413 } 3414 3415 static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG) { 3416 assert(N->getOpcode() == ISD::MUL); 3417 EVT VT = N->getValueType(0); 3418 if (VT != MVT::v8i32 && VT != MVT::v16i32) 3419 return SDValue(); 3420 3421 // Mul with extending inputs. 3422 SDValue LHS = N->getOperand(0); 3423 SDValue RHS = N->getOperand(1); 3424 if (LHS.getOpcode() != RHS.getOpcode()) 3425 return SDValue(); 3426 3427 if (LHS.getOpcode() != ISD::SIGN_EXTEND && 3428 LHS.getOpcode() != ISD::ZERO_EXTEND) 3429 return SDValue(); 3430 3431 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType()) 3432 return SDValue(); 3433 3434 EVT FromVT = LHS->getOperand(0).getValueType(); 3435 EVT EltTy = FromVT.getVectorElementType(); 3436 if (EltTy != MVT::i8) 3437 return SDValue(); 3438 3439 // For an input DAG that looks like this 3440 // %a = input_type 3441 // %b = input_type 3442 // %lhs = extend %a to output_type 3443 // %rhs = extend %b to output_type 3444 // %mul = mul %lhs, %rhs 3445 3446 // input_type | output_type | instructions 3447 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b 3448 // | | %high = i16x8.extmul_high_i8x16_, %a, %b 3449 // | | %low_low = i32x4.ext_low_i16x8_ %low 3450 // | | %low_high = i32x4.ext_high_i16x8_ %low 3451 // | | %high_low = i32x4.ext_low_i16x8_ %high 3452 // | | %high_high = i32x4.ext_high_i16x8_ %high 3453 // | | %res = concat_vector(...) 3454 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b 3455 // | | %low_low = i32x4.ext_low_i16x8_ %low 3456 // | | %low_high = i32x4.ext_high_i16x8_ %low 3457 // | | %res = concat_vector(%low_low, %low_high) 3458 3459 SDLoc DL(N); 3460 unsigned NumElts = VT.getVectorNumElements(); 3461 SDValue ExtendInLHS = LHS->getOperand(0); 3462 SDValue ExtendInRHS = RHS->getOperand(0); 3463 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND; 3464 unsigned ExtendLowOpc = 3465 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U; 3466 unsigned ExtendHighOpc = 3467 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U; 3468 3469 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) { 3470 return DAG.getNode(ExtendLowOpc, DL, VT, Op); 3471 }; 3472 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) { 3473 return DAG.getNode(ExtendHighOpc, DL, VT, Op); 3474 }; 3475 3476 if (NumElts == 16) { 3477 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS); 3478 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS); 3479 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS); 3480 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS); 3481 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS); 3482 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS); 3483 SDValue SubVectors[] = { 3484 GetExtendLow(MVT::v4i32, MulLow), 3485 GetExtendHigh(MVT::v4i32, MulLow), 3486 GetExtendLow(MVT::v4i32, MulHigh), 3487 GetExtendHigh(MVT::v4i32, MulHigh), 3488 }; 3489 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors); 3490 } else { 3491 assert(NumElts == 8); 3492 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS); 3493 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS); 3494 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS); 3495 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow); 3496 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow); 3497 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 3498 } 3499 return SDValue(); 3500 } 3501 3502 SDValue 3503 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, 3504 DAGCombinerInfo &DCI) const { 3505 switch (N->getOpcode()) { 3506 default: 3507 return SDValue(); 3508 case ISD::BITCAST: 3509 return performBitcastCombine(N, DCI); 3510 case ISD::SETCC: 3511 return performSETCCCombine(N, DCI); 3512 case ISD::VECTOR_SHUFFLE: 3513 return performVECTOR_SHUFFLECombine(N, DCI); 3514 case ISD::SIGN_EXTEND: 3515 case ISD::ZERO_EXTEND: 3516 return performVectorExtendCombine(N, DCI); 3517 case ISD::UINT_TO_FP: 3518 case ISD::SINT_TO_FP: 3519 return performVectorExtendToFPCombine(N, DCI); 3520 case ISD::FP_TO_SINT_SAT: 3521 case ISD::FP_TO_UINT_SAT: 3522 case ISD::FP_ROUND: 3523 case ISD::CONCAT_VECTORS: 3524 return performVectorTruncZeroCombine(N, DCI); 3525 case ISD::TRUNCATE: 3526 return performTruncateCombine(N, DCI); 3527 case ISD::INTRINSIC_WO_CHAIN: { 3528 if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG)) 3529 return AnyAllCombine; 3530 return performLowerPartialReduction(N, DCI.DAG); 3531 } 3532 case ISD::MUL: 3533 return performMulCombine(N, DCI.DAG); 3534 } 3535 } 3536