10b57cec5SDimitry Andric //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This is the parent TargetLowering class for hardware code gen 110b57cec5SDimitry Andric /// targets. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AMDGPUISelLowering.h" 160b57cec5SDimitry Andric #include "AMDGPU.h" 17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 18e8d8bef9SDimitry Andric #include "AMDGPUMachineFunction.h" 19e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 200b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 210b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 220b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 23e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 24e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h" 250b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h" 26e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 27e8d8bef9SDimitry Andric 280b57cec5SDimitry Andric using namespace llvm; 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric #include "AMDGPUGenCallingConv.inc" 310b57cec5SDimitry Andric 325ffd83dbSDimitry Andric static cl::opt<bool> AMDGPUBypassSlowDiv( 335ffd83dbSDimitry Andric "amdgpu-bypass-slow-div", 345ffd83dbSDimitry Andric cl::desc("Skip 64-bit divide for dynamic 32-bit values"), 355ffd83dbSDimitry Andric cl::init(true)); 365ffd83dbSDimitry Andric 370b57cec5SDimitry Andric // Find a larger type to do a load / store of a vector with. 380b57cec5SDimitry Andric EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { 390b57cec5SDimitry Andric unsigned StoreSize = VT.getStoreSizeInBits(); 400b57cec5SDimitry Andric if (StoreSize <= 32) 410b57cec5SDimitry Andric return EVT::getIntegerVT(Ctx, StoreSize); 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric assert(StoreSize % 32 == 0 && "Store size not a multiple of 32"); 440b57cec5SDimitry Andric return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { 48349cc55cSDimitry Andric return DAG.computeKnownBits(Op).countMaxActiveBits(); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { 520b57cec5SDimitry Andric // In order for this to be a signed 24-bit value, bit 23, must 530b57cec5SDimitry Andric // be a sign bit. 54*04eeddc0SDimitry Andric return DAG.ComputeMaxSignificantBits(Op); 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, 580b57cec5SDimitry Andric const AMDGPUSubtarget &STI) 590b57cec5SDimitry Andric : TargetLowering(TM), Subtarget(&STI) { 600b57cec5SDimitry Andric // Lower floating point store/load to integer store/load to reduce the number 610b57cec5SDimitry Andric // of patterns in tablegen. 620b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f32, Promote); 630b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 660b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f32, Promote); 690b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 720b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v5f32, Promote); 750b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32); 760b57cec5SDimitry Andric 77fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v6f32, Promote); 78fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32); 79fe6060f1SDimitry Andric 80fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v7f32, Promote); 81fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32); 82fe6060f1SDimitry Andric 830b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 840b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 870b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v32f32, Promote); 900b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::i64, Promote); 930b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 960b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f64, Promote); 990b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f64, Promote); 1020b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32); 1030b57cec5SDimitry Andric 104fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3i64, Promote); 105fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32); 106fe6060f1SDimitry Andric 1075ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v4i64, Promote); 1085ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32); 1095ffd83dbSDimitry Andric 110fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f64, Promote); 111fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32); 112fe6060f1SDimitry Andric 1135ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f64, Promote); 1145ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32); 1155ffd83dbSDimitry Andric 1165ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v8i64, Promote); 1175ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32); 1185ffd83dbSDimitry Andric 1195ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f64, Promote); 1205ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32); 1215ffd83dbSDimitry Andric 1225ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v16i64, Promote); 1235ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32); 1245ffd83dbSDimitry Andric 1255ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f64, Promote); 1265ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32); 1275ffd83dbSDimitry Andric 1280b57cec5SDimitry Andric // There are no 64-bit extloads. These should be done as a 32-bit extload and 1290b57cec5SDimitry Andric // an extension to 64-bit. 1300b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 1310b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand); 1320b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand); 1330b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 1370b57cec5SDimitry Andric if (VT == MVT::i64) 1380b57cec5SDimitry Andric continue; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 1410b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal); 1420b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal); 1430b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 1460b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal); 1470b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal); 1480b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 1510b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal); 1520b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal); 1530b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1568bcb0991SDimitry Andric for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 1570b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); 1580b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); 1590b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); 1600b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); 1610b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); 1620b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); 1630b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); 1640b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); 1650b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); 1668bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand); 1678bcb0991SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand); 1688bcb0991SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand); 1690b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); 1700b57cec5SDimitry Andric setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); 1710b57cec5SDimitry Andric setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 1750b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); 1768bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); 1770b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); 1780b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); 1798bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); 1808bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 1830b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); 184fe6060f1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand); 1850b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); 1860b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand); 1875ffd83dbSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand); 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 1900b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); 191fe6060f1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand); 1920b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); 1930b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); 1945ffd83dbSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand); 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f32, Promote); 1970b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f32, Promote); 2000b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f32, Promote); 2030b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32); 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v4f32, Promote); 2060b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v5f32, Promote); 2090b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32); 2100b57cec5SDimitry Andric 211fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v6f32, Promote); 212fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32); 213fe6060f1SDimitry Andric 214fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v7f32, Promote); 215fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32); 216fe6060f1SDimitry Andric 2170b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v8f32, Promote); 2180b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v16f32, Promote); 2210b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v32f32, Promote); 2240b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32); 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::i64, Promote); 2270b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2i64, Promote); 2300b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f64, Promote); 2330b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f64, Promote); 2360b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); 2370b57cec5SDimitry Andric 238fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v3i64, Promote); 239fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32); 240fe6060f1SDimitry Andric 241fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f64, Promote); 242fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32); 243fe6060f1SDimitry Andric 2445ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v4i64, Promote); 2455ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32); 2465ffd83dbSDimitry Andric 2475ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v4f64, Promote); 2485ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32); 2495ffd83dbSDimitry Andric 2505ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v8i64, Promote); 2515ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32); 2525ffd83dbSDimitry Andric 2535ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v8f64, Promote); 2545ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32); 2555ffd83dbSDimitry Andric 2565ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v16i64, Promote); 2575ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32); 2585ffd83dbSDimitry Andric 2595ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v16f64, Promote); 2605ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32); 2615ffd83dbSDimitry Andric 2620b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i1, Expand); 2630b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i8, Expand); 2640b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i16, Expand); 2650b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i32, Expand); 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); 2680b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand); 2690b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand); 2700b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric setTruncStoreAction(MVT::f32, MVT::f16, Expand); 2730b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); 2748bcb0991SDimitry Andric setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); 2750b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); 2760b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); 2778bcb0991SDimitry Andric setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand); 2788bcb0991SDimitry Andric setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand); 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f16, Expand); 2810b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f32, Expand); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 2840b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); 2850b57cec5SDimitry Andric 286fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand); 287fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand); 288fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand); 289fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand); 290fe6060f1SDimitry Andric 2915ffd83dbSDimitry Andric setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand); 2925ffd83dbSDimitry Andric setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand); 2930b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); 2940b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); 2970b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); 2980b57cec5SDimitry Andric 2995ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand); 3005ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand); 3015ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); 3025ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); 3035ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); 3045ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); 3055ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric setOperationAction(ISD::Constant, MVT::i32, Legal); 3080b57cec5SDimitry Andric setOperationAction(ISD::Constant, MVT::i64, Legal); 3090b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 3100b57cec5SDimitry Andric setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric setOperationAction(ISD::BR_JT, MVT::Other, Expand); 3130b57cec5SDimitry Andric setOperationAction(ISD::BRIND, MVT::Other, Expand); 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric // This is totally unsupported, just custom lower to produce an error. 3160b57cec5SDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric // Library functions. These default to Expand, but we have instructions 3190b57cec5SDimitry Andric // for them. 3200b57cec5SDimitry Andric setOperationAction(ISD::FCEIL, MVT::f32, Legal); 3210b57cec5SDimitry Andric setOperationAction(ISD::FEXP2, MVT::f32, Legal); 3220b57cec5SDimitry Andric setOperationAction(ISD::FPOW, MVT::f32, Legal); 3230b57cec5SDimitry Andric setOperationAction(ISD::FLOG2, MVT::f32, Legal); 3240b57cec5SDimitry Andric setOperationAction(ISD::FABS, MVT::f32, Legal); 3250b57cec5SDimitry Andric setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 3260b57cec5SDimitry Andric setOperationAction(ISD::FRINT, MVT::f32, Legal); 3270b57cec5SDimitry Andric setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 3280b57cec5SDimitry Andric setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 3290b57cec5SDimitry Andric setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f32, Custom); 3320b57cec5SDimitry Andric setOperationAction(ISD::FROUND, MVT::f64, Custom); 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric setOperationAction(ISD::FLOG, MVT::f32, Custom); 3350b57cec5SDimitry Andric setOperationAction(ISD::FLOG10, MVT::f32, Custom); 3360b57cec5SDimitry Andric setOperationAction(ISD::FEXP, MVT::f32, Custom); 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); 3400b57cec5SDimitry Andric setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); 3410b57cec5SDimitry Andric 342e8d8bef9SDimitry Andric setOperationAction(ISD::FREM, MVT::f16, Custom); 3430b57cec5SDimitry Andric setOperationAction(ISD::FREM, MVT::f32, Custom); 3440b57cec5SDimitry Andric setOperationAction(ISD::FREM, MVT::f64, Custom); 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric // Expand to fneg + fadd. 3470b57cec5SDimitry Andric setOperationAction(ISD::FSUB, MVT::f64, Expand); 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom); 3500b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom); 3510b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 3520b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 3530b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom); 3540b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom); 355fe6060f1SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v6i32, Custom); 356fe6060f1SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v6f32, Custom); 357fe6060f1SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v7i32, Custom); 358fe6060f1SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v7f32, Custom); 3590b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); 3600b57cec5SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); 361fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom); 362fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom); 363*04eeddc0SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom); 364*04eeddc0SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom); 3650b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 3660b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 3670b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom); 3680b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom); 3690b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom); 3700b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); 3710b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom); 3720b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); 373fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6f32, Custom); 374fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6i32, Custom); 375fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7f32, Custom); 376fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7i32, Custom); 3770b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); 3780b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); 3790b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom); 3800b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); 3810b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); 3820b57cec5SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); 3835ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom); 3845ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom); 385fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f64, Custom); 386fe6060f1SDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i64, Custom); 3875ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom); 3885ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom); 3895ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom); 3905ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom); 3915ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom); 3925ffd83dbSDimitry Andric setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom); 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 3950b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); 3960b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 3990b57cec5SDimitry Andric for (MVT VT : ScalarIntVTs) { 4000b57cec5SDimitry Andric // These should use [SU]DIVREM, so set them to expand 4010b57cec5SDimitry Andric setOperationAction(ISD::SDIV, VT, Expand); 4020b57cec5SDimitry Andric setOperationAction(ISD::UDIV, VT, Expand); 4030b57cec5SDimitry Andric setOperationAction(ISD::SREM, VT, Expand); 4040b57cec5SDimitry Andric setOperationAction(ISD::UREM, VT, Expand); 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric // GPU does not have divrem function for signed or unsigned. 4070b57cec5SDimitry Andric setOperationAction(ISD::SDIVREM, VT, Custom); 4080b57cec5SDimitry Andric setOperationAction(ISD::UDIVREM, VT, Custom); 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric // GPU does not have [S|U]MUL_LOHI functions as a single instruction. 4110b57cec5SDimitry Andric setOperationAction(ISD::SMUL_LOHI, VT, Expand); 4120b57cec5SDimitry Andric setOperationAction(ISD::UMUL_LOHI, VT, Expand); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric setOperationAction(ISD::BSWAP, VT, Expand); 4150b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, VT, Expand); 4160b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, VT, Expand); 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric // AMDGPU uses ADDC/SUBC/ADDE/SUBE 4190b57cec5SDimitry Andric setOperationAction(ISD::ADDC, VT, Legal); 4200b57cec5SDimitry Andric setOperationAction(ISD::SUBC, VT, Legal); 4210b57cec5SDimitry Andric setOperationAction(ISD::ADDE, VT, Legal); 4220b57cec5SDimitry Andric setOperationAction(ISD::SUBE, VT, Legal); 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric 4255ffd83dbSDimitry Andric // The hardware supports 32-bit FSHR, but not FSHL. 4265ffd83dbSDimitry Andric setOperationAction(ISD::FSHR, MVT::i32, Legal); 4275ffd83dbSDimitry Andric 4280b57cec5SDimitry Andric // The hardware supports 32-bit ROTR, but not ROTL. 4290b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i32, Expand); 4300b57cec5SDimitry Andric setOperationAction(ISD::ROTL, MVT::i64, Expand); 4310b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i64, Expand); 4320b57cec5SDimitry Andric 433e8d8bef9SDimitry Andric setOperationAction(ISD::MULHU, MVT::i16, Expand); 434e8d8bef9SDimitry Andric setOperationAction(ISD::MULHS, MVT::i16, Expand); 435e8d8bef9SDimitry Andric 4360b57cec5SDimitry Andric setOperationAction(ISD::MUL, MVT::i64, Expand); 4370b57cec5SDimitry Andric setOperationAction(ISD::MULHU, MVT::i64, Expand); 4380b57cec5SDimitry Andric setOperationAction(ISD::MULHS, MVT::i64, Expand); 4390b57cec5SDimitry Andric setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 4400b57cec5SDimitry Andric setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 4410b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 4420b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 4430b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric setOperationAction(ISD::SMIN, MVT::i32, Legal); 4460b57cec5SDimitry Andric setOperationAction(ISD::UMIN, MVT::i32, Legal); 4470b57cec5SDimitry Andric setOperationAction(ISD::SMAX, MVT::i32, Legal); 4480b57cec5SDimitry Andric setOperationAction(ISD::UMAX, MVT::i32, Legal); 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, MVT::i64, Custom); 4510b57cec5SDimitry Andric setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); 4520b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, MVT::i64, Custom); 4530b57cec5SDimitry Andric setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric static const MVT::SimpleValueType VectorIntTypes[] = { 456fe6060f1SDimitry Andric MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32}; 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric for (MVT VT : VectorIntTypes) { 4590b57cec5SDimitry Andric // Expand the following operations for the current type by default. 4600b57cec5SDimitry Andric setOperationAction(ISD::ADD, VT, Expand); 4610b57cec5SDimitry Andric setOperationAction(ISD::AND, VT, Expand); 4620b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_SINT, VT, Expand); 4630b57cec5SDimitry Andric setOperationAction(ISD::FP_TO_UINT, VT, Expand); 4640b57cec5SDimitry Andric setOperationAction(ISD::MUL, VT, Expand); 4650b57cec5SDimitry Andric setOperationAction(ISD::MULHU, VT, Expand); 4660b57cec5SDimitry Andric setOperationAction(ISD::MULHS, VT, Expand); 4670b57cec5SDimitry Andric setOperationAction(ISD::OR, VT, Expand); 4680b57cec5SDimitry Andric setOperationAction(ISD::SHL, VT, Expand); 4690b57cec5SDimitry Andric setOperationAction(ISD::SRA, VT, Expand); 4700b57cec5SDimitry Andric setOperationAction(ISD::SRL, VT, Expand); 4710b57cec5SDimitry Andric setOperationAction(ISD::ROTL, VT, Expand); 4720b57cec5SDimitry Andric setOperationAction(ISD::ROTR, VT, Expand); 4730b57cec5SDimitry Andric setOperationAction(ISD::SUB, VT, Expand); 4740b57cec5SDimitry Andric setOperationAction(ISD::SINT_TO_FP, VT, Expand); 4750b57cec5SDimitry Andric setOperationAction(ISD::UINT_TO_FP, VT, Expand); 4760b57cec5SDimitry Andric setOperationAction(ISD::SDIV, VT, Expand); 4770b57cec5SDimitry Andric setOperationAction(ISD::UDIV, VT, Expand); 4780b57cec5SDimitry Andric setOperationAction(ISD::SREM, VT, Expand); 4790b57cec5SDimitry Andric setOperationAction(ISD::UREM, VT, Expand); 4800b57cec5SDimitry Andric setOperationAction(ISD::SMUL_LOHI, VT, Expand); 4810b57cec5SDimitry Andric setOperationAction(ISD::UMUL_LOHI, VT, Expand); 4825ffd83dbSDimitry Andric setOperationAction(ISD::SDIVREM, VT, Expand); 4830b57cec5SDimitry Andric setOperationAction(ISD::UDIVREM, VT, Expand); 4840b57cec5SDimitry Andric setOperationAction(ISD::SELECT, VT, Expand); 4850b57cec5SDimitry Andric setOperationAction(ISD::VSELECT, VT, Expand); 4860b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, VT, Expand); 4870b57cec5SDimitry Andric setOperationAction(ISD::XOR, VT, Expand); 4880b57cec5SDimitry Andric setOperationAction(ISD::BSWAP, VT, Expand); 4890b57cec5SDimitry Andric setOperationAction(ISD::CTPOP, VT, Expand); 4900b57cec5SDimitry Andric setOperationAction(ISD::CTTZ, VT, Expand); 4910b57cec5SDimitry Andric setOperationAction(ISD::CTLZ, VT, Expand); 4920b57cec5SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 4930b57cec5SDimitry Andric setOperationAction(ISD::SETCC, VT, Expand); 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric static const MVT::SimpleValueType FloatVectorTypes[] = { 497fe6060f1SDimitry Andric MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32}; 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric for (MVT VT : FloatVectorTypes) { 5000b57cec5SDimitry Andric setOperationAction(ISD::FABS, VT, Expand); 5010b57cec5SDimitry Andric setOperationAction(ISD::FMINNUM, VT, Expand); 5020b57cec5SDimitry Andric setOperationAction(ISD::FMAXNUM, VT, Expand); 5030b57cec5SDimitry Andric setOperationAction(ISD::FADD, VT, Expand); 5040b57cec5SDimitry Andric setOperationAction(ISD::FCEIL, VT, Expand); 5050b57cec5SDimitry Andric setOperationAction(ISD::FCOS, VT, Expand); 5060b57cec5SDimitry Andric setOperationAction(ISD::FDIV, VT, Expand); 5070b57cec5SDimitry Andric setOperationAction(ISD::FEXP2, VT, Expand); 5080b57cec5SDimitry Andric setOperationAction(ISD::FEXP, VT, Expand); 5090b57cec5SDimitry Andric setOperationAction(ISD::FLOG2, VT, Expand); 5100b57cec5SDimitry Andric setOperationAction(ISD::FREM, VT, Expand); 5110b57cec5SDimitry Andric setOperationAction(ISD::FLOG, VT, Expand); 5120b57cec5SDimitry Andric setOperationAction(ISD::FLOG10, VT, Expand); 5130b57cec5SDimitry Andric setOperationAction(ISD::FPOW, VT, Expand); 5140b57cec5SDimitry Andric setOperationAction(ISD::FFLOOR, VT, Expand); 5150b57cec5SDimitry Andric setOperationAction(ISD::FTRUNC, VT, Expand); 5160b57cec5SDimitry Andric setOperationAction(ISD::FMUL, VT, Expand); 5170b57cec5SDimitry Andric setOperationAction(ISD::FMA, VT, Expand); 5180b57cec5SDimitry Andric setOperationAction(ISD::FRINT, VT, Expand); 5190b57cec5SDimitry Andric setOperationAction(ISD::FNEARBYINT, VT, Expand); 5200b57cec5SDimitry Andric setOperationAction(ISD::FSQRT, VT, Expand); 5210b57cec5SDimitry Andric setOperationAction(ISD::FSIN, VT, Expand); 5220b57cec5SDimitry Andric setOperationAction(ISD::FSUB, VT, Expand); 5230b57cec5SDimitry Andric setOperationAction(ISD::FNEG, VT, Expand); 5240b57cec5SDimitry Andric setOperationAction(ISD::VSELECT, VT, Expand); 5250b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, VT, Expand); 5260b57cec5SDimitry Andric setOperationAction(ISD::FCOPYSIGN, VT, Expand); 5270b57cec5SDimitry Andric setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 5280b57cec5SDimitry Andric setOperationAction(ISD::SETCC, VT, Expand); 5290b57cec5SDimitry Andric setOperationAction(ISD::FCANONICALIZE, VT, Expand); 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric // This causes using an unrolled select operation rather than expansion with 5330b57cec5SDimitry Andric // bit operations. This is in general better, but the alternative using BFI 5340b57cec5SDimitry Andric // instructions may be better if the select sources are SGPRs. 5350b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v2f32, Promote); 5360b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v3f32, Promote); 5390b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32); 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v4f32, Promote); 5420b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v5f32, Promote); 5450b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32); 5460b57cec5SDimitry Andric 547fe6060f1SDimitry Andric setOperationAction(ISD::SELECT, MVT::v6f32, Promote); 548fe6060f1SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32); 549fe6060f1SDimitry Andric 550fe6060f1SDimitry Andric setOperationAction(ISD::SELECT, MVT::v7f32, Promote); 551fe6060f1SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32); 552fe6060f1SDimitry Andric 5530b57cec5SDimitry Andric // There are no libcalls of any kind. 5540b57cec5SDimitry Andric for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) 5550b57cec5SDimitry Andric setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr); 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric setSchedulingPreference(Sched::RegPressure); 5580b57cec5SDimitry Andric setJumpIsExpensive(true); 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // FIXME: This is only partially true. If we have to do vector compares, any 5610b57cec5SDimitry Andric // SGPR pair can be a condition register. If we have a uniform condition, we 5620b57cec5SDimitry Andric // are better off doing SALU operations, where there is only one SCC. For now, 5630b57cec5SDimitry Andric // we don't have a way of knowing during instruction selection if a condition 5640b57cec5SDimitry Andric // will be uniform and we always use vector compares. Assume we are using 5650b57cec5SDimitry Andric // vector compares until that is fixed. 5660b57cec5SDimitry Andric setHasMultipleConditionRegisters(true); 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric setMinCmpXchgSizeInBits(32); 5690b57cec5SDimitry Andric setSupportsUnalignedAtomics(false); 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric PredictableSelectIsExpensive = false; 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric // We want to find all load dependencies for long chains of stores to enable 5740b57cec5SDimitry Andric // merging into very wide vectors. The problem is with vectors with > 4 5750b57cec5SDimitry Andric // elements. MergeConsecutiveStores will attempt to merge these because x8/x16 5760b57cec5SDimitry Andric // vectors are a legal type, even though we have to split the loads 5770b57cec5SDimitry Andric // usually. When we can more precisely specify load legality per address 5780b57cec5SDimitry Andric // space, we should be able to make FindBetterChain/MergeConsecutiveStores 5790b57cec5SDimitry Andric // smarter so that they can figure out what to do in 2 iterations without all 5800b57cec5SDimitry Andric // N > 4 stores on the same chain. 5810b57cec5SDimitry Andric GatherAllAliasesMaxDepth = 16; 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry 5840b57cec5SDimitry Andric // about these during lowering. 5850b57cec5SDimitry Andric MaxStoresPerMemcpy = 0xffffffff; 5860b57cec5SDimitry Andric MaxStoresPerMemmove = 0xffffffff; 5870b57cec5SDimitry Andric MaxStoresPerMemset = 0xffffffff; 5880b57cec5SDimitry Andric 5895ffd83dbSDimitry Andric // The expansion for 64-bit division is enormous. 5905ffd83dbSDimitry Andric if (AMDGPUBypassSlowDiv) 5915ffd83dbSDimitry Andric addBypassSlowDiv(64, 32); 5925ffd83dbSDimitry Andric 5930b57cec5SDimitry Andric setTargetDAGCombine(ISD::BITCAST); 5940b57cec5SDimitry Andric setTargetDAGCombine(ISD::SHL); 5950b57cec5SDimitry Andric setTargetDAGCombine(ISD::SRA); 5960b57cec5SDimitry Andric setTargetDAGCombine(ISD::SRL); 5970b57cec5SDimitry Andric setTargetDAGCombine(ISD::TRUNCATE); 5980b57cec5SDimitry Andric setTargetDAGCombine(ISD::MUL); 5994824e7fdSDimitry Andric setTargetDAGCombine(ISD::SMUL_LOHI); 6004824e7fdSDimitry Andric setTargetDAGCombine(ISD::UMUL_LOHI); 6010b57cec5SDimitry Andric setTargetDAGCombine(ISD::MULHU); 6020b57cec5SDimitry Andric setTargetDAGCombine(ISD::MULHS); 6030b57cec5SDimitry Andric setTargetDAGCombine(ISD::SELECT); 6040b57cec5SDimitry Andric setTargetDAGCombine(ISD::SELECT_CC); 6050b57cec5SDimitry Andric setTargetDAGCombine(ISD::STORE); 6060b57cec5SDimitry Andric setTargetDAGCombine(ISD::FADD); 6070b57cec5SDimitry Andric setTargetDAGCombine(ISD::FSUB); 6080b57cec5SDimitry Andric setTargetDAGCombine(ISD::FNEG); 6090b57cec5SDimitry Andric setTargetDAGCombine(ISD::FABS); 6100b57cec5SDimitry Andric setTargetDAGCombine(ISD::AssertZext); 6110b57cec5SDimitry Andric setTargetDAGCombine(ISD::AssertSext); 6128bcb0991SDimitry Andric setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric 615e8d8bef9SDimitry Andric bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { 616e8d8bef9SDimitry Andric if (getTargetMachine().Options.NoSignedZerosFPMath) 617e8d8bef9SDimitry Andric return true; 618e8d8bef9SDimitry Andric 619e8d8bef9SDimitry Andric const auto Flags = Op.getNode()->getFlags(); 620e8d8bef9SDimitry Andric if (Flags.hasNoSignedZeros()) 621e8d8bef9SDimitry Andric return true; 622e8d8bef9SDimitry Andric 623e8d8bef9SDimitry Andric return false; 624e8d8bef9SDimitry Andric } 625e8d8bef9SDimitry Andric 6260b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6270b57cec5SDimitry Andric // Target Information 6280b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric LLVM_READNONE 6310b57cec5SDimitry Andric static bool fnegFoldsIntoOp(unsigned Opc) { 6320b57cec5SDimitry Andric switch (Opc) { 6330b57cec5SDimitry Andric case ISD::FADD: 6340b57cec5SDimitry Andric case ISD::FSUB: 6350b57cec5SDimitry Andric case ISD::FMUL: 6360b57cec5SDimitry Andric case ISD::FMA: 6370b57cec5SDimitry Andric case ISD::FMAD: 6380b57cec5SDimitry Andric case ISD::FMINNUM: 6390b57cec5SDimitry Andric case ISD::FMAXNUM: 6400b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 6410b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 6420b57cec5SDimitry Andric case ISD::FSIN: 6430b57cec5SDimitry Andric case ISD::FTRUNC: 6440b57cec5SDimitry Andric case ISD::FRINT: 6450b57cec5SDimitry Andric case ISD::FNEARBYINT: 6460b57cec5SDimitry Andric case ISD::FCANONICALIZE: 6470b57cec5SDimitry Andric case AMDGPUISD::RCP: 6480b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 6490b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 6500b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: 6510b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: 6520b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 6530b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 6540b57cec5SDimitry Andric case AMDGPUISD::FMED3: 655e8d8bef9SDimitry Andric // TODO: handle llvm.amdgcn.fma.legacy 6560b57cec5SDimitry Andric return true; 6570b57cec5SDimitry Andric default: 6580b57cec5SDimitry Andric return false; 6590b57cec5SDimitry Andric } 6600b57cec5SDimitry Andric } 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit 6630b57cec5SDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source 6640b57cec5SDimitry Andric /// modifiers. 6650b57cec5SDimitry Andric LLVM_READONLY 6660b57cec5SDimitry Andric static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { 6670b57cec5SDimitry Andric return N->getNumOperands() > 2 || VT == MVT::f64; 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric // Most FP instructions support source modifiers, but this could be refined 6710b57cec5SDimitry Andric // slightly. 6720b57cec5SDimitry Andric LLVM_READONLY 6730b57cec5SDimitry Andric static bool hasSourceMods(const SDNode *N) { 6740b57cec5SDimitry Andric if (isa<MemSDNode>(N)) 6750b57cec5SDimitry Andric return false; 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric switch (N->getOpcode()) { 6780b57cec5SDimitry Andric case ISD::CopyToReg: 6790b57cec5SDimitry Andric case ISD::SELECT: 6800b57cec5SDimitry Andric case ISD::FDIV: 6810b57cec5SDimitry Andric case ISD::FREM: 6820b57cec5SDimitry Andric case ISD::INLINEASM: 6830b57cec5SDimitry Andric case ISD::INLINEASM_BR: 6840b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: 6858bcb0991SDimitry Andric case ISD::INTRINSIC_W_CHAIN: 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric // TODO: Should really be looking at the users of the bitcast. These are 6880b57cec5SDimitry Andric // problematic because bitcasts are used to legalize all stores to integer 6890b57cec5SDimitry Andric // types. 6900b57cec5SDimitry Andric case ISD::BITCAST: 6910b57cec5SDimitry Andric return false; 6928bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 6938bcb0991SDimitry Andric switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) { 6948bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p1: 6958bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p2: 6968bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_mov: 6978bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p1_f16: 6988bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p2_f16: 6998bcb0991SDimitry Andric return false; 7008bcb0991SDimitry Andric default: 7018bcb0991SDimitry Andric return true; 7028bcb0991SDimitry Andric } 7038bcb0991SDimitry Andric } 7040b57cec5SDimitry Andric default: 7050b57cec5SDimitry Andric return true; 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric } 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N, 7100b57cec5SDimitry Andric unsigned CostThreshold) { 7110b57cec5SDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus 7120b57cec5SDimitry Andric // it is truly free to use a source modifier in all cases. If there are 7130b57cec5SDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be 7140b57cec5SDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it 7150b57cec5SDimitry Andric // will save on the instruction count. 7160b57cec5SDimitry Andric unsigned NumMayIncreaseSize = 0; 7170b57cec5SDimitry Andric MVT VT = N->getValueType(0).getScalarType().getSimpleVT(); 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric // XXX - Should this limit number of uses to check? 7200b57cec5SDimitry Andric for (const SDNode *U : N->uses()) { 7210b57cec5SDimitry Andric if (!hasSourceMods(U)) 7220b57cec5SDimitry Andric return false; 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric if (!opMustUseVOP3Encoding(U, VT)) { 7250b57cec5SDimitry Andric if (++NumMayIncreaseSize > CostThreshold) 7260b57cec5SDimitry Andric return false; 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric } 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andric return true; 7310b57cec5SDimitry Andric } 7320b57cec5SDimitry Andric 7335ffd83dbSDimitry Andric EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, 7345ffd83dbSDimitry Andric ISD::NodeType ExtendKind) const { 7355ffd83dbSDimitry Andric assert(!VT.isVector() && "only scalar expected"); 7365ffd83dbSDimitry Andric 7375ffd83dbSDimitry Andric // Round to the next multiple of 32-bits. 7385ffd83dbSDimitry Andric unsigned Size = VT.getSizeInBits(); 7395ffd83dbSDimitry Andric if (Size <= 32) 7405ffd83dbSDimitry Andric return MVT::i32; 7415ffd83dbSDimitry Andric return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32)); 7425ffd83dbSDimitry Andric } 7435ffd83dbSDimitry Andric 7440b57cec5SDimitry Andric MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { 7450b57cec5SDimitry Andric return MVT::i32; 7460b57cec5SDimitry Andric } 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { 7490b57cec5SDimitry Andric return true; 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric // The backend supports 32 and 64 bit floating point immediates. 7530b57cec5SDimitry Andric // FIXME: Why are we reporting vectors of FP immediates as legal? 7540b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 7550b57cec5SDimitry Andric bool ForCodeSize) const { 7560b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType(); 7570b57cec5SDimitry Andric return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 || 7580b57cec5SDimitry Andric (ScalarVT == MVT::f16 && Subtarget->has16BitInsts())); 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric // We don't want to shrink f64 / f32 constants. 7620b57cec5SDimitry Andric bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { 7630b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType(); 7640b57cec5SDimitry Andric return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64); 7650b57cec5SDimitry Andric } 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, 7680b57cec5SDimitry Andric ISD::LoadExtType ExtTy, 7690b57cec5SDimitry Andric EVT NewVT) const { 7700b57cec5SDimitry Andric // TODO: This may be worth removing. Check regression tests for diffs. 7710b57cec5SDimitry Andric if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT)) 7720b57cec5SDimitry Andric return false; 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric unsigned NewSize = NewVT.getStoreSizeInBits(); 7750b57cec5SDimitry Andric 7765ffd83dbSDimitry Andric // If we are reducing to a 32-bit load or a smaller multi-dword load, 7775ffd83dbSDimitry Andric // this is always better. 7785ffd83dbSDimitry Andric if (NewSize >= 32) 7790b57cec5SDimitry Andric return true; 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric EVT OldVT = N->getValueType(0); 7820b57cec5SDimitry Andric unsigned OldSize = OldVT.getStoreSizeInBits(); 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric MemSDNode *MN = cast<MemSDNode>(N); 7850b57cec5SDimitry Andric unsigned AS = MN->getAddressSpace(); 7860b57cec5SDimitry Andric // Do not shrink an aligned scalar load to sub-dword. 7870b57cec5SDimitry Andric // Scalar engine cannot do sub-dword loads. 7880b57cec5SDimitry Andric if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 && 7890b57cec5SDimitry Andric (AS == AMDGPUAS::CONSTANT_ADDRESS || 7900b57cec5SDimitry Andric AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 7910b57cec5SDimitry Andric (isa<LoadSDNode>(N) && 7920b57cec5SDimitry Andric AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) && 7930b57cec5SDimitry Andric AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand())) 7940b57cec5SDimitry Andric return false; 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andric // Don't produce extloads from sub 32-bit types. SI doesn't have scalar 7970b57cec5SDimitry Andric // extloads, so doing one requires using a buffer_load. In cases where we 7980b57cec5SDimitry Andric // still couldn't use a scalar load, using the wider load shouldn't really 7990b57cec5SDimitry Andric // hurt anything. 8000b57cec5SDimitry Andric 8010b57cec5SDimitry Andric // If the old size already had to be an extload, there's no harm in continuing 8020b57cec5SDimitry Andric // to reduce the width. 8030b57cec5SDimitry Andric return (OldSize < 32); 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric 8060b57cec5SDimitry Andric bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, 8070b57cec5SDimitry Andric const SelectionDAG &DAG, 8080b57cec5SDimitry Andric const MachineMemOperand &MMO) const { 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits()); 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric if (LoadTy.getScalarType() == MVT::i32) 8130b57cec5SDimitry Andric return false; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric unsigned LScalarSize = LoadTy.getScalarSizeInBits(); 8160b57cec5SDimitry Andric unsigned CastScalarSize = CastTy.getScalarSizeInBits(); 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) 8190b57cec5SDimitry Andric return false; 8200b57cec5SDimitry Andric 8210b57cec5SDimitry Andric bool Fast = false; 8228bcb0991SDimitry Andric return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 8238bcb0991SDimitry Andric CastTy, MMO, &Fast) && 8248bcb0991SDimitry Andric Fast; 8250b57cec5SDimitry Andric } 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also 8280b57cec5SDimitry Andric // profitable with the expansion for 64-bit since it's generally good to 8290b57cec5SDimitry Andric // speculate things. 8300b57cec5SDimitry Andric // FIXME: These should really have the size as a parameter. 8310b57cec5SDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const { 8320b57cec5SDimitry Andric return true; 8330b57cec5SDimitry Andric } 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { 8360b57cec5SDimitry Andric return true; 8370b57cec5SDimitry Andric } 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { 8400b57cec5SDimitry Andric switch (N->getOpcode()) { 8410b57cec5SDimitry Andric case ISD::EntryToken: 8420b57cec5SDimitry Andric case ISD::TokenFactor: 8430b57cec5SDimitry Andric return true; 844e8d8bef9SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 8450b57cec5SDimitry Andric unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 8460b57cec5SDimitry Andric switch (IntrID) { 8470b57cec5SDimitry Andric case Intrinsic::amdgcn_readfirstlane: 8480b57cec5SDimitry Andric case Intrinsic::amdgcn_readlane: 8490b57cec5SDimitry Andric return true; 8500b57cec5SDimitry Andric } 851e8d8bef9SDimitry Andric return false; 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric case ISD::LOAD: 8548bcb0991SDimitry Andric if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() == 8558bcb0991SDimitry Andric AMDGPUAS::CONSTANT_ADDRESS_32BIT) 8560b57cec5SDimitry Andric return true; 8570b57cec5SDimitry Andric return false; 8580b57cec5SDimitry Andric } 859e8d8bef9SDimitry Andric return false; 8600b57cec5SDimitry Andric } 8610b57cec5SDimitry Andric 8625ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::getNegatedExpression( 8635ffd83dbSDimitry Andric SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, 8645ffd83dbSDimitry Andric NegatibleCost &Cost, unsigned Depth) const { 8655ffd83dbSDimitry Andric 8665ffd83dbSDimitry Andric switch (Op.getOpcode()) { 8675ffd83dbSDimitry Andric case ISD::FMA: 8685ffd83dbSDimitry Andric case ISD::FMAD: { 8695ffd83dbSDimitry Andric // Negating a fma is not free if it has users without source mods. 8705ffd83dbSDimitry Andric if (!allUsesHaveSourceMods(Op.getNode())) 8715ffd83dbSDimitry Andric return SDValue(); 8725ffd83dbSDimitry Andric break; 8735ffd83dbSDimitry Andric } 8745ffd83dbSDimitry Andric default: 8755ffd83dbSDimitry Andric break; 8765ffd83dbSDimitry Andric } 8775ffd83dbSDimitry Andric 8785ffd83dbSDimitry Andric return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, 8795ffd83dbSDimitry Andric ForCodeSize, Cost, Depth); 8805ffd83dbSDimitry Andric } 8815ffd83dbSDimitry Andric 8820b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 8830b57cec5SDimitry Andric // Target Properties 8840b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 8850b57cec5SDimitry Andric 8860b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 8870b57cec5SDimitry Andric assert(VT.isFloatingPoint()); 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric // Packed operations do not have a fabs modifier. 8900b57cec5SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || 8910b57cec5SDimitry Andric (Subtarget->has16BitInsts() && VT == MVT::f16); 8920b57cec5SDimitry Andric } 8930b57cec5SDimitry Andric 8940b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 8950b57cec5SDimitry Andric assert(VT.isFloatingPoint()); 896fe6060f1SDimitry Andric // Report this based on the end legalized type. 897fe6060f1SDimitry Andric VT = VT.getScalarType(); 898fe6060f1SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16; 8990b57cec5SDimitry Andric } 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, 9020b57cec5SDimitry Andric unsigned NumElem, 9030b57cec5SDimitry Andric unsigned AS) const { 9040b57cec5SDimitry Andric return true; 9050b57cec5SDimitry Andric } 9060b57cec5SDimitry Andric 9070b57cec5SDimitry Andric bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { 9080b57cec5SDimitry Andric // There are few operations which truly have vector input operands. Any vector 9090b57cec5SDimitry Andric // operation is going to involve operations on each component, and a 9100b57cec5SDimitry Andric // build_vector will be a copy per element, so it always makes sense to use a 9110b57cec5SDimitry Andric // build_vector input in place of the extracted element to avoid a copy into a 9120b57cec5SDimitry Andric // super register. 9130b57cec5SDimitry Andric // 9140b57cec5SDimitry Andric // We should probably only do this if all users are extracts only, but this 9150b57cec5SDimitry Andric // should be the common case. 9160b57cec5SDimitry Andric return true; 9170b57cec5SDimitry Andric } 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { 9200b57cec5SDimitry Andric // Truncate is just accessing a subregister. 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric unsigned SrcSize = Source.getSizeInBits(); 9230b57cec5SDimitry Andric unsigned DestSize = Dest.getSizeInBits(); 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0 ; 9260b57cec5SDimitry Andric } 9270b57cec5SDimitry Andric 9280b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { 9290b57cec5SDimitry Andric // Truncate is just accessing a subregister. 9300b57cec5SDimitry Andric 9310b57cec5SDimitry Andric unsigned SrcSize = Source->getScalarSizeInBits(); 9320b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits(); 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric if (DestSize== 16 && Subtarget->has16BitInsts()) 9350b57cec5SDimitry Andric return SrcSize >= 32; 9360b57cec5SDimitry Andric 9370b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0; 9380b57cec5SDimitry Andric } 9390b57cec5SDimitry Andric 9400b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { 9410b57cec5SDimitry Andric unsigned SrcSize = Src->getScalarSizeInBits(); 9420b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits(); 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric if (SrcSize == 16 && Subtarget->has16BitInsts()) 9450b57cec5SDimitry Andric return DestSize >= 32; 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric return SrcSize == 32 && DestSize == 64; 9480b57cec5SDimitry Andric } 9490b57cec5SDimitry Andric 9500b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { 9510b57cec5SDimitry Andric // Any register load of a 64-bit value really requires 2 32-bit moves. For all 9520b57cec5SDimitry Andric // practical purposes, the extra mov 0 to load a 64-bit is free. As used, 9530b57cec5SDimitry Andric // this will enable reducing 64-bit operations the 32-bit, which is always 9540b57cec5SDimitry Andric // good. 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric if (Src == MVT::i16) 9570b57cec5SDimitry Andric return Dest == MVT::i32 ||Dest == MVT::i64 ; 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric return Src == MVT::i32 && Dest == MVT::i64; 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 9630b57cec5SDimitry Andric return isZExtFree(Val.getValueType(), VT2); 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andric bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { 9670b57cec5SDimitry Andric // There aren't really 64-bit registers, but pairs of 32-bit ones and only a 9680b57cec5SDimitry Andric // limited number of native 64-bit operations. Shrinking an operation to fit 9690b57cec5SDimitry Andric // in a single 32-bit register should always be helpful. As currently used, 9700b57cec5SDimitry Andric // this is much less general than the name suggests, and is only used in 9710b57cec5SDimitry Andric // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is 9720b57cec5SDimitry Andric // not profitable, and may actually be harmful. 9730b57cec5SDimitry Andric return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32; 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric 9760b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 9770b57cec5SDimitry Andric // TargetLowering Callbacks 9780b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, 9810b57cec5SDimitry Andric bool IsVarArg) { 9820b57cec5SDimitry Andric switch (CC) { 9830b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 9840b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 9850b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 9860b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 9870b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 9880b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 9890b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 9900b57cec5SDimitry Andric return CC_AMDGPU; 9910b57cec5SDimitry Andric case CallingConv::C: 9920b57cec5SDimitry Andric case CallingConv::Fast: 9930b57cec5SDimitry Andric case CallingConv::Cold: 9940b57cec5SDimitry Andric return CC_AMDGPU_Func; 995e8d8bef9SDimitry Andric case CallingConv::AMDGPU_Gfx: 996e8d8bef9SDimitry Andric return CC_SI_Gfx; 9970b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 9980b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 9990b57cec5SDimitry Andric default: 10000b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention for call"); 10010b57cec5SDimitry Andric } 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, 10050b57cec5SDimitry Andric bool IsVarArg) { 10060b57cec5SDimitry Andric switch (CC) { 10070b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 10080b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 10090b57cec5SDimitry Andric llvm_unreachable("kernels should not be handled here"); 10100b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 10110b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 10120b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 10130b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 10140b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 10150b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 10160b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 10170b57cec5SDimitry Andric return RetCC_SI_Shader; 1018e8d8bef9SDimitry Andric case CallingConv::AMDGPU_Gfx: 1019e8d8bef9SDimitry Andric return RetCC_SI_Gfx; 10200b57cec5SDimitry Andric case CallingConv::C: 10210b57cec5SDimitry Andric case CallingConv::Fast: 10220b57cec5SDimitry Andric case CallingConv::Cold: 10230b57cec5SDimitry Andric return RetCC_AMDGPU_Func; 10240b57cec5SDimitry Andric default: 10250b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention."); 10260b57cec5SDimitry Andric } 10270b57cec5SDimitry Andric } 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andric /// The SelectionDAGBuilder will automatically promote function arguments 10300b57cec5SDimitry Andric /// with illegal types. However, this does not work for the AMDGPU targets 10310b57cec5SDimitry Andric /// since the function arguments are stored in memory as these illegal types. 10320b57cec5SDimitry Andric /// In order to handle this properly we need to get the original types sizes 10330b57cec5SDimitry Andric /// from the LLVM IR Function and fixup the ISD:InputArg values before 10340b57cec5SDimitry Andric /// passing them to AnalyzeFormalArguments() 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting 10370b57cec5SDimitry Andric /// input values across multiple registers. Each item in the Ins array 10380b57cec5SDimitry Andric /// represents a single value that will be stored in registers. Ins[x].VT is 10390b57cec5SDimitry Andric /// the value type of the value that will be stored in the register, so 10400b57cec5SDimitry Andric /// whatever SDNode we lower the argument to needs to be this type. 10410b57cec5SDimitry Andric /// 10420b57cec5SDimitry Andric /// In order to correctly lower the arguments we need to know the size of each 10430b57cec5SDimitry Andric /// argument. Since Ins[x].VT gives us the size of the register that will 10440b57cec5SDimitry Andric /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type 1045349cc55cSDimitry Andric /// for the original function argument so that we can deduce the correct memory 10460b57cec5SDimitry Andric /// type to use for Ins[x]. In most cases the correct memory type will be 10470b57cec5SDimitry Andric /// Ins[x].ArgVT. However, this will not always be the case. If, for example, 10480b57cec5SDimitry Andric /// we have a kernel argument of type v8i8, this argument will be split into 10490b57cec5SDimitry Andric /// 8 parts and each part will be represented by its own item in the Ins array. 10500b57cec5SDimitry Andric /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of 10510b57cec5SDimitry Andric /// the argument before it was split. From this, we deduce that the memory type 10520b57cec5SDimitry Andric /// for each individual part is i8. We pass the memory type as LocVT to the 10530b57cec5SDimitry Andric /// calling convention analysis function and the register type (Ins[x].VT) as 10540b57cec5SDimitry Andric /// the ValVT. 10550b57cec5SDimitry Andric void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( 10560b57cec5SDimitry Andric CCState &State, 10570b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins) const { 10580b57cec5SDimitry Andric const MachineFunction &MF = State.getMachineFunction(); 10590b57cec5SDimitry Andric const Function &Fn = MF.getFunction(); 10600b57cec5SDimitry Andric LLVMContext &Ctx = Fn.getParent()->getContext(); 10610b57cec5SDimitry Andric const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 10620b57cec5SDimitry Andric const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn); 10630b57cec5SDimitry Andric CallingConv::ID CC = Fn.getCallingConv(); 10640b57cec5SDimitry Andric 10655ffd83dbSDimitry Andric Align MaxAlign = Align(1); 10660b57cec5SDimitry Andric uint64_t ExplicitArgOffset = 0; 10670b57cec5SDimitry Andric const DataLayout &DL = Fn.getParent()->getDataLayout(); 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andric unsigned InIndex = 0; 10700b57cec5SDimitry Andric 10710b57cec5SDimitry Andric for (const Argument &Arg : Fn.args()) { 1072e8d8bef9SDimitry Andric const bool IsByRef = Arg.hasByRefAttr(); 10730b57cec5SDimitry Andric Type *BaseArgTy = Arg.getType(); 1074e8d8bef9SDimitry Andric Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy; 1075e8d8bef9SDimitry Andric MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None; 1076e8d8bef9SDimitry Andric if (!Alignment) 1077e8d8bef9SDimitry Andric Alignment = DL.getABITypeAlign(MemArgTy); 1078e8d8bef9SDimitry Andric MaxAlign = max(Alignment, MaxAlign); 1079e8d8bef9SDimitry Andric uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy); 10800b57cec5SDimitry Andric 10815ffd83dbSDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset; 10825ffd83dbSDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize; 10830b57cec5SDimitry Andric 10840b57cec5SDimitry Andric // We're basically throwing away everything passed into us and starting over 10850b57cec5SDimitry Andric // to get accurate in-memory offsets. The "PartOffset" is completely useless 10860b57cec5SDimitry Andric // to us as computed in Ins. 10870b57cec5SDimitry Andric // 10880b57cec5SDimitry Andric // We also need to figure out what type legalization is trying to do to get 10890b57cec5SDimitry Andric // the correct memory offsets. 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric SmallVector<EVT, 16> ValueVTs; 10920b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 10930b57cec5SDimitry Andric ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric for (unsigned Value = 0, NumValues = ValueVTs.size(); 10960b57cec5SDimitry Andric Value != NumValues; ++Value) { 10970b57cec5SDimitry Andric uint64_t BasePartOffset = Offsets[Value]; 10980b57cec5SDimitry Andric 10990b57cec5SDimitry Andric EVT ArgVT = ValueVTs[Value]; 11000b57cec5SDimitry Andric EVT MemVT = ArgVT; 11010b57cec5SDimitry Andric MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT); 11020b57cec5SDimitry Andric unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT); 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric if (NumRegs == 1) { 11050b57cec5SDimitry Andric // This argument is not split, so the IR type is the memory type. 11060b57cec5SDimitry Andric if (ArgVT.isExtended()) { 11070b57cec5SDimitry Andric // We have an extended type, like i24, so we should just use the 11080b57cec5SDimitry Andric // register type. 11090b57cec5SDimitry Andric MemVT = RegisterVT; 11100b57cec5SDimitry Andric } else { 11110b57cec5SDimitry Andric MemVT = ArgVT; 11120b57cec5SDimitry Andric } 11130b57cec5SDimitry Andric } else if (ArgVT.isVector() && RegisterVT.isVector() && 11140b57cec5SDimitry Andric ArgVT.getScalarType() == RegisterVT.getScalarType()) { 11150b57cec5SDimitry Andric assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements()); 11160b57cec5SDimitry Andric // We have a vector value which has been split into a vector with 11170b57cec5SDimitry Andric // the same scalar type, but fewer elements. This should handle 11180b57cec5SDimitry Andric // all the floating-point vector types. 11190b57cec5SDimitry Andric MemVT = RegisterVT; 11200b57cec5SDimitry Andric } else if (ArgVT.isVector() && 11210b57cec5SDimitry Andric ArgVT.getVectorNumElements() == NumRegs) { 11220b57cec5SDimitry Andric // This arg has been split so that each element is stored in a separate 11230b57cec5SDimitry Andric // register. 11240b57cec5SDimitry Andric MemVT = ArgVT.getScalarType(); 11250b57cec5SDimitry Andric } else if (ArgVT.isExtended()) { 11260b57cec5SDimitry Andric // We have an extended type, like i65. 11270b57cec5SDimitry Andric MemVT = RegisterVT; 11280b57cec5SDimitry Andric } else { 11290b57cec5SDimitry Andric unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs; 11300b57cec5SDimitry Andric assert(ArgVT.getStoreSizeInBits() % NumRegs == 0); 11310b57cec5SDimitry Andric if (RegisterVT.isInteger()) { 11320b57cec5SDimitry Andric MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits); 11330b57cec5SDimitry Andric } else if (RegisterVT.isVector()) { 11340b57cec5SDimitry Andric assert(!RegisterVT.getScalarType().isFloatingPoint()); 11350b57cec5SDimitry Andric unsigned NumElements = RegisterVT.getVectorNumElements(); 11360b57cec5SDimitry Andric assert(MemoryBits % NumElements == 0); 11370b57cec5SDimitry Andric // This vector type has been split into another vector type with 11380b57cec5SDimitry Andric // a different elements size. 11390b57cec5SDimitry Andric EVT ScalarVT = EVT::getIntegerVT(State.getContext(), 11400b57cec5SDimitry Andric MemoryBits / NumElements); 11410b57cec5SDimitry Andric MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements); 11420b57cec5SDimitry Andric } else { 11430b57cec5SDimitry Andric llvm_unreachable("cannot deduce memory type."); 11440b57cec5SDimitry Andric } 11450b57cec5SDimitry Andric } 11460b57cec5SDimitry Andric 11470b57cec5SDimitry Andric // Convert one element vectors to scalar. 11480b57cec5SDimitry Andric if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) 11490b57cec5SDimitry Andric MemVT = MemVT.getScalarType(); 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric // Round up vec3/vec5 argument. 11520b57cec5SDimitry Andric if (MemVT.isVector() && !MemVT.isPow2VectorType()) { 11530b57cec5SDimitry Andric assert(MemVT.getVectorNumElements() == 3 || 11540b57cec5SDimitry Andric MemVT.getVectorNumElements() == 5); 11550b57cec5SDimitry Andric MemVT = MemVT.getPow2VectorType(State.getContext()); 11565ffd83dbSDimitry Andric } else if (!MemVT.isSimple() && !MemVT.isVector()) { 11575ffd83dbSDimitry Andric MemVT = MemVT.getRoundIntegerType(State.getContext()); 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric 11600b57cec5SDimitry Andric unsigned PartOffset = 0; 11610b57cec5SDimitry Andric for (unsigned i = 0; i != NumRegs; ++i) { 11620b57cec5SDimitry Andric State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT, 11630b57cec5SDimitry Andric BasePartOffset + PartOffset, 11640b57cec5SDimitry Andric MemVT.getSimpleVT(), 11650b57cec5SDimitry Andric CCValAssign::Full)); 11660b57cec5SDimitry Andric PartOffset += MemVT.getStoreSize(); 11670b57cec5SDimitry Andric } 11680b57cec5SDimitry Andric } 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric } 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerReturn( 11730b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv, 11740b57cec5SDimitry Andric bool isVarArg, 11750b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 11760b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 11770b57cec5SDimitry Andric const SDLoc &DL, SelectionDAG &DAG) const { 11780b57cec5SDimitry Andric // FIXME: Fails for r600 tests 11790b57cec5SDimitry Andric //assert(!isVarArg && Outs.empty() && OutVals.empty() && 11800b57cec5SDimitry Andric // "wave terminate should not have return values"); 11810b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain); 11820b57cec5SDimitry Andric } 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 11850b57cec5SDimitry Andric // Target specific lowering 11860b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 11870b57cec5SDimitry Andric 11880b57cec5SDimitry Andric /// Selects the correct CCAssignFn for a given CallingConvention value. 11890b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, 11900b57cec5SDimitry Andric bool IsVarArg) { 11910b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg); 11920b57cec5SDimitry Andric } 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, 11950b57cec5SDimitry Andric bool IsVarArg) { 11960b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg); 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain, 12000b57cec5SDimitry Andric SelectionDAG &DAG, 12010b57cec5SDimitry Andric MachineFrameInfo &MFI, 12020b57cec5SDimitry Andric int ClobberedFI) const { 12030b57cec5SDimitry Andric SmallVector<SDValue, 8> ArgChains; 12040b57cec5SDimitry Andric int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); 12050b57cec5SDimitry Andric int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; 12060b57cec5SDimitry Andric 12070b57cec5SDimitry Andric // Include the original chain at the beginning of the list. When this is 12080b57cec5SDimitry Andric // used by target LowerCall hooks, this helps legalize find the 12090b57cec5SDimitry Andric // CALLSEQ_BEGIN node. 12100b57cec5SDimitry Andric ArgChains.push_back(Chain); 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric // Add a chain value for each stack argument corresponding 1213349cc55cSDimitry Andric for (SDNode *U : DAG.getEntryNode().getNode()->uses()) { 1214349cc55cSDimitry Andric if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) { 12150b57cec5SDimitry Andric if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) { 12160b57cec5SDimitry Andric if (FI->getIndex() < 0) { 12170b57cec5SDimitry Andric int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); 12180b57cec5SDimitry Andric int64_t InLastByte = InFirstByte; 12190b57cec5SDimitry Andric InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || 12220b57cec5SDimitry Andric (FirstByte <= InFirstByte && InFirstByte <= LastByte)) 12230b57cec5SDimitry Andric ArgChains.push_back(SDValue(L, 1)); 12240b57cec5SDimitry Andric } 12250b57cec5SDimitry Andric } 12260b57cec5SDimitry Andric } 12270b57cec5SDimitry Andric } 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric // Build a tokenfactor for all the chains. 12300b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); 12310b57cec5SDimitry Andric } 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI, 12340b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals, 12350b57cec5SDimitry Andric StringRef Reason) const { 12360b57cec5SDimitry Andric SDValue Callee = CLI.Callee; 12370b57cec5SDimitry Andric SelectionDAG &DAG = CLI.DAG; 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric StringRef FuncName("<unknown>"); 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee)) 12440b57cec5SDimitry Andric FuncName = G->getSymbol(); 12450b57cec5SDimitry Andric else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 12460b57cec5SDimitry Andric FuncName = G->getGlobal()->getName(); 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric DiagnosticInfoUnsupported NoCalls( 12490b57cec5SDimitry Andric Fn, Reason + FuncName, CLI.DL.getDebugLoc()); 12500b57cec5SDimitry Andric DAG.getContext()->diagnose(NoCalls); 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric if (!CLI.IsTailCall) { 12530b57cec5SDimitry Andric for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) 12540b57cec5SDimitry Andric InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); 12550b57cec5SDimitry Andric } 12560b57cec5SDimitry Andric 12570b57cec5SDimitry Andric return DAG.getEntryNode(); 12580b57cec5SDimitry Andric } 12590b57cec5SDimitry Andric 12600b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, 12610b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const { 12620b57cec5SDimitry Andric return lowerUnhandledCall(CLI, InVals, "unsupported call to function "); 12630b57cec5SDimitry Andric } 12640b57cec5SDimitry Andric 12650b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 12660b57cec5SDimitry Andric SelectionDAG &DAG) const { 12670b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 12680b57cec5SDimitry Andric 12690b57cec5SDimitry Andric DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", 12700b57cec5SDimitry Andric SDLoc(Op).getDebugLoc()); 12710b57cec5SDimitry Andric DAG.getContext()->diagnose(NoDynamicAlloca); 12720b57cec5SDimitry Andric auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; 12730b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc()); 12740b57cec5SDimitry Andric } 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, 12770b57cec5SDimitry Andric SelectionDAG &DAG) const { 12780b57cec5SDimitry Andric switch (Op.getOpcode()) { 12790b57cec5SDimitry Andric default: 12800b57cec5SDimitry Andric Op->print(errs(), &DAG); 12810b57cec5SDimitry Andric llvm_unreachable("Custom lowering code for this " 12820b57cec5SDimitry Andric "instruction is not implemented yet!"); 12830b57cec5SDimitry Andric break; 12840b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 12850b57cec5SDimitry Andric case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 12860b57cec5SDimitry Andric case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 12870b57cec5SDimitry Andric case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 12880b57cec5SDimitry Andric case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); 12890b57cec5SDimitry Andric case ISD::FREM: return LowerFREM(Op, DAG); 12900b57cec5SDimitry Andric case ISD::FCEIL: return LowerFCEIL(Op, DAG); 12910b57cec5SDimitry Andric case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); 12920b57cec5SDimitry Andric case ISD::FRINT: return LowerFRINT(Op, DAG); 12930b57cec5SDimitry Andric case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); 12940b57cec5SDimitry Andric case ISD::FROUND: return LowerFROUND(Op, DAG); 12950b57cec5SDimitry Andric case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); 12960b57cec5SDimitry Andric case ISD::FLOG: 12975ffd83dbSDimitry Andric return LowerFLOG(Op, DAG, numbers::ln2f); 12980b57cec5SDimitry Andric case ISD::FLOG10: 12998bcb0991SDimitry Andric return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f); 13000b57cec5SDimitry Andric case ISD::FEXP: 13010b57cec5SDimitry Andric return lowerFEXP(Op, DAG); 13020b57cec5SDimitry Andric case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 13030b57cec5SDimitry Andric case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 13040b57cec5SDimitry Andric case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); 1305fe6060f1SDimitry Andric case ISD::FP_TO_SINT: 1306fe6060f1SDimitry Andric case ISD::FP_TO_UINT: 1307fe6060f1SDimitry Andric return LowerFP_TO_INT(Op, DAG); 13080b57cec5SDimitry Andric case ISD::CTTZ: 13090b57cec5SDimitry Andric case ISD::CTTZ_ZERO_UNDEF: 13100b57cec5SDimitry Andric case ISD::CTLZ: 13110b57cec5SDimitry Andric case ISD::CTLZ_ZERO_UNDEF: 13120b57cec5SDimitry Andric return LowerCTLZ_CTTZ(Op, DAG); 13130b57cec5SDimitry Andric case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 13140b57cec5SDimitry Andric } 13150b57cec5SDimitry Andric return Op; 13160b57cec5SDimitry Andric } 13170b57cec5SDimitry Andric 13180b57cec5SDimitry Andric void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, 13190b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results, 13200b57cec5SDimitry Andric SelectionDAG &DAG) const { 13210b57cec5SDimitry Andric switch (N->getOpcode()) { 13220b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: 13230b57cec5SDimitry Andric // Different parts of legalization seem to interpret which type of 13240b57cec5SDimitry Andric // sign_extend_inreg is the one to check for custom lowering. The extended 13250b57cec5SDimitry Andric // from type is what really matters, but some places check for custom 13260b57cec5SDimitry Andric // lowering of the result type. This results in trying to use 13270b57cec5SDimitry Andric // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do 13280b57cec5SDimitry Andric // nothing here and let the illegal result integer be handled normally. 13290b57cec5SDimitry Andric return; 13300b57cec5SDimitry Andric default: 13310b57cec5SDimitry Andric return; 13320b57cec5SDimitry Andric } 13330b57cec5SDimitry Andric } 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 13360b57cec5SDimitry Andric SDValue Op, 13370b57cec5SDimitry Andric SelectionDAG &DAG) const { 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 13400b57cec5SDimitry Andric GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 13410b57cec5SDimitry Andric const GlobalValue *GV = G->getGlobal(); 13420b57cec5SDimitry Andric 13430b57cec5SDimitry Andric if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 13440b57cec5SDimitry Andric G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { 1345fe6060f1SDimitry Andric if (!MFI->isModuleEntryFunction() && 1346fe6060f1SDimitry Andric !GV->getName().equals("llvm.amdgcn.module.lds")) { 13475ffd83dbSDimitry Andric SDLoc DL(Op); 13480b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 13490b57cec5SDimitry Andric DiagnosticInfoUnsupported BadLDSDecl( 13505ffd83dbSDimitry Andric Fn, "local memory global used by non-kernel function", 13515ffd83dbSDimitry Andric DL.getDebugLoc(), DS_Warning); 13520b57cec5SDimitry Andric DAG.getContext()->diagnose(BadLDSDecl); 13535ffd83dbSDimitry Andric 13545ffd83dbSDimitry Andric // We currently don't have a way to correctly allocate LDS objects that 13555ffd83dbSDimitry Andric // aren't directly associated with a kernel. We do force inlining of 13565ffd83dbSDimitry Andric // functions that use local objects. However, if these dead functions are 13575ffd83dbSDimitry Andric // not eliminated, we don't want a compile time error. Just emit a warning 13585ffd83dbSDimitry Andric // and a trap, since there should be no callable path here. 13595ffd83dbSDimitry Andric SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode()); 13605ffd83dbSDimitry Andric SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 13615ffd83dbSDimitry Andric Trap, DAG.getRoot()); 13625ffd83dbSDimitry Andric DAG.setRoot(OutputChain); 13635ffd83dbSDimitry Andric return DAG.getUNDEF(Op.getValueType()); 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric // XXX: What does the value of G->getOffset() mean? 13670b57cec5SDimitry Andric assert(G->getOffset() == 0 && 13680b57cec5SDimitry Andric "Do not know what to do with an non-zero offset"); 13690b57cec5SDimitry Andric 13700b57cec5SDimitry Andric // TODO: We could emit code to handle the initialization somewhere. 1371349cc55cSDimitry Andric // We ignore the initializer for now and legalize it to allow selection. 1372349cc55cSDimitry Andric // The initializer will anyway get errored out during assembly emission. 13735ffd83dbSDimitry Andric unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV)); 13740b57cec5SDimitry Andric return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); 13750b57cec5SDimitry Andric } 13760b57cec5SDimitry Andric return SDValue(); 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 13800b57cec5SDimitry Andric SelectionDAG &DAG) const { 13810b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric EVT VT = Op.getValueType(); 13840b57cec5SDimitry Andric if (VT == MVT::v4i16 || VT == MVT::v4f16) { 13850b57cec5SDimitry Andric SDLoc SL(Op); 13860b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(0)); 13870b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(1)); 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric SDValue BV = DAG.getBuildVector(MVT::v2i32, SL, { Lo, Hi }); 13900b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, VT, BV); 13910b57cec5SDimitry Andric } 13920b57cec5SDimitry Andric 13930b57cec5SDimitry Andric for (const SDUse &U : Op->ops()) 13940b57cec5SDimitry Andric DAG.ExtractVectorElements(U.get(), Args); 13950b57cec5SDimitry Andric 13960b57cec5SDimitry Andric return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); 13970b57cec5SDimitry Andric } 13980b57cec5SDimitry Andric 13990b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 14000b57cec5SDimitry Andric SelectionDAG &DAG) const { 14010b57cec5SDimitry Andric 14020b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 14030b57cec5SDimitry Andric unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 14040b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1405fe6060f1SDimitry Andric EVT SrcVT = Op.getOperand(0).getValueType(); 1406fe6060f1SDimitry Andric 1407fe6060f1SDimitry Andric // For these types, we have some TableGen patterns except if the index is 1 1408fe6060f1SDimitry Andric if (((SrcVT == MVT::v4f16 && VT == MVT::v2f16) || 1409fe6060f1SDimitry Andric (SrcVT == MVT::v4i16 && VT == MVT::v2i16)) && 1410fe6060f1SDimitry Andric Start != 1) 1411fe6060f1SDimitry Andric return Op; 1412fe6060f1SDimitry Andric 1413*04eeddc0SDimitry Andric if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) || 1414*04eeddc0SDimitry Andric (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) && 1415*04eeddc0SDimitry Andric (Start == 0 || Start == 4)) 1416*04eeddc0SDimitry Andric return Op; 1417*04eeddc0SDimitry Andric 14180b57cec5SDimitry Andric DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, 14190b57cec5SDimitry Andric VT.getVectorNumElements()); 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args); 14220b57cec5SDimitry Andric } 14230b57cec5SDimitry Andric 14240b57cec5SDimitry Andric /// Generate Min/Max node 14250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, 14260b57cec5SDimitry Andric SDValue LHS, SDValue RHS, 14270b57cec5SDimitry Andric SDValue True, SDValue False, 14280b57cec5SDimitry Andric SDValue CC, 14290b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 14300b57cec5SDimitry Andric if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 14310b57cec5SDimitry Andric return SDValue(); 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 14340b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 14350b57cec5SDimitry Andric switch (CCOpcode) { 14360b57cec5SDimitry Andric case ISD::SETOEQ: 14370b57cec5SDimitry Andric case ISD::SETONE: 14380b57cec5SDimitry Andric case ISD::SETUNE: 14390b57cec5SDimitry Andric case ISD::SETNE: 14400b57cec5SDimitry Andric case ISD::SETUEQ: 14410b57cec5SDimitry Andric case ISD::SETEQ: 14420b57cec5SDimitry Andric case ISD::SETFALSE: 14430b57cec5SDimitry Andric case ISD::SETFALSE2: 14440b57cec5SDimitry Andric case ISD::SETTRUE: 14450b57cec5SDimitry Andric case ISD::SETTRUE2: 14460b57cec5SDimitry Andric case ISD::SETUO: 14470b57cec5SDimitry Andric case ISD::SETO: 14480b57cec5SDimitry Andric break; 14490b57cec5SDimitry Andric case ISD::SETULE: 14500b57cec5SDimitry Andric case ISD::SETULT: { 14510b57cec5SDimitry Andric if (LHS == True) 14520b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); 14530b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric case ISD::SETOLE: 14560b57cec5SDimitry Andric case ISD::SETOLT: 14570b57cec5SDimitry Andric case ISD::SETLE: 14580b57cec5SDimitry Andric case ISD::SETLT: { 14590b57cec5SDimitry Andric // Ordered. Assume ordered for undefined. 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric // Only do this after legalization to avoid interfering with other combines 14620b57cec5SDimitry Andric // which might occur. 14630b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && 14640b57cec5SDimitry Andric !DCI.isCalledByLegalizer()) 14650b57cec5SDimitry Andric return SDValue(); 14660b57cec5SDimitry Andric 14670b57cec5SDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 14680b57cec5SDimitry Andric // selected operand is the second one based on the failing compare with NaN, 14690b57cec5SDimitry Andric // so permute it based on the compare type the hardware uses. 14700b57cec5SDimitry Andric if (LHS == True) 14710b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); 14720b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); 14730b57cec5SDimitry Andric } 14740b57cec5SDimitry Andric case ISD::SETUGE: 14750b57cec5SDimitry Andric case ISD::SETUGT: { 14760b57cec5SDimitry Andric if (LHS == True) 14770b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); 14780b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric case ISD::SETGT: 14810b57cec5SDimitry Andric case ISD::SETGE: 14820b57cec5SDimitry Andric case ISD::SETOGE: 14830b57cec5SDimitry Andric case ISD::SETOGT: { 14840b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && 14850b57cec5SDimitry Andric !DCI.isCalledByLegalizer()) 14860b57cec5SDimitry Andric return SDValue(); 14870b57cec5SDimitry Andric 14880b57cec5SDimitry Andric if (LHS == True) 14890b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); 14900b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); 14910b57cec5SDimitry Andric } 14920b57cec5SDimitry Andric case ISD::SETCC_INVALID: 14930b57cec5SDimitry Andric llvm_unreachable("Invalid setcc condcode!"); 14940b57cec5SDimitry Andric } 14950b57cec5SDimitry Andric return SDValue(); 14960b57cec5SDimitry Andric } 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric std::pair<SDValue, SDValue> 14990b57cec5SDimitry Andric AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { 15000b57cec5SDimitry Andric SDLoc SL(Op); 15010b57cec5SDimitry Andric 15020b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 15030b57cec5SDimitry Andric 15040b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 15050b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 15080b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 15090b57cec5SDimitry Andric 15100b57cec5SDimitry Andric return std::make_pair(Lo, Hi); 15110b57cec5SDimitry Andric } 15120b57cec5SDimitry Andric 15130b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const { 15140b57cec5SDimitry Andric SDLoc SL(Op); 15150b57cec5SDimitry Andric 15160b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 15170b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 15180b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 15190b57cec5SDimitry Andric } 15200b57cec5SDimitry Andric 15210b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const { 15220b57cec5SDimitry Andric SDLoc SL(Op); 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 15250b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 15260b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 15270b57cec5SDimitry Andric } 15280b57cec5SDimitry Andric 15290b57cec5SDimitry Andric // Split a vector type into two parts. The first part is a power of two vector. 15300b57cec5SDimitry Andric // The second part is whatever is left over, and is a scalar if it would 15310b57cec5SDimitry Andric // otherwise be a 1-vector. 15320b57cec5SDimitry Andric std::pair<EVT, EVT> 15330b57cec5SDimitry Andric AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const { 15340b57cec5SDimitry Andric EVT LoVT, HiVT; 15350b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType(); 15360b57cec5SDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 15370b57cec5SDimitry Andric unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2); 15380b57cec5SDimitry Andric LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts); 15390b57cec5SDimitry Andric HiVT = NumElts - LoNumElts == 1 15400b57cec5SDimitry Andric ? EltVT 15410b57cec5SDimitry Andric : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts); 15420b57cec5SDimitry Andric return std::make_pair(LoVT, HiVT); 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric // Split a vector value into two parts of types LoVT and HiVT. HiVT could be 15460b57cec5SDimitry Andric // scalar. 15470b57cec5SDimitry Andric std::pair<SDValue, SDValue> 15480b57cec5SDimitry Andric AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL, 15490b57cec5SDimitry Andric const EVT &LoVT, const EVT &HiVT, 15500b57cec5SDimitry Andric SelectionDAG &DAG) const { 15510b57cec5SDimitry Andric assert(LoVT.getVectorNumElements() + 15520b57cec5SDimitry Andric (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <= 15530b57cec5SDimitry Andric N.getValueType().getVectorNumElements() && 15540b57cec5SDimitry Andric "More vector elements requested than available!"); 15550b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, 15565ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, DL)); 15570b57cec5SDimitry Andric SDValue Hi = DAG.getNode( 15580b57cec5SDimitry Andric HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL, 15595ffd83dbSDimitry Andric HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL)); 15600b57cec5SDimitry Andric return std::make_pair(Lo, Hi); 15610b57cec5SDimitry Andric } 15620b57cec5SDimitry Andric 15630b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, 15640b57cec5SDimitry Andric SelectionDAG &DAG) const { 15650b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 15660b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1567480093f4SDimitry Andric SDLoc SL(Op); 15680b57cec5SDimitry Andric 15690b57cec5SDimitry Andric 15700b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create 15710b57cec5SDimitry Andric // weird 1 element vectors. 1572480093f4SDimitry Andric if (VT.getVectorNumElements() == 2) { 1573480093f4SDimitry Andric SDValue Ops[2]; 1574480093f4SDimitry Andric std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG); 1575480093f4SDimitry Andric return DAG.getMergeValues(Ops, SL); 1576480093f4SDimitry Andric } 15770b57cec5SDimitry Andric 15780b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 15790b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 15800b57cec5SDimitry Andric 15810b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andric EVT LoVT, HiVT; 15840b57cec5SDimitry Andric EVT LoMemVT, HiMemVT; 15850b57cec5SDimitry Andric SDValue Lo, Hi; 15860b57cec5SDimitry Andric 15870b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); 15880b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); 15890b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG); 15900b57cec5SDimitry Andric 15910b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize(); 15920b57cec5SDimitry Andric unsigned BaseAlign = Load->getAlignment(); 15930b57cec5SDimitry Andric unsigned HiAlign = MinAlign(BaseAlign, Size); 15940b57cec5SDimitry Andric 15950b57cec5SDimitry Andric SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, 15960b57cec5SDimitry Andric Load->getChain(), BasePtr, SrcValue, LoMemVT, 15970b57cec5SDimitry Andric BaseAlign, Load->getMemOperand()->getFlags()); 1598e8d8bef9SDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Size)); 15990b57cec5SDimitry Andric SDValue HiLoad = 16000b57cec5SDimitry Andric DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), 16010b57cec5SDimitry Andric HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), 16020b57cec5SDimitry Andric HiMemVT, HiAlign, Load->getMemOperand()->getFlags()); 16030b57cec5SDimitry Andric 16040b57cec5SDimitry Andric SDValue Join; 16050b57cec5SDimitry Andric if (LoVT == HiVT) { 16060b57cec5SDimitry Andric // This is the case that the vector is power of two so was evenly split. 16070b57cec5SDimitry Andric Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad); 16080b57cec5SDimitry Andric } else { 16090b57cec5SDimitry Andric Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad, 16105ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, SL)); 16115ffd83dbSDimitry Andric Join = DAG.getNode( 16125ffd83dbSDimitry Andric HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL, 16135ffd83dbSDimitry Andric VT, Join, HiLoad, 16145ffd83dbSDimitry Andric DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL)); 16150b57cec5SDimitry Andric } 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other, 16180b57cec5SDimitry Andric LoLoad.getValue(1), HiLoad.getValue(1))}; 16190b57cec5SDimitry Andric 16200b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SL); 16210b57cec5SDimitry Andric } 16220b57cec5SDimitry Andric 1623e8d8bef9SDimitry Andric SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op, 16240b57cec5SDimitry Andric SelectionDAG &DAG) const { 16250b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 16260b57cec5SDimitry Andric EVT VT = Op.getValueType(); 16270b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 16280b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 16290b57cec5SDimitry Andric SDLoc SL(Op); 16300b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); 16310b57cec5SDimitry Andric unsigned BaseAlign = Load->getAlignment(); 1632e8d8bef9SDimitry Andric unsigned NumElements = MemVT.getVectorNumElements(); 1633e8d8bef9SDimitry Andric 1634e8d8bef9SDimitry Andric // Widen from vec3 to vec4 when the load is at least 8-byte aligned 1635e8d8bef9SDimitry Andric // or 16-byte fully dereferenceable. Otherwise, split the vector load. 1636e8d8bef9SDimitry Andric if (NumElements != 3 || 1637e8d8bef9SDimitry Andric (BaseAlign < 8 && 1638e8d8bef9SDimitry Andric !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout()))) 1639e8d8bef9SDimitry Andric return SplitVectorLoad(Op, DAG); 1640e8d8bef9SDimitry Andric 1641e8d8bef9SDimitry Andric assert(NumElements == 3); 16420b57cec5SDimitry Andric 16430b57cec5SDimitry Andric EVT WideVT = 16440b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4); 16450b57cec5SDimitry Andric EVT WideMemVT = 16460b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4); 16470b57cec5SDimitry Andric SDValue WideLoad = DAG.getExtLoad( 16480b57cec5SDimitry Andric Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue, 16490b57cec5SDimitry Andric WideMemVT, BaseAlign, Load->getMemOperand()->getFlags()); 16500b57cec5SDimitry Andric return DAG.getMergeValues( 16510b57cec5SDimitry Andric {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad, 16525ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, SL)), 16530b57cec5SDimitry Andric WideLoad.getValue(1)}, 16540b57cec5SDimitry Andric SL); 16550b57cec5SDimitry Andric } 16560b57cec5SDimitry Andric 16570b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 16580b57cec5SDimitry Andric SelectionDAG &DAG) const { 16590b57cec5SDimitry Andric StoreSDNode *Store = cast<StoreSDNode>(Op); 16600b57cec5SDimitry Andric SDValue Val = Store->getValue(); 16610b57cec5SDimitry Andric EVT VT = Val.getValueType(); 16620b57cec5SDimitry Andric 16630b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create 16640b57cec5SDimitry Andric // weird 1 element vectors. 16650b57cec5SDimitry Andric if (VT.getVectorNumElements() == 2) 16660b57cec5SDimitry Andric return scalarizeVectorStore(Store, DAG); 16670b57cec5SDimitry Andric 16680b57cec5SDimitry Andric EVT MemVT = Store->getMemoryVT(); 16690b57cec5SDimitry Andric SDValue Chain = Store->getChain(); 16700b57cec5SDimitry Andric SDValue BasePtr = Store->getBasePtr(); 16710b57cec5SDimitry Andric SDLoc SL(Op); 16720b57cec5SDimitry Andric 16730b57cec5SDimitry Andric EVT LoVT, HiVT; 16740b57cec5SDimitry Andric EVT LoMemVT, HiMemVT; 16750b57cec5SDimitry Andric SDValue Lo, Hi; 16760b57cec5SDimitry Andric 16770b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); 16780b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); 16790b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG); 16800b57cec5SDimitry Andric 16810b57cec5SDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize()); 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); 16840b57cec5SDimitry Andric unsigned BaseAlign = Store->getAlignment(); 16850b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize(); 16860b57cec5SDimitry Andric unsigned HiAlign = MinAlign(BaseAlign, Size); 16870b57cec5SDimitry Andric 16880b57cec5SDimitry Andric SDValue LoStore = 16890b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, 16900b57cec5SDimitry Andric Store->getMemOperand()->getFlags()); 16910b57cec5SDimitry Andric SDValue HiStore = 16920b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), 16930b57cec5SDimitry Andric HiMemVT, HiAlign, Store->getMemOperand()->getFlags()); 16940b57cec5SDimitry Andric 16950b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); 16960b57cec5SDimitry Andric } 16970b57cec5SDimitry Andric 16980b57cec5SDimitry Andric // This is a shortcut for integer division because we have fast i32<->f32 16990b57cec5SDimitry Andric // conversions, and fast f32 reciprocal instructions. The fractional part of a 17000b57cec5SDimitry Andric // float is enough to accurately represent up to a 24-bit signed integer. 17010b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, 17020b57cec5SDimitry Andric bool Sign) const { 17030b57cec5SDimitry Andric SDLoc DL(Op); 17040b57cec5SDimitry Andric EVT VT = Op.getValueType(); 17050b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 17060b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 17070b57cec5SDimitry Andric MVT IntVT = MVT::i32; 17080b57cec5SDimitry Andric MVT FltVT = MVT::f32; 17090b57cec5SDimitry Andric 17100b57cec5SDimitry Andric unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS); 17110b57cec5SDimitry Andric if (LHSSignBits < 9) 17120b57cec5SDimitry Andric return SDValue(); 17130b57cec5SDimitry Andric 17140b57cec5SDimitry Andric unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS); 17150b57cec5SDimitry Andric if (RHSSignBits < 9) 17160b57cec5SDimitry Andric return SDValue(); 17170b57cec5SDimitry Andric 17180b57cec5SDimitry Andric unsigned BitSize = VT.getSizeInBits(); 17190b57cec5SDimitry Andric unsigned SignBits = std::min(LHSSignBits, RHSSignBits); 17200b57cec5SDimitry Andric unsigned DivBits = BitSize - SignBits; 17210b57cec5SDimitry Andric if (Sign) 17220b57cec5SDimitry Andric ++DivBits; 17230b57cec5SDimitry Andric 17240b57cec5SDimitry Andric ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; 17250b57cec5SDimitry Andric ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; 17260b57cec5SDimitry Andric 17270b57cec5SDimitry Andric SDValue jq = DAG.getConstant(1, DL, IntVT); 17280b57cec5SDimitry Andric 17290b57cec5SDimitry Andric if (Sign) { 17300b57cec5SDimitry Andric // char|short jq = ia ^ ib; 17310b57cec5SDimitry Andric jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS); 17320b57cec5SDimitry Andric 17330b57cec5SDimitry Andric // jq = jq >> (bitsize - 2) 17340b57cec5SDimitry Andric jq = DAG.getNode(ISD::SRA, DL, VT, jq, 17350b57cec5SDimitry Andric DAG.getConstant(BitSize - 2, DL, VT)); 17360b57cec5SDimitry Andric 17370b57cec5SDimitry Andric // jq = jq | 0x1 17380b57cec5SDimitry Andric jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT)); 17390b57cec5SDimitry Andric } 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // int ia = (int)LHS; 17420b57cec5SDimitry Andric SDValue ia = LHS; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric // int ib, (int)RHS; 17450b57cec5SDimitry Andric SDValue ib = RHS; 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric // float fa = (float)ia; 17480b57cec5SDimitry Andric SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia); 17490b57cec5SDimitry Andric 17500b57cec5SDimitry Andric // float fb = (float)ib; 17510b57cec5SDimitry Andric SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib); 17520b57cec5SDimitry Andric 17530b57cec5SDimitry Andric SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT, 17540b57cec5SDimitry Andric fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb)); 17550b57cec5SDimitry Andric 17560b57cec5SDimitry Andric // fq = trunc(fq); 17570b57cec5SDimitry Andric fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq); 17580b57cec5SDimitry Andric 17590b57cec5SDimitry Andric // float fqneg = -fq; 17600b57cec5SDimitry Andric SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); 17610b57cec5SDimitry Andric 1762480093f4SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 1763480093f4SDimitry Andric const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); 1764480093f4SDimitry Andric 17650b57cec5SDimitry Andric // float fr = mad(fqneg, fb, fa); 17665ffd83dbSDimitry Andric unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? 17675ffd83dbSDimitry Andric (unsigned)ISD::FMA : 17685ffd83dbSDimitry Andric !MFI->getMode().allFP32Denormals() ? 17695ffd83dbSDimitry Andric (unsigned)ISD::FMAD : 17705ffd83dbSDimitry Andric (unsigned)AMDGPUISD::FMAD_FTZ; 17710b57cec5SDimitry Andric SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); 17720b57cec5SDimitry Andric 17730b57cec5SDimitry Andric // int iq = (int)fq; 17740b57cec5SDimitry Andric SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric // fr = fabs(fr); 17770b57cec5SDimitry Andric fr = DAG.getNode(ISD::FABS, DL, FltVT, fr); 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric // fb = fabs(fb); 17800b57cec5SDimitry Andric fb = DAG.getNode(ISD::FABS, DL, FltVT, fb); 17810b57cec5SDimitry Andric 17820b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 17830b57cec5SDimitry Andric 17840b57cec5SDimitry Andric // int cv = fr >= fb; 17850b57cec5SDimitry Andric SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE); 17860b57cec5SDimitry Andric 17870b57cec5SDimitry Andric // jq = (cv ? jq : 0); 17880b57cec5SDimitry Andric jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT)); 17890b57cec5SDimitry Andric 17900b57cec5SDimitry Andric // dst = iq + jq; 17910b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq); 17920b57cec5SDimitry Andric 17930b57cec5SDimitry Andric // Rem needs compensation, it's easier to recompute it 17940b57cec5SDimitry Andric SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS); 17950b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem); 17960b57cec5SDimitry Andric 17970b57cec5SDimitry Andric // Truncate to number of bits this divide really is. 17980b57cec5SDimitry Andric if (Sign) { 17990b57cec5SDimitry Andric SDValue InRegSize 18000b57cec5SDimitry Andric = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits)); 18010b57cec5SDimitry Andric Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize); 18020b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize); 18030b57cec5SDimitry Andric } else { 18040b57cec5SDimitry Andric SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT); 18050b57cec5SDimitry Andric Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask); 18060b57cec5SDimitry Andric Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask); 18070b57cec5SDimitry Andric } 18080b57cec5SDimitry Andric 18090b57cec5SDimitry Andric return DAG.getMergeValues({ Div, Rem }, DL); 18100b57cec5SDimitry Andric } 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andric void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, 18130b57cec5SDimitry Andric SelectionDAG &DAG, 18140b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) const { 18150b57cec5SDimitry Andric SDLoc DL(Op); 18160b57cec5SDimitry Andric EVT VT = Op.getValueType(); 18170b57cec5SDimitry Andric 18180b57cec5SDimitry Andric assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64"); 18190b57cec5SDimitry Andric 18200b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); 18210b57cec5SDimitry Andric 18220b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, HalfVT); 18230b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, HalfVT); 18240b57cec5SDimitry Andric 18250b57cec5SDimitry Andric //HiLo split 18260b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 18270b57cec5SDimitry Andric SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); 18280b57cec5SDimitry Andric SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One); 18290b57cec5SDimitry Andric 18300b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 18310b57cec5SDimitry Andric SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); 18320b57cec5SDimitry Andric SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One); 18330b57cec5SDimitry Andric 18340b57cec5SDimitry Andric if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && 18350b57cec5SDimitry Andric DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), 18380b57cec5SDimitry Andric LHS_Lo, RHS_Lo); 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero}); 18410b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero}); 18420b57cec5SDimitry Andric 18430b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV)); 18440b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM)); 18450b57cec5SDimitry Andric return; 18460b57cec5SDimitry Andric } 18470b57cec5SDimitry Andric 18480b57cec5SDimitry Andric if (isTypeLegal(MVT::i64)) { 1849349cc55cSDimitry Andric // The algorithm here is based on ideas from "Software Integer Division", 1850349cc55cSDimitry Andric // Tom Rodeheffer, August 2008. 1851349cc55cSDimitry Andric 1852480093f4SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 1853480093f4SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1854480093f4SDimitry Andric 18550b57cec5SDimitry Andric // Compute denominator reciprocal. 18565ffd83dbSDimitry Andric unsigned FMAD = !Subtarget->hasMadMacF32Insts() ? 18575ffd83dbSDimitry Andric (unsigned)ISD::FMA : 18585ffd83dbSDimitry Andric !MFI->getMode().allFP32Denormals() ? 18595ffd83dbSDimitry Andric (unsigned)ISD::FMAD : 18605ffd83dbSDimitry Andric (unsigned)AMDGPUISD::FMAD_FTZ; 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo); 18630b57cec5SDimitry Andric SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi); 18640b57cec5SDimitry Andric SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi, 18650b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32), 18660b57cec5SDimitry Andric Cvt_Lo); 18670b57cec5SDimitry Andric SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1); 18680b57cec5SDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp, 18690b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32)); 18700b57cec5SDimitry Andric SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1, 18710b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32)); 18720b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2); 18730b57cec5SDimitry Andric SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc, 18740b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32), 18750b57cec5SDimitry Andric Mul1); 18760b57cec5SDimitry Andric SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2); 18770b57cec5SDimitry Andric SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc); 18780b57cec5SDimitry Andric SDValue Rcp64 = DAG.getBitcast(VT, 18790b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi})); 18800b57cec5SDimitry Andric 18810b57cec5SDimitry Andric SDValue Zero64 = DAG.getConstant(0, DL, VT); 18820b57cec5SDimitry Andric SDValue One64 = DAG.getConstant(1, DL, VT); 18830b57cec5SDimitry Andric SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1); 18840b57cec5SDimitry Andric SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1); 18850b57cec5SDimitry Andric 1886349cc55cSDimitry Andric // First round of UNR (Unsigned integer Newton-Raphson). 18870b57cec5SDimitry Andric SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS); 18880b57cec5SDimitry Andric SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64); 18890b57cec5SDimitry Andric SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1); 18900b57cec5SDimitry Andric SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, 18910b57cec5SDimitry Andric Zero); 1892349cc55cSDimitry Andric SDValue Mulhi1_Hi = 1893349cc55cSDimitry Andric DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, One); 18940b57cec5SDimitry Andric SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo, 18950b57cec5SDimitry Andric Mulhi1_Lo, Zero1); 18960b57cec5SDimitry Andric SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi, 18970b57cec5SDimitry Andric Mulhi1_Hi, Add1_Lo.getValue(1)); 18980b57cec5SDimitry Andric SDValue Add1 = DAG.getBitcast(VT, 18990b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi})); 19000b57cec5SDimitry Andric 1901349cc55cSDimitry Andric // Second round of UNR. 19020b57cec5SDimitry Andric SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1); 19030b57cec5SDimitry Andric SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2); 19040b57cec5SDimitry Andric SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, 19050b57cec5SDimitry Andric Zero); 1906349cc55cSDimitry Andric SDValue Mulhi2_Hi = 1907349cc55cSDimitry Andric DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, One); 19080b57cec5SDimitry Andric SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo, 19090b57cec5SDimitry Andric Mulhi2_Lo, Zero1); 1910349cc55cSDimitry Andric SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Hi, 1911349cc55cSDimitry Andric Mulhi2_Hi, Add2_Lo.getValue(1)); 19120b57cec5SDimitry Andric SDValue Add2 = DAG.getBitcast(VT, 19130b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi})); 1914349cc55cSDimitry Andric 19150b57cec5SDimitry Andric SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2); 19160b57cec5SDimitry Andric 19170b57cec5SDimitry Andric SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3); 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero); 19200b57cec5SDimitry Andric SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One); 19210b57cec5SDimitry Andric SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo, 19220b57cec5SDimitry Andric Mul3_Lo, Zero1); 19230b57cec5SDimitry Andric SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi, 19240b57cec5SDimitry Andric Mul3_Hi, Sub1_Lo.getValue(1)); 19250b57cec5SDimitry Andric SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi); 19260b57cec5SDimitry Andric SDValue Sub1 = DAG.getBitcast(VT, 19270b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi})); 19280b57cec5SDimitry Andric 19290b57cec5SDimitry Andric SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT); 19300b57cec5SDimitry Andric SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero, 19310b57cec5SDimitry Andric ISD::SETUGE); 19320b57cec5SDimitry Andric SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero, 19330b57cec5SDimitry Andric ISD::SETUGE); 19340b57cec5SDimitry Andric SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ); 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric // TODO: Here and below portions of the code can be enclosed into if/endif. 19370b57cec5SDimitry Andric // Currently control flow is unconditional and we have 4 selects after 19380b57cec5SDimitry Andric // potential endif to substitute PHIs. 19390b57cec5SDimitry Andric 19400b57cec5SDimitry Andric // if C3 != 0 ... 19410b57cec5SDimitry Andric SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo, 19420b57cec5SDimitry Andric RHS_Lo, Zero1); 19430b57cec5SDimitry Andric SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi, 19440b57cec5SDimitry Andric RHS_Hi, Sub1_Lo.getValue(1)); 19450b57cec5SDimitry Andric SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, 19460b57cec5SDimitry Andric Zero, Sub2_Lo.getValue(1)); 19470b57cec5SDimitry Andric SDValue Sub2 = DAG.getBitcast(VT, 19480b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi})); 19490b57cec5SDimitry Andric 19500b57cec5SDimitry Andric SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64); 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero, 19530b57cec5SDimitry Andric ISD::SETUGE); 19540b57cec5SDimitry Andric SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero, 19550b57cec5SDimitry Andric ISD::SETUGE); 19560b57cec5SDimitry Andric SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ); 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric // if (C6 != 0) 19590b57cec5SDimitry Andric SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64); 19600b57cec5SDimitry Andric 19610b57cec5SDimitry Andric SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo, 19620b57cec5SDimitry Andric RHS_Lo, Zero1); 19630b57cec5SDimitry Andric SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, 19640b57cec5SDimitry Andric RHS_Hi, Sub2_Lo.getValue(1)); 19650b57cec5SDimitry Andric SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi, 19660b57cec5SDimitry Andric Zero, Sub3_Lo.getValue(1)); 19670b57cec5SDimitry Andric SDValue Sub3 = DAG.getBitcast(VT, 19680b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi})); 19690b57cec5SDimitry Andric 19700b57cec5SDimitry Andric // endif C6 19710b57cec5SDimitry Andric // endif C3 19720b57cec5SDimitry Andric 19730b57cec5SDimitry Andric SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE); 19740b57cec5SDimitry Andric SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE); 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE); 19770b57cec5SDimitry Andric SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE); 19780b57cec5SDimitry Andric 19790b57cec5SDimitry Andric Results.push_back(Div); 19800b57cec5SDimitry Andric Results.push_back(Rem); 19810b57cec5SDimitry Andric 19820b57cec5SDimitry Andric return; 19830b57cec5SDimitry Andric } 19840b57cec5SDimitry Andric 19850b57cec5SDimitry Andric // r600 expandion. 19860b57cec5SDimitry Andric // Get Speculative values 19870b57cec5SDimitry Andric SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); 19880b57cec5SDimitry Andric SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); 19890b57cec5SDimitry Andric 19900b57cec5SDimitry Andric SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ); 19910b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero}); 19920b57cec5SDimitry Andric REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM); 19930b57cec5SDimitry Andric 19940b57cec5SDimitry Andric SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ); 19950b57cec5SDimitry Andric SDValue DIV_Lo = Zero; 19960b57cec5SDimitry Andric 19970b57cec5SDimitry Andric const unsigned halfBitWidth = HalfVT.getSizeInBits(); 19980b57cec5SDimitry Andric 19990b57cec5SDimitry Andric for (unsigned i = 0; i < halfBitWidth; ++i) { 20000b57cec5SDimitry Andric const unsigned bitPos = halfBitWidth - i - 1; 20010b57cec5SDimitry Andric SDValue POS = DAG.getConstant(bitPos, DL, HalfVT); 20020b57cec5SDimitry Andric // Get value of high bit 20030b57cec5SDimitry Andric SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); 20040b57cec5SDimitry Andric HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One); 20050b57cec5SDimitry Andric HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); 20060b57cec5SDimitry Andric 20070b57cec5SDimitry Andric // Shift 20080b57cec5SDimitry Andric REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT)); 20090b57cec5SDimitry Andric // Add LHS high bit 20100b57cec5SDimitry Andric REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit); 20110b57cec5SDimitry Andric 20120b57cec5SDimitry Andric SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT); 20130b57cec5SDimitry Andric SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE); 20140b57cec5SDimitry Andric 20150b57cec5SDimitry Andric DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric // Update REM 20180b57cec5SDimitry Andric SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); 20190b57cec5SDimitry Andric REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE); 20200b57cec5SDimitry Andric } 20210b57cec5SDimitry Andric 20220b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi}); 20230b57cec5SDimitry Andric DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV); 20240b57cec5SDimitry Andric Results.push_back(DIV); 20250b57cec5SDimitry Andric Results.push_back(REM); 20260b57cec5SDimitry Andric } 20270b57cec5SDimitry Andric 20280b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 20290b57cec5SDimitry Andric SelectionDAG &DAG) const { 20300b57cec5SDimitry Andric SDLoc DL(Op); 20310b57cec5SDimitry Andric EVT VT = Op.getValueType(); 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric if (VT == MVT::i64) { 20340b57cec5SDimitry Andric SmallVector<SDValue, 2> Results; 20350b57cec5SDimitry Andric LowerUDIVREM64(Op, DAG, Results); 20360b57cec5SDimitry Andric return DAG.getMergeValues(Results, DL); 20370b57cec5SDimitry Andric } 20380b57cec5SDimitry Andric 20390b57cec5SDimitry Andric if (VT == MVT::i32) { 20400b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, false)) 20410b57cec5SDimitry Andric return Res; 20420b57cec5SDimitry Andric } 20430b57cec5SDimitry Andric 20445ffd83dbSDimitry Andric SDValue X = Op.getOperand(0); 20455ffd83dbSDimitry Andric SDValue Y = Op.getOperand(1); 20460b57cec5SDimitry Andric 20475ffd83dbSDimitry Andric // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the 20485ffd83dbSDimitry Andric // algorithm used here. 20490b57cec5SDimitry Andric 20505ffd83dbSDimitry Andric // Initial estimate of inv(y). 20515ffd83dbSDimitry Andric SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y); 20520b57cec5SDimitry Andric 20535ffd83dbSDimitry Andric // One round of UNR. 20545ffd83dbSDimitry Andric SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y); 20555ffd83dbSDimitry Andric SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z); 20565ffd83dbSDimitry Andric Z = DAG.getNode(ISD::ADD, DL, VT, Z, 20575ffd83dbSDimitry Andric DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ)); 20580b57cec5SDimitry Andric 20595ffd83dbSDimitry Andric // Quotient/remainder estimate. 20605ffd83dbSDimitry Andric SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z); 20615ffd83dbSDimitry Andric SDValue R = 20625ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y)); 20630b57cec5SDimitry Andric 20645ffd83dbSDimitry Andric // First quotient/remainder refinement. 20655ffd83dbSDimitry Andric EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 20665ffd83dbSDimitry Andric SDValue One = DAG.getConstant(1, DL, VT); 20675ffd83dbSDimitry Andric SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE); 20685ffd83dbSDimitry Andric Q = DAG.getNode(ISD::SELECT, DL, VT, Cond, 20695ffd83dbSDimitry Andric DAG.getNode(ISD::ADD, DL, VT, Q, One), Q); 20705ffd83dbSDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, Cond, 20715ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, R, Y), R); 20720b57cec5SDimitry Andric 20735ffd83dbSDimitry Andric // Second quotient/remainder refinement. 20745ffd83dbSDimitry Andric Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE); 20755ffd83dbSDimitry Andric Q = DAG.getNode(ISD::SELECT, DL, VT, Cond, 20765ffd83dbSDimitry Andric DAG.getNode(ISD::ADD, DL, VT, Q, One), Q); 20775ffd83dbSDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, Cond, 20785ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, R, Y), R); 20790b57cec5SDimitry Andric 20805ffd83dbSDimitry Andric return DAG.getMergeValues({Q, R}, DL); 20810b57cec5SDimitry Andric } 20820b57cec5SDimitry Andric 20830b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, 20840b57cec5SDimitry Andric SelectionDAG &DAG) const { 20850b57cec5SDimitry Andric SDLoc DL(Op); 20860b57cec5SDimitry Andric EVT VT = Op.getValueType(); 20870b57cec5SDimitry Andric 20880b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 20890b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 20900b57cec5SDimitry Andric 20910b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, VT); 20920b57cec5SDimitry Andric SDValue NegOne = DAG.getConstant(-1, DL, VT); 20930b57cec5SDimitry Andric 20940b57cec5SDimitry Andric if (VT == MVT::i32) { 20950b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, true)) 20960b57cec5SDimitry Andric return Res; 20970b57cec5SDimitry Andric } 20980b57cec5SDimitry Andric 20990b57cec5SDimitry Andric if (VT == MVT::i64 && 21000b57cec5SDimitry Andric DAG.ComputeNumSignBits(LHS) > 32 && 21010b57cec5SDimitry Andric DAG.ComputeNumSignBits(RHS) > 32) { 21020b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); 21030b57cec5SDimitry Andric 21040b57cec5SDimitry Andric //HiLo split 21050b57cec5SDimitry Andric SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); 21060b57cec5SDimitry Andric SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); 21070b57cec5SDimitry Andric SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), 21080b57cec5SDimitry Andric LHS_Lo, RHS_Lo); 21090b57cec5SDimitry Andric SDValue Res[2] = { 21100b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)), 21110b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1)) 21120b57cec5SDimitry Andric }; 21130b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL); 21140b57cec5SDimitry Andric } 21150b57cec5SDimitry Andric 21160b57cec5SDimitry Andric SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); 21170b57cec5SDimitry Andric SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); 21180b57cec5SDimitry Andric SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); 21190b57cec5SDimitry Andric SDValue RSign = LHSign; // Remainder sign is the same as LHS 21200b57cec5SDimitry Andric 21210b57cec5SDimitry Andric LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign); 21220b57cec5SDimitry Andric RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign); 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign); 21250b57cec5SDimitry Andric RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign); 21260b57cec5SDimitry Andric 21270b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS); 21280b57cec5SDimitry Andric SDValue Rem = Div.getValue(1); 21290b57cec5SDimitry Andric 21300b57cec5SDimitry Andric Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign); 21310b57cec5SDimitry Andric Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign); 21320b57cec5SDimitry Andric 21330b57cec5SDimitry Andric Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign); 21340b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign); 21350b57cec5SDimitry Andric 21360b57cec5SDimitry Andric SDValue Res[2] = { 21370b57cec5SDimitry Andric Div, 21380b57cec5SDimitry Andric Rem 21390b57cec5SDimitry Andric }; 21400b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL); 21410b57cec5SDimitry Andric } 21420b57cec5SDimitry Andric 2143e8d8bef9SDimitry Andric // (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x) 21440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { 21450b57cec5SDimitry Andric SDLoc SL(Op); 21460b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2147e8d8bef9SDimitry Andric auto Flags = Op->getFlags(); 21480b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 21490b57cec5SDimitry Andric SDValue Y = Op.getOperand(1); 21500b57cec5SDimitry Andric 2151e8d8bef9SDimitry Andric SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags); 2152e8d8bef9SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags); 2153e8d8bef9SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags); 2154e8d8bef9SDimitry Andric // TODO: For f32 use FMAD instead if !hasFastFMA32? 2155e8d8bef9SDimitry Andric return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags); 21560b57cec5SDimitry Andric } 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { 21590b57cec5SDimitry Andric SDLoc SL(Op); 21600b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric // result = trunc(src) 21630b57cec5SDimitry Andric // if (src > 0.0 && src != result) 21640b57cec5SDimitry Andric // result += 1.0 21650b57cec5SDimitry Andric 21660b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 21670b57cec5SDimitry Andric 21680b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); 21690b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64); 21700b57cec5SDimitry Andric 21710b57cec5SDimitry Andric EVT SetCCVT = 21720b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 21730b57cec5SDimitry Andric 21740b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); 21750b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); 21760b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); 21790b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 21800b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); 21810b57cec5SDimitry Andric } 21820b57cec5SDimitry Andric 21830b57cec5SDimitry Andric static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, 21840b57cec5SDimitry Andric SelectionDAG &DAG) { 21850b57cec5SDimitry Andric const unsigned FractBits = 52; 21860b57cec5SDimitry Andric const unsigned ExpBits = 11; 21870b57cec5SDimitry Andric 21880b57cec5SDimitry Andric SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, 21890b57cec5SDimitry Andric Hi, 21900b57cec5SDimitry Andric DAG.getConstant(FractBits - 32, SL, MVT::i32), 21910b57cec5SDimitry Andric DAG.getConstant(ExpBits, SL, MVT::i32)); 21920b57cec5SDimitry Andric SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart, 21930b57cec5SDimitry Andric DAG.getConstant(1023, SL, MVT::i32)); 21940b57cec5SDimitry Andric 21950b57cec5SDimitry Andric return Exp; 21960b57cec5SDimitry Andric } 21970b57cec5SDimitry Andric 21980b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { 21990b57cec5SDimitry Andric SDLoc SL(Op); 22000b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 22010b57cec5SDimitry Andric 22020b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64); 22030b57cec5SDimitry Andric 22040b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 22050b57cec5SDimitry Andric 22060b57cec5SDimitry Andric // Extract the upper half, since this is where we will find the sign and 22070b57cec5SDimitry Andric // exponent. 2208349cc55cSDimitry Andric SDValue Hi = getHiHalf64(Src, DAG); 22090b57cec5SDimitry Andric 22100b57cec5SDimitry Andric SDValue Exp = extractF64Exponent(Hi, SL, DAG); 22110b57cec5SDimitry Andric 22120b57cec5SDimitry Andric const unsigned FractBits = 52; 22130b57cec5SDimitry Andric 22140b57cec5SDimitry Andric // Extract the sign bit. 22150b57cec5SDimitry Andric const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32); 22160b57cec5SDimitry Andric SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); 22170b57cec5SDimitry Andric 22180b57cec5SDimitry Andric // Extend back to 64-bits. 22190b57cec5SDimitry Andric SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit}); 22200b57cec5SDimitry Andric SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); 22210b57cec5SDimitry Andric 22220b57cec5SDimitry Andric SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); 22230b57cec5SDimitry Andric const SDValue FractMask 22240b57cec5SDimitry Andric = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64); 22250b57cec5SDimitry Andric 22260b57cec5SDimitry Andric SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); 22270b57cec5SDimitry Andric SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); 22280b57cec5SDimitry Andric SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); 22290b57cec5SDimitry Andric 22300b57cec5SDimitry Andric EVT SetCCVT = 22310b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); 22320b57cec5SDimitry Andric 22330b57cec5SDimitry Andric const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32); 22340b57cec5SDimitry Andric 22350b57cec5SDimitry Andric SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); 22360b57cec5SDimitry Andric SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); 22370b57cec5SDimitry Andric 22380b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0); 22390b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1); 22400b57cec5SDimitry Andric 22410b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); 22420b57cec5SDimitry Andric } 22430b57cec5SDimitry Andric 22440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { 22450b57cec5SDimitry Andric SDLoc SL(Op); 22460b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 22470b57cec5SDimitry Andric 22480b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64); 22490b57cec5SDimitry Andric 22500b57cec5SDimitry Andric APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); 22510b57cec5SDimitry Andric SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64); 22520b57cec5SDimitry Andric SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); 22530b57cec5SDimitry Andric 22540b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 22550b57cec5SDimitry Andric 22560b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); 22570b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); 22580b57cec5SDimitry Andric 22590b57cec5SDimitry Andric SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); 22600b57cec5SDimitry Andric 22610b57cec5SDimitry Andric APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); 22620b57cec5SDimitry Andric SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64); 22630b57cec5SDimitry Andric 22640b57cec5SDimitry Andric EVT SetCCVT = 22650b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 22660b57cec5SDimitry Andric SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); 22670b57cec5SDimitry Andric 22680b57cec5SDimitry Andric return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); 22690b57cec5SDimitry Andric } 22700b57cec5SDimitry Andric 22710b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const { 22720b57cec5SDimitry Andric // FNEARBYINT and FRINT are the same, except in their handling of FP 22730b57cec5SDimitry Andric // exceptions. Those aren't really meaningful for us, and OpenCL only has 22740b57cec5SDimitry Andric // rint, so just treat them as equivalent. 22750b57cec5SDimitry Andric return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); 22760b57cec5SDimitry Andric } 22770b57cec5SDimitry Andric 22780b57cec5SDimitry Andric // XXX - May require not supporting f32 denormals? 22790b57cec5SDimitry Andric 22800b57cec5SDimitry Andric // Don't handle v2f16. The extra instructions to scalarize and repack around the 22810b57cec5SDimitry Andric // compare and vselect end up producing worse code than scalarizing the whole 22820b57cec5SDimitry Andric // operation. 22835ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { 22840b57cec5SDimitry Andric SDLoc SL(Op); 22850b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 22860b57cec5SDimitry Andric EVT VT = Op.getValueType(); 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X); 22890b57cec5SDimitry Andric 22900b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 22910b57cec5SDimitry Andric 22920b57cec5SDimitry Andric SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T); 22930b57cec5SDimitry Andric 22940b57cec5SDimitry Andric SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff); 22950b57cec5SDimitry Andric 22960b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, VT); 22970b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, VT); 22980b57cec5SDimitry Andric const SDValue Half = DAG.getConstantFP(0.5, SL, VT); 22990b57cec5SDimitry Andric 23000b57cec5SDimitry Andric SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X); 23010b57cec5SDimitry Andric 23020b57cec5SDimitry Andric EVT SetCCVT = 23030b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 23040b57cec5SDimitry Andric 23050b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE); 23060b57cec5SDimitry Andric 23070b57cec5SDimitry Andric SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero); 23080b57cec5SDimitry Andric 23090b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, T, Sel); 23100b57cec5SDimitry Andric } 23110b57cec5SDimitry Andric 23120b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { 23130b57cec5SDimitry Andric SDLoc SL(Op); 23140b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 23150b57cec5SDimitry Andric 23160b57cec5SDimitry Andric // result = trunc(src); 23170b57cec5SDimitry Andric // if (src < 0.0 && src != result) 23180b57cec5SDimitry Andric // result += -1.0. 23190b57cec5SDimitry Andric 23200b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 23210b57cec5SDimitry Andric 23220b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); 23230b57cec5SDimitry Andric const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64); 23240b57cec5SDimitry Andric 23250b57cec5SDimitry Andric EVT SetCCVT = 23260b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 23270b57cec5SDimitry Andric 23280b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); 23290b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); 23300b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); 23310b57cec5SDimitry Andric 23320b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); 23330b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 23340b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); 23350b57cec5SDimitry Andric } 23360b57cec5SDimitry Andric 23370b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG, 23380b57cec5SDimitry Andric double Log2BaseInverted) const { 23390b57cec5SDimitry Andric EVT VT = Op.getValueType(); 23400b57cec5SDimitry Andric 23410b57cec5SDimitry Andric SDLoc SL(Op); 23420b57cec5SDimitry Andric SDValue Operand = Op.getOperand(0); 23430b57cec5SDimitry Andric SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand); 23440b57cec5SDimitry Andric SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand); 23470b57cec5SDimitry Andric } 23480b57cec5SDimitry Andric 23490b57cec5SDimitry Andric // exp2(M_LOG2E_F * f); 23500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { 23510b57cec5SDimitry Andric EVT VT = Op.getValueType(); 23520b57cec5SDimitry Andric SDLoc SL(Op); 23530b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 23540b57cec5SDimitry Andric 23558bcb0991SDimitry Andric const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); 23560b57cec5SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags()); 23570b57cec5SDimitry Andric return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags()); 23580b57cec5SDimitry Andric } 23590b57cec5SDimitry Andric 23600b57cec5SDimitry Andric static bool isCtlzOpc(unsigned Opc) { 23610b57cec5SDimitry Andric return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; 23620b57cec5SDimitry Andric } 23630b57cec5SDimitry Andric 23640b57cec5SDimitry Andric static bool isCttzOpc(unsigned Opc) { 23650b57cec5SDimitry Andric return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF; 23660b57cec5SDimitry Andric } 23670b57cec5SDimitry Andric 23680b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { 23690b57cec5SDimitry Andric SDLoc SL(Op); 23700b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 23710b57cec5SDimitry Andric 2372349cc55cSDimitry Andric assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode())); 2373349cc55cSDimitry Andric bool Ctlz = isCtlzOpc(Op.getOpcode()); 2374349cc55cSDimitry Andric unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32; 23750b57cec5SDimitry Andric 2376349cc55cSDimitry Andric bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF || 2377349cc55cSDimitry Andric Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF; 23780b57cec5SDimitry Andric 2379349cc55cSDimitry Andric if (Src.getValueType() == MVT::i32) { 2380349cc55cSDimitry Andric // (ctlz hi:lo) -> (umin (ffbh src), 32) 2381349cc55cSDimitry Andric // (cttz hi:lo) -> (umin (ffbl src), 32) 2382349cc55cSDimitry Andric // (ctlz_zero_undef src) -> (ffbh src) 2383349cc55cSDimitry Andric // (cttz_zero_undef src) -> (ffbl src) 2384349cc55cSDimitry Andric SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src); 2385349cc55cSDimitry Andric if (!ZeroUndef) { 2386349cc55cSDimitry Andric const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32); 2387349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32); 2388349cc55cSDimitry Andric } 2389349cc55cSDimitry Andric return NewOpr; 23900b57cec5SDimitry Andric } 23910b57cec5SDimitry Andric 2392349cc55cSDimitry Andric SDValue Lo, Hi; 2393349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG); 2394349cc55cSDimitry Andric 2395349cc55cSDimitry Andric SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo); 2396349cc55cSDimitry Andric SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi); 2397349cc55cSDimitry Andric 2398349cc55cSDimitry Andric // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64) 2399349cc55cSDimitry Andric // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64) 2400349cc55cSDimitry Andric // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32)) 2401349cc55cSDimitry Andric // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo)) 2402349cc55cSDimitry Andric 2403349cc55cSDimitry Andric unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT; 2404349cc55cSDimitry Andric const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32); 2405349cc55cSDimitry Andric if (Ctlz) 2406349cc55cSDimitry Andric OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32); 2407349cc55cSDimitry Andric else 2408349cc55cSDimitry Andric OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32); 2409349cc55cSDimitry Andric 2410349cc55cSDimitry Andric SDValue NewOpr; 2411349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi); 24120b57cec5SDimitry Andric if (!ZeroUndef) { 2413349cc55cSDimitry Andric const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32); 2414349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64); 24150b57cec5SDimitry Andric } 24160b57cec5SDimitry Andric 24170b57cec5SDimitry Andric return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr); 24180b57cec5SDimitry Andric } 24190b57cec5SDimitry Andric 24200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, 24210b57cec5SDimitry Andric bool Signed) const { 2422349cc55cSDimitry Andric // The regular method converting a 64-bit integer to float roughly consists of 2423349cc55cSDimitry Andric // 2 steps: normalization and rounding. In fact, after normalization, the 2424349cc55cSDimitry Andric // conversion from a 64-bit integer to a float is essentially the same as the 2425349cc55cSDimitry Andric // one from a 32-bit integer. The only difference is that it has more 2426349cc55cSDimitry Andric // trailing bits to be rounded. To leverage the native 32-bit conversion, a 2427349cc55cSDimitry Andric // 64-bit integer could be preprocessed and fit into a 32-bit integer then 2428349cc55cSDimitry Andric // converted into the correct float number. The basic steps for the unsigned 2429349cc55cSDimitry Andric // conversion are illustrated in the following pseudo code: 2430349cc55cSDimitry Andric // 2431349cc55cSDimitry Andric // f32 uitofp(i64 u) { 2432349cc55cSDimitry Andric // i32 hi, lo = split(u); 2433349cc55cSDimitry Andric // // Only count the leading zeros in hi as we have native support of the 2434349cc55cSDimitry Andric // // conversion from i32 to f32. If hi is all 0s, the conversion is 2435349cc55cSDimitry Andric // // reduced to a 32-bit one automatically. 2436349cc55cSDimitry Andric // i32 shamt = clz(hi); // Return 32 if hi is all 0s. 2437349cc55cSDimitry Andric // u <<= shamt; 2438349cc55cSDimitry Andric // hi, lo = split(u); 2439349cc55cSDimitry Andric // hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo. 2440349cc55cSDimitry Andric // // convert it as a 32-bit integer and scale the result back. 2441349cc55cSDimitry Andric // return uitofp(hi) * 2^(32 - shamt); 24420b57cec5SDimitry Andric // } 2443349cc55cSDimitry Andric // 2444349cc55cSDimitry Andric // The signed one follows the same principle but uses 'ffbh_i32' to count its 2445349cc55cSDimitry Andric // sign bits instead. If 'ffbh_i32' is not available, its absolute value is 2446349cc55cSDimitry Andric // converted instead followed by negation based its sign bit. 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric SDLoc SL(Op); 24490b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 24500b57cec5SDimitry Andric 2451349cc55cSDimitry Andric SDValue Lo, Hi; 2452349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG); 2453349cc55cSDimitry Andric SDValue Sign; 2454349cc55cSDimitry Andric SDValue ShAmt; 2455349cc55cSDimitry Andric if (Signed && Subtarget->isGCN()) { 2456349cc55cSDimitry Andric // We also need to consider the sign bit in Lo if Hi has just sign bits, 2457349cc55cSDimitry Andric // i.e. Hi is 0 or -1. However, that only needs to take the MSB into 2458349cc55cSDimitry Andric // account. That is, the maximal shift is 2459349cc55cSDimitry Andric // - 32 if Lo and Hi have opposite signs; 2460349cc55cSDimitry Andric // - 33 if Lo and Hi have the same sign. 2461349cc55cSDimitry Andric // 2462349cc55cSDimitry Andric // Or, MaxShAmt = 33 + OppositeSign, where 2463349cc55cSDimitry Andric // 2464349cc55cSDimitry Andric // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is 2465349cc55cSDimitry Andric // - -1 if Lo and Hi have opposite signs; and 2466349cc55cSDimitry Andric // - 0 otherwise. 2467349cc55cSDimitry Andric // 2468349cc55cSDimitry Andric // All in all, ShAmt is calculated as 2469349cc55cSDimitry Andric // 2470349cc55cSDimitry Andric // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1. 2471349cc55cSDimitry Andric // 2472349cc55cSDimitry Andric // or 2473349cc55cSDimitry Andric // 2474349cc55cSDimitry Andric // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31). 2475349cc55cSDimitry Andric // 2476349cc55cSDimitry Andric // to reduce the critical path. 2477349cc55cSDimitry Andric SDValue OppositeSign = DAG.getNode( 2478349cc55cSDimitry Andric ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi), 2479349cc55cSDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 2480349cc55cSDimitry Andric SDValue MaxShAmt = 2481349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), 2482349cc55cSDimitry Andric OppositeSign); 2483349cc55cSDimitry Andric // Count the leading sign bits. 2484349cc55cSDimitry Andric ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi); 2485349cc55cSDimitry Andric // Different from unsigned conversion, the shift should be one bit less to 2486349cc55cSDimitry Andric // preserve the sign bit. 2487349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt, 2488349cc55cSDimitry Andric DAG.getConstant(1, SL, MVT::i32)); 2489349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt); 2490349cc55cSDimitry Andric } else { 24910b57cec5SDimitry Andric if (Signed) { 2492349cc55cSDimitry Andric // Without 'ffbh_i32', only leading zeros could be counted. Take the 2493349cc55cSDimitry Andric // absolute value first. 2494349cc55cSDimitry Andric Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src, 2495349cc55cSDimitry Andric DAG.getConstant(63, SL, MVT::i64)); 2496349cc55cSDimitry Andric SDValue Abs = 2497349cc55cSDimitry Andric DAG.getNode(ISD::XOR, SL, MVT::i64, 2498349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign); 2499349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Abs, DAG); 25000b57cec5SDimitry Andric } 2501349cc55cSDimitry Andric // Count the leading zeros. 2502349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi); 2503349cc55cSDimitry Andric // The shift amount for signed integers is [0, 32]. 2504349cc55cSDimitry Andric } 2505349cc55cSDimitry Andric // Normalize the given 64-bit integer. 2506349cc55cSDimitry Andric SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt); 2507349cc55cSDimitry Andric // Split it again. 2508349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Norm, DAG); 2509349cc55cSDimitry Andric // Calculate the adjust bit for rounding. 2510349cc55cSDimitry Andric // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo) 2511349cc55cSDimitry Andric SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32, 2512349cc55cSDimitry Andric DAG.getConstant(1, SL, MVT::i32), Lo); 2513349cc55cSDimitry Andric // Get the 32-bit normalized integer. 2514349cc55cSDimitry Andric Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust); 2515349cc55cSDimitry Andric // Convert the normalized 32-bit integer into f32. 2516349cc55cSDimitry Andric unsigned Opc = 2517349cc55cSDimitry Andric (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; 2518349cc55cSDimitry Andric SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm); 25190b57cec5SDimitry Andric 2520349cc55cSDimitry Andric // Finally, need to scale back the converted floating number as the original 2521349cc55cSDimitry Andric // 64-bit integer is converted as a 32-bit one. 2522349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), 2523349cc55cSDimitry Andric ShAmt); 2524349cc55cSDimitry Andric // On GCN, use LDEXP directly. 2525349cc55cSDimitry Andric if (Subtarget->isGCN()) 2526349cc55cSDimitry Andric return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt); 25270b57cec5SDimitry Andric 2528349cc55cSDimitry Andric // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent 2529349cc55cSDimitry Andric // part directly to emulate the multiplication of 2^ShAmt. That 8-bit 2530349cc55cSDimitry Andric // exponent is enough to avoid overflowing into the sign bit. 2531349cc55cSDimitry Andric SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt, 2532349cc55cSDimitry Andric DAG.getConstant(23, SL, MVT::i32)); 2533349cc55cSDimitry Andric SDValue IVal = 2534349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i32, 2535349cc55cSDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp); 2536349cc55cSDimitry Andric if (Signed) { 2537349cc55cSDimitry Andric // Set the sign bit. 2538349cc55cSDimitry Andric Sign = DAG.getNode(ISD::SHL, SL, MVT::i32, 2539349cc55cSDimitry Andric DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign), 2540349cc55cSDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 2541349cc55cSDimitry Andric IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign); 2542349cc55cSDimitry Andric } 2543349cc55cSDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal); 25440b57cec5SDimitry Andric } 25450b57cec5SDimitry Andric 25460b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, 25470b57cec5SDimitry Andric bool Signed) const { 25480b57cec5SDimitry Andric SDLoc SL(Op); 25490b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 25500b57cec5SDimitry Andric 2551349cc55cSDimitry Andric SDValue Lo, Hi; 2552349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG); 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP, 25550b57cec5SDimitry Andric SL, MVT::f64, Hi); 25560b57cec5SDimitry Andric 25570b57cec5SDimitry Andric SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); 25580b57cec5SDimitry Andric 25590b57cec5SDimitry Andric SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, 25600b57cec5SDimitry Andric DAG.getConstant(32, SL, MVT::i32)); 25610b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 25620b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); 25630b57cec5SDimitry Andric } 25640b57cec5SDimitry Andric 25650b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 25660b57cec5SDimitry Andric SelectionDAG &DAG) const { 25670b57cec5SDimitry Andric // TODO: Factor out code common with LowerSINT_TO_FP. 25680b57cec5SDimitry Andric EVT DestVT = Op.getValueType(); 2569480093f4SDimitry Andric SDValue Src = Op.getOperand(0); 2570480093f4SDimitry Andric EVT SrcVT = Src.getValueType(); 2571480093f4SDimitry Andric 2572480093f4SDimitry Andric if (SrcVT == MVT::i16) { 2573480093f4SDimitry Andric if (DestVT == MVT::f16) 2574480093f4SDimitry Andric return Op; 2575480093f4SDimitry Andric SDLoc DL(Op); 2576480093f4SDimitry Andric 2577480093f4SDimitry Andric // Promote src to i32 2578480093f4SDimitry Andric SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src); 2579480093f4SDimitry Andric return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext); 2580480093f4SDimitry Andric } 2581480093f4SDimitry Andric 2582480093f4SDimitry Andric assert(SrcVT == MVT::i64 && "operation should be legal"); 2583480093f4SDimitry Andric 25840b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { 25850b57cec5SDimitry Andric SDLoc DL(Op); 25860b57cec5SDimitry Andric 25870b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); 25880b57cec5SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op)); 25890b57cec5SDimitry Andric SDValue FPRound = 25900b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); 25910b57cec5SDimitry Andric 25920b57cec5SDimitry Andric return FPRound; 25930b57cec5SDimitry Andric } 25940b57cec5SDimitry Andric 25950b57cec5SDimitry Andric if (DestVT == MVT::f32) 25960b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, false); 25970b57cec5SDimitry Andric 25980b57cec5SDimitry Andric assert(DestVT == MVT::f64); 25990b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, false); 26000b57cec5SDimitry Andric } 26010b57cec5SDimitry Andric 26020b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, 26030b57cec5SDimitry Andric SelectionDAG &DAG) const { 2604480093f4SDimitry Andric EVT DestVT = Op.getValueType(); 2605480093f4SDimitry Andric 2606480093f4SDimitry Andric SDValue Src = Op.getOperand(0); 2607480093f4SDimitry Andric EVT SrcVT = Src.getValueType(); 2608480093f4SDimitry Andric 2609480093f4SDimitry Andric if (SrcVT == MVT::i16) { 2610480093f4SDimitry Andric if (DestVT == MVT::f16) 2611480093f4SDimitry Andric return Op; 2612480093f4SDimitry Andric 2613480093f4SDimitry Andric SDLoc DL(Op); 2614480093f4SDimitry Andric // Promote src to i32 2615480093f4SDimitry Andric SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src); 2616480093f4SDimitry Andric return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext); 2617480093f4SDimitry Andric } 2618480093f4SDimitry Andric 2619480093f4SDimitry Andric assert(SrcVT == MVT::i64 && "operation should be legal"); 26200b57cec5SDimitry Andric 26210b57cec5SDimitry Andric // TODO: Factor out code common with LowerUINT_TO_FP. 26220b57cec5SDimitry Andric 26230b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { 26240b57cec5SDimitry Andric SDLoc DL(Op); 26250b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 26260b57cec5SDimitry Andric 26270b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); 26280b57cec5SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op)); 26290b57cec5SDimitry Andric SDValue FPRound = 26300b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); 26310b57cec5SDimitry Andric 26320b57cec5SDimitry Andric return FPRound; 26330b57cec5SDimitry Andric } 26340b57cec5SDimitry Andric 26350b57cec5SDimitry Andric if (DestVT == MVT::f32) 26360b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, true); 26370b57cec5SDimitry Andric 26380b57cec5SDimitry Andric assert(DestVT == MVT::f64); 26390b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, true); 26400b57cec5SDimitry Andric } 26410b57cec5SDimitry Andric 2642fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, 26430b57cec5SDimitry Andric bool Signed) const { 26440b57cec5SDimitry Andric SDLoc SL(Op); 26450b57cec5SDimitry Andric 26460b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2647fe6060f1SDimitry Andric EVT SrcVT = Src.getValueType(); 26480b57cec5SDimitry Andric 2649fe6060f1SDimitry Andric assert(SrcVT == MVT::f32 || SrcVT == MVT::f64); 26500b57cec5SDimitry Andric 2651fe6060f1SDimitry Andric // The basic idea of converting a floating point number into a pair of 32-bit 2652fe6060f1SDimitry Andric // integers is illustrated as follows: 2653fe6060f1SDimitry Andric // 2654fe6060f1SDimitry Andric // tf := trunc(val); 2655fe6060f1SDimitry Andric // hif := floor(tf * 2^-32); 2656fe6060f1SDimitry Andric // lof := tf - hif * 2^32; // lof is always positive due to floor. 2657fe6060f1SDimitry Andric // hi := fptoi(hif); 2658fe6060f1SDimitry Andric // lo := fptoi(lof); 2659fe6060f1SDimitry Andric // 2660fe6060f1SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src); 2661fe6060f1SDimitry Andric SDValue Sign; 2662fe6060f1SDimitry Andric if (Signed && SrcVT == MVT::f32) { 2663fe6060f1SDimitry Andric // However, a 32-bit floating point number has only 23 bits mantissa and 2664fe6060f1SDimitry Andric // it's not enough to hold all the significant bits of `lof` if val is 2665fe6060f1SDimitry Andric // negative. To avoid the loss of precision, We need to take the absolute 2666fe6060f1SDimitry Andric // value after truncating and flip the result back based on the original 2667fe6060f1SDimitry Andric // signedness. 2668fe6060f1SDimitry Andric Sign = DAG.getNode(ISD::SRA, SL, MVT::i32, 2669fe6060f1SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc), 2670fe6060f1SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 2671fe6060f1SDimitry Andric Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc); 2672fe6060f1SDimitry Andric } 2673fe6060f1SDimitry Andric 2674fe6060f1SDimitry Andric SDValue K0, K1; 2675fe6060f1SDimitry Andric if (SrcVT == MVT::f64) { 2676fe6060f1SDimitry Andric K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*2^-32*/ 0x3df0000000000000)), 2677fe6060f1SDimitry Andric SL, SrcVT); 2678fe6060f1SDimitry Andric K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*-2^32*/ 0xc1f0000000000000)), 2679fe6060f1SDimitry Andric SL, SrcVT); 2680fe6060f1SDimitry Andric } else { 2681fe6060f1SDimitry Andric K0 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*2^-32*/ 0x2f800000)), SL, 2682fe6060f1SDimitry Andric SrcVT); 2683fe6060f1SDimitry Andric K1 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*-2^32*/ 0xcf800000)), SL, 2684fe6060f1SDimitry Andric SrcVT); 2685fe6060f1SDimitry Andric } 26860b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 2687fe6060f1SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0); 26880b57cec5SDimitry Andric 2689fe6060f1SDimitry Andric SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul); 26900b57cec5SDimitry Andric 2691fe6060f1SDimitry Andric SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc); 26920b57cec5SDimitry Andric 2693fe6060f1SDimitry Andric SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT 2694fe6060f1SDimitry Andric : ISD::FP_TO_UINT, 2695fe6060f1SDimitry Andric SL, MVT::i32, FloorMul); 26960b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma); 26970b57cec5SDimitry Andric 2698fe6060f1SDimitry Andric SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64, 2699fe6060f1SDimitry Andric DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi})); 27000b57cec5SDimitry Andric 2701fe6060f1SDimitry Andric if (Signed && SrcVT == MVT::f32) { 2702fe6060f1SDimitry Andric assert(Sign); 2703fe6060f1SDimitry Andric // Flip the result based on the signedness, which is either all 0s or 1s. 2704fe6060f1SDimitry Andric Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64, 2705fe6060f1SDimitry Andric DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign})); 2706fe6060f1SDimitry Andric // r := xor(r, sign) - sign; 2707fe6060f1SDimitry Andric Result = 2708fe6060f1SDimitry Andric DAG.getNode(ISD::SUB, SL, MVT::i64, 2709fe6060f1SDimitry Andric DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign); 2710fe6060f1SDimitry Andric } 2711fe6060f1SDimitry Andric 2712fe6060f1SDimitry Andric return Result; 27130b57cec5SDimitry Andric } 27140b57cec5SDimitry Andric 27150b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const { 27160b57cec5SDimitry Andric SDLoc DL(Op); 27170b57cec5SDimitry Andric SDValue N0 = Op.getOperand(0); 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric // Convert to target node to get known bits 27200b57cec5SDimitry Andric if (N0.getValueType() == MVT::f32) 27210b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0); 27220b57cec5SDimitry Andric 27230b57cec5SDimitry Andric if (getTargetMachine().Options.UnsafeFPMath) { 27240b57cec5SDimitry Andric // There is a generic expand for FP_TO_FP16 with unsafe fast math. 27250b57cec5SDimitry Andric return SDValue(); 27260b57cec5SDimitry Andric } 27270b57cec5SDimitry Andric 27280b57cec5SDimitry Andric assert(N0.getSimpleValueType() == MVT::f64); 27290b57cec5SDimitry Andric 27300b57cec5SDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode. 27310b57cec5SDimitry Andric const unsigned ExpMask = 0x7ff; 27320b57cec5SDimitry Andric const unsigned ExpBiasf64 = 1023; 27330b57cec5SDimitry Andric const unsigned ExpBiasf16 = 15; 27340b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 27350b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, MVT::i32); 27360b57cec5SDimitry Andric SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0); 27370b57cec5SDimitry Andric SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U, 27380b57cec5SDimitry Andric DAG.getConstant(32, DL, MVT::i64)); 27390b57cec5SDimitry Andric UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32); 27400b57cec5SDimitry Andric U = DAG.getZExtOrTrunc(U, DL, MVT::i32); 27410b57cec5SDimitry Andric SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 27420b57cec5SDimitry Andric DAG.getConstant(20, DL, MVT::i64)); 27430b57cec5SDimitry Andric E = DAG.getNode(ISD::AND, DL, MVT::i32, E, 27440b57cec5SDimitry Andric DAG.getConstant(ExpMask, DL, MVT::i32)); 27450b57cec5SDimitry Andric // Subtract the fp64 exponent bias (1023) to get the real exponent and 27460b57cec5SDimitry Andric // add the f16 bias (15) to get the biased exponent for the f16 format. 27470b57cec5SDimitry Andric E = DAG.getNode(ISD::ADD, DL, MVT::i32, E, 27480b57cec5SDimitry Andric DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32)); 27490b57cec5SDimitry Andric 27500b57cec5SDimitry Andric SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 27510b57cec5SDimitry Andric DAG.getConstant(8, DL, MVT::i32)); 27520b57cec5SDimitry Andric M = DAG.getNode(ISD::AND, DL, MVT::i32, M, 27530b57cec5SDimitry Andric DAG.getConstant(0xffe, DL, MVT::i32)); 27540b57cec5SDimitry Andric 27550b57cec5SDimitry Andric SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH, 27560b57cec5SDimitry Andric DAG.getConstant(0x1ff, DL, MVT::i32)); 27570b57cec5SDimitry Andric MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U); 27580b57cec5SDimitry Andric 27590b57cec5SDimitry Andric SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ); 27600b57cec5SDimitry Andric M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set); 27610b57cec5SDimitry Andric 27620b57cec5SDimitry Andric // (M != 0 ? 0x0200 : 0) | 0x7c00; 27630b57cec5SDimitry Andric SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32, 27640b57cec5SDimitry Andric DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32), 27650b57cec5SDimitry Andric Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32)); 27660b57cec5SDimitry Andric 27670b57cec5SDimitry Andric // N = M | (E << 12); 27680b57cec5SDimitry Andric SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M, 27690b57cec5SDimitry Andric DAG.getNode(ISD::SHL, DL, MVT::i32, E, 27700b57cec5SDimitry Andric DAG.getConstant(12, DL, MVT::i32))); 27710b57cec5SDimitry Andric 27720b57cec5SDimitry Andric // B = clamp(1-E, 0, 13); 27730b57cec5SDimitry Andric SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32, 27740b57cec5SDimitry Andric One, E); 27750b57cec5SDimitry Andric SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero); 27760b57cec5SDimitry Andric B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B, 27770b57cec5SDimitry Andric DAG.getConstant(13, DL, MVT::i32)); 27780b57cec5SDimitry Andric 27790b57cec5SDimitry Andric SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M, 27800b57cec5SDimitry Andric DAG.getConstant(0x1000, DL, MVT::i32)); 27810b57cec5SDimitry Andric 27820b57cec5SDimitry Andric SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B); 27830b57cec5SDimitry Andric SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B); 27840b57cec5SDimitry Andric SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE); 27850b57cec5SDimitry Andric D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1); 27860b57cec5SDimitry Andric 27870b57cec5SDimitry Andric SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT); 27880b57cec5SDimitry Andric SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V, 27890b57cec5SDimitry Andric DAG.getConstant(0x7, DL, MVT::i32)); 27900b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, DL, MVT::i32, V, 27910b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32)); 27920b57cec5SDimitry Andric SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32), 27930b57cec5SDimitry Andric One, Zero, ISD::SETEQ); 27940b57cec5SDimitry Andric SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32), 27950b57cec5SDimitry Andric One, Zero, ISD::SETGT); 27960b57cec5SDimitry Andric V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1); 27970b57cec5SDimitry Andric V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1); 27980b57cec5SDimitry Andric 27990b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32), 28000b57cec5SDimitry Andric DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT); 28010b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32), 28020b57cec5SDimitry Andric I, V, ISD::SETEQ); 28030b57cec5SDimitry Andric 28040b57cec5SDimitry Andric // Extract the sign bit. 28050b57cec5SDimitry Andric SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 28060b57cec5SDimitry Andric DAG.getConstant(16, DL, MVT::i32)); 28070b57cec5SDimitry Andric Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign, 28080b57cec5SDimitry Andric DAG.getConstant(0x8000, DL, MVT::i32)); 28090b57cec5SDimitry Andric 28100b57cec5SDimitry Andric V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V); 28110b57cec5SDimitry Andric return DAG.getZExtOrTrunc(V, DL, Op.getValueType()); 28120b57cec5SDimitry Andric } 28130b57cec5SDimitry Andric 2814fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op, 28150b57cec5SDimitry Andric SelectionDAG &DAG) const { 28160b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 2817fe6060f1SDimitry Andric unsigned OpOpcode = Op.getOpcode(); 28180b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 2819fe6060f1SDimitry Andric EVT DestVT = Op.getValueType(); 2820fe6060f1SDimitry Andric 2821fe6060f1SDimitry Andric // Will be selected natively 2822fe6060f1SDimitry Andric if (SrcVT == MVT::f16 && DestVT == MVT::i16) 2823fe6060f1SDimitry Andric return Op; 2824fe6060f1SDimitry Andric 2825fe6060f1SDimitry Andric // Promote i16 to i32 2826fe6060f1SDimitry Andric if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { 2827fe6060f1SDimitry Andric SDLoc DL(Op); 2828fe6060f1SDimitry Andric 2829fe6060f1SDimitry Andric SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src); 2830fe6060f1SDimitry Andric return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32); 2831fe6060f1SDimitry Andric } 2832fe6060f1SDimitry Andric 2833e8d8bef9SDimitry Andric if (SrcVT == MVT::f16 || 2834e8d8bef9SDimitry Andric (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) { 28350b57cec5SDimitry Andric SDLoc DL(Op); 28360b57cec5SDimitry Andric 2837fe6060f1SDimitry Andric SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src); 2838fe6060f1SDimitry Andric unsigned Ext = 2839fe6060f1SDimitry Andric OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 2840fe6060f1SDimitry Andric return DAG.getNode(Ext, DL, MVT::i64, FpToInt32); 28410b57cec5SDimitry Andric } 28420b57cec5SDimitry Andric 2843fe6060f1SDimitry Andric if (DestVT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) 2844fe6060f1SDimitry Andric return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT); 28450b57cec5SDimitry Andric 28460b57cec5SDimitry Andric return SDValue(); 28470b57cec5SDimitry Andric } 28480b57cec5SDimitry Andric 28490b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 28500b57cec5SDimitry Andric SelectionDAG &DAG) const { 28510b57cec5SDimitry Andric EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 28520b57cec5SDimitry Andric MVT VT = Op.getSimpleValueType(); 28530b57cec5SDimitry Andric MVT ScalarVT = VT.getScalarType(); 28540b57cec5SDimitry Andric 28550b57cec5SDimitry Andric assert(VT.isVector()); 28560b57cec5SDimitry Andric 28570b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 28580b57cec5SDimitry Andric SDLoc DL(Op); 28590b57cec5SDimitry Andric 28600b57cec5SDimitry Andric // TODO: Don't scalarize on Evergreen? 28610b57cec5SDimitry Andric unsigned NElts = VT.getVectorNumElements(); 28620b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 28630b57cec5SDimitry Andric DAG.ExtractVectorElements(Src, Args, 0, NElts); 28640b57cec5SDimitry Andric 28650b57cec5SDimitry Andric SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); 28660b57cec5SDimitry Andric for (unsigned I = 0; I < NElts; ++I) 28670b57cec5SDimitry Andric Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); 28680b57cec5SDimitry Andric 28690b57cec5SDimitry Andric return DAG.getBuildVector(VT, DL, Args); 28700b57cec5SDimitry Andric } 28710b57cec5SDimitry Andric 28720b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 28730b57cec5SDimitry Andric // Custom DAG optimizations 28740b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 28750b57cec5SDimitry Andric 28760b57cec5SDimitry Andric static bool isU24(SDValue Op, SelectionDAG &DAG) { 28770b57cec5SDimitry Andric return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24; 28780b57cec5SDimitry Andric } 28790b57cec5SDimitry Andric 28800b57cec5SDimitry Andric static bool isI24(SDValue Op, SelectionDAG &DAG) { 28810b57cec5SDimitry Andric EVT VT = Op.getValueType(); 28820b57cec5SDimitry Andric return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated 28830b57cec5SDimitry Andric // as unsigned 24-bit values. 2884349cc55cSDimitry Andric AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24; 28850b57cec5SDimitry Andric } 28860b57cec5SDimitry Andric 2887fe6060f1SDimitry Andric static SDValue simplifyMul24(SDNode *Node24, 28880b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 28890b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 28905ffd83dbSDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 28918bcb0991SDimitry Andric bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN; 28928bcb0991SDimitry Andric 28938bcb0991SDimitry Andric SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0); 28948bcb0991SDimitry Andric SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); 28958bcb0991SDimitry Andric unsigned NewOpcode = Node24->getOpcode(); 28968bcb0991SDimitry Andric if (IsIntrin) { 28978bcb0991SDimitry Andric unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue(); 2898349cc55cSDimitry Andric switch (IID) { 2899349cc55cSDimitry Andric case Intrinsic::amdgcn_mul_i24: 2900349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MUL_I24; 2901349cc55cSDimitry Andric break; 2902349cc55cSDimitry Andric case Intrinsic::amdgcn_mul_u24: 2903349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MUL_U24; 2904349cc55cSDimitry Andric break; 2905349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_i24: 2906349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MULHI_I24; 2907349cc55cSDimitry Andric break; 2908349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_u24: 2909349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MULHI_U24; 2910349cc55cSDimitry Andric break; 2911349cc55cSDimitry Andric default: 2912349cc55cSDimitry Andric llvm_unreachable("Expected 24-bit mul intrinsic"); 2913349cc55cSDimitry Andric } 29148bcb0991SDimitry Andric } 29150b57cec5SDimitry Andric 29160b57cec5SDimitry Andric APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); 29170b57cec5SDimitry Andric 29185ffd83dbSDimitry Andric // First try to simplify using SimplifyMultipleUseDemandedBits which allows 29195ffd83dbSDimitry Andric // the operands to have other uses, but will only perform simplifications that 29205ffd83dbSDimitry Andric // involve bypassing some nodes for this user. 29215ffd83dbSDimitry Andric SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG); 29225ffd83dbSDimitry Andric SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG); 29230b57cec5SDimitry Andric if (DemandedLHS || DemandedRHS) 29248bcb0991SDimitry Andric return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(), 29250b57cec5SDimitry Andric DemandedLHS ? DemandedLHS : LHS, 29260b57cec5SDimitry Andric DemandedRHS ? DemandedRHS : RHS); 29270b57cec5SDimitry Andric 29280b57cec5SDimitry Andric // Now try SimplifyDemandedBits which can simplify the nodes used by our 29290b57cec5SDimitry Andric // operands if this node is the only user. 29300b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI)) 29310b57cec5SDimitry Andric return SDValue(Node24, 0); 29320b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI)) 29330b57cec5SDimitry Andric return SDValue(Node24, 0); 29340b57cec5SDimitry Andric 29350b57cec5SDimitry Andric return SDValue(); 29360b57cec5SDimitry Andric } 29370b57cec5SDimitry Andric 29380b57cec5SDimitry Andric template <typename IntTy> 29390b57cec5SDimitry Andric static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, 29400b57cec5SDimitry Andric uint32_t Width, const SDLoc &DL) { 29410b57cec5SDimitry Andric if (Width + Offset < 32) { 29420b57cec5SDimitry Andric uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width); 29430b57cec5SDimitry Andric IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width); 29440b57cec5SDimitry Andric return DAG.getConstant(Result, DL, MVT::i32); 29450b57cec5SDimitry Andric } 29460b57cec5SDimitry Andric 29470b57cec5SDimitry Andric return DAG.getConstant(Src0 >> Offset, DL, MVT::i32); 29480b57cec5SDimitry Andric } 29490b57cec5SDimitry Andric 29500b57cec5SDimitry Andric static bool hasVolatileUser(SDNode *Val) { 29510b57cec5SDimitry Andric for (SDNode *U : Val->uses()) { 29520b57cec5SDimitry Andric if (MemSDNode *M = dyn_cast<MemSDNode>(U)) { 29530b57cec5SDimitry Andric if (M->isVolatile()) 29540b57cec5SDimitry Andric return true; 29550b57cec5SDimitry Andric } 29560b57cec5SDimitry Andric } 29570b57cec5SDimitry Andric 29580b57cec5SDimitry Andric return false; 29590b57cec5SDimitry Andric } 29600b57cec5SDimitry Andric 29610b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { 29620b57cec5SDimitry Andric // i32 vectors are the canonical memory type. 29630b57cec5SDimitry Andric if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT)) 29640b57cec5SDimitry Andric return false; 29650b57cec5SDimitry Andric 29660b57cec5SDimitry Andric if (!VT.isByteSized()) 29670b57cec5SDimitry Andric return false; 29680b57cec5SDimitry Andric 29690b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 29700b57cec5SDimitry Andric 29710b57cec5SDimitry Andric if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector()) 29720b57cec5SDimitry Andric return false; 29730b57cec5SDimitry Andric 29740b57cec5SDimitry Andric if (Size == 3 || (Size > 4 && (Size % 4 != 0))) 29750b57cec5SDimitry Andric return false; 29760b57cec5SDimitry Andric 29770b57cec5SDimitry Andric return true; 29780b57cec5SDimitry Andric } 29790b57cec5SDimitry Andric 29800b57cec5SDimitry Andric // Replace load of an illegal type with a store of a bitcast to a friendlier 29810b57cec5SDimitry Andric // type. 29820b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, 29830b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 29840b57cec5SDimitry Andric if (!DCI.isBeforeLegalize()) 29850b57cec5SDimitry Andric return SDValue(); 29860b57cec5SDimitry Andric 29870b57cec5SDimitry Andric LoadSDNode *LN = cast<LoadSDNode>(N); 29885ffd83dbSDimitry Andric if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN)) 29890b57cec5SDimitry Andric return SDValue(); 29900b57cec5SDimitry Andric 29910b57cec5SDimitry Andric SDLoc SL(N); 29920b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 29930b57cec5SDimitry Andric EVT VT = LN->getMemoryVT(); 29940b57cec5SDimitry Andric 29950b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 29965ffd83dbSDimitry Andric Align Alignment = LN->getAlign(); 29975ffd83dbSDimitry Andric if (Alignment < Size && isTypeLegal(VT)) { 29980b57cec5SDimitry Andric bool IsFast; 29990b57cec5SDimitry Andric unsigned AS = LN->getAddressSpace(); 30000b57cec5SDimitry Andric 30010b57cec5SDimitry Andric // Expand unaligned loads earlier than legalization. Due to visitation order 30020b57cec5SDimitry Andric // problems during legalization, the emitted instructions to pack and unpack 30030b57cec5SDimitry Andric // the bytes again are not eliminated in the case of an unaligned copy. 3004fe6060f1SDimitry Andric if (!allowsMisalignedMemoryAccesses( 3005fe6060f1SDimitry Andric VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) { 30060b57cec5SDimitry Andric SDValue Ops[2]; 3007480093f4SDimitry Andric 3008480093f4SDimitry Andric if (VT.isVector()) 3009480093f4SDimitry Andric std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LN, DAG); 3010480093f4SDimitry Andric else 30110b57cec5SDimitry Andric std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); 3012480093f4SDimitry Andric 30130b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc(N)); 30140b57cec5SDimitry Andric } 30150b57cec5SDimitry Andric 30160b57cec5SDimitry Andric if (!IsFast) 30170b57cec5SDimitry Andric return SDValue(); 30180b57cec5SDimitry Andric } 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT)) 30210b57cec5SDimitry Andric return SDValue(); 30220b57cec5SDimitry Andric 30230b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); 30240b57cec5SDimitry Andric 30250b57cec5SDimitry Andric SDValue NewLoad 30260b57cec5SDimitry Andric = DAG.getLoad(NewVT, SL, LN->getChain(), 30270b57cec5SDimitry Andric LN->getBasePtr(), LN->getMemOperand()); 30280b57cec5SDimitry Andric 30290b57cec5SDimitry Andric SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad); 30300b57cec5SDimitry Andric DCI.CombineTo(N, BC, NewLoad.getValue(1)); 30310b57cec5SDimitry Andric return SDValue(N, 0); 30320b57cec5SDimitry Andric } 30330b57cec5SDimitry Andric 30340b57cec5SDimitry Andric // Replace store of an illegal type with a store of a bitcast to a friendlier 30350b57cec5SDimitry Andric // type. 30360b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, 30370b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 30380b57cec5SDimitry Andric if (!DCI.isBeforeLegalize()) 30390b57cec5SDimitry Andric return SDValue(); 30400b57cec5SDimitry Andric 30410b57cec5SDimitry Andric StoreSDNode *SN = cast<StoreSDNode>(N); 30425ffd83dbSDimitry Andric if (!SN->isSimple() || !ISD::isNormalStore(SN)) 30430b57cec5SDimitry Andric return SDValue(); 30440b57cec5SDimitry Andric 30450b57cec5SDimitry Andric EVT VT = SN->getMemoryVT(); 30460b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 30470b57cec5SDimitry Andric 30480b57cec5SDimitry Andric SDLoc SL(N); 30490b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 30505ffd83dbSDimitry Andric Align Alignment = SN->getAlign(); 30515ffd83dbSDimitry Andric if (Alignment < Size && isTypeLegal(VT)) { 30520b57cec5SDimitry Andric bool IsFast; 30530b57cec5SDimitry Andric unsigned AS = SN->getAddressSpace(); 30540b57cec5SDimitry Andric 30550b57cec5SDimitry Andric // Expand unaligned stores earlier than legalization. Due to visitation 30560b57cec5SDimitry Andric // order problems during legalization, the emitted instructions to pack and 30570b57cec5SDimitry Andric // unpack the bytes again are not eliminated in the case of an unaligned 30580b57cec5SDimitry Andric // copy. 3059fe6060f1SDimitry Andric if (!allowsMisalignedMemoryAccesses( 3060fe6060f1SDimitry Andric VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) { 30610b57cec5SDimitry Andric if (VT.isVector()) 30620b57cec5SDimitry Andric return scalarizeVectorStore(SN, DAG); 30630b57cec5SDimitry Andric 30640b57cec5SDimitry Andric return expandUnalignedStore(SN, DAG); 30650b57cec5SDimitry Andric } 30660b57cec5SDimitry Andric 30670b57cec5SDimitry Andric if (!IsFast) 30680b57cec5SDimitry Andric return SDValue(); 30690b57cec5SDimitry Andric } 30700b57cec5SDimitry Andric 30710b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT)) 30720b57cec5SDimitry Andric return SDValue(); 30730b57cec5SDimitry Andric 30740b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); 30750b57cec5SDimitry Andric SDValue Val = SN->getValue(); 30760b57cec5SDimitry Andric 30770b57cec5SDimitry Andric //DCI.AddToWorklist(Val.getNode()); 30780b57cec5SDimitry Andric 30790b57cec5SDimitry Andric bool OtherUses = !Val.hasOneUse(); 30800b57cec5SDimitry Andric SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val); 30810b57cec5SDimitry Andric if (OtherUses) { 30820b57cec5SDimitry Andric SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal); 30830b57cec5SDimitry Andric DAG.ReplaceAllUsesOfValueWith(Val, CastBack); 30840b57cec5SDimitry Andric } 30850b57cec5SDimitry Andric 30860b57cec5SDimitry Andric return DAG.getStore(SN->getChain(), SL, CastVal, 30870b57cec5SDimitry Andric SN->getBasePtr(), SN->getMemOperand()); 30880b57cec5SDimitry Andric } 30890b57cec5SDimitry Andric 30900b57cec5SDimitry Andric // FIXME: This should go in generic DAG combiner with an isTruncateFree check, 30910b57cec5SDimitry Andric // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU 30920b57cec5SDimitry Andric // issues. 30930b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, 30940b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 30950b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 30960b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 30970b57cec5SDimitry Andric 30980b57cec5SDimitry Andric // (vt2 (assertzext (truncate vt0:x), vt1)) -> 30990b57cec5SDimitry Andric // (vt2 (truncate (assertzext vt0:x, vt1))) 31000b57cec5SDimitry Andric if (N0.getOpcode() == ISD::TRUNCATE) { 31010b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 31020b57cec5SDimitry Andric EVT ExtVT = cast<VTSDNode>(N1)->getVT(); 31030b57cec5SDimitry Andric SDLoc SL(N); 31040b57cec5SDimitry Andric 31050b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 31060b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 31070b57cec5SDimitry Andric if (SrcVT.bitsGE(ExtVT)) { 31080b57cec5SDimitry Andric SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); 31090b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); 31100b57cec5SDimitry Andric } 31110b57cec5SDimitry Andric } 31120b57cec5SDimitry Andric 31130b57cec5SDimitry Andric return SDValue(); 31140b57cec5SDimitry Andric } 31158bcb0991SDimitry Andric 31168bcb0991SDimitry Andric SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( 31178bcb0991SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const { 31188bcb0991SDimitry Andric unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 31198bcb0991SDimitry Andric switch (IID) { 31208bcb0991SDimitry Andric case Intrinsic::amdgcn_mul_i24: 31218bcb0991SDimitry Andric case Intrinsic::amdgcn_mul_u24: 3122349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_i24: 3123349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_u24: 3124fe6060f1SDimitry Andric return simplifyMul24(N, DCI); 31255ffd83dbSDimitry Andric case Intrinsic::amdgcn_fract: 31265ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq: 31275ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 31285ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_legacy: 31295ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_clamp: 31305ffd83dbSDimitry Andric case Intrinsic::amdgcn_ldexp: { 31315ffd83dbSDimitry Andric // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted 31325ffd83dbSDimitry Andric SDValue Src = N->getOperand(1); 31335ffd83dbSDimitry Andric return Src.isUndef() ? Src : SDValue(); 31345ffd83dbSDimitry Andric } 31358bcb0991SDimitry Andric default: 31368bcb0991SDimitry Andric return SDValue(); 31378bcb0991SDimitry Andric } 31388bcb0991SDimitry Andric } 31398bcb0991SDimitry Andric 31400b57cec5SDimitry Andric /// Split the 64-bit value \p LHS into two 32-bit components, and perform the 31410b57cec5SDimitry Andric /// binary operation \p Opc to it with the corresponding constant operands. 31420b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( 31430b57cec5SDimitry Andric DAGCombinerInfo &DCI, const SDLoc &SL, 31440b57cec5SDimitry Andric unsigned Opc, SDValue LHS, 31450b57cec5SDimitry Andric uint32_t ValLo, uint32_t ValHi) const { 31460b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 31470b57cec5SDimitry Andric SDValue Lo, Hi; 31480b57cec5SDimitry Andric std::tie(Lo, Hi) = split64BitValue(LHS, DAG); 31490b57cec5SDimitry Andric 31500b57cec5SDimitry Andric SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32); 31510b57cec5SDimitry Andric SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32); 31520b57cec5SDimitry Andric 31530b57cec5SDimitry Andric SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS); 31540b57cec5SDimitry Andric SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS); 31550b57cec5SDimitry Andric 31560b57cec5SDimitry Andric // Re-visit the ands. It's possible we eliminated one of them and it could 31570b57cec5SDimitry Andric // simplify the vector. 31580b57cec5SDimitry Andric DCI.AddToWorklist(Lo.getNode()); 31590b57cec5SDimitry Andric DCI.AddToWorklist(Hi.getNode()); 31600b57cec5SDimitry Andric 31610b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd}); 31620b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); 31630b57cec5SDimitry Andric } 31640b57cec5SDimitry Andric 31650b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, 31660b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 31670b57cec5SDimitry Andric EVT VT = N->getValueType(0); 31680b57cec5SDimitry Andric 31690b57cec5SDimitry Andric ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 31700b57cec5SDimitry Andric if (!RHS) 31710b57cec5SDimitry Andric return SDValue(); 31720b57cec5SDimitry Andric 31730b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 31740b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue(); 31750b57cec5SDimitry Andric if (!RHSVal) 31760b57cec5SDimitry Andric return LHS; 31770b57cec5SDimitry Andric 31780b57cec5SDimitry Andric SDLoc SL(N); 31790b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 31800b57cec5SDimitry Andric 31810b57cec5SDimitry Andric switch (LHS->getOpcode()) { 31820b57cec5SDimitry Andric default: 31830b57cec5SDimitry Andric break; 31840b57cec5SDimitry Andric case ISD::ZERO_EXTEND: 31850b57cec5SDimitry Andric case ISD::SIGN_EXTEND: 31860b57cec5SDimitry Andric case ISD::ANY_EXTEND: { 31870b57cec5SDimitry Andric SDValue X = LHS->getOperand(0); 31880b57cec5SDimitry Andric 31890b57cec5SDimitry Andric if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 && 31900b57cec5SDimitry Andric isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) { 31910b57cec5SDimitry Andric // Prefer build_vector as the canonical form if packed types are legal. 31920b57cec5SDimitry Andric // (shl ([asz]ext i16:x), 16 -> build_vector 0, x 31930b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL, 31940b57cec5SDimitry Andric { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) }); 31950b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); 31960b57cec5SDimitry Andric } 31970b57cec5SDimitry Andric 31980b57cec5SDimitry Andric // shl (ext x) => zext (shl x), if shift does not overflow int 31990b57cec5SDimitry Andric if (VT != MVT::i64) 32000b57cec5SDimitry Andric break; 32010b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(X); 32020b57cec5SDimitry Andric unsigned LZ = Known.countMinLeadingZeros(); 32030b57cec5SDimitry Andric if (LZ < RHSVal) 32040b57cec5SDimitry Andric break; 32050b57cec5SDimitry Andric EVT XVT = X.getValueType(); 32060b57cec5SDimitry Andric SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); 32070b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Shl, SL, VT); 32080b57cec5SDimitry Andric } 32090b57cec5SDimitry Andric } 32100b57cec5SDimitry Andric 32110b57cec5SDimitry Andric if (VT != MVT::i64) 32120b57cec5SDimitry Andric return SDValue(); 32130b57cec5SDimitry Andric 32140b57cec5SDimitry Andric // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) 32150b57cec5SDimitry Andric 32160b57cec5SDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 32170b57cec5SDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 32180b57cec5SDimitry Andric // the same code size. 32190b57cec5SDimitry Andric if (RHSVal < 32) 32200b57cec5SDimitry Andric return SDValue(); 32210b57cec5SDimitry Andric 32220b57cec5SDimitry Andric SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); 32230b57cec5SDimitry Andric 32240b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); 32250b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt); 32260b57cec5SDimitry Andric 32270b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 32280b57cec5SDimitry Andric 32290b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift}); 32300b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); 32310b57cec5SDimitry Andric } 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, 32340b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 32350b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i64) 32360b57cec5SDimitry Andric return SDValue(); 32370b57cec5SDimitry Andric 32380b57cec5SDimitry Andric const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 32390b57cec5SDimitry Andric if (!RHS) 32400b57cec5SDimitry Andric return SDValue(); 32410b57cec5SDimitry Andric 32420b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 32430b57cec5SDimitry Andric SDLoc SL(N); 32440b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue(); 32450b57cec5SDimitry Andric 32460b57cec5SDimitry Andric // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31) 32470b57cec5SDimitry Andric if (RHSVal == 32) { 32480b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG); 32490b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, 32500b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 32510b57cec5SDimitry Andric 32520b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift}); 32530b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); 32540b57cec5SDimitry Andric } 32550b57cec5SDimitry Andric 32560b57cec5SDimitry Andric // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31) 32570b57cec5SDimitry Andric if (RHSVal == 63) { 32580b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG); 32590b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, 32600b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 32610b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift}); 32620b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); 32630b57cec5SDimitry Andric } 32640b57cec5SDimitry Andric 32650b57cec5SDimitry Andric return SDValue(); 32660b57cec5SDimitry Andric } 32670b57cec5SDimitry Andric 32680b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, 32690b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 32700b57cec5SDimitry Andric auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 32710b57cec5SDimitry Andric if (!RHS) 32720b57cec5SDimitry Andric return SDValue(); 32730b57cec5SDimitry Andric 32740b57cec5SDimitry Andric EVT VT = N->getValueType(0); 32750b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 32760b57cec5SDimitry Andric unsigned ShiftAmt = RHS->getZExtValue(); 32770b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 32780b57cec5SDimitry Andric SDLoc SL(N); 32790b57cec5SDimitry Andric 32800b57cec5SDimitry Andric // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) 32810b57cec5SDimitry Andric // this improves the ability to match BFE patterns in isel. 32820b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::AND) { 32830b57cec5SDimitry Andric if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) { 32840b57cec5SDimitry Andric if (Mask->getAPIntValue().isShiftedMask() && 32850b57cec5SDimitry Andric Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) { 32860b57cec5SDimitry Andric return DAG.getNode( 32870b57cec5SDimitry Andric ISD::AND, SL, VT, 32880b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), 32890b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); 32900b57cec5SDimitry Andric } 32910b57cec5SDimitry Andric } 32920b57cec5SDimitry Andric } 32930b57cec5SDimitry Andric 32940b57cec5SDimitry Andric if (VT != MVT::i64) 32950b57cec5SDimitry Andric return SDValue(); 32960b57cec5SDimitry Andric 32970b57cec5SDimitry Andric if (ShiftAmt < 32) 32980b57cec5SDimitry Andric return SDValue(); 32990b57cec5SDimitry Andric 33000b57cec5SDimitry Andric // srl i64:x, C for C >= 32 33010b57cec5SDimitry Andric // => 33020b57cec5SDimitry Andric // build_pair (srl hi_32(x), C - 32), 0 33030b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 33040b57cec5SDimitry Andric 3305349cc55cSDimitry Andric SDValue Hi = getHiHalf64(LHS, DAG); 33060b57cec5SDimitry Andric 33070b57cec5SDimitry Andric SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); 33080b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); 33090b57cec5SDimitry Andric 33100b57cec5SDimitry Andric SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero}); 33110b57cec5SDimitry Andric 33120b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair); 33130b57cec5SDimitry Andric } 33140b57cec5SDimitry Andric 33150b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performTruncateCombine( 33160b57cec5SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const { 33170b57cec5SDimitry Andric SDLoc SL(N); 33180b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 33190b57cec5SDimitry Andric EVT VT = N->getValueType(0); 33200b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 33210b57cec5SDimitry Andric 33220b57cec5SDimitry Andric // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x) 33230b57cec5SDimitry Andric if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) { 33240b57cec5SDimitry Andric SDValue Vec = Src.getOperand(0); 33250b57cec5SDimitry Andric if (Vec.getOpcode() == ISD::BUILD_VECTOR) { 33260b57cec5SDimitry Andric SDValue Elt0 = Vec.getOperand(0); 33270b57cec5SDimitry Andric EVT EltVT = Elt0.getValueType(); 3328e8d8bef9SDimitry Andric if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) { 33290b57cec5SDimitry Andric if (EltVT.isFloatingPoint()) { 33300b57cec5SDimitry Andric Elt0 = DAG.getNode(ISD::BITCAST, SL, 33310b57cec5SDimitry Andric EltVT.changeTypeToInteger(), Elt0); 33320b57cec5SDimitry Andric } 33330b57cec5SDimitry Andric 33340b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0); 33350b57cec5SDimitry Andric } 33360b57cec5SDimitry Andric } 33370b57cec5SDimitry Andric } 33380b57cec5SDimitry Andric 33390b57cec5SDimitry Andric // Equivalent of above for accessing the high element of a vector as an 33400b57cec5SDimitry Andric // integer operation. 33410b57cec5SDimitry Andric // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y) 33420b57cec5SDimitry Andric if (Src.getOpcode() == ISD::SRL && !VT.isVector()) { 33430b57cec5SDimitry Andric if (auto K = isConstOrConstSplat(Src.getOperand(1))) { 33440b57cec5SDimitry Andric if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) { 33450b57cec5SDimitry Andric SDValue BV = stripBitcast(Src.getOperand(0)); 33460b57cec5SDimitry Andric if (BV.getOpcode() == ISD::BUILD_VECTOR && 33470b57cec5SDimitry Andric BV.getValueType().getVectorNumElements() == 2) { 33480b57cec5SDimitry Andric SDValue SrcElt = BV.getOperand(1); 33490b57cec5SDimitry Andric EVT SrcEltVT = SrcElt.getValueType(); 33500b57cec5SDimitry Andric if (SrcEltVT.isFloatingPoint()) { 33510b57cec5SDimitry Andric SrcElt = DAG.getNode(ISD::BITCAST, SL, 33520b57cec5SDimitry Andric SrcEltVT.changeTypeToInteger(), SrcElt); 33530b57cec5SDimitry Andric } 33540b57cec5SDimitry Andric 33550b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt); 33560b57cec5SDimitry Andric } 33570b57cec5SDimitry Andric } 33580b57cec5SDimitry Andric } 33590b57cec5SDimitry Andric } 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andric // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit. 33620b57cec5SDimitry Andric // 33630b57cec5SDimitry Andric // i16 (trunc (srl i64:x, K)), K <= 16 -> 33640b57cec5SDimitry Andric // i16 (trunc (srl (i32 (trunc x), K))) 33650b57cec5SDimitry Andric if (VT.getScalarSizeInBits() < 32) { 33660b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 33670b57cec5SDimitry Andric if (SrcVT.getScalarSizeInBits() > 32 && 33680b57cec5SDimitry Andric (Src.getOpcode() == ISD::SRL || 33690b57cec5SDimitry Andric Src.getOpcode() == ISD::SRA || 33700b57cec5SDimitry Andric Src.getOpcode() == ISD::SHL)) { 33710b57cec5SDimitry Andric SDValue Amt = Src.getOperand(1); 33720b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(Amt); 33730b57cec5SDimitry Andric unsigned Size = VT.getScalarSizeInBits(); 33740b57cec5SDimitry Andric if ((Known.isConstant() && Known.getConstant().ule(Size)) || 3375349cc55cSDimitry Andric (Known.countMaxActiveBits() <= Log2_32(Size))) { 33760b57cec5SDimitry Andric EVT MidVT = VT.isVector() ? 33770b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MVT::i32, 33780b57cec5SDimitry Andric VT.getVectorNumElements()) : MVT::i32; 33790b57cec5SDimitry Andric 33800b57cec5SDimitry Andric EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout()); 33810b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT, 33820b57cec5SDimitry Andric Src.getOperand(0)); 33830b57cec5SDimitry Andric DCI.AddToWorklist(Trunc.getNode()); 33840b57cec5SDimitry Andric 33850b57cec5SDimitry Andric if (Amt.getValueType() != NewShiftVT) { 33860b57cec5SDimitry Andric Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT); 33870b57cec5SDimitry Andric DCI.AddToWorklist(Amt.getNode()); 33880b57cec5SDimitry Andric } 33890b57cec5SDimitry Andric 33900b57cec5SDimitry Andric SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT, 33910b57cec5SDimitry Andric Trunc, Amt); 33920b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift); 33930b57cec5SDimitry Andric } 33940b57cec5SDimitry Andric } 33950b57cec5SDimitry Andric } 33960b57cec5SDimitry Andric 33970b57cec5SDimitry Andric return SDValue(); 33980b57cec5SDimitry Andric } 33990b57cec5SDimitry Andric 34000b57cec5SDimitry Andric // We need to specifically handle i64 mul here to avoid unnecessary conversion 34010b57cec5SDimitry Andric // instructions. If we only match on the legalized i64 mul expansion, 34020b57cec5SDimitry Andric // SimplifyDemandedBits will be unable to remove them because there will be 34030b57cec5SDimitry Andric // multiple uses due to the separate mul + mulh[su]. 34040b57cec5SDimitry Andric static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, 34050b57cec5SDimitry Andric SDValue N0, SDValue N1, unsigned Size, bool Signed) { 34060b57cec5SDimitry Andric if (Size <= 32) { 34070b57cec5SDimitry Andric unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; 34080b57cec5SDimitry Andric return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1); 34090b57cec5SDimitry Andric } 34100b57cec5SDimitry Andric 3411e8d8bef9SDimitry Andric unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; 3412e8d8bef9SDimitry Andric unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24; 34130b57cec5SDimitry Andric 3414e8d8bef9SDimitry Andric SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1); 3415e8d8bef9SDimitry Andric SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1); 34160b57cec5SDimitry Andric 3417e8d8bef9SDimitry Andric return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi); 34180b57cec5SDimitry Andric } 34190b57cec5SDimitry Andric 34200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, 34210b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 34220b57cec5SDimitry Andric EVT VT = N->getValueType(0); 34230b57cec5SDimitry Andric 3424fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since 3425fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs 3426fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the 3427fe6060f1SDimitry Andric // value is in an SGPR. 3428fe6060f1SDimitry Andric if (!N->isDivergent()) 3429fe6060f1SDimitry Andric return SDValue(); 3430fe6060f1SDimitry Andric 34310b57cec5SDimitry Andric unsigned Size = VT.getSizeInBits(); 34320b57cec5SDimitry Andric if (VT.isVector() || Size > 64) 34330b57cec5SDimitry Andric return SDValue(); 34340b57cec5SDimitry Andric 34350b57cec5SDimitry Andric // There are i16 integer mul/mad. 34360b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16)) 34370b57cec5SDimitry Andric return SDValue(); 34380b57cec5SDimitry Andric 34390b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 34400b57cec5SDimitry Andric SDLoc DL(N); 34410b57cec5SDimitry Andric 34420b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 34430b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 34440b57cec5SDimitry Andric 34450b57cec5SDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends 34460b57cec5SDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since 34470b57cec5SDimitry Andric // we can assume the high bits are whatever we want, use the underlying value 34480b57cec5SDimitry Andric // to avoid the unknown high bits from interfering. 34490b57cec5SDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND) 34500b57cec5SDimitry Andric N0 = N0.getOperand(0); 34510b57cec5SDimitry Andric 34520b57cec5SDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND) 34530b57cec5SDimitry Andric N1 = N1.getOperand(0); 34540b57cec5SDimitry Andric 34550b57cec5SDimitry Andric SDValue Mul; 34560b57cec5SDimitry Andric 34570b57cec5SDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { 34580b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 34590b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 34600b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, false); 34610b57cec5SDimitry Andric } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { 34620b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 34630b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 34640b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, true); 34650b57cec5SDimitry Andric } else { 34660b57cec5SDimitry Andric return SDValue(); 34670b57cec5SDimitry Andric } 34680b57cec5SDimitry Andric 34690b57cec5SDimitry Andric // We need to use sext even for MUL_U24, because MUL_U24 is used 34700b57cec5SDimitry Andric // for signed multiply of 8 and 16-bit types. 34710b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mul, DL, VT); 34720b57cec5SDimitry Andric } 34730b57cec5SDimitry Andric 34744824e7fdSDimitry Andric SDValue 34754824e7fdSDimitry Andric AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N, 34764824e7fdSDimitry Andric DAGCombinerInfo &DCI) const { 34774824e7fdSDimitry Andric if (N->getValueType(0) != MVT::i32) 34784824e7fdSDimitry Andric return SDValue(); 34794824e7fdSDimitry Andric 34804824e7fdSDimitry Andric SelectionDAG &DAG = DCI.DAG; 34814824e7fdSDimitry Andric SDLoc DL(N); 34824824e7fdSDimitry Andric 34834824e7fdSDimitry Andric SDValue N0 = N->getOperand(0); 34844824e7fdSDimitry Andric SDValue N1 = N->getOperand(1); 34854824e7fdSDimitry Andric 34864824e7fdSDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends 34874824e7fdSDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since 34884824e7fdSDimitry Andric // we can assume the high bits are whatever we want, use the underlying value 34894824e7fdSDimitry Andric // to avoid the unknown high bits from interfering. 34904824e7fdSDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND) 34914824e7fdSDimitry Andric N0 = N0.getOperand(0); 34924824e7fdSDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND) 34934824e7fdSDimitry Andric N1 = N1.getOperand(0); 34944824e7fdSDimitry Andric 34954824e7fdSDimitry Andric // Try to use two fast 24-bit multiplies (one for each half of the result) 34964824e7fdSDimitry Andric // instead of one slow extending multiply. 34974824e7fdSDimitry Andric unsigned LoOpcode, HiOpcode; 34984824e7fdSDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { 34994824e7fdSDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 35004824e7fdSDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 35014824e7fdSDimitry Andric LoOpcode = AMDGPUISD::MUL_U24; 35024824e7fdSDimitry Andric HiOpcode = AMDGPUISD::MULHI_U24; 35034824e7fdSDimitry Andric } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { 35044824e7fdSDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 35054824e7fdSDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 35064824e7fdSDimitry Andric LoOpcode = AMDGPUISD::MUL_I24; 35074824e7fdSDimitry Andric HiOpcode = AMDGPUISD::MULHI_I24; 35084824e7fdSDimitry Andric } else { 35094824e7fdSDimitry Andric return SDValue(); 35104824e7fdSDimitry Andric } 35114824e7fdSDimitry Andric 35124824e7fdSDimitry Andric SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1); 35134824e7fdSDimitry Andric SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1); 35144824e7fdSDimitry Andric DCI.CombineTo(N, Lo, Hi); 35154824e7fdSDimitry Andric return SDValue(N, 0); 35164824e7fdSDimitry Andric } 35174824e7fdSDimitry Andric 35180b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N, 35190b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 35200b57cec5SDimitry Andric EVT VT = N->getValueType(0); 35210b57cec5SDimitry Andric 35220b57cec5SDimitry Andric if (!Subtarget->hasMulI24() || VT.isVector()) 35230b57cec5SDimitry Andric return SDValue(); 35240b57cec5SDimitry Andric 3525fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since 3526fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs 3527fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the 3528fe6060f1SDimitry Andric // value is in an SGPR. 3529fe6060f1SDimitry Andric // This doesn't apply if no s_mul_hi is available (since we'll end up with a 3530fe6060f1SDimitry Andric // valu op anyway) 3531fe6060f1SDimitry Andric if (Subtarget->hasSMulHi() && !N->isDivergent()) 3532fe6060f1SDimitry Andric return SDValue(); 3533fe6060f1SDimitry Andric 35340b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 35350b57cec5SDimitry Andric SDLoc DL(N); 35360b57cec5SDimitry Andric 35370b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 35380b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 35390b57cec5SDimitry Andric 35400b57cec5SDimitry Andric if (!isI24(N0, DAG) || !isI24(N1, DAG)) 35410b57cec5SDimitry Andric return SDValue(); 35420b57cec5SDimitry Andric 35430b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 35440b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 35450b57cec5SDimitry Andric 35460b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1); 35470b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode()); 35480b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mulhi, DL, VT); 35490b57cec5SDimitry Andric } 35500b57cec5SDimitry Andric 35510b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N, 35520b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 35530b57cec5SDimitry Andric EVT VT = N->getValueType(0); 35540b57cec5SDimitry Andric 35550b57cec5SDimitry Andric if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32) 35560b57cec5SDimitry Andric return SDValue(); 35570b57cec5SDimitry Andric 3558fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since 3559fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs 3560fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the 3561fe6060f1SDimitry Andric // value is in an SGPR. 3562fe6060f1SDimitry Andric // This doesn't apply if no s_mul_hi is available (since we'll end up with a 3563fe6060f1SDimitry Andric // valu op anyway) 3564fe6060f1SDimitry Andric if (Subtarget->hasSMulHi() && !N->isDivergent()) 3565fe6060f1SDimitry Andric return SDValue(); 3566fe6060f1SDimitry Andric 35670b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 35680b57cec5SDimitry Andric SDLoc DL(N); 35690b57cec5SDimitry Andric 35700b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 35710b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 35720b57cec5SDimitry Andric 35730b57cec5SDimitry Andric if (!isU24(N0, DAG) || !isU24(N1, DAG)) 35740b57cec5SDimitry Andric return SDValue(); 35750b57cec5SDimitry Andric 35760b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 35770b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 35780b57cec5SDimitry Andric 35790b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1); 35800b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode()); 35810b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Mulhi, DL, VT); 35820b57cec5SDimitry Andric } 35830b57cec5SDimitry Andric 35840b57cec5SDimitry Andric static bool isNegativeOne(SDValue Val) { 35850b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) 3586349cc55cSDimitry Andric return C->isAllOnes(); 35870b57cec5SDimitry Andric return false; 35880b57cec5SDimitry Andric } 35890b57cec5SDimitry Andric 35900b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG, 35910b57cec5SDimitry Andric SDValue Op, 35920b57cec5SDimitry Andric const SDLoc &DL, 35930b57cec5SDimitry Andric unsigned Opc) const { 35940b57cec5SDimitry Andric EVT VT = Op.getValueType(); 35950b57cec5SDimitry Andric EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT); 35960b57cec5SDimitry Andric if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() && 35970b57cec5SDimitry Andric LegalVT != MVT::i16)) 35980b57cec5SDimitry Andric return SDValue(); 35990b57cec5SDimitry Andric 36000b57cec5SDimitry Andric if (VT != MVT::i32) 36010b57cec5SDimitry Andric Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op); 36020b57cec5SDimitry Andric 36030b57cec5SDimitry Andric SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op); 36040b57cec5SDimitry Andric if (VT != MVT::i32) 36050b57cec5SDimitry Andric FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX); 36060b57cec5SDimitry Andric 36070b57cec5SDimitry Andric return FFBX; 36080b57cec5SDimitry Andric } 36090b57cec5SDimitry Andric 36100b57cec5SDimitry Andric // The native instructions return -1 on 0 input. Optimize out a select that 36110b57cec5SDimitry Andric // produces -1 on 0. 36120b57cec5SDimitry Andric // 36130b57cec5SDimitry Andric // TODO: If zero is not undef, we could also do this if the output is compared 36140b57cec5SDimitry Andric // against the bitwidth. 36150b57cec5SDimitry Andric // 36160b57cec5SDimitry Andric // TODO: Should probably combine against FFBH_U32 instead of ctlz directly. 36170b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, 36180b57cec5SDimitry Andric SDValue LHS, SDValue RHS, 36190b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 36200b57cec5SDimitry Andric ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3621349cc55cSDimitry Andric if (!CmpRhs || !CmpRhs->isZero()) 36220b57cec5SDimitry Andric return SDValue(); 36230b57cec5SDimitry Andric 36240b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 36250b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 36260b57cec5SDimitry Andric SDValue CmpLHS = Cond.getOperand(0); 36270b57cec5SDimitry Andric 36280b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x 36290b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x 36300b57cec5SDimitry Andric if (CCOpcode == ISD::SETEQ && 36310b57cec5SDimitry Andric (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && 36325ffd83dbSDimitry Andric RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) { 36335ffd83dbSDimitry Andric unsigned Opc = 36345ffd83dbSDimitry Andric isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; 36350b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc); 36360b57cec5SDimitry Andric } 36370b57cec5SDimitry Andric 36380b57cec5SDimitry Andric // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x 36390b57cec5SDimitry Andric // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x 36400b57cec5SDimitry Andric if (CCOpcode == ISD::SETNE && 36415ffd83dbSDimitry Andric (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) && 36425ffd83dbSDimitry Andric LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) { 36435ffd83dbSDimitry Andric unsigned Opc = 36445ffd83dbSDimitry Andric isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; 36455ffd83dbSDimitry Andric 36460b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc); 36470b57cec5SDimitry Andric } 36480b57cec5SDimitry Andric 36490b57cec5SDimitry Andric return SDValue(); 36500b57cec5SDimitry Andric } 36510b57cec5SDimitry Andric 36520b57cec5SDimitry Andric static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, 36530b57cec5SDimitry Andric unsigned Op, 36540b57cec5SDimitry Andric const SDLoc &SL, 36550b57cec5SDimitry Andric SDValue Cond, 36560b57cec5SDimitry Andric SDValue N1, 36570b57cec5SDimitry Andric SDValue N2) { 36580b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 36590b57cec5SDimitry Andric EVT VT = N1.getValueType(); 36600b57cec5SDimitry Andric 36610b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, 36620b57cec5SDimitry Andric N1.getOperand(0), N2.getOperand(0)); 36630b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode()); 36640b57cec5SDimitry Andric return DAG.getNode(Op, SL, VT, NewSelect); 36650b57cec5SDimitry Andric } 36660b57cec5SDimitry Andric 36670b57cec5SDimitry Andric // Pull a free FP operation out of a select so it may fold into uses. 36680b57cec5SDimitry Andric // 36690b57cec5SDimitry Andric // select c, (fneg x), (fneg y) -> fneg (select c, x, y) 36700b57cec5SDimitry Andric // select c, (fneg x), k -> fneg (select c, x, (fneg k)) 36710b57cec5SDimitry Andric // 36720b57cec5SDimitry Andric // select c, (fabs x), (fabs y) -> fabs (select c, x, y) 36730b57cec5SDimitry Andric // select c, (fabs x), +k -> fabs (select c, x, k) 36740b57cec5SDimitry Andric static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 36750b57cec5SDimitry Andric SDValue N) { 36760b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 36770b57cec5SDimitry Andric SDValue Cond = N.getOperand(0); 36780b57cec5SDimitry Andric SDValue LHS = N.getOperand(1); 36790b57cec5SDimitry Andric SDValue RHS = N.getOperand(2); 36800b57cec5SDimitry Andric 36810b57cec5SDimitry Andric EVT VT = N.getValueType(); 36820b57cec5SDimitry Andric if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || 36830b57cec5SDimitry Andric (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { 36840b57cec5SDimitry Andric return distributeOpThroughSelect(DCI, LHS.getOpcode(), 36850b57cec5SDimitry Andric SDLoc(N), Cond, LHS, RHS); 36860b57cec5SDimitry Andric } 36870b57cec5SDimitry Andric 36880b57cec5SDimitry Andric bool Inv = false; 36890b57cec5SDimitry Andric if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) { 36900b57cec5SDimitry Andric std::swap(LHS, RHS); 36910b57cec5SDimitry Andric Inv = true; 36920b57cec5SDimitry Andric } 36930b57cec5SDimitry Andric 36940b57cec5SDimitry Andric // TODO: Support vector constants. 36950b57cec5SDimitry Andric ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS); 36960b57cec5SDimitry Andric if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { 36970b57cec5SDimitry Andric SDLoc SL(N); 36980b57cec5SDimitry Andric // If one side is an fneg/fabs and the other is a constant, we can push the 36990b57cec5SDimitry Andric // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. 37000b57cec5SDimitry Andric SDValue NewLHS = LHS.getOperand(0); 37010b57cec5SDimitry Andric SDValue NewRHS = RHS; 37020b57cec5SDimitry Andric 37030b57cec5SDimitry Andric // Careful: if the neg can be folded up, don't try to pull it back down. 37040b57cec5SDimitry Andric bool ShouldFoldNeg = true; 37050b57cec5SDimitry Andric 37060b57cec5SDimitry Andric if (NewLHS.hasOneUse()) { 37070b57cec5SDimitry Andric unsigned Opc = NewLHS.getOpcode(); 37080b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc)) 37090b57cec5SDimitry Andric ShouldFoldNeg = false; 37100b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL) 37110b57cec5SDimitry Andric ShouldFoldNeg = false; 37120b57cec5SDimitry Andric } 37130b57cec5SDimitry Andric 37140b57cec5SDimitry Andric if (ShouldFoldNeg) { 37150b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 37160b57cec5SDimitry Andric NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 37170b57cec5SDimitry Andric else if (CRHS->isNegative()) 37180b57cec5SDimitry Andric return SDValue(); 37190b57cec5SDimitry Andric 37200b57cec5SDimitry Andric if (Inv) 37210b57cec5SDimitry Andric std::swap(NewLHS, NewRHS); 37220b57cec5SDimitry Andric 37230b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, 37240b57cec5SDimitry Andric Cond, NewLHS, NewRHS); 37250b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode()); 37260b57cec5SDimitry Andric return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); 37270b57cec5SDimitry Andric } 37280b57cec5SDimitry Andric } 37290b57cec5SDimitry Andric 37300b57cec5SDimitry Andric return SDValue(); 37310b57cec5SDimitry Andric } 37320b57cec5SDimitry Andric 37330b57cec5SDimitry Andric 37340b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, 37350b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 37360b57cec5SDimitry Andric if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) 37370b57cec5SDimitry Andric return Folded; 37380b57cec5SDimitry Andric 37390b57cec5SDimitry Andric SDValue Cond = N->getOperand(0); 37400b57cec5SDimitry Andric if (Cond.getOpcode() != ISD::SETCC) 37410b57cec5SDimitry Andric return SDValue(); 37420b57cec5SDimitry Andric 37430b57cec5SDimitry Andric EVT VT = N->getValueType(0); 37440b57cec5SDimitry Andric SDValue LHS = Cond.getOperand(0); 37450b57cec5SDimitry Andric SDValue RHS = Cond.getOperand(1); 37460b57cec5SDimitry Andric SDValue CC = Cond.getOperand(2); 37470b57cec5SDimitry Andric 37480b57cec5SDimitry Andric SDValue True = N->getOperand(1); 37490b57cec5SDimitry Andric SDValue False = N->getOperand(2); 37500b57cec5SDimitry Andric 37510b57cec5SDimitry Andric if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. 37520b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 37530b57cec5SDimitry Andric if (DAG.isConstantValueOfAnyType(True) && 37540b57cec5SDimitry Andric !DAG.isConstantValueOfAnyType(False)) { 37550b57cec5SDimitry Andric // Swap cmp + select pair to move constant to false input. 37560b57cec5SDimitry Andric // This will allow using VOPC cndmasks more often. 37570b57cec5SDimitry Andric // select (setcc x, y), k, x -> select (setccinv x, y), x, k 37580b57cec5SDimitry Andric 37590b57cec5SDimitry Andric SDLoc SL(N); 3760480093f4SDimitry Andric ISD::CondCode NewCC = 3761480093f4SDimitry Andric getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType()); 37620b57cec5SDimitry Andric 37630b57cec5SDimitry Andric SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); 37640b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); 37650b57cec5SDimitry Andric } 37660b57cec5SDimitry Andric 37670b57cec5SDimitry Andric if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { 37680b57cec5SDimitry Andric SDValue MinMax 37690b57cec5SDimitry Andric = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); 37700b57cec5SDimitry Andric // Revisit this node so we can catch min3/max3/med3 patterns. 37710b57cec5SDimitry Andric //DCI.AddToWorklist(MinMax.getNode()); 37720b57cec5SDimitry Andric return MinMax; 37730b57cec5SDimitry Andric } 37740b57cec5SDimitry Andric } 37750b57cec5SDimitry Andric 37760b57cec5SDimitry Andric // There's no reason to not do this if the condition has other uses. 37770b57cec5SDimitry Andric return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI); 37780b57cec5SDimitry Andric } 37790b57cec5SDimitry Andric 37800b57cec5SDimitry Andric static bool isInv2Pi(const APFloat &APF) { 37810b57cec5SDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); 37820b57cec5SDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); 37830b57cec5SDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882)); 37840b57cec5SDimitry Andric 37850b57cec5SDimitry Andric return APF.bitwiseIsEqual(KF16) || 37860b57cec5SDimitry Andric APF.bitwiseIsEqual(KF32) || 37870b57cec5SDimitry Andric APF.bitwiseIsEqual(KF64); 37880b57cec5SDimitry Andric } 37890b57cec5SDimitry Andric 37900b57cec5SDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an 37910b57cec5SDimitry Andric // additional cost to negate them. 37920b57cec5SDimitry Andric bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const { 37930b57cec5SDimitry Andric if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) { 37940b57cec5SDimitry Andric if (C->isZero() && !C->isNegative()) 37950b57cec5SDimitry Andric return true; 37960b57cec5SDimitry Andric 37970b57cec5SDimitry Andric if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF())) 37980b57cec5SDimitry Andric return true; 37990b57cec5SDimitry Andric } 38000b57cec5SDimitry Andric 38010b57cec5SDimitry Andric return false; 38020b57cec5SDimitry Andric } 38030b57cec5SDimitry Andric 38040b57cec5SDimitry Andric static unsigned inverseMinMax(unsigned Opc) { 38050b57cec5SDimitry Andric switch (Opc) { 38060b57cec5SDimitry Andric case ISD::FMAXNUM: 38070b57cec5SDimitry Andric return ISD::FMINNUM; 38080b57cec5SDimitry Andric case ISD::FMINNUM: 38090b57cec5SDimitry Andric return ISD::FMAXNUM; 38100b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 38110b57cec5SDimitry Andric return ISD::FMINNUM_IEEE; 38120b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 38130b57cec5SDimitry Andric return ISD::FMAXNUM_IEEE; 38140b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 38150b57cec5SDimitry Andric return AMDGPUISD::FMIN_LEGACY; 38160b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 38170b57cec5SDimitry Andric return AMDGPUISD::FMAX_LEGACY; 38180b57cec5SDimitry Andric default: 38190b57cec5SDimitry Andric llvm_unreachable("invalid min/max opcode"); 38200b57cec5SDimitry Andric } 38210b57cec5SDimitry Andric } 38220b57cec5SDimitry Andric 38230b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, 38240b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 38250b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 38260b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 38270b57cec5SDimitry Andric EVT VT = N->getValueType(0); 38280b57cec5SDimitry Andric 38290b57cec5SDimitry Andric unsigned Opc = N0.getOpcode(); 38300b57cec5SDimitry Andric 38310b57cec5SDimitry Andric // If the input has multiple uses and we can either fold the negate down, or 38320b57cec5SDimitry Andric // the other uses cannot, give up. This both prevents unprofitable 38330b57cec5SDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around 38340b57cec5SDimitry Andric // a negate that has no 'good' form. 38350b57cec5SDimitry Andric if (N0.hasOneUse()) { 38360b57cec5SDimitry Andric // This may be able to fold into the source, but at a code size cost. Don't 38370b57cec5SDimitry Andric // fold if the fold into the user is free. 38380b57cec5SDimitry Andric if (allUsesHaveSourceMods(N, 0)) 38390b57cec5SDimitry Andric return SDValue(); 38400b57cec5SDimitry Andric } else { 38410b57cec5SDimitry Andric if (fnegFoldsIntoOp(Opc) && 38420b57cec5SDimitry Andric (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode()))) 38430b57cec5SDimitry Andric return SDValue(); 38440b57cec5SDimitry Andric } 38450b57cec5SDimitry Andric 38460b57cec5SDimitry Andric SDLoc SL(N); 38470b57cec5SDimitry Andric switch (Opc) { 38480b57cec5SDimitry Andric case ISD::FADD: { 38490b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0)) 38500b57cec5SDimitry Andric return SDValue(); 38510b57cec5SDimitry Andric 38520b57cec5SDimitry Andric // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) 38530b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 38540b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 38550b57cec5SDimitry Andric 38560b57cec5SDimitry Andric if (LHS.getOpcode() != ISD::FNEG) 38570b57cec5SDimitry Andric LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); 38580b57cec5SDimitry Andric else 38590b57cec5SDimitry Andric LHS = LHS.getOperand(0); 38600b57cec5SDimitry Andric 38610b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG) 38620b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 38630b57cec5SDimitry Andric else 38640b57cec5SDimitry Andric RHS = RHS.getOperand(0); 38650b57cec5SDimitry Andric 38660b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags()); 38670b57cec5SDimitry Andric if (Res.getOpcode() != ISD::FADD) 38680b57cec5SDimitry Andric return SDValue(); // Op got folded away. 38690b57cec5SDimitry Andric if (!N0.hasOneUse()) 38700b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 38710b57cec5SDimitry Andric return Res; 38720b57cec5SDimitry Andric } 38730b57cec5SDimitry Andric case ISD::FMUL: 38740b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: { 38750b57cec5SDimitry Andric // (fneg (fmul x, y)) -> (fmul x, (fneg y)) 38760b57cec5SDimitry Andric // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y)) 38770b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 38780b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 38790b57cec5SDimitry Andric 38800b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 38810b57cec5SDimitry Andric LHS = LHS.getOperand(0); 38820b57cec5SDimitry Andric else if (RHS.getOpcode() == ISD::FNEG) 38830b57cec5SDimitry Andric RHS = RHS.getOperand(0); 38840b57cec5SDimitry Andric else 38850b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags()); 38880b57cec5SDimitry Andric if (Res.getOpcode() != Opc) 38890b57cec5SDimitry Andric return SDValue(); // Op got folded away. 38900b57cec5SDimitry Andric if (!N0.hasOneUse()) 38910b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 38920b57cec5SDimitry Andric return Res; 38930b57cec5SDimitry Andric } 38940b57cec5SDimitry Andric case ISD::FMA: 38950b57cec5SDimitry Andric case ISD::FMAD: { 3896e8d8bef9SDimitry Andric // TODO: handle llvm.amdgcn.fma.legacy 38970b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0)) 38980b57cec5SDimitry Andric return SDValue(); 38990b57cec5SDimitry Andric 39000b57cec5SDimitry Andric // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) 39010b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 39020b57cec5SDimitry Andric SDValue MHS = N0.getOperand(1); 39030b57cec5SDimitry Andric SDValue RHS = N0.getOperand(2); 39040b57cec5SDimitry Andric 39050b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 39060b57cec5SDimitry Andric LHS = LHS.getOperand(0); 39070b57cec5SDimitry Andric else if (MHS.getOpcode() == ISD::FNEG) 39080b57cec5SDimitry Andric MHS = MHS.getOperand(0); 39090b57cec5SDimitry Andric else 39100b57cec5SDimitry Andric MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS); 39110b57cec5SDimitry Andric 39120b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG) 39130b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 39140b57cec5SDimitry Andric else 39150b57cec5SDimitry Andric RHS = RHS.getOperand(0); 39160b57cec5SDimitry Andric 39170b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS); 39180b57cec5SDimitry Andric if (Res.getOpcode() != Opc) 39190b57cec5SDimitry Andric return SDValue(); // Op got folded away. 39200b57cec5SDimitry Andric if (!N0.hasOneUse()) 39210b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 39220b57cec5SDimitry Andric return Res; 39230b57cec5SDimitry Andric } 39240b57cec5SDimitry Andric case ISD::FMAXNUM: 39250b57cec5SDimitry Andric case ISD::FMINNUM: 39260b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 39270b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 39280b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 39290b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: { 39300b57cec5SDimitry Andric // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) 39310b57cec5SDimitry Andric // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y) 39320b57cec5SDimitry Andric // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y) 39330b57cec5SDimitry Andric // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y) 39340b57cec5SDimitry Andric 39350b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 39360b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 39370b57cec5SDimitry Andric 39380b57cec5SDimitry Andric // 0 doesn't have a negated inline immediate. 39390b57cec5SDimitry Andric // TODO: This constant check should be generalized to other operations. 39400b57cec5SDimitry Andric if (isConstantCostlierToNegate(RHS)) 39410b57cec5SDimitry Andric return SDValue(); 39420b57cec5SDimitry Andric 39430b57cec5SDimitry Andric SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); 39440b57cec5SDimitry Andric SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 39450b57cec5SDimitry Andric unsigned Opposite = inverseMinMax(Opc); 39460b57cec5SDimitry Andric 39470b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); 39480b57cec5SDimitry Andric if (Res.getOpcode() != Opposite) 39490b57cec5SDimitry Andric return SDValue(); // Op got folded away. 39500b57cec5SDimitry Andric if (!N0.hasOneUse()) 39510b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 39520b57cec5SDimitry Andric return Res; 39530b57cec5SDimitry Andric } 39540b57cec5SDimitry Andric case AMDGPUISD::FMED3: { 39550b57cec5SDimitry Andric SDValue Ops[3]; 39560b57cec5SDimitry Andric for (unsigned I = 0; I < 3; ++I) 39570b57cec5SDimitry Andric Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags()); 39580b57cec5SDimitry Andric 39590b57cec5SDimitry Andric SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags()); 39600b57cec5SDimitry Andric if (Res.getOpcode() != AMDGPUISD::FMED3) 39610b57cec5SDimitry Andric return SDValue(); // Op got folded away. 3962e8d8bef9SDimitry Andric 3963e8d8bef9SDimitry Andric if (!N0.hasOneUse()) { 3964e8d8bef9SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res); 3965e8d8bef9SDimitry Andric DAG.ReplaceAllUsesWith(N0, Neg); 3966e8d8bef9SDimitry Andric 3967e8d8bef9SDimitry Andric for (SDNode *U : Neg->uses()) 3968e8d8bef9SDimitry Andric DCI.AddToWorklist(U); 3969e8d8bef9SDimitry Andric } 3970e8d8bef9SDimitry Andric 39710b57cec5SDimitry Andric return Res; 39720b57cec5SDimitry Andric } 39730b57cec5SDimitry Andric case ISD::FP_EXTEND: 39740b57cec5SDimitry Andric case ISD::FTRUNC: 39750b57cec5SDimitry Andric case ISD::FRINT: 39760b57cec5SDimitry Andric case ISD::FNEARBYINT: // XXX - Should fround be handled? 39770b57cec5SDimitry Andric case ISD::FSIN: 39780b57cec5SDimitry Andric case ISD::FCANONICALIZE: 39790b57cec5SDimitry Andric case AMDGPUISD::RCP: 39800b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 39810b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 39820b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: { 39830b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0); 39840b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) { 39850b57cec5SDimitry Andric // (fneg (fp_extend (fneg x))) -> (fp_extend x) 39860b57cec5SDimitry Andric // (fneg (rcp (fneg x))) -> (rcp x) 39870b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0)); 39880b57cec5SDimitry Andric } 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric if (!N0.hasOneUse()) 39910b57cec5SDimitry Andric return SDValue(); 39920b57cec5SDimitry Andric 39930b57cec5SDimitry Andric // (fneg (fp_extend x)) -> (fp_extend (fneg x)) 39940b57cec5SDimitry Andric // (fneg (rcp x)) -> (rcp (fneg x)) 39950b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); 39960b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags()); 39970b57cec5SDimitry Andric } 39980b57cec5SDimitry Andric case ISD::FP_ROUND: { 39990b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0); 40000b57cec5SDimitry Andric 40010b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) { 40020b57cec5SDimitry Andric // (fneg (fp_round (fneg x))) -> (fp_round x) 40030b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, 40040b57cec5SDimitry Andric CvtSrc.getOperand(0), N0.getOperand(1)); 40050b57cec5SDimitry Andric } 40060b57cec5SDimitry Andric 40070b57cec5SDimitry Andric if (!N0.hasOneUse()) 40080b57cec5SDimitry Andric return SDValue(); 40090b57cec5SDimitry Andric 40100b57cec5SDimitry Andric // (fneg (fp_round x)) -> (fp_round (fneg x)) 40110b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); 40120b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1)); 40130b57cec5SDimitry Andric } 40140b57cec5SDimitry Andric case ISD::FP16_TO_FP: { 40150b57cec5SDimitry Andric // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal 40160b57cec5SDimitry Andric // f16, but legalization of f16 fneg ends up pulling it out of the source. 40170b57cec5SDimitry Andric // Put the fneg back as a legal source operation that can be matched later. 40180b57cec5SDimitry Andric SDLoc SL(N); 40190b57cec5SDimitry Andric 40200b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 40210b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 40220b57cec5SDimitry Andric 40230b57cec5SDimitry Andric // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000) 40240b57cec5SDimitry Andric SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src, 40250b57cec5SDimitry Andric DAG.getConstant(0x8000, SL, SrcVT)); 40260b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg); 40270b57cec5SDimitry Andric } 40280b57cec5SDimitry Andric default: 40290b57cec5SDimitry Andric return SDValue(); 40300b57cec5SDimitry Andric } 40310b57cec5SDimitry Andric } 40320b57cec5SDimitry Andric 40330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N, 40340b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 40350b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 40360b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric if (!N0.hasOneUse()) 40390b57cec5SDimitry Andric return SDValue(); 40400b57cec5SDimitry Andric 40410b57cec5SDimitry Andric switch (N0.getOpcode()) { 40420b57cec5SDimitry Andric case ISD::FP16_TO_FP: { 40430b57cec5SDimitry Andric assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal"); 40440b57cec5SDimitry Andric SDLoc SL(N); 40450b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 40460b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 40470b57cec5SDimitry Andric 40480b57cec5SDimitry Andric // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff) 40490b57cec5SDimitry Andric SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src, 40500b57cec5SDimitry Andric DAG.getConstant(0x7fff, SL, SrcVT)); 40510b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs); 40520b57cec5SDimitry Andric } 40530b57cec5SDimitry Andric default: 40540b57cec5SDimitry Andric return SDValue(); 40550b57cec5SDimitry Andric } 40560b57cec5SDimitry Andric } 40570b57cec5SDimitry Andric 40580b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N, 40590b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 40600b57cec5SDimitry Andric const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 40610b57cec5SDimitry Andric if (!CFP) 40620b57cec5SDimitry Andric return SDValue(); 40630b57cec5SDimitry Andric 40640b57cec5SDimitry Andric // XXX - Should this flush denormals? 40650b57cec5SDimitry Andric const APFloat &Val = CFP->getValueAPF(); 40660b57cec5SDimitry Andric APFloat One(Val.getSemantics(), "1.0"); 40670b57cec5SDimitry Andric return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0)); 40680b57cec5SDimitry Andric } 40690b57cec5SDimitry Andric 40700b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, 40710b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 40720b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 40730b57cec5SDimitry Andric SDLoc DL(N); 40740b57cec5SDimitry Andric 40750b57cec5SDimitry Andric switch(N->getOpcode()) { 40760b57cec5SDimitry Andric default: 40770b57cec5SDimitry Andric break; 40780b57cec5SDimitry Andric case ISD::BITCAST: { 40790b57cec5SDimitry Andric EVT DestVT = N->getValueType(0); 40800b57cec5SDimitry Andric 40810b57cec5SDimitry Andric // Push casts through vector builds. This helps avoid emitting a large 40820b57cec5SDimitry Andric // number of copies when materializing floating point vector constants. 40830b57cec5SDimitry Andric // 40840b57cec5SDimitry Andric // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) => 40850b57cec5SDimitry Andric // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y)) 40860b57cec5SDimitry Andric if (DestVT.isVector()) { 40870b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 40880b57cec5SDimitry Andric if (Src.getOpcode() == ISD::BUILD_VECTOR) { 40890b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 40900b57cec5SDimitry Andric unsigned NElts = DestVT.getVectorNumElements(); 40910b57cec5SDimitry Andric 40920b57cec5SDimitry Andric if (SrcVT.getVectorNumElements() == NElts) { 40930b57cec5SDimitry Andric EVT DestEltVT = DestVT.getVectorElementType(); 40940b57cec5SDimitry Andric 40950b57cec5SDimitry Andric SmallVector<SDValue, 8> CastedElts; 40960b57cec5SDimitry Andric SDLoc SL(N); 40970b57cec5SDimitry Andric for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) { 40980b57cec5SDimitry Andric SDValue Elt = Src.getOperand(I); 40990b57cec5SDimitry Andric CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt)); 41000b57cec5SDimitry Andric } 41010b57cec5SDimitry Andric 41020b57cec5SDimitry Andric return DAG.getBuildVector(DestVT, SL, CastedElts); 41030b57cec5SDimitry Andric } 41040b57cec5SDimitry Andric } 41050b57cec5SDimitry Andric } 41060b57cec5SDimitry Andric 4107e8d8bef9SDimitry Andric if (DestVT.getSizeInBits() != 64 || !DestVT.isVector()) 41080b57cec5SDimitry Andric break; 41090b57cec5SDimitry Andric 41100b57cec5SDimitry Andric // Fold bitcasts of constants. 41110b57cec5SDimitry Andric // 41120b57cec5SDimitry Andric // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k) 41130b57cec5SDimitry Andric // TODO: Generalize and move to DAGCombiner 41140b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 41150b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) { 41160b57cec5SDimitry Andric SDLoc SL(N); 41170b57cec5SDimitry Andric uint64_t CVal = C->getZExtValue(); 41180b57cec5SDimitry Andric SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, 41190b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32), 41200b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); 41210b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, BV); 41220b57cec5SDimitry Andric } 41230b57cec5SDimitry Andric 41240b57cec5SDimitry Andric if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) { 41250b57cec5SDimitry Andric const APInt &Val = C->getValueAPF().bitcastToAPInt(); 41260b57cec5SDimitry Andric SDLoc SL(N); 41270b57cec5SDimitry Andric uint64_t CVal = Val.getZExtValue(); 41280b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, 41290b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32), 41300b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); 41310b57cec5SDimitry Andric 41320b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec); 41330b57cec5SDimitry Andric } 41340b57cec5SDimitry Andric 41350b57cec5SDimitry Andric break; 41360b57cec5SDimitry Andric } 41370b57cec5SDimitry Andric case ISD::SHL: { 41380b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 41390b57cec5SDimitry Andric break; 41400b57cec5SDimitry Andric 41410b57cec5SDimitry Andric return performShlCombine(N, DCI); 41420b57cec5SDimitry Andric } 41430b57cec5SDimitry Andric case ISD::SRL: { 41440b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 41450b57cec5SDimitry Andric break; 41460b57cec5SDimitry Andric 41470b57cec5SDimitry Andric return performSrlCombine(N, DCI); 41480b57cec5SDimitry Andric } 41490b57cec5SDimitry Andric case ISD::SRA: { 41500b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 41510b57cec5SDimitry Andric break; 41520b57cec5SDimitry Andric 41530b57cec5SDimitry Andric return performSraCombine(N, DCI); 41540b57cec5SDimitry Andric } 41550b57cec5SDimitry Andric case ISD::TRUNCATE: 41560b57cec5SDimitry Andric return performTruncateCombine(N, DCI); 41570b57cec5SDimitry Andric case ISD::MUL: 41580b57cec5SDimitry Andric return performMulCombine(N, DCI); 41594824e7fdSDimitry Andric case ISD::SMUL_LOHI: 41604824e7fdSDimitry Andric case ISD::UMUL_LOHI: 41614824e7fdSDimitry Andric return performMulLoHiCombine(N, DCI); 41620b57cec5SDimitry Andric case ISD::MULHS: 41630b57cec5SDimitry Andric return performMulhsCombine(N, DCI); 41640b57cec5SDimitry Andric case ISD::MULHU: 41650b57cec5SDimitry Andric return performMulhuCombine(N, DCI); 41660b57cec5SDimitry Andric case AMDGPUISD::MUL_I24: 41670b57cec5SDimitry Andric case AMDGPUISD::MUL_U24: 41680b57cec5SDimitry Andric case AMDGPUISD::MULHI_I24: 4169fe6060f1SDimitry Andric case AMDGPUISD::MULHI_U24: 4170fe6060f1SDimitry Andric return simplifyMul24(N, DCI); 41710b57cec5SDimitry Andric case ISD::SELECT: 41720b57cec5SDimitry Andric return performSelectCombine(N, DCI); 41730b57cec5SDimitry Andric case ISD::FNEG: 41740b57cec5SDimitry Andric return performFNegCombine(N, DCI); 41750b57cec5SDimitry Andric case ISD::FABS: 41760b57cec5SDimitry Andric return performFAbsCombine(N, DCI); 41770b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: 41780b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 41790b57cec5SDimitry Andric assert(!N->getValueType(0).isVector() && 41800b57cec5SDimitry Andric "Vector handling of BFE not implemented"); 41810b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 41820b57cec5SDimitry Andric if (!Width) 41830b57cec5SDimitry Andric break; 41840b57cec5SDimitry Andric 41850b57cec5SDimitry Andric uint32_t WidthVal = Width->getZExtValue() & 0x1f; 41860b57cec5SDimitry Andric if (WidthVal == 0) 41870b57cec5SDimitry Andric return DAG.getConstant(0, DL, MVT::i32); 41880b57cec5SDimitry Andric 41890b57cec5SDimitry Andric ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 41900b57cec5SDimitry Andric if (!Offset) 41910b57cec5SDimitry Andric break; 41920b57cec5SDimitry Andric 41930b57cec5SDimitry Andric SDValue BitsFrom = N->getOperand(0); 41940b57cec5SDimitry Andric uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; 41950b57cec5SDimitry Andric 41960b57cec5SDimitry Andric bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; 41970b57cec5SDimitry Andric 41980b57cec5SDimitry Andric if (OffsetVal == 0) { 41990b57cec5SDimitry Andric // This is already sign / zero extended, so try to fold away extra BFEs. 42000b57cec5SDimitry Andric unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); 42010b57cec5SDimitry Andric 42020b57cec5SDimitry Andric unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); 42030b57cec5SDimitry Andric if (OpSignBits >= SignBits) 42040b57cec5SDimitry Andric return BitsFrom; 42050b57cec5SDimitry Andric 42060b57cec5SDimitry Andric EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); 42070b57cec5SDimitry Andric if (Signed) { 42080b57cec5SDimitry Andric // This is a sign_extend_inreg. Replace it to take advantage of existing 42090b57cec5SDimitry Andric // DAG Combines. If not eliminated, we will match back to BFE during 42100b57cec5SDimitry Andric // selection. 42110b57cec5SDimitry Andric 42120b57cec5SDimitry Andric // TODO: The sext_inreg of extended types ends, although we can could 42130b57cec5SDimitry Andric // handle them in a single BFE. 42140b57cec5SDimitry Andric return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, 42150b57cec5SDimitry Andric DAG.getValueType(SmallVT)); 42160b57cec5SDimitry Andric } 42170b57cec5SDimitry Andric 42180b57cec5SDimitry Andric return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); 42190b57cec5SDimitry Andric } 42200b57cec5SDimitry Andric 42210b57cec5SDimitry Andric if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) { 42220b57cec5SDimitry Andric if (Signed) { 42230b57cec5SDimitry Andric return constantFoldBFE<int32_t>(DAG, 42240b57cec5SDimitry Andric CVal->getSExtValue(), 42250b57cec5SDimitry Andric OffsetVal, 42260b57cec5SDimitry Andric WidthVal, 42270b57cec5SDimitry Andric DL); 42280b57cec5SDimitry Andric } 42290b57cec5SDimitry Andric 42300b57cec5SDimitry Andric return constantFoldBFE<uint32_t>(DAG, 42310b57cec5SDimitry Andric CVal->getZExtValue(), 42320b57cec5SDimitry Andric OffsetVal, 42330b57cec5SDimitry Andric WidthVal, 42340b57cec5SDimitry Andric DL); 42350b57cec5SDimitry Andric } 42360b57cec5SDimitry Andric 42370b57cec5SDimitry Andric if ((OffsetVal + WidthVal) >= 32 && 42380b57cec5SDimitry Andric !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { 42390b57cec5SDimitry Andric SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); 42400b57cec5SDimitry Andric return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, 42410b57cec5SDimitry Andric BitsFrom, ShiftVal); 42420b57cec5SDimitry Andric } 42430b57cec5SDimitry Andric 42440b57cec5SDimitry Andric if (BitsFrom.hasOneUse()) { 42450b57cec5SDimitry Andric APInt Demanded = APInt::getBitsSet(32, 42460b57cec5SDimitry Andric OffsetVal, 42470b57cec5SDimitry Andric OffsetVal + WidthVal); 42480b57cec5SDimitry Andric 42490b57cec5SDimitry Andric KnownBits Known; 42500b57cec5SDimitry Andric TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), 42510b57cec5SDimitry Andric !DCI.isBeforeLegalizeOps()); 42520b57cec5SDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 42530b57cec5SDimitry Andric if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || 42540b57cec5SDimitry Andric TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) { 42550b57cec5SDimitry Andric DCI.CommitTargetLoweringOpt(TLO); 42560b57cec5SDimitry Andric } 42570b57cec5SDimitry Andric } 42580b57cec5SDimitry Andric 42590b57cec5SDimitry Andric break; 42600b57cec5SDimitry Andric } 42610b57cec5SDimitry Andric case ISD::LOAD: 42620b57cec5SDimitry Andric return performLoadCombine(N, DCI); 42630b57cec5SDimitry Andric case ISD::STORE: 42640b57cec5SDimitry Andric return performStoreCombine(N, DCI); 42650b57cec5SDimitry Andric case AMDGPUISD::RCP: 42660b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 42670b57cec5SDimitry Andric return performRcpCombine(N, DCI); 42680b57cec5SDimitry Andric case ISD::AssertZext: 42690b57cec5SDimitry Andric case ISD::AssertSext: 42700b57cec5SDimitry Andric return performAssertSZExtCombine(N, DCI); 42718bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: 42728bcb0991SDimitry Andric return performIntrinsicWOChainCombine(N, DCI); 42730b57cec5SDimitry Andric } 42740b57cec5SDimitry Andric return SDValue(); 42750b57cec5SDimitry Andric } 42760b57cec5SDimitry Andric 42770b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 42780b57cec5SDimitry Andric // Helper functions 42790b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 42800b57cec5SDimitry Andric 42810b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 42820b57cec5SDimitry Andric const TargetRegisterClass *RC, 42835ffd83dbSDimitry Andric Register Reg, EVT VT, 42840b57cec5SDimitry Andric const SDLoc &SL, 42850b57cec5SDimitry Andric bool RawReg) const { 42860b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 42870b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 42885ffd83dbSDimitry Andric Register VReg; 42890b57cec5SDimitry Andric 42900b57cec5SDimitry Andric if (!MRI.isLiveIn(Reg)) { 42910b57cec5SDimitry Andric VReg = MRI.createVirtualRegister(RC); 42920b57cec5SDimitry Andric MRI.addLiveIn(Reg, VReg); 42930b57cec5SDimitry Andric } else { 42940b57cec5SDimitry Andric VReg = MRI.getLiveInVirtReg(Reg); 42950b57cec5SDimitry Andric } 42960b57cec5SDimitry Andric 42970b57cec5SDimitry Andric if (RawReg) 42980b57cec5SDimitry Andric return DAG.getRegister(VReg, VT); 42990b57cec5SDimitry Andric 43000b57cec5SDimitry Andric return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); 43010b57cec5SDimitry Andric } 43020b57cec5SDimitry Andric 43038bcb0991SDimitry Andric // This may be called multiple times, and nothing prevents creating multiple 43048bcb0991SDimitry Andric // objects at the same offset. See if we already defined this object. 43058bcb0991SDimitry Andric static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, 43068bcb0991SDimitry Andric int64_t Offset) { 43078bcb0991SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { 43088bcb0991SDimitry Andric if (MFI.getObjectOffset(I) == Offset) { 43098bcb0991SDimitry Andric assert(MFI.getObjectSize(I) == Size); 43108bcb0991SDimitry Andric return I; 43118bcb0991SDimitry Andric } 43128bcb0991SDimitry Andric } 43138bcb0991SDimitry Andric 43148bcb0991SDimitry Andric return MFI.CreateFixedObject(Size, Offset, true); 43158bcb0991SDimitry Andric } 43168bcb0991SDimitry Andric 43170b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, 43180b57cec5SDimitry Andric EVT VT, 43190b57cec5SDimitry Andric const SDLoc &SL, 43200b57cec5SDimitry Andric int64_t Offset) const { 43210b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 43220b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 43238bcb0991SDimitry Andric int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset); 43240b57cec5SDimitry Andric 43250b57cec5SDimitry Andric auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); 43260b57cec5SDimitry Andric SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); 43270b57cec5SDimitry Andric 4328e8d8bef9SDimitry Andric return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4), 43290b57cec5SDimitry Andric MachineMemOperand::MODereferenceable | 43300b57cec5SDimitry Andric MachineMemOperand::MOInvariant); 43310b57cec5SDimitry Andric } 43320b57cec5SDimitry Andric 43330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, 43340b57cec5SDimitry Andric const SDLoc &SL, 43350b57cec5SDimitry Andric SDValue Chain, 43360b57cec5SDimitry Andric SDValue ArgVal, 43370b57cec5SDimitry Andric int64_t Offset) const { 43380b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 43390b57cec5SDimitry Andric MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset); 4340fe6060f1SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 43410b57cec5SDimitry Andric 43420b57cec5SDimitry Andric SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32); 4343fe6060f1SDimitry Andric // Stores to the argument stack area are relative to the stack pointer. 4344fe6060f1SDimitry Andric SDValue SP = 4345fe6060f1SDimitry Andric DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32); 4346fe6060f1SDimitry Andric Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr); 4347e8d8bef9SDimitry Andric SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4), 43480b57cec5SDimitry Andric MachineMemOperand::MODereferenceable); 43490b57cec5SDimitry Andric return Store; 43500b57cec5SDimitry Andric } 43510b57cec5SDimitry Andric 43520b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG, 43530b57cec5SDimitry Andric const TargetRegisterClass *RC, 43540b57cec5SDimitry Andric EVT VT, const SDLoc &SL, 43550b57cec5SDimitry Andric const ArgDescriptor &Arg) const { 43560b57cec5SDimitry Andric assert(Arg && "Attempting to load missing argument"); 43570b57cec5SDimitry Andric 43580b57cec5SDimitry Andric SDValue V = Arg.isRegister() ? 43590b57cec5SDimitry Andric CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) : 43600b57cec5SDimitry Andric loadStackInputValue(DAG, VT, SL, Arg.getStackOffset()); 43610b57cec5SDimitry Andric 43620b57cec5SDimitry Andric if (!Arg.isMasked()) 43630b57cec5SDimitry Andric return V; 43640b57cec5SDimitry Andric 43650b57cec5SDimitry Andric unsigned Mask = Arg.getMask(); 43660b57cec5SDimitry Andric unsigned Shift = countTrailingZeros<unsigned>(Mask); 43670b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, SL, VT, V, 43680b57cec5SDimitry Andric DAG.getShiftAmountConstant(Shift, VT, SL)); 43690b57cec5SDimitry Andric return DAG.getNode(ISD::AND, SL, VT, V, 43700b57cec5SDimitry Andric DAG.getConstant(Mask >> Shift, SL, VT)); 43710b57cec5SDimitry Andric } 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( 43740b57cec5SDimitry Andric const MachineFunction &MF, const ImplicitParameter Param) const { 43750b57cec5SDimitry Andric const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); 43760b57cec5SDimitry Andric const AMDGPUSubtarget &ST = 43770b57cec5SDimitry Andric AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); 43780b57cec5SDimitry Andric unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); 43798bcb0991SDimitry Andric const Align Alignment = ST.getAlignmentForImplicitArgPtr(); 43800b57cec5SDimitry Andric uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + 43810b57cec5SDimitry Andric ExplicitArgOffset; 43820b57cec5SDimitry Andric switch (Param) { 43830b57cec5SDimitry Andric case GRID_DIM: 43840b57cec5SDimitry Andric return ArgOffset; 43850b57cec5SDimitry Andric case GRID_OFFSET: 43860b57cec5SDimitry Andric return ArgOffset + 4; 43870b57cec5SDimitry Andric } 43880b57cec5SDimitry Andric llvm_unreachable("unexpected implicit parameter type"); 43890b57cec5SDimitry Andric } 43900b57cec5SDimitry Andric 43910b57cec5SDimitry Andric #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 43920b57cec5SDimitry Andric 43930b57cec5SDimitry Andric const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 43940b57cec5SDimitry Andric switch ((AMDGPUISD::NodeType)Opcode) { 43950b57cec5SDimitry Andric case AMDGPUISD::FIRST_NUMBER: break; 43960b57cec5SDimitry Andric // AMDIL DAG nodes 43970b57cec5SDimitry Andric NODE_NAME_CASE(UMUL); 43980b57cec5SDimitry Andric NODE_NAME_CASE(BRANCH_COND); 43990b57cec5SDimitry Andric 44000b57cec5SDimitry Andric // AMDGPU DAG nodes 44010b57cec5SDimitry Andric NODE_NAME_CASE(IF) 44020b57cec5SDimitry Andric NODE_NAME_CASE(ELSE) 44030b57cec5SDimitry Andric NODE_NAME_CASE(LOOP) 44040b57cec5SDimitry Andric NODE_NAME_CASE(CALL) 44050b57cec5SDimitry Andric NODE_NAME_CASE(TC_RETURN) 44060b57cec5SDimitry Andric NODE_NAME_CASE(TRAP) 44070b57cec5SDimitry Andric NODE_NAME_CASE(RET_FLAG) 4408349cc55cSDimitry Andric NODE_NAME_CASE(RET_GFX_FLAG) 44090b57cec5SDimitry Andric NODE_NAME_CASE(RETURN_TO_EPILOG) 44100b57cec5SDimitry Andric NODE_NAME_CASE(ENDPGM) 44110b57cec5SDimitry Andric NODE_NAME_CASE(DWORDADDR) 44120b57cec5SDimitry Andric NODE_NAME_CASE(FRACT) 44130b57cec5SDimitry Andric NODE_NAME_CASE(SETCC) 44140b57cec5SDimitry Andric NODE_NAME_CASE(SETREG) 44158bcb0991SDimitry Andric NODE_NAME_CASE(DENORM_MODE) 44160b57cec5SDimitry Andric NODE_NAME_CASE(FMA_W_CHAIN) 44170b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_W_CHAIN) 44180b57cec5SDimitry Andric NODE_NAME_CASE(CLAMP) 44190b57cec5SDimitry Andric NODE_NAME_CASE(COS_HW) 44200b57cec5SDimitry Andric NODE_NAME_CASE(SIN_HW) 44210b57cec5SDimitry Andric NODE_NAME_CASE(FMAX_LEGACY) 44220b57cec5SDimitry Andric NODE_NAME_CASE(FMIN_LEGACY) 44230b57cec5SDimitry Andric NODE_NAME_CASE(FMAX3) 44240b57cec5SDimitry Andric NODE_NAME_CASE(SMAX3) 44250b57cec5SDimitry Andric NODE_NAME_CASE(UMAX3) 44260b57cec5SDimitry Andric NODE_NAME_CASE(FMIN3) 44270b57cec5SDimitry Andric NODE_NAME_CASE(SMIN3) 44280b57cec5SDimitry Andric NODE_NAME_CASE(UMIN3) 44290b57cec5SDimitry Andric NODE_NAME_CASE(FMED3) 44300b57cec5SDimitry Andric NODE_NAME_CASE(SMED3) 44310b57cec5SDimitry Andric NODE_NAME_CASE(UMED3) 44320b57cec5SDimitry Andric NODE_NAME_CASE(FDOT2) 44330b57cec5SDimitry Andric NODE_NAME_CASE(URECIP) 44340b57cec5SDimitry Andric NODE_NAME_CASE(DIV_SCALE) 44350b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FMAS) 44360b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FIXUP) 44370b57cec5SDimitry Andric NODE_NAME_CASE(FMAD_FTZ) 44380b57cec5SDimitry Andric NODE_NAME_CASE(RCP) 44390b57cec5SDimitry Andric NODE_NAME_CASE(RSQ) 44400b57cec5SDimitry Andric NODE_NAME_CASE(RCP_LEGACY) 44410b57cec5SDimitry Andric NODE_NAME_CASE(RCP_IFLAG) 44420b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_LEGACY) 44430b57cec5SDimitry Andric NODE_NAME_CASE(RSQ_CLAMP) 44440b57cec5SDimitry Andric NODE_NAME_CASE(LDEXP) 44450b57cec5SDimitry Andric NODE_NAME_CASE(FP_CLASS) 44460b57cec5SDimitry Andric NODE_NAME_CASE(DOT4) 44470b57cec5SDimitry Andric NODE_NAME_CASE(CARRY) 44480b57cec5SDimitry Andric NODE_NAME_CASE(BORROW) 44490b57cec5SDimitry Andric NODE_NAME_CASE(BFE_U32) 44500b57cec5SDimitry Andric NODE_NAME_CASE(BFE_I32) 44510b57cec5SDimitry Andric NODE_NAME_CASE(BFI) 44520b57cec5SDimitry Andric NODE_NAME_CASE(BFM) 44530b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_U32) 44540b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_I32) 44550b57cec5SDimitry Andric NODE_NAME_CASE(FFBL_B32) 44560b57cec5SDimitry Andric NODE_NAME_CASE(MUL_U24) 44570b57cec5SDimitry Andric NODE_NAME_CASE(MUL_I24) 44580b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_U24) 44590b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_I24) 44600b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U24) 44610b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I24) 44620b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I64_I32) 44630b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U64_U32) 44640b57cec5SDimitry Andric NODE_NAME_CASE(PERM) 44650b57cec5SDimitry Andric NODE_NAME_CASE(TEXTURE_FETCH) 44660b57cec5SDimitry Andric NODE_NAME_CASE(R600_EXPORT) 44670b57cec5SDimitry Andric NODE_NAME_CASE(CONST_ADDRESS) 44680b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_LOAD) 44690b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_STORE) 44700b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLE) 44710b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEB) 44720b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLED) 44730b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEL) 44740b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE0) 44750b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE1) 44760b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE2) 44770b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE3) 44780b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKRTZ_F16_F32) 44790b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_I16_F32) 44800b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_U16_F32) 44810b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_I16_I32) 44820b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_U16_U32) 44830b57cec5SDimitry Andric NODE_NAME_CASE(FP_TO_FP16) 44840b57cec5SDimitry Andric NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) 44850b57cec5SDimitry Andric NODE_NAME_CASE(CONST_DATA_PTR) 44860b57cec5SDimitry Andric NODE_NAME_CASE(PC_ADD_REL_OFFSET) 44870b57cec5SDimitry Andric NODE_NAME_CASE(LDS) 44880b57cec5SDimitry Andric NODE_NAME_CASE(DUMMY_CHAIN) 44890b57cec5SDimitry Andric case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; 44900b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI) 44910b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO) 44920b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_I8) 44930b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_U8) 44940b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_I8) 44950b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_U8) 44960b57cec5SDimitry Andric NODE_NAME_CASE(STORE_MSKOR) 44970b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_CONSTANT) 44980b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 44990b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) 45000b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) 45010b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) 45020b57cec5SDimitry Andric NODE_NAME_CASE(DS_ORDERED_COUNT) 45030b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_CMP_SWAP) 45040b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_INC) 45050b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_DEC) 45060b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_LOAD_FMIN) 45070b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_LOAD_FMAX) 45080b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD) 45090b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_UBYTE) 45100b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_USHORT) 45110b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_BYTE) 45120b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_SHORT) 45130b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT) 45140b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16) 45150b57cec5SDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD) 45160b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE) 45170b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_BYTE) 45180b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_SHORT) 45190b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT) 45200b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16) 45210b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) 45220b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_ADD) 45230b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SUB) 45240b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMIN) 45250b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMIN) 45260b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMAX) 45270b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMAX) 45280b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_AND) 45290b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_OR) 45300b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_XOR) 45318bcb0991SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_INC) 45328bcb0991SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_DEC) 45330b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) 45345ffd83dbSDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CSUB) 45350b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FADD) 4536fe6060f1SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) 4537fe6060f1SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) 45380b57cec5SDimitry Andric 45390b57cec5SDimitry Andric case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; 45400b57cec5SDimitry Andric } 45410b57cec5SDimitry Andric return nullptr; 45420b57cec5SDimitry Andric } 45430b57cec5SDimitry Andric 45440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand, 45450b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled, 45460b57cec5SDimitry Andric int &RefinementSteps, 45470b57cec5SDimitry Andric bool &UseOneConstNR, 45480b57cec5SDimitry Andric bool Reciprocal) const { 45490b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 45500b57cec5SDimitry Andric 45510b57cec5SDimitry Andric if (VT == MVT::f32) { 45520b57cec5SDimitry Andric RefinementSteps = 0; 45530b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand); 45540b57cec5SDimitry Andric } 45550b57cec5SDimitry Andric 45560b57cec5SDimitry Andric // TODO: There is also f64 rsq instruction, but the documentation is less 45570b57cec5SDimitry Andric // clear on its precision. 45580b57cec5SDimitry Andric 45590b57cec5SDimitry Andric return SDValue(); 45600b57cec5SDimitry Andric } 45610b57cec5SDimitry Andric 45620b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, 45630b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled, 45640b57cec5SDimitry Andric int &RefinementSteps) const { 45650b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 45660b57cec5SDimitry Andric 45670b57cec5SDimitry Andric if (VT == MVT::f32) { 45680b57cec5SDimitry Andric // Reciprocal, < 1 ulp error. 45690b57cec5SDimitry Andric // 45700b57cec5SDimitry Andric // This reciprocal approximation converges to < 0.5 ulp error with one 45710b57cec5SDimitry Andric // newton rhapson performed with two fused multiple adds (FMAs). 45720b57cec5SDimitry Andric 45730b57cec5SDimitry Andric RefinementSteps = 0; 45740b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand); 45750b57cec5SDimitry Andric } 45760b57cec5SDimitry Andric 45770b57cec5SDimitry Andric // TODO: There is also f64 rcp instruction, but the documentation is less 45780b57cec5SDimitry Andric // clear on its precision. 45790b57cec5SDimitry Andric 45800b57cec5SDimitry Andric return SDValue(); 45810b57cec5SDimitry Andric } 45820b57cec5SDimitry Andric 45830b57cec5SDimitry Andric void AMDGPUTargetLowering::computeKnownBitsForTargetNode( 45840b57cec5SDimitry Andric const SDValue Op, KnownBits &Known, 45850b57cec5SDimitry Andric const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { 45860b57cec5SDimitry Andric 45870b57cec5SDimitry Andric Known.resetAll(); // Don't know anything. 45880b57cec5SDimitry Andric 45890b57cec5SDimitry Andric unsigned Opc = Op.getOpcode(); 45900b57cec5SDimitry Andric 45910b57cec5SDimitry Andric switch (Opc) { 45920b57cec5SDimitry Andric default: 45930b57cec5SDimitry Andric break; 45940b57cec5SDimitry Andric case AMDGPUISD::CARRY: 45950b57cec5SDimitry Andric case AMDGPUISD::BORROW: { 45960b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 31); 45970b57cec5SDimitry Andric break; 45980b57cec5SDimitry Andric } 45990b57cec5SDimitry Andric 46000b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: 46010b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 46020b57cec5SDimitry Andric ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 46030b57cec5SDimitry Andric if (!CWidth) 46040b57cec5SDimitry Andric return; 46050b57cec5SDimitry Andric 46060b57cec5SDimitry Andric uint32_t Width = CWidth->getZExtValue() & 0x1f; 46070b57cec5SDimitry Andric 46080b57cec5SDimitry Andric if (Opc == AMDGPUISD::BFE_U32) 46090b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 32 - Width); 46100b57cec5SDimitry Andric 46110b57cec5SDimitry Andric break; 46120b57cec5SDimitry Andric } 4613fe6060f1SDimitry Andric case AMDGPUISD::FP_TO_FP16: { 46140b57cec5SDimitry Andric unsigned BitWidth = Known.getBitWidth(); 46150b57cec5SDimitry Andric 46160b57cec5SDimitry Andric // High bits are zero. 46170b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); 46180b57cec5SDimitry Andric break; 46190b57cec5SDimitry Andric } 46200b57cec5SDimitry Andric case AMDGPUISD::MUL_U24: 46210b57cec5SDimitry Andric case AMDGPUISD::MUL_I24: { 46220b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 46230b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 46240b57cec5SDimitry Andric unsigned TrailZ = LHSKnown.countMinTrailingZeros() + 46250b57cec5SDimitry Andric RHSKnown.countMinTrailingZeros(); 46260b57cec5SDimitry Andric Known.Zero.setLowBits(std::min(TrailZ, 32u)); 4627480093f4SDimitry Andric // Skip extra check if all bits are known zeros. 4628480093f4SDimitry Andric if (TrailZ >= 32) 4629480093f4SDimitry Andric break; 46300b57cec5SDimitry Andric 46310b57cec5SDimitry Andric // Truncate to 24 bits. 46320b57cec5SDimitry Andric LHSKnown = LHSKnown.trunc(24); 46330b57cec5SDimitry Andric RHSKnown = RHSKnown.trunc(24); 46340b57cec5SDimitry Andric 46350b57cec5SDimitry Andric if (Opc == AMDGPUISD::MUL_I24) { 4636*04eeddc0SDimitry Andric unsigned LHSValBits = LHSKnown.countMaxSignificantBits(); 4637*04eeddc0SDimitry Andric unsigned RHSValBits = RHSKnown.countMaxSignificantBits(); 4638*04eeddc0SDimitry Andric unsigned MaxValBits = LHSValBits + RHSValBits; 4639*04eeddc0SDimitry Andric if (MaxValBits > 32) 46400b57cec5SDimitry Andric break; 4641*04eeddc0SDimitry Andric unsigned SignBits = 32 - MaxValBits + 1; 46420b57cec5SDimitry Andric bool LHSNegative = LHSKnown.isNegative(); 4643480093f4SDimitry Andric bool LHSNonNegative = LHSKnown.isNonNegative(); 4644480093f4SDimitry Andric bool LHSPositive = LHSKnown.isStrictlyPositive(); 46450b57cec5SDimitry Andric bool RHSNegative = RHSKnown.isNegative(); 4646480093f4SDimitry Andric bool RHSNonNegative = RHSKnown.isNonNegative(); 4647480093f4SDimitry Andric bool RHSPositive = RHSKnown.isStrictlyPositive(); 4648480093f4SDimitry Andric 4649480093f4SDimitry Andric if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative)) 4650*04eeddc0SDimitry Andric Known.Zero.setHighBits(SignBits); 4651480093f4SDimitry Andric else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative)) 4652*04eeddc0SDimitry Andric Known.One.setHighBits(SignBits); 46530b57cec5SDimitry Andric } else { 4654*04eeddc0SDimitry Andric unsigned LHSValBits = LHSKnown.countMaxActiveBits(); 4655*04eeddc0SDimitry Andric unsigned RHSValBits = RHSKnown.countMaxActiveBits(); 4656*04eeddc0SDimitry Andric unsigned MaxValBits = LHSValBits + RHSValBits; 46570b57cec5SDimitry Andric if (MaxValBits >= 32) 46580b57cec5SDimitry Andric break; 4659*04eeddc0SDimitry Andric Known.Zero.setBitsFrom(MaxValBits); 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric break; 46620b57cec5SDimitry Andric } 46630b57cec5SDimitry Andric case AMDGPUISD::PERM: { 46640b57cec5SDimitry Andric ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 46650b57cec5SDimitry Andric if (!CMask) 46660b57cec5SDimitry Andric return; 46670b57cec5SDimitry Andric 46680b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 46690b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 46700b57cec5SDimitry Andric unsigned Sel = CMask->getZExtValue(); 46710b57cec5SDimitry Andric 46720b57cec5SDimitry Andric for (unsigned I = 0; I < 32; I += 8) { 46730b57cec5SDimitry Andric unsigned SelBits = Sel & 0xff; 46740b57cec5SDimitry Andric if (SelBits < 4) { 46750b57cec5SDimitry Andric SelBits *= 8; 46760b57cec5SDimitry Andric Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; 46770b57cec5SDimitry Andric Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; 46780b57cec5SDimitry Andric } else if (SelBits < 7) { 46790b57cec5SDimitry Andric SelBits = (SelBits & 3) * 8; 46800b57cec5SDimitry Andric Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; 46810b57cec5SDimitry Andric Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; 46820b57cec5SDimitry Andric } else if (SelBits == 0x0c) { 46838bcb0991SDimitry Andric Known.Zero |= 0xFFull << I; 46840b57cec5SDimitry Andric } else if (SelBits > 0x0c) { 46858bcb0991SDimitry Andric Known.One |= 0xFFull << I; 46860b57cec5SDimitry Andric } 46870b57cec5SDimitry Andric Sel >>= 8; 46880b57cec5SDimitry Andric } 46890b57cec5SDimitry Andric break; 46900b57cec5SDimitry Andric } 46910b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: { 46920b57cec5SDimitry Andric Known.Zero.setHighBits(24); 46930b57cec5SDimitry Andric break; 46940b57cec5SDimitry Andric } 46950b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: { 46960b57cec5SDimitry Andric Known.Zero.setHighBits(16); 46970b57cec5SDimitry Andric break; 46980b57cec5SDimitry Andric } 46990b57cec5SDimitry Andric case AMDGPUISD::LDS: { 47000b57cec5SDimitry Andric auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode()); 47015ffd83dbSDimitry Andric Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout()); 47020b57cec5SDimitry Andric 47030b57cec5SDimitry Andric Known.Zero.setHighBits(16); 47045ffd83dbSDimitry Andric Known.Zero.setLowBits(Log2(Alignment)); 47050b57cec5SDimitry Andric break; 47060b57cec5SDimitry Andric } 47070b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 47080b57cec5SDimitry Andric unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 47090b57cec5SDimitry Andric switch (IID) { 47100b57cec5SDimitry Andric case Intrinsic::amdgcn_mbcnt_lo: 47110b57cec5SDimitry Andric case Intrinsic::amdgcn_mbcnt_hi: { 47120b57cec5SDimitry Andric const GCNSubtarget &ST = 47130b57cec5SDimitry Andric DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); 47140b57cec5SDimitry Andric // These return at most the wavefront size - 1. 47150b57cec5SDimitry Andric unsigned Size = Op.getValueType().getSizeInBits(); 47160b57cec5SDimitry Andric Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); 47170b57cec5SDimitry Andric break; 47180b57cec5SDimitry Andric } 47190b57cec5SDimitry Andric default: 47200b57cec5SDimitry Andric break; 47210b57cec5SDimitry Andric } 47220b57cec5SDimitry Andric } 47230b57cec5SDimitry Andric } 47240b57cec5SDimitry Andric } 47250b57cec5SDimitry Andric 47260b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( 47270b57cec5SDimitry Andric SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 47280b57cec5SDimitry Andric unsigned Depth) const { 47290b57cec5SDimitry Andric switch (Op.getOpcode()) { 47300b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: { 47310b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 47320b57cec5SDimitry Andric if (!Width) 47330b57cec5SDimitry Andric return 1; 47340b57cec5SDimitry Andric 47350b57cec5SDimitry Andric unsigned SignBits = 32 - Width->getZExtValue() + 1; 47360b57cec5SDimitry Andric if (!isNullConstant(Op.getOperand(1))) 47370b57cec5SDimitry Andric return SignBits; 47380b57cec5SDimitry Andric 47390b57cec5SDimitry Andric // TODO: Could probably figure something out with non-0 offsets. 47400b57cec5SDimitry Andric unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 47410b57cec5SDimitry Andric return std::max(SignBits, Op0SignBits); 47420b57cec5SDimitry Andric } 47430b57cec5SDimitry Andric 47440b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 47450b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 47460b57cec5SDimitry Andric return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; 47470b57cec5SDimitry Andric } 47480b57cec5SDimitry Andric 47490b57cec5SDimitry Andric case AMDGPUISD::CARRY: 47500b57cec5SDimitry Andric case AMDGPUISD::BORROW: 47510b57cec5SDimitry Andric return 31; 47520b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_BYTE: 47530b57cec5SDimitry Andric return 25; 47540b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_SHORT: 47550b57cec5SDimitry Andric return 17; 47560b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: 47570b57cec5SDimitry Andric return 24; 47580b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: 47590b57cec5SDimitry Andric return 16; 47600b57cec5SDimitry Andric case AMDGPUISD::FP_TO_FP16: 47610b57cec5SDimitry Andric return 16; 47620b57cec5SDimitry Andric default: 47630b57cec5SDimitry Andric return 1; 47640b57cec5SDimitry Andric } 47650b57cec5SDimitry Andric } 47660b57cec5SDimitry Andric 47675ffd83dbSDimitry Andric unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr( 47685ffd83dbSDimitry Andric GISelKnownBits &Analysis, Register R, 47695ffd83dbSDimitry Andric const APInt &DemandedElts, const MachineRegisterInfo &MRI, 47705ffd83dbSDimitry Andric unsigned Depth) const { 47715ffd83dbSDimitry Andric const MachineInstr *MI = MRI.getVRegDef(R); 47725ffd83dbSDimitry Andric if (!MI) 47735ffd83dbSDimitry Andric return 1; 47745ffd83dbSDimitry Andric 47755ffd83dbSDimitry Andric // TODO: Check range metadata on MMO. 47765ffd83dbSDimitry Andric switch (MI->getOpcode()) { 47775ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE: 47785ffd83dbSDimitry Andric return 25; 47795ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT: 47805ffd83dbSDimitry Andric return 17; 47815ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: 47825ffd83dbSDimitry Andric return 24; 47835ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: 47845ffd83dbSDimitry Andric return 16; 47855ffd83dbSDimitry Andric default: 47865ffd83dbSDimitry Andric return 1; 47875ffd83dbSDimitry Andric } 47885ffd83dbSDimitry Andric } 47895ffd83dbSDimitry Andric 47900b57cec5SDimitry Andric bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, 47910b57cec5SDimitry Andric const SelectionDAG &DAG, 47920b57cec5SDimitry Andric bool SNaN, 47930b57cec5SDimitry Andric unsigned Depth) const { 47940b57cec5SDimitry Andric unsigned Opcode = Op.getOpcode(); 47950b57cec5SDimitry Andric switch (Opcode) { 47960b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 47970b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: { 47980b57cec5SDimitry Andric if (SNaN) 47990b57cec5SDimitry Andric return true; 48000b57cec5SDimitry Andric 48010b57cec5SDimitry Andric // TODO: Can check no nans on one of the operands for each one, but which 48020b57cec5SDimitry Andric // one? 48030b57cec5SDimitry Andric return false; 48040b57cec5SDimitry Andric } 48050b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: 48060b57cec5SDimitry Andric case AMDGPUISD::CVT_PKRTZ_F16_F32: { 48070b57cec5SDimitry Andric if (SNaN) 48080b57cec5SDimitry Andric return true; 48090b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 48100b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 48110b57cec5SDimitry Andric } 48120b57cec5SDimitry Andric case AMDGPUISD::FMED3: 48130b57cec5SDimitry Andric case AMDGPUISD::FMIN3: 48140b57cec5SDimitry Andric case AMDGPUISD::FMAX3: 48150b57cec5SDimitry Andric case AMDGPUISD::FMAD_FTZ: { 48160b57cec5SDimitry Andric if (SNaN) 48170b57cec5SDimitry Andric return true; 48180b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 48190b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 48200b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 48210b57cec5SDimitry Andric } 48220b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE0: 48230b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE1: 48240b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE2: 48250b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE3: 48260b57cec5SDimitry Andric return true; 48270b57cec5SDimitry Andric 48280b57cec5SDimitry Andric case AMDGPUISD::RCP: 48290b57cec5SDimitry Andric case AMDGPUISD::RSQ: 48300b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 48310b57cec5SDimitry Andric case AMDGPUISD::RSQ_CLAMP: { 48320b57cec5SDimitry Andric if (SNaN) 48330b57cec5SDimitry Andric return true; 48340b57cec5SDimitry Andric 48350b57cec5SDimitry Andric // TODO: Need is known positive check. 48360b57cec5SDimitry Andric return false; 48370b57cec5SDimitry Andric } 48380b57cec5SDimitry Andric case AMDGPUISD::LDEXP: 48390b57cec5SDimitry Andric case AMDGPUISD::FRACT: { 48400b57cec5SDimitry Andric if (SNaN) 48410b57cec5SDimitry Andric return true; 48420b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 48430b57cec5SDimitry Andric } 48440b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: 48450b57cec5SDimitry Andric case AMDGPUISD::DIV_FMAS: 48460b57cec5SDimitry Andric case AMDGPUISD::DIV_FIXUP: 48470b57cec5SDimitry Andric // TODO: Refine on operands. 48480b57cec5SDimitry Andric return SNaN; 48490b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: 48500b57cec5SDimitry Andric case AMDGPUISD::COS_HW: { 48510b57cec5SDimitry Andric // TODO: Need check for infinity 48520b57cec5SDimitry Andric return SNaN; 48530b57cec5SDimitry Andric } 48540b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 48550b57cec5SDimitry Andric unsigned IntrinsicID 48560b57cec5SDimitry Andric = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 48570b57cec5SDimitry Andric // TODO: Handle more intrinsics 48580b57cec5SDimitry Andric switch (IntrinsicID) { 48590b57cec5SDimitry Andric case Intrinsic::amdgcn_cubeid: 48600b57cec5SDimitry Andric return true; 48610b57cec5SDimitry Andric 48620b57cec5SDimitry Andric case Intrinsic::amdgcn_frexp_mant: { 48630b57cec5SDimitry Andric if (SNaN) 48640b57cec5SDimitry Andric return true; 48650b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 48660b57cec5SDimitry Andric } 48670b57cec5SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: { 48680b57cec5SDimitry Andric if (SNaN) 48690b57cec5SDimitry Andric return true; 48700b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 48710b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 48720b57cec5SDimitry Andric } 48735ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp: 48745ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq: 48755ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 48765ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_legacy: 48775ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_clamp: { 48785ffd83dbSDimitry Andric if (SNaN) 48795ffd83dbSDimitry Andric return true; 48805ffd83dbSDimitry Andric 48815ffd83dbSDimitry Andric // TODO: Need is known positive check. 48825ffd83dbSDimitry Andric return false; 48835ffd83dbSDimitry Andric } 48845ffd83dbSDimitry Andric case Intrinsic::amdgcn_trig_preop: 48850b57cec5SDimitry Andric case Intrinsic::amdgcn_fdot2: 48860b57cec5SDimitry Andric // TODO: Refine on operand 48870b57cec5SDimitry Andric return SNaN; 4888e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fma_legacy: 4889e8d8bef9SDimitry Andric if (SNaN) 4890e8d8bef9SDimitry Andric return true; 4891e8d8bef9SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 4892e8d8bef9SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) && 4893e8d8bef9SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1); 48940b57cec5SDimitry Andric default: 48950b57cec5SDimitry Andric return false; 48960b57cec5SDimitry Andric } 48970b57cec5SDimitry Andric } 48980b57cec5SDimitry Andric default: 48990b57cec5SDimitry Andric return false; 49000b57cec5SDimitry Andric } 49010b57cec5SDimitry Andric } 49020b57cec5SDimitry Andric 49030b57cec5SDimitry Andric TargetLowering::AtomicExpansionKind 49040b57cec5SDimitry Andric AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 49050b57cec5SDimitry Andric switch (RMW->getOperation()) { 49060b57cec5SDimitry Andric case AtomicRMWInst::Nand: 49070b57cec5SDimitry Andric case AtomicRMWInst::FAdd: 49080b57cec5SDimitry Andric case AtomicRMWInst::FSub: 49090b57cec5SDimitry Andric return AtomicExpansionKind::CmpXChg; 49100b57cec5SDimitry Andric default: 49110b57cec5SDimitry Andric return AtomicExpansionKind::None; 49120b57cec5SDimitry Andric } 49130b57cec5SDimitry Andric } 4914fe6060f1SDimitry Andric 4915*04eeddc0SDimitry Andric bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal( 4916fe6060f1SDimitry Andric unsigned Opc, LLT Ty1, LLT Ty2) const { 4917*04eeddc0SDimitry Andric return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) && 4918*04eeddc0SDimitry Andric Ty2 == LLT::scalar(32); 4919fe6060f1SDimitry Andric } 4920