xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (revision 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623)
10b57cec5SDimitry Andric //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This is the parent TargetLowering class for hardware code gen
110b57cec5SDimitry Andric /// targets.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AMDGPUISelLowering.h"
160b57cec5SDimitry Andric #include "AMDGPU.h"
17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
18e8d8bef9SDimitry Andric #include "AMDGPUMachineFunction.h"
19e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
200b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
220b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
23e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
24e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h"
250b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
26e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
27e8d8bef9SDimitry Andric 
280b57cec5SDimitry Andric using namespace llvm;
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric #include "AMDGPUGenCallingConv.inc"
310b57cec5SDimitry Andric 
325ffd83dbSDimitry Andric static cl::opt<bool> AMDGPUBypassSlowDiv(
335ffd83dbSDimitry Andric   "amdgpu-bypass-slow-div",
345ffd83dbSDimitry Andric   cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
355ffd83dbSDimitry Andric   cl::init(true));
365ffd83dbSDimitry Andric 
370b57cec5SDimitry Andric // Find a larger type to do a load / store of a vector with.
380b57cec5SDimitry Andric EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
390b57cec5SDimitry Andric   unsigned StoreSize = VT.getStoreSizeInBits();
400b57cec5SDimitry Andric   if (StoreSize <= 32)
410b57cec5SDimitry Andric     return EVT::getIntegerVT(Ctx, StoreSize);
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric   assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
440b57cec5SDimitry Andric   return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
450b57cec5SDimitry Andric }
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
48349cc55cSDimitry Andric   return DAG.computeKnownBits(Op).countMaxActiveBits();
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
520b57cec5SDimitry Andric   // In order for this to be a signed 24-bit value, bit 23, must
530b57cec5SDimitry Andric   // be a sign bit.
54*04eeddc0SDimitry Andric   return DAG.ComputeMaxSignificantBits(Op);
550b57cec5SDimitry Andric }
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
580b57cec5SDimitry Andric                                            const AMDGPUSubtarget &STI)
590b57cec5SDimitry Andric     : TargetLowering(TM), Subtarget(&STI) {
600b57cec5SDimitry Andric   // Lower floating point store/load to integer store/load to reduce the number
610b57cec5SDimitry Andric   // of patterns in tablegen.
620b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::f32, Promote);
630b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
660b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v3f32, Promote);
690b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32);
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
720b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
750b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
760b57cec5SDimitry Andric 
77fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v6f32, Promote);
78fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32);
79fe6060f1SDimitry Andric 
80fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v7f32, Promote);
81fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32);
82fe6060f1SDimitry Andric 
830b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
840b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
870b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
900b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::i64, Promote);
930b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
960b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::f64, Promote);
990b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
1020b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32);
1030b57cec5SDimitry Andric 
104fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v3i64, Promote);
105fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32);
106fe6060f1SDimitry Andric 
1075ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v4i64, Promote);
1085ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32);
1095ffd83dbSDimitry Andric 
110fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v3f64, Promote);
111fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32);
112fe6060f1SDimitry Andric 
1135ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v4f64, Promote);
1145ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32);
1155ffd83dbSDimitry Andric 
1165ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v8i64, Promote);
1175ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32);
1185ffd83dbSDimitry Andric 
1195ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v8f64, Promote);
1205ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32);
1215ffd83dbSDimitry Andric 
1225ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v16i64, Promote);
1235ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32);
1245ffd83dbSDimitry Andric 
1255ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v16f64, Promote);
1265ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32);
1275ffd83dbSDimitry Andric 
1280b57cec5SDimitry Andric   // There are no 64-bit extloads. These should be done as a 32-bit extload and
1290b57cec5SDimitry Andric   // an extension to 64-bit.
1300b57cec5SDimitry Andric   for (MVT VT : MVT::integer_valuetypes()) {
1310b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
1320b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
1330b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   for (MVT VT : MVT::integer_valuetypes()) {
1370b57cec5SDimitry Andric     if (VT == MVT::i64)
1380b57cec5SDimitry Andric       continue;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1410b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
1420b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
1430b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
1460b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
1470b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
1480b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
1510b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
1520b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
1530b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric 
1568bcb0991SDimitry Andric   for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1570b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
1580b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
1590b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
1600b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
1610b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
1620b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
1630b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
1640b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
1650b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
1668bcb0991SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand);
1678bcb0991SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand);
1688bcb0991SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand);
1690b57cec5SDimitry Andric     setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
1700b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
1710b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
1750b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
1768bcb0991SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
1770b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
1780b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
1798bcb0991SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
1808bcb0991SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
1830b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
184fe6060f1SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand);
1850b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
1860b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand);
1875ffd83dbSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand);
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
1900b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
191fe6060f1SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand);
1920b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
1930b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
1945ffd83dbSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::f32, Promote);
1970b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v2f32, Promote);
2000b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v3f32, Promote);
2030b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32);
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
2060b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v5f32, Promote);
2090b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
2100b57cec5SDimitry Andric 
211fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v6f32, Promote);
212fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32);
213fe6060f1SDimitry Andric 
214fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v7f32, Promote);
215fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32);
216fe6060f1SDimitry Andric 
2170b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v8f32, Promote);
2180b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v16f32, Promote);
2210b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v32f32, Promote);
2240b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::i64, Promote);
2270b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v2i64, Promote);
2300b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::f64, Promote);
2330b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v2f64, Promote);
2360b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);
2370b57cec5SDimitry Andric 
238fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v3i64, Promote);
239fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32);
240fe6060f1SDimitry Andric 
241fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v3f64, Promote);
242fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32);
243fe6060f1SDimitry Andric 
2445ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v4i64, Promote);
2455ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32);
2465ffd83dbSDimitry Andric 
2475ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v4f64, Promote);
2485ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32);
2495ffd83dbSDimitry Andric 
2505ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v8i64, Promote);
2515ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32);
2525ffd83dbSDimitry Andric 
2535ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v8f64, Promote);
2545ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32);
2555ffd83dbSDimitry Andric 
2565ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v16i64, Promote);
2575ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32);
2585ffd83dbSDimitry Andric 
2595ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v16f64, Promote);
2605ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32);
2615ffd83dbSDimitry Andric 
2620b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i1, Expand);
2630b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i8, Expand);
2640b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
2650b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
2680b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand);
2690b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand);
2700b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
2730b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
2748bcb0991SDimitry Andric   setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
2750b57cec5SDimitry Andric   setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
2760b57cec5SDimitry Andric   setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
2778bcb0991SDimitry Andric   setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
2788bcb0991SDimitry Andric   setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
2810b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
2840b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
2850b57cec5SDimitry Andric 
286fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand);
287fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand);
288fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand);
289fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand);
290fe6060f1SDimitry Andric 
2915ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand);
2925ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand);
2930b57cec5SDimitry Andric   setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand);
2940b57cec5SDimitry Andric   setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
2970b57cec5SDimitry Andric   setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
2980b57cec5SDimitry Andric 
2995ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand);
3005ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand);
3015ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
3025ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
3035ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
3045ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
3055ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand);
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric   setOperationAction(ISD::Constant, MVT::i32, Legal);
3080b57cec5SDimitry Andric   setOperationAction(ISD::Constant, MVT::i64, Legal);
3090b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
3100b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
3130b57cec5SDimitry Andric   setOperationAction(ISD::BRIND, MVT::Other, Expand);
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   // This is totally unsupported, just custom lower to produce an error.
3160b57cec5SDimitry Andric   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   // Library functions.  These default to Expand, but we have instructions
3190b57cec5SDimitry Andric   // for them.
3200b57cec5SDimitry Andric   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
3210b57cec5SDimitry Andric   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
3220b57cec5SDimitry Andric   setOperationAction(ISD::FPOW,   MVT::f32, Legal);
3230b57cec5SDimitry Andric   setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
3240b57cec5SDimitry Andric   setOperationAction(ISD::FABS,   MVT::f32, Legal);
3250b57cec5SDimitry Andric   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
3260b57cec5SDimitry Andric   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
3270b57cec5SDimitry Andric   setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
3280b57cec5SDimitry Andric   setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
3290b57cec5SDimitry Andric   setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f32, Custom);
3320b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f64, Custom);
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   setOperationAction(ISD::FLOG, MVT::f32, Custom);
3350b57cec5SDimitry Andric   setOperationAction(ISD::FLOG10, MVT::f32, Custom);
3360b57cec5SDimitry Andric   setOperationAction(ISD::FEXP, MVT::f32, Custom);
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
3400b57cec5SDimitry Andric   setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
3410b57cec5SDimitry Andric 
342e8d8bef9SDimitry Andric   setOperationAction(ISD::FREM, MVT::f16, Custom);
3430b57cec5SDimitry Andric   setOperationAction(ISD::FREM, MVT::f32, Custom);
3440b57cec5SDimitry Andric   setOperationAction(ISD::FREM, MVT::f64, Custom);
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric   // Expand to fneg + fadd.
3470b57cec5SDimitry Andric   setOperationAction(ISD::FSUB, MVT::f64, Expand);
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom);
3500b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom);
3510b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
3520b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
3530b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom);
3540b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom);
355fe6060f1SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v6i32, Custom);
356fe6060f1SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v6f32, Custom);
357fe6060f1SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v7i32, Custom);
358fe6060f1SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v7f32, Custom);
3590b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
3600b57cec5SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
361fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom);
362fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom);
363*04eeddc0SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom);
364*04eeddc0SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom);
3650b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
3660b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
3670b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
3680b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom);
3690b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
3700b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
3710b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom);
3720b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
373fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6f32, Custom);
374fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6i32, Custom);
375fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7f32, Custom);
376fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7i32, Custom);
3770b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
3780b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
3790b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom);
3800b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
3810b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
3820b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
3835ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom);
3845ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom);
385fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f64, Custom);
386fe6060f1SDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i64, Custom);
3875ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom);
3885ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom);
3895ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom);
3905ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom);
3915ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom);
3925ffd83dbSDimitry Andric   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom);
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
3950b57cec5SDimitry Andric   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
3960b57cec5SDimitry Andric   setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
3990b57cec5SDimitry Andric   for (MVT VT : ScalarIntVTs) {
4000b57cec5SDimitry Andric     // These should use [SU]DIVREM, so set them to expand
4010b57cec5SDimitry Andric     setOperationAction(ISD::SDIV, VT, Expand);
4020b57cec5SDimitry Andric     setOperationAction(ISD::UDIV, VT, Expand);
4030b57cec5SDimitry Andric     setOperationAction(ISD::SREM, VT, Expand);
4040b57cec5SDimitry Andric     setOperationAction(ISD::UREM, VT, Expand);
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric     // GPU does not have divrem function for signed or unsigned.
4070b57cec5SDimitry Andric     setOperationAction(ISD::SDIVREM, VT, Custom);
4080b57cec5SDimitry Andric     setOperationAction(ISD::UDIVREM, VT, Custom);
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric     // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
4110b57cec5SDimitry Andric     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
4120b57cec5SDimitry Andric     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric     setOperationAction(ISD::BSWAP, VT, Expand);
4150b57cec5SDimitry Andric     setOperationAction(ISD::CTTZ, VT, Expand);
4160b57cec5SDimitry Andric     setOperationAction(ISD::CTLZ, VT, Expand);
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric     // AMDGPU uses ADDC/SUBC/ADDE/SUBE
4190b57cec5SDimitry Andric     setOperationAction(ISD::ADDC, VT, Legal);
4200b57cec5SDimitry Andric     setOperationAction(ISD::SUBC, VT, Legal);
4210b57cec5SDimitry Andric     setOperationAction(ISD::ADDE, VT, Legal);
4220b57cec5SDimitry Andric     setOperationAction(ISD::SUBE, VT, Legal);
4230b57cec5SDimitry Andric   }
4240b57cec5SDimitry Andric 
4255ffd83dbSDimitry Andric   // The hardware supports 32-bit FSHR, but not FSHL.
4265ffd83dbSDimitry Andric   setOperationAction(ISD::FSHR, MVT::i32, Legal);
4275ffd83dbSDimitry Andric 
4280b57cec5SDimitry Andric   // The hardware supports 32-bit ROTR, but not ROTL.
4290b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i32, Expand);
4300b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i64, Expand);
4310b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i64, Expand);
4320b57cec5SDimitry Andric 
433e8d8bef9SDimitry Andric   setOperationAction(ISD::MULHU, MVT::i16, Expand);
434e8d8bef9SDimitry Andric   setOperationAction(ISD::MULHS, MVT::i16, Expand);
435e8d8bef9SDimitry Andric 
4360b57cec5SDimitry Andric   setOperationAction(ISD::MUL, MVT::i64, Expand);
4370b57cec5SDimitry Andric   setOperationAction(ISD::MULHU, MVT::i64, Expand);
4380b57cec5SDimitry Andric   setOperationAction(ISD::MULHS, MVT::i64, Expand);
4390b57cec5SDimitry Andric   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
4400b57cec5SDimitry Andric   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
4410b57cec5SDimitry Andric   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
4420b57cec5SDimitry Andric   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
4430b57cec5SDimitry Andric   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   setOperationAction(ISD::SMIN, MVT::i32, Legal);
4460b57cec5SDimitry Andric   setOperationAction(ISD::UMIN, MVT::i32, Legal);
4470b57cec5SDimitry Andric   setOperationAction(ISD::SMAX, MVT::i32, Legal);
4480b57cec5SDimitry Andric   setOperationAction(ISD::UMAX, MVT::i32, Legal);
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
4510b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
4520b57cec5SDimitry Andric   setOperationAction(ISD::CTLZ, MVT::i64, Custom);
4530b57cec5SDimitry Andric   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric   static const MVT::SimpleValueType VectorIntTypes[] = {
456fe6060f1SDimitry Andric       MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32};
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   for (MVT VT : VectorIntTypes) {
4590b57cec5SDimitry Andric     // Expand the following operations for the current type by default.
4600b57cec5SDimitry Andric     setOperationAction(ISD::ADD,  VT, Expand);
4610b57cec5SDimitry Andric     setOperationAction(ISD::AND,  VT, Expand);
4620b57cec5SDimitry Andric     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
4630b57cec5SDimitry Andric     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
4640b57cec5SDimitry Andric     setOperationAction(ISD::MUL,  VT, Expand);
4650b57cec5SDimitry Andric     setOperationAction(ISD::MULHU, VT, Expand);
4660b57cec5SDimitry Andric     setOperationAction(ISD::MULHS, VT, Expand);
4670b57cec5SDimitry Andric     setOperationAction(ISD::OR,   VT, Expand);
4680b57cec5SDimitry Andric     setOperationAction(ISD::SHL,  VT, Expand);
4690b57cec5SDimitry Andric     setOperationAction(ISD::SRA,  VT, Expand);
4700b57cec5SDimitry Andric     setOperationAction(ISD::SRL,  VT, Expand);
4710b57cec5SDimitry Andric     setOperationAction(ISD::ROTL, VT, Expand);
4720b57cec5SDimitry Andric     setOperationAction(ISD::ROTR, VT, Expand);
4730b57cec5SDimitry Andric     setOperationAction(ISD::SUB,  VT, Expand);
4740b57cec5SDimitry Andric     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
4750b57cec5SDimitry Andric     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
4760b57cec5SDimitry Andric     setOperationAction(ISD::SDIV, VT, Expand);
4770b57cec5SDimitry Andric     setOperationAction(ISD::UDIV, VT, Expand);
4780b57cec5SDimitry Andric     setOperationAction(ISD::SREM, VT, Expand);
4790b57cec5SDimitry Andric     setOperationAction(ISD::UREM, VT, Expand);
4800b57cec5SDimitry Andric     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
4810b57cec5SDimitry Andric     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
4825ffd83dbSDimitry Andric     setOperationAction(ISD::SDIVREM, VT, Expand);
4830b57cec5SDimitry Andric     setOperationAction(ISD::UDIVREM, VT, Expand);
4840b57cec5SDimitry Andric     setOperationAction(ISD::SELECT, VT, Expand);
4850b57cec5SDimitry Andric     setOperationAction(ISD::VSELECT, VT, Expand);
4860b57cec5SDimitry Andric     setOperationAction(ISD::SELECT_CC, VT, Expand);
4870b57cec5SDimitry Andric     setOperationAction(ISD::XOR,  VT, Expand);
4880b57cec5SDimitry Andric     setOperationAction(ISD::BSWAP, VT, Expand);
4890b57cec5SDimitry Andric     setOperationAction(ISD::CTPOP, VT, Expand);
4900b57cec5SDimitry Andric     setOperationAction(ISD::CTTZ, VT, Expand);
4910b57cec5SDimitry Andric     setOperationAction(ISD::CTLZ, VT, Expand);
4920b57cec5SDimitry Andric     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
4930b57cec5SDimitry Andric     setOperationAction(ISD::SETCC, VT, Expand);
4940b57cec5SDimitry Andric   }
4950b57cec5SDimitry Andric 
4960b57cec5SDimitry Andric   static const MVT::SimpleValueType FloatVectorTypes[] = {
497fe6060f1SDimitry Andric       MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32};
4980b57cec5SDimitry Andric 
4990b57cec5SDimitry Andric   for (MVT VT : FloatVectorTypes) {
5000b57cec5SDimitry Andric     setOperationAction(ISD::FABS, VT, Expand);
5010b57cec5SDimitry Andric     setOperationAction(ISD::FMINNUM, VT, Expand);
5020b57cec5SDimitry Andric     setOperationAction(ISD::FMAXNUM, VT, Expand);
5030b57cec5SDimitry Andric     setOperationAction(ISD::FADD, VT, Expand);
5040b57cec5SDimitry Andric     setOperationAction(ISD::FCEIL, VT, Expand);
5050b57cec5SDimitry Andric     setOperationAction(ISD::FCOS, VT, Expand);
5060b57cec5SDimitry Andric     setOperationAction(ISD::FDIV, VT, Expand);
5070b57cec5SDimitry Andric     setOperationAction(ISD::FEXP2, VT, Expand);
5080b57cec5SDimitry Andric     setOperationAction(ISD::FEXP, VT, Expand);
5090b57cec5SDimitry Andric     setOperationAction(ISD::FLOG2, VT, Expand);
5100b57cec5SDimitry Andric     setOperationAction(ISD::FREM, VT, Expand);
5110b57cec5SDimitry Andric     setOperationAction(ISD::FLOG, VT, Expand);
5120b57cec5SDimitry Andric     setOperationAction(ISD::FLOG10, VT, Expand);
5130b57cec5SDimitry Andric     setOperationAction(ISD::FPOW, VT, Expand);
5140b57cec5SDimitry Andric     setOperationAction(ISD::FFLOOR, VT, Expand);
5150b57cec5SDimitry Andric     setOperationAction(ISD::FTRUNC, VT, Expand);
5160b57cec5SDimitry Andric     setOperationAction(ISD::FMUL, VT, Expand);
5170b57cec5SDimitry Andric     setOperationAction(ISD::FMA, VT, Expand);
5180b57cec5SDimitry Andric     setOperationAction(ISD::FRINT, VT, Expand);
5190b57cec5SDimitry Andric     setOperationAction(ISD::FNEARBYINT, VT, Expand);
5200b57cec5SDimitry Andric     setOperationAction(ISD::FSQRT, VT, Expand);
5210b57cec5SDimitry Andric     setOperationAction(ISD::FSIN, VT, Expand);
5220b57cec5SDimitry Andric     setOperationAction(ISD::FSUB, VT, Expand);
5230b57cec5SDimitry Andric     setOperationAction(ISD::FNEG, VT, Expand);
5240b57cec5SDimitry Andric     setOperationAction(ISD::VSELECT, VT, Expand);
5250b57cec5SDimitry Andric     setOperationAction(ISD::SELECT_CC, VT, Expand);
5260b57cec5SDimitry Andric     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
5270b57cec5SDimitry Andric     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
5280b57cec5SDimitry Andric     setOperationAction(ISD::SETCC, VT, Expand);
5290b57cec5SDimitry Andric     setOperationAction(ISD::FCANONICALIZE, VT, Expand);
5300b57cec5SDimitry Andric   }
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric   // This causes using an unrolled select operation rather than expansion with
5330b57cec5SDimitry Andric   // bit operations. This is in general better, but the alternative using BFI
5340b57cec5SDimitry Andric   // instructions may be better if the select sources are SGPRs.
5350b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v2f32, Promote);
5360b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32);
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v3f32, Promote);
5390b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32);
5400b57cec5SDimitry Andric 
5410b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
5420b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
5450b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
5460b57cec5SDimitry Andric 
547fe6060f1SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v6f32, Promote);
548fe6060f1SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32);
549fe6060f1SDimitry Andric 
550fe6060f1SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v7f32, Promote);
551fe6060f1SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);
552fe6060f1SDimitry Andric 
5530b57cec5SDimitry Andric   // There are no libcalls of any kind.
5540b57cec5SDimitry Andric   for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
5550b57cec5SDimitry Andric     setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
5560b57cec5SDimitry Andric 
5570b57cec5SDimitry Andric   setSchedulingPreference(Sched::RegPressure);
5580b57cec5SDimitry Andric   setJumpIsExpensive(true);
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric   // FIXME: This is only partially true. If we have to do vector compares, any
5610b57cec5SDimitry Andric   // SGPR pair can be a condition register. If we have a uniform condition, we
5620b57cec5SDimitry Andric   // are better off doing SALU operations, where there is only one SCC. For now,
5630b57cec5SDimitry Andric   // we don't have a way of knowing during instruction selection if a condition
5640b57cec5SDimitry Andric   // will be uniform and we always use vector compares. Assume we are using
5650b57cec5SDimitry Andric   // vector compares until that is fixed.
5660b57cec5SDimitry Andric   setHasMultipleConditionRegisters(true);
5670b57cec5SDimitry Andric 
5680b57cec5SDimitry Andric   setMinCmpXchgSizeInBits(32);
5690b57cec5SDimitry Andric   setSupportsUnalignedAtomics(false);
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric   PredictableSelectIsExpensive = false;
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   // We want to find all load dependencies for long chains of stores to enable
5740b57cec5SDimitry Andric   // merging into very wide vectors. The problem is with vectors with > 4
5750b57cec5SDimitry Andric   // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
5760b57cec5SDimitry Andric   // vectors are a legal type, even though we have to split the loads
5770b57cec5SDimitry Andric   // usually. When we can more precisely specify load legality per address
5780b57cec5SDimitry Andric   // space, we should be able to make FindBetterChain/MergeConsecutiveStores
5790b57cec5SDimitry Andric   // smarter so that they can figure out what to do in 2 iterations without all
5800b57cec5SDimitry Andric   // N > 4 stores on the same chain.
5810b57cec5SDimitry Andric   GatherAllAliasesMaxDepth = 16;
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric   // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
5840b57cec5SDimitry Andric   // about these during lowering.
5850b57cec5SDimitry Andric   MaxStoresPerMemcpy  = 0xffffffff;
5860b57cec5SDimitry Andric   MaxStoresPerMemmove = 0xffffffff;
5870b57cec5SDimitry Andric   MaxStoresPerMemset  = 0xffffffff;
5880b57cec5SDimitry Andric 
5895ffd83dbSDimitry Andric   // The expansion for 64-bit division is enormous.
5905ffd83dbSDimitry Andric   if (AMDGPUBypassSlowDiv)
5915ffd83dbSDimitry Andric     addBypassSlowDiv(64, 32);
5925ffd83dbSDimitry Andric 
5930b57cec5SDimitry Andric   setTargetDAGCombine(ISD::BITCAST);
5940b57cec5SDimitry Andric   setTargetDAGCombine(ISD::SHL);
5950b57cec5SDimitry Andric   setTargetDAGCombine(ISD::SRA);
5960b57cec5SDimitry Andric   setTargetDAGCombine(ISD::SRL);
5970b57cec5SDimitry Andric   setTargetDAGCombine(ISD::TRUNCATE);
5980b57cec5SDimitry Andric   setTargetDAGCombine(ISD::MUL);
5994824e7fdSDimitry Andric   setTargetDAGCombine(ISD::SMUL_LOHI);
6004824e7fdSDimitry Andric   setTargetDAGCombine(ISD::UMUL_LOHI);
6010b57cec5SDimitry Andric   setTargetDAGCombine(ISD::MULHU);
6020b57cec5SDimitry Andric   setTargetDAGCombine(ISD::MULHS);
6030b57cec5SDimitry Andric   setTargetDAGCombine(ISD::SELECT);
6040b57cec5SDimitry Andric   setTargetDAGCombine(ISD::SELECT_CC);
6050b57cec5SDimitry Andric   setTargetDAGCombine(ISD::STORE);
6060b57cec5SDimitry Andric   setTargetDAGCombine(ISD::FADD);
6070b57cec5SDimitry Andric   setTargetDAGCombine(ISD::FSUB);
6080b57cec5SDimitry Andric   setTargetDAGCombine(ISD::FNEG);
6090b57cec5SDimitry Andric   setTargetDAGCombine(ISD::FABS);
6100b57cec5SDimitry Andric   setTargetDAGCombine(ISD::AssertZext);
6110b57cec5SDimitry Andric   setTargetDAGCombine(ISD::AssertSext);
6128bcb0991SDimitry Andric   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
6130b57cec5SDimitry Andric }
6140b57cec5SDimitry Andric 
615e8d8bef9SDimitry Andric bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
616e8d8bef9SDimitry Andric   if (getTargetMachine().Options.NoSignedZerosFPMath)
617e8d8bef9SDimitry Andric     return true;
618e8d8bef9SDimitry Andric 
619e8d8bef9SDimitry Andric   const auto Flags = Op.getNode()->getFlags();
620e8d8bef9SDimitry Andric   if (Flags.hasNoSignedZeros())
621e8d8bef9SDimitry Andric     return true;
622e8d8bef9SDimitry Andric 
623e8d8bef9SDimitry Andric   return false;
624e8d8bef9SDimitry Andric }
625e8d8bef9SDimitry Andric 
6260b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
6270b57cec5SDimitry Andric // Target Information
6280b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
6290b57cec5SDimitry Andric 
6300b57cec5SDimitry Andric LLVM_READNONE
6310b57cec5SDimitry Andric static bool fnegFoldsIntoOp(unsigned Opc) {
6320b57cec5SDimitry Andric   switch (Opc) {
6330b57cec5SDimitry Andric   case ISD::FADD:
6340b57cec5SDimitry Andric   case ISD::FSUB:
6350b57cec5SDimitry Andric   case ISD::FMUL:
6360b57cec5SDimitry Andric   case ISD::FMA:
6370b57cec5SDimitry Andric   case ISD::FMAD:
6380b57cec5SDimitry Andric   case ISD::FMINNUM:
6390b57cec5SDimitry Andric   case ISD::FMAXNUM:
6400b57cec5SDimitry Andric   case ISD::FMINNUM_IEEE:
6410b57cec5SDimitry Andric   case ISD::FMAXNUM_IEEE:
6420b57cec5SDimitry Andric   case ISD::FSIN:
6430b57cec5SDimitry Andric   case ISD::FTRUNC:
6440b57cec5SDimitry Andric   case ISD::FRINT:
6450b57cec5SDimitry Andric   case ISD::FNEARBYINT:
6460b57cec5SDimitry Andric   case ISD::FCANONICALIZE:
6470b57cec5SDimitry Andric   case AMDGPUISD::RCP:
6480b57cec5SDimitry Andric   case AMDGPUISD::RCP_LEGACY:
6490b57cec5SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
6500b57cec5SDimitry Andric   case AMDGPUISD::SIN_HW:
6510b57cec5SDimitry Andric   case AMDGPUISD::FMUL_LEGACY:
6520b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY:
6530b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY:
6540b57cec5SDimitry Andric   case AMDGPUISD::FMED3:
655e8d8bef9SDimitry Andric     // TODO: handle llvm.amdgcn.fma.legacy
6560b57cec5SDimitry Andric     return true;
6570b57cec5SDimitry Andric   default:
6580b57cec5SDimitry Andric     return false;
6590b57cec5SDimitry Andric   }
6600b57cec5SDimitry Andric }
6610b57cec5SDimitry Andric 
6620b57cec5SDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit
6630b57cec5SDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source
6640b57cec5SDimitry Andric /// modifiers.
6650b57cec5SDimitry Andric LLVM_READONLY
6660b57cec5SDimitry Andric static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
6670b57cec5SDimitry Andric   return N->getNumOperands() > 2 || VT == MVT::f64;
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric // Most FP instructions support source modifiers, but this could be refined
6710b57cec5SDimitry Andric // slightly.
6720b57cec5SDimitry Andric LLVM_READONLY
6730b57cec5SDimitry Andric static bool hasSourceMods(const SDNode *N) {
6740b57cec5SDimitry Andric   if (isa<MemSDNode>(N))
6750b57cec5SDimitry Andric     return false;
6760b57cec5SDimitry Andric 
6770b57cec5SDimitry Andric   switch (N->getOpcode()) {
6780b57cec5SDimitry Andric   case ISD::CopyToReg:
6790b57cec5SDimitry Andric   case ISD::SELECT:
6800b57cec5SDimitry Andric   case ISD::FDIV:
6810b57cec5SDimitry Andric   case ISD::FREM:
6820b57cec5SDimitry Andric   case ISD::INLINEASM:
6830b57cec5SDimitry Andric   case ISD::INLINEASM_BR:
6840b57cec5SDimitry Andric   case AMDGPUISD::DIV_SCALE:
6858bcb0991SDimitry Andric   case ISD::INTRINSIC_W_CHAIN:
6860b57cec5SDimitry Andric 
6870b57cec5SDimitry Andric   // TODO: Should really be looking at the users of the bitcast. These are
6880b57cec5SDimitry Andric   // problematic because bitcasts are used to legalize all stores to integer
6890b57cec5SDimitry Andric   // types.
6900b57cec5SDimitry Andric   case ISD::BITCAST:
6910b57cec5SDimitry Andric     return false;
6928bcb0991SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
6938bcb0991SDimitry Andric     switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
6948bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p1:
6958bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p2:
6968bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_mov:
6978bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p1_f16:
6988bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p2_f16:
6998bcb0991SDimitry Andric       return false;
7008bcb0991SDimitry Andric     default:
7018bcb0991SDimitry Andric       return true;
7028bcb0991SDimitry Andric     }
7038bcb0991SDimitry Andric   }
7040b57cec5SDimitry Andric   default:
7050b57cec5SDimitry Andric     return true;
7060b57cec5SDimitry Andric   }
7070b57cec5SDimitry Andric }
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
7100b57cec5SDimitry Andric                                                  unsigned CostThreshold) {
7110b57cec5SDimitry Andric   // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
7120b57cec5SDimitry Andric   // it is truly free to use a source modifier in all cases. If there are
7130b57cec5SDimitry Andric   // multiple users but for each one will necessitate using VOP3, there will be
7140b57cec5SDimitry Andric   // a code size increase. Try to avoid increasing code size unless we know it
7150b57cec5SDimitry Andric   // will save on the instruction count.
7160b57cec5SDimitry Andric   unsigned NumMayIncreaseSize = 0;
7170b57cec5SDimitry Andric   MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
7180b57cec5SDimitry Andric 
7190b57cec5SDimitry Andric   // XXX - Should this limit number of uses to check?
7200b57cec5SDimitry Andric   for (const SDNode *U : N->uses()) {
7210b57cec5SDimitry Andric     if (!hasSourceMods(U))
7220b57cec5SDimitry Andric       return false;
7230b57cec5SDimitry Andric 
7240b57cec5SDimitry Andric     if (!opMustUseVOP3Encoding(U, VT)) {
7250b57cec5SDimitry Andric       if (++NumMayIncreaseSize > CostThreshold)
7260b57cec5SDimitry Andric         return false;
7270b57cec5SDimitry Andric     }
7280b57cec5SDimitry Andric   }
7290b57cec5SDimitry Andric 
7300b57cec5SDimitry Andric   return true;
7310b57cec5SDimitry Andric }
7320b57cec5SDimitry Andric 
7335ffd83dbSDimitry Andric EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
7345ffd83dbSDimitry Andric                                               ISD::NodeType ExtendKind) const {
7355ffd83dbSDimitry Andric   assert(!VT.isVector() && "only scalar expected");
7365ffd83dbSDimitry Andric 
7375ffd83dbSDimitry Andric   // Round to the next multiple of 32-bits.
7385ffd83dbSDimitry Andric   unsigned Size = VT.getSizeInBits();
7395ffd83dbSDimitry Andric   if (Size <= 32)
7405ffd83dbSDimitry Andric     return MVT::i32;
7415ffd83dbSDimitry Andric   return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32));
7425ffd83dbSDimitry Andric }
7435ffd83dbSDimitry Andric 
7440b57cec5SDimitry Andric MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
7450b57cec5SDimitry Andric   return MVT::i32;
7460b57cec5SDimitry Andric }
7470b57cec5SDimitry Andric 
7480b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
7490b57cec5SDimitry Andric   return true;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric // The backend supports 32 and 64 bit floating point immediates.
7530b57cec5SDimitry Andric // FIXME: Why are we reporting vectors of FP immediates as legal?
7540b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7550b57cec5SDimitry Andric                                         bool ForCodeSize) const {
7560b57cec5SDimitry Andric   EVT ScalarVT = VT.getScalarType();
7570b57cec5SDimitry Andric   return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
7580b57cec5SDimitry Andric          (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
7590b57cec5SDimitry Andric }
7600b57cec5SDimitry Andric 
7610b57cec5SDimitry Andric // We don't want to shrink f64 / f32 constants.
7620b57cec5SDimitry Andric bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
7630b57cec5SDimitry Andric   EVT ScalarVT = VT.getScalarType();
7640b57cec5SDimitry Andric   return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
7650b57cec5SDimitry Andric }
7660b57cec5SDimitry Andric 
7670b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
7680b57cec5SDimitry Andric                                                  ISD::LoadExtType ExtTy,
7690b57cec5SDimitry Andric                                                  EVT NewVT) const {
7700b57cec5SDimitry Andric   // TODO: This may be worth removing. Check regression tests for diffs.
7710b57cec5SDimitry Andric   if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
7720b57cec5SDimitry Andric     return false;
7730b57cec5SDimitry Andric 
7740b57cec5SDimitry Andric   unsigned NewSize = NewVT.getStoreSizeInBits();
7750b57cec5SDimitry Andric 
7765ffd83dbSDimitry Andric   // If we are reducing to a 32-bit load or a smaller multi-dword load,
7775ffd83dbSDimitry Andric   // this is always better.
7785ffd83dbSDimitry Andric   if (NewSize >= 32)
7790b57cec5SDimitry Andric     return true;
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric   EVT OldVT = N->getValueType(0);
7820b57cec5SDimitry Andric   unsigned OldSize = OldVT.getStoreSizeInBits();
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric   MemSDNode *MN = cast<MemSDNode>(N);
7850b57cec5SDimitry Andric   unsigned AS = MN->getAddressSpace();
7860b57cec5SDimitry Andric   // Do not shrink an aligned scalar load to sub-dword.
7870b57cec5SDimitry Andric   // Scalar engine cannot do sub-dword loads.
7880b57cec5SDimitry Andric   if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
7890b57cec5SDimitry Andric       (AS == AMDGPUAS::CONSTANT_ADDRESS ||
7900b57cec5SDimitry Andric        AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
7910b57cec5SDimitry Andric        (isa<LoadSDNode>(N) &&
7920b57cec5SDimitry Andric         AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
7930b57cec5SDimitry Andric       AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
7940b57cec5SDimitry Andric     return false;
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric   // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
7970b57cec5SDimitry Andric   // extloads, so doing one requires using a buffer_load. In cases where we
7980b57cec5SDimitry Andric   // still couldn't use a scalar load, using the wider load shouldn't really
7990b57cec5SDimitry Andric   // hurt anything.
8000b57cec5SDimitry Andric 
8010b57cec5SDimitry Andric   // If the old size already had to be an extload, there's no harm in continuing
8020b57cec5SDimitry Andric   // to reduce the width.
8030b57cec5SDimitry Andric   return (OldSize < 32);
8040b57cec5SDimitry Andric }
8050b57cec5SDimitry Andric 
8060b57cec5SDimitry Andric bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
8070b57cec5SDimitry Andric                                                    const SelectionDAG &DAG,
8080b57cec5SDimitry Andric                                                    const MachineMemOperand &MMO) const {
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric   assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
8110b57cec5SDimitry Andric 
8120b57cec5SDimitry Andric   if (LoadTy.getScalarType() == MVT::i32)
8130b57cec5SDimitry Andric     return false;
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric   unsigned LScalarSize = LoadTy.getScalarSizeInBits();
8160b57cec5SDimitry Andric   unsigned CastScalarSize = CastTy.getScalarSizeInBits();
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric   if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
8190b57cec5SDimitry Andric     return false;
8200b57cec5SDimitry Andric 
8210b57cec5SDimitry Andric   bool Fast = false;
8228bcb0991SDimitry Andric   return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8238bcb0991SDimitry Andric                                         CastTy, MMO, &Fast) &&
8248bcb0991SDimitry Andric          Fast;
8250b57cec5SDimitry Andric }
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
8280b57cec5SDimitry Andric // profitable with the expansion for 64-bit since it's generally good to
8290b57cec5SDimitry Andric // speculate things.
8300b57cec5SDimitry Andric // FIXME: These should really have the size as a parameter.
8310b57cec5SDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
8320b57cec5SDimitry Andric   return true;
8330b57cec5SDimitry Andric }
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
8360b57cec5SDimitry Andric   return true;
8370b57cec5SDimitry Andric }
8380b57cec5SDimitry Andric 
8390b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
8400b57cec5SDimitry Andric   switch (N->getOpcode()) {
8410b57cec5SDimitry Andric   case ISD::EntryToken:
8420b57cec5SDimitry Andric   case ISD::TokenFactor:
8430b57cec5SDimitry Andric     return true;
844e8d8bef9SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
8450b57cec5SDimitry Andric     unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
8460b57cec5SDimitry Andric     switch (IntrID) {
8470b57cec5SDimitry Andric     case Intrinsic::amdgcn_readfirstlane:
8480b57cec5SDimitry Andric     case Intrinsic::amdgcn_readlane:
8490b57cec5SDimitry Andric       return true;
8500b57cec5SDimitry Andric     }
851e8d8bef9SDimitry Andric     return false;
8520b57cec5SDimitry Andric   }
8530b57cec5SDimitry Andric   case ISD::LOAD:
8548bcb0991SDimitry Andric     if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
8558bcb0991SDimitry Andric         AMDGPUAS::CONSTANT_ADDRESS_32BIT)
8560b57cec5SDimitry Andric       return true;
8570b57cec5SDimitry Andric     return false;
8580b57cec5SDimitry Andric   }
859e8d8bef9SDimitry Andric   return false;
8600b57cec5SDimitry Andric }
8610b57cec5SDimitry Andric 
8625ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::getNegatedExpression(
8635ffd83dbSDimitry Andric     SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
8645ffd83dbSDimitry Andric     NegatibleCost &Cost, unsigned Depth) const {
8655ffd83dbSDimitry Andric 
8665ffd83dbSDimitry Andric   switch (Op.getOpcode()) {
8675ffd83dbSDimitry Andric   case ISD::FMA:
8685ffd83dbSDimitry Andric   case ISD::FMAD: {
8695ffd83dbSDimitry Andric     // Negating a fma is not free if it has users without source mods.
8705ffd83dbSDimitry Andric     if (!allUsesHaveSourceMods(Op.getNode()))
8715ffd83dbSDimitry Andric       return SDValue();
8725ffd83dbSDimitry Andric     break;
8735ffd83dbSDimitry Andric   }
8745ffd83dbSDimitry Andric   default:
8755ffd83dbSDimitry Andric     break;
8765ffd83dbSDimitry Andric   }
8775ffd83dbSDimitry Andric 
8785ffd83dbSDimitry Andric   return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
8795ffd83dbSDimitry Andric                                               ForCodeSize, Cost, Depth);
8805ffd83dbSDimitry Andric }
8815ffd83dbSDimitry Andric 
8820b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
8830b57cec5SDimitry Andric // Target Properties
8840b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
8850b57cec5SDimitry Andric 
8860b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
8870b57cec5SDimitry Andric   assert(VT.isFloatingPoint());
8880b57cec5SDimitry Andric 
8890b57cec5SDimitry Andric   // Packed operations do not have a fabs modifier.
8900b57cec5SDimitry Andric   return VT == MVT::f32 || VT == MVT::f64 ||
8910b57cec5SDimitry Andric          (Subtarget->has16BitInsts() && VT == MVT::f16);
8920b57cec5SDimitry Andric }
8930b57cec5SDimitry Andric 
8940b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
8950b57cec5SDimitry Andric   assert(VT.isFloatingPoint());
896fe6060f1SDimitry Andric   // Report this based on the end legalized type.
897fe6060f1SDimitry Andric   VT = VT.getScalarType();
898fe6060f1SDimitry Andric   return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16;
8990b57cec5SDimitry Andric }
9000b57cec5SDimitry Andric 
9010b57cec5SDimitry Andric bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
9020b57cec5SDimitry Andric                                                          unsigned NumElem,
9030b57cec5SDimitry Andric                                                          unsigned AS) const {
9040b57cec5SDimitry Andric   return true;
9050b57cec5SDimitry Andric }
9060b57cec5SDimitry Andric 
9070b57cec5SDimitry Andric bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
9080b57cec5SDimitry Andric   // There are few operations which truly have vector input operands. Any vector
9090b57cec5SDimitry Andric   // operation is going to involve operations on each component, and a
9100b57cec5SDimitry Andric   // build_vector will be a copy per element, so it always makes sense to use a
9110b57cec5SDimitry Andric   // build_vector input in place of the extracted element to avoid a copy into a
9120b57cec5SDimitry Andric   // super register.
9130b57cec5SDimitry Andric   //
9140b57cec5SDimitry Andric   // We should probably only do this if all users are extracts only, but this
9150b57cec5SDimitry Andric   // should be the common case.
9160b57cec5SDimitry Andric   return true;
9170b57cec5SDimitry Andric }
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
9200b57cec5SDimitry Andric   // Truncate is just accessing a subregister.
9210b57cec5SDimitry Andric 
9220b57cec5SDimitry Andric   unsigned SrcSize = Source.getSizeInBits();
9230b57cec5SDimitry Andric   unsigned DestSize = Dest.getSizeInBits();
9240b57cec5SDimitry Andric 
9250b57cec5SDimitry Andric   return DestSize < SrcSize && DestSize % 32 == 0 ;
9260b57cec5SDimitry Andric }
9270b57cec5SDimitry Andric 
9280b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
9290b57cec5SDimitry Andric   // Truncate is just accessing a subregister.
9300b57cec5SDimitry Andric 
9310b57cec5SDimitry Andric   unsigned SrcSize = Source->getScalarSizeInBits();
9320b57cec5SDimitry Andric   unsigned DestSize = Dest->getScalarSizeInBits();
9330b57cec5SDimitry Andric 
9340b57cec5SDimitry Andric   if (DestSize== 16 && Subtarget->has16BitInsts())
9350b57cec5SDimitry Andric     return SrcSize >= 32;
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric   return DestSize < SrcSize && DestSize % 32 == 0;
9380b57cec5SDimitry Andric }
9390b57cec5SDimitry Andric 
9400b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
9410b57cec5SDimitry Andric   unsigned SrcSize = Src->getScalarSizeInBits();
9420b57cec5SDimitry Andric   unsigned DestSize = Dest->getScalarSizeInBits();
9430b57cec5SDimitry Andric 
9440b57cec5SDimitry Andric   if (SrcSize == 16 && Subtarget->has16BitInsts())
9450b57cec5SDimitry Andric     return DestSize >= 32;
9460b57cec5SDimitry Andric 
9470b57cec5SDimitry Andric   return SrcSize == 32 && DestSize == 64;
9480b57cec5SDimitry Andric }
9490b57cec5SDimitry Andric 
9500b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
9510b57cec5SDimitry Andric   // Any register load of a 64-bit value really requires 2 32-bit moves. For all
9520b57cec5SDimitry Andric   // practical purposes, the extra mov 0 to load a 64-bit is free.  As used,
9530b57cec5SDimitry Andric   // this will enable reducing 64-bit operations the 32-bit, which is always
9540b57cec5SDimitry Andric   // good.
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric   if (Src == MVT::i16)
9570b57cec5SDimitry Andric     return Dest == MVT::i32 ||Dest == MVT::i64 ;
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric   return Src == MVT::i32 && Dest == MVT::i64;
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric 
9620b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
9630b57cec5SDimitry Andric   return isZExtFree(Val.getValueType(), VT2);
9640b57cec5SDimitry Andric }
9650b57cec5SDimitry Andric 
9660b57cec5SDimitry Andric bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
9670b57cec5SDimitry Andric   // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
9680b57cec5SDimitry Andric   // limited number of native 64-bit operations. Shrinking an operation to fit
9690b57cec5SDimitry Andric   // in a single 32-bit register should always be helpful. As currently used,
9700b57cec5SDimitry Andric   // this is much less general than the name suggests, and is only used in
9710b57cec5SDimitry Andric   // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
9720b57cec5SDimitry Andric   // not profitable, and may actually be harmful.
9730b57cec5SDimitry Andric   return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
9740b57cec5SDimitry Andric }
9750b57cec5SDimitry Andric 
9760b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
9770b57cec5SDimitry Andric // TargetLowering Callbacks
9780b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
9790b57cec5SDimitry Andric 
9800b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
9810b57cec5SDimitry Andric                                                   bool IsVarArg) {
9820b57cec5SDimitry Andric   switch (CC) {
9830b57cec5SDimitry Andric   case CallingConv::AMDGPU_VS:
9840b57cec5SDimitry Andric   case CallingConv::AMDGPU_GS:
9850b57cec5SDimitry Andric   case CallingConv::AMDGPU_PS:
9860b57cec5SDimitry Andric   case CallingConv::AMDGPU_CS:
9870b57cec5SDimitry Andric   case CallingConv::AMDGPU_HS:
9880b57cec5SDimitry Andric   case CallingConv::AMDGPU_ES:
9890b57cec5SDimitry Andric   case CallingConv::AMDGPU_LS:
9900b57cec5SDimitry Andric     return CC_AMDGPU;
9910b57cec5SDimitry Andric   case CallingConv::C:
9920b57cec5SDimitry Andric   case CallingConv::Fast:
9930b57cec5SDimitry Andric   case CallingConv::Cold:
9940b57cec5SDimitry Andric     return CC_AMDGPU_Func;
995e8d8bef9SDimitry Andric   case CallingConv::AMDGPU_Gfx:
996e8d8bef9SDimitry Andric     return CC_SI_Gfx;
9970b57cec5SDimitry Andric   case CallingConv::AMDGPU_KERNEL:
9980b57cec5SDimitry Andric   case CallingConv::SPIR_KERNEL:
9990b57cec5SDimitry Andric   default:
10000b57cec5SDimitry Andric     report_fatal_error("Unsupported calling convention for call");
10010b57cec5SDimitry Andric   }
10020b57cec5SDimitry Andric }
10030b57cec5SDimitry Andric 
10040b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
10050b57cec5SDimitry Andric                                                     bool IsVarArg) {
10060b57cec5SDimitry Andric   switch (CC) {
10070b57cec5SDimitry Andric   case CallingConv::AMDGPU_KERNEL:
10080b57cec5SDimitry Andric   case CallingConv::SPIR_KERNEL:
10090b57cec5SDimitry Andric     llvm_unreachable("kernels should not be handled here");
10100b57cec5SDimitry Andric   case CallingConv::AMDGPU_VS:
10110b57cec5SDimitry Andric   case CallingConv::AMDGPU_GS:
10120b57cec5SDimitry Andric   case CallingConv::AMDGPU_PS:
10130b57cec5SDimitry Andric   case CallingConv::AMDGPU_CS:
10140b57cec5SDimitry Andric   case CallingConv::AMDGPU_HS:
10150b57cec5SDimitry Andric   case CallingConv::AMDGPU_ES:
10160b57cec5SDimitry Andric   case CallingConv::AMDGPU_LS:
10170b57cec5SDimitry Andric     return RetCC_SI_Shader;
1018e8d8bef9SDimitry Andric   case CallingConv::AMDGPU_Gfx:
1019e8d8bef9SDimitry Andric     return RetCC_SI_Gfx;
10200b57cec5SDimitry Andric   case CallingConv::C:
10210b57cec5SDimitry Andric   case CallingConv::Fast:
10220b57cec5SDimitry Andric   case CallingConv::Cold:
10230b57cec5SDimitry Andric     return RetCC_AMDGPU_Func;
10240b57cec5SDimitry Andric   default:
10250b57cec5SDimitry Andric     report_fatal_error("Unsupported calling convention.");
10260b57cec5SDimitry Andric   }
10270b57cec5SDimitry Andric }
10280b57cec5SDimitry Andric 
10290b57cec5SDimitry Andric /// The SelectionDAGBuilder will automatically promote function arguments
10300b57cec5SDimitry Andric /// with illegal types.  However, this does not work for the AMDGPU targets
10310b57cec5SDimitry Andric /// since the function arguments are stored in memory as these illegal types.
10320b57cec5SDimitry Andric /// In order to handle this properly we need to get the original types sizes
10330b57cec5SDimitry Andric /// from the LLVM IR Function and fixup the ISD:InputArg values before
10340b57cec5SDimitry Andric /// passing them to AnalyzeFormalArguments()
10350b57cec5SDimitry Andric 
10360b57cec5SDimitry Andric /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
10370b57cec5SDimitry Andric /// input values across multiple registers.  Each item in the Ins array
10380b57cec5SDimitry Andric /// represents a single value that will be stored in registers.  Ins[x].VT is
10390b57cec5SDimitry Andric /// the value type of the value that will be stored in the register, so
10400b57cec5SDimitry Andric /// whatever SDNode we lower the argument to needs to be this type.
10410b57cec5SDimitry Andric ///
10420b57cec5SDimitry Andric /// In order to correctly lower the arguments we need to know the size of each
10430b57cec5SDimitry Andric /// argument.  Since Ins[x].VT gives us the size of the register that will
10440b57cec5SDimitry Andric /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
1045349cc55cSDimitry Andric /// for the original function argument so that we can deduce the correct memory
10460b57cec5SDimitry Andric /// type to use for Ins[x].  In most cases the correct memory type will be
10470b57cec5SDimitry Andric /// Ins[x].ArgVT.  However, this will not always be the case.  If, for example,
10480b57cec5SDimitry Andric /// we have a kernel argument of type v8i8, this argument will be split into
10490b57cec5SDimitry Andric /// 8 parts and each part will be represented by its own item in the Ins array.
10500b57cec5SDimitry Andric /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
10510b57cec5SDimitry Andric /// the argument before it was split.  From this, we deduce that the memory type
10520b57cec5SDimitry Andric /// for each individual part is i8.  We pass the memory type as LocVT to the
10530b57cec5SDimitry Andric /// calling convention analysis function and the register type (Ins[x].VT) as
10540b57cec5SDimitry Andric /// the ValVT.
10550b57cec5SDimitry Andric void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
10560b57cec5SDimitry Andric   CCState &State,
10570b57cec5SDimitry Andric   const SmallVectorImpl<ISD::InputArg> &Ins) const {
10580b57cec5SDimitry Andric   const MachineFunction &MF = State.getMachineFunction();
10590b57cec5SDimitry Andric   const Function &Fn = MF.getFunction();
10600b57cec5SDimitry Andric   LLVMContext &Ctx = Fn.getParent()->getContext();
10610b57cec5SDimitry Andric   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
10620b57cec5SDimitry Andric   const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
10630b57cec5SDimitry Andric   CallingConv::ID CC = Fn.getCallingConv();
10640b57cec5SDimitry Andric 
10655ffd83dbSDimitry Andric   Align MaxAlign = Align(1);
10660b57cec5SDimitry Andric   uint64_t ExplicitArgOffset = 0;
10670b57cec5SDimitry Andric   const DataLayout &DL = Fn.getParent()->getDataLayout();
10680b57cec5SDimitry Andric 
10690b57cec5SDimitry Andric   unsigned InIndex = 0;
10700b57cec5SDimitry Andric 
10710b57cec5SDimitry Andric   for (const Argument &Arg : Fn.args()) {
1072e8d8bef9SDimitry Andric     const bool IsByRef = Arg.hasByRefAttr();
10730b57cec5SDimitry Andric     Type *BaseArgTy = Arg.getType();
1074e8d8bef9SDimitry Andric     Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
1075e8d8bef9SDimitry Andric     MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None;
1076e8d8bef9SDimitry Andric     if (!Alignment)
1077e8d8bef9SDimitry Andric       Alignment = DL.getABITypeAlign(MemArgTy);
1078e8d8bef9SDimitry Andric     MaxAlign = max(Alignment, MaxAlign);
1079e8d8bef9SDimitry Andric     uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);
10800b57cec5SDimitry Andric 
10815ffd83dbSDimitry Andric     uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
10825ffd83dbSDimitry Andric     ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;
10830b57cec5SDimitry Andric 
10840b57cec5SDimitry Andric     // We're basically throwing away everything passed into us and starting over
10850b57cec5SDimitry Andric     // to get accurate in-memory offsets. The "PartOffset" is completely useless
10860b57cec5SDimitry Andric     // to us as computed in Ins.
10870b57cec5SDimitry Andric     //
10880b57cec5SDimitry Andric     // We also need to figure out what type legalization is trying to do to get
10890b57cec5SDimitry Andric     // the correct memory offsets.
10900b57cec5SDimitry Andric 
10910b57cec5SDimitry Andric     SmallVector<EVT, 16> ValueVTs;
10920b57cec5SDimitry Andric     SmallVector<uint64_t, 16> Offsets;
10930b57cec5SDimitry Andric     ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
10940b57cec5SDimitry Andric 
10950b57cec5SDimitry Andric     for (unsigned Value = 0, NumValues = ValueVTs.size();
10960b57cec5SDimitry Andric          Value != NumValues; ++Value) {
10970b57cec5SDimitry Andric       uint64_t BasePartOffset = Offsets[Value];
10980b57cec5SDimitry Andric 
10990b57cec5SDimitry Andric       EVT ArgVT = ValueVTs[Value];
11000b57cec5SDimitry Andric       EVT MemVT = ArgVT;
11010b57cec5SDimitry Andric       MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
11020b57cec5SDimitry Andric       unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
11030b57cec5SDimitry Andric 
11040b57cec5SDimitry Andric       if (NumRegs == 1) {
11050b57cec5SDimitry Andric         // This argument is not split, so the IR type is the memory type.
11060b57cec5SDimitry Andric         if (ArgVT.isExtended()) {
11070b57cec5SDimitry Andric           // We have an extended type, like i24, so we should just use the
11080b57cec5SDimitry Andric           // register type.
11090b57cec5SDimitry Andric           MemVT = RegisterVT;
11100b57cec5SDimitry Andric         } else {
11110b57cec5SDimitry Andric           MemVT = ArgVT;
11120b57cec5SDimitry Andric         }
11130b57cec5SDimitry Andric       } else if (ArgVT.isVector() && RegisterVT.isVector() &&
11140b57cec5SDimitry Andric                  ArgVT.getScalarType() == RegisterVT.getScalarType()) {
11150b57cec5SDimitry Andric         assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements());
11160b57cec5SDimitry Andric         // We have a vector value which has been split into a vector with
11170b57cec5SDimitry Andric         // the same scalar type, but fewer elements.  This should handle
11180b57cec5SDimitry Andric         // all the floating-point vector types.
11190b57cec5SDimitry Andric         MemVT = RegisterVT;
11200b57cec5SDimitry Andric       } else if (ArgVT.isVector() &&
11210b57cec5SDimitry Andric                  ArgVT.getVectorNumElements() == NumRegs) {
11220b57cec5SDimitry Andric         // This arg has been split so that each element is stored in a separate
11230b57cec5SDimitry Andric         // register.
11240b57cec5SDimitry Andric         MemVT = ArgVT.getScalarType();
11250b57cec5SDimitry Andric       } else if (ArgVT.isExtended()) {
11260b57cec5SDimitry Andric         // We have an extended type, like i65.
11270b57cec5SDimitry Andric         MemVT = RegisterVT;
11280b57cec5SDimitry Andric       } else {
11290b57cec5SDimitry Andric         unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs;
11300b57cec5SDimitry Andric         assert(ArgVT.getStoreSizeInBits() % NumRegs == 0);
11310b57cec5SDimitry Andric         if (RegisterVT.isInteger()) {
11320b57cec5SDimitry Andric           MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
11330b57cec5SDimitry Andric         } else if (RegisterVT.isVector()) {
11340b57cec5SDimitry Andric           assert(!RegisterVT.getScalarType().isFloatingPoint());
11350b57cec5SDimitry Andric           unsigned NumElements = RegisterVT.getVectorNumElements();
11360b57cec5SDimitry Andric           assert(MemoryBits % NumElements == 0);
11370b57cec5SDimitry Andric           // This vector type has been split into another vector type with
11380b57cec5SDimitry Andric           // a different elements size.
11390b57cec5SDimitry Andric           EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
11400b57cec5SDimitry Andric                                            MemoryBits / NumElements);
11410b57cec5SDimitry Andric           MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
11420b57cec5SDimitry Andric         } else {
11430b57cec5SDimitry Andric           llvm_unreachable("cannot deduce memory type.");
11440b57cec5SDimitry Andric         }
11450b57cec5SDimitry Andric       }
11460b57cec5SDimitry Andric 
11470b57cec5SDimitry Andric       // Convert one element vectors to scalar.
11480b57cec5SDimitry Andric       if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
11490b57cec5SDimitry Andric         MemVT = MemVT.getScalarType();
11500b57cec5SDimitry Andric 
11510b57cec5SDimitry Andric       // Round up vec3/vec5 argument.
11520b57cec5SDimitry Andric       if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
11530b57cec5SDimitry Andric         assert(MemVT.getVectorNumElements() == 3 ||
11540b57cec5SDimitry Andric                MemVT.getVectorNumElements() == 5);
11550b57cec5SDimitry Andric         MemVT = MemVT.getPow2VectorType(State.getContext());
11565ffd83dbSDimitry Andric       } else if (!MemVT.isSimple() && !MemVT.isVector()) {
11575ffd83dbSDimitry Andric         MemVT = MemVT.getRoundIntegerType(State.getContext());
11580b57cec5SDimitry Andric       }
11590b57cec5SDimitry Andric 
11600b57cec5SDimitry Andric       unsigned PartOffset = 0;
11610b57cec5SDimitry Andric       for (unsigned i = 0; i != NumRegs; ++i) {
11620b57cec5SDimitry Andric         State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,
11630b57cec5SDimitry Andric                                                BasePartOffset + PartOffset,
11640b57cec5SDimitry Andric                                                MemVT.getSimpleVT(),
11650b57cec5SDimitry Andric                                                CCValAssign::Full));
11660b57cec5SDimitry Andric         PartOffset += MemVT.getStoreSize();
11670b57cec5SDimitry Andric       }
11680b57cec5SDimitry Andric     }
11690b57cec5SDimitry Andric   }
11700b57cec5SDimitry Andric }
11710b57cec5SDimitry Andric 
11720b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerReturn(
11730b57cec5SDimitry Andric   SDValue Chain, CallingConv::ID CallConv,
11740b57cec5SDimitry Andric   bool isVarArg,
11750b57cec5SDimitry Andric   const SmallVectorImpl<ISD::OutputArg> &Outs,
11760b57cec5SDimitry Andric   const SmallVectorImpl<SDValue> &OutVals,
11770b57cec5SDimitry Andric   const SDLoc &DL, SelectionDAG &DAG) const {
11780b57cec5SDimitry Andric   // FIXME: Fails for r600 tests
11790b57cec5SDimitry Andric   //assert(!isVarArg && Outs.empty() && OutVals.empty() &&
11800b57cec5SDimitry Andric   // "wave terminate should not have return values");
11810b57cec5SDimitry Andric   return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
11820b57cec5SDimitry Andric }
11830b57cec5SDimitry Andric 
11840b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
11850b57cec5SDimitry Andric // Target specific lowering
11860b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
11870b57cec5SDimitry Andric 
11880b57cec5SDimitry Andric /// Selects the correct CCAssignFn for a given CallingConvention value.
11890b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
11900b57cec5SDimitry Andric                                                     bool IsVarArg) {
11910b57cec5SDimitry Andric   return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
11920b57cec5SDimitry Andric }
11930b57cec5SDimitry Andric 
11940b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
11950b57cec5SDimitry Andric                                                       bool IsVarArg) {
11960b57cec5SDimitry Andric   return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
11970b57cec5SDimitry Andric }
11980b57cec5SDimitry Andric 
11990b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
12000b57cec5SDimitry Andric                                                   SelectionDAG &DAG,
12010b57cec5SDimitry Andric                                                   MachineFrameInfo &MFI,
12020b57cec5SDimitry Andric                                                   int ClobberedFI) const {
12030b57cec5SDimitry Andric   SmallVector<SDValue, 8> ArgChains;
12040b57cec5SDimitry Andric   int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
12050b57cec5SDimitry Andric   int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
12060b57cec5SDimitry Andric 
12070b57cec5SDimitry Andric   // Include the original chain at the beginning of the list. When this is
12080b57cec5SDimitry Andric   // used by target LowerCall hooks, this helps legalize find the
12090b57cec5SDimitry Andric   // CALLSEQ_BEGIN node.
12100b57cec5SDimitry Andric   ArgChains.push_back(Chain);
12110b57cec5SDimitry Andric 
12120b57cec5SDimitry Andric   // Add a chain value for each stack argument corresponding
1213349cc55cSDimitry Andric   for (SDNode *U : DAG.getEntryNode().getNode()->uses()) {
1214349cc55cSDimitry Andric     if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
12150b57cec5SDimitry Andric       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
12160b57cec5SDimitry Andric         if (FI->getIndex() < 0) {
12170b57cec5SDimitry Andric           int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
12180b57cec5SDimitry Andric           int64_t InLastByte = InFirstByte;
12190b57cec5SDimitry Andric           InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric           if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
12220b57cec5SDimitry Andric               (FirstByte <= InFirstByte && InFirstByte <= LastByte))
12230b57cec5SDimitry Andric             ArgChains.push_back(SDValue(L, 1));
12240b57cec5SDimitry Andric         }
12250b57cec5SDimitry Andric       }
12260b57cec5SDimitry Andric     }
12270b57cec5SDimitry Andric   }
12280b57cec5SDimitry Andric 
12290b57cec5SDimitry Andric   // Build a tokenfactor for all the chains.
12300b57cec5SDimitry Andric   return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
12310b57cec5SDimitry Andric }
12320b57cec5SDimitry Andric 
12330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
12340b57cec5SDimitry Andric                                                  SmallVectorImpl<SDValue> &InVals,
12350b57cec5SDimitry Andric                                                  StringRef Reason) const {
12360b57cec5SDimitry Andric   SDValue Callee = CLI.Callee;
12370b57cec5SDimitry Andric   SelectionDAG &DAG = CLI.DAG;
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric   const Function &Fn = DAG.getMachineFunction().getFunction();
12400b57cec5SDimitry Andric 
12410b57cec5SDimitry Andric   StringRef FuncName("<unknown>");
12420b57cec5SDimitry Andric 
12430b57cec5SDimitry Andric   if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
12440b57cec5SDimitry Andric     FuncName = G->getSymbol();
12450b57cec5SDimitry Andric   else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
12460b57cec5SDimitry Andric     FuncName = G->getGlobal()->getName();
12470b57cec5SDimitry Andric 
12480b57cec5SDimitry Andric   DiagnosticInfoUnsupported NoCalls(
12490b57cec5SDimitry Andric     Fn, Reason + FuncName, CLI.DL.getDebugLoc());
12500b57cec5SDimitry Andric   DAG.getContext()->diagnose(NoCalls);
12510b57cec5SDimitry Andric 
12520b57cec5SDimitry Andric   if (!CLI.IsTailCall) {
12530b57cec5SDimitry Andric     for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
12540b57cec5SDimitry Andric       InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
12550b57cec5SDimitry Andric   }
12560b57cec5SDimitry Andric 
12570b57cec5SDimitry Andric   return DAG.getEntryNode();
12580b57cec5SDimitry Andric }
12590b57cec5SDimitry Andric 
12600b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
12610b57cec5SDimitry Andric                                         SmallVectorImpl<SDValue> &InVals) const {
12620b57cec5SDimitry Andric   return lowerUnhandledCall(CLI, InVals, "unsupported call to function ");
12630b57cec5SDimitry Andric }
12640b57cec5SDimitry Andric 
12650b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
12660b57cec5SDimitry Andric                                                       SelectionDAG &DAG) const {
12670b57cec5SDimitry Andric   const Function &Fn = DAG.getMachineFunction().getFunction();
12680b57cec5SDimitry Andric 
12690b57cec5SDimitry Andric   DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca",
12700b57cec5SDimitry Andric                                             SDLoc(Op).getDebugLoc());
12710b57cec5SDimitry Andric   DAG.getContext()->diagnose(NoDynamicAlloca);
12720b57cec5SDimitry Andric   auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
12730b57cec5SDimitry Andric   return DAG.getMergeValues(Ops, SDLoc());
12740b57cec5SDimitry Andric }
12750b57cec5SDimitry Andric 
12760b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
12770b57cec5SDimitry Andric                                              SelectionDAG &DAG) const {
12780b57cec5SDimitry Andric   switch (Op.getOpcode()) {
12790b57cec5SDimitry Andric   default:
12800b57cec5SDimitry Andric     Op->print(errs(), &DAG);
12810b57cec5SDimitry Andric     llvm_unreachable("Custom lowering code for this "
12820b57cec5SDimitry Andric                      "instruction is not implemented yet!");
12830b57cec5SDimitry Andric     break;
12840b57cec5SDimitry Andric   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
12850b57cec5SDimitry Andric   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
12860b57cec5SDimitry Andric   case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
12870b57cec5SDimitry Andric   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
12880b57cec5SDimitry Andric   case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
12890b57cec5SDimitry Andric   case ISD::FREM: return LowerFREM(Op, DAG);
12900b57cec5SDimitry Andric   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
12910b57cec5SDimitry Andric   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
12920b57cec5SDimitry Andric   case ISD::FRINT: return LowerFRINT(Op, DAG);
12930b57cec5SDimitry Andric   case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
12940b57cec5SDimitry Andric   case ISD::FROUND: return LowerFROUND(Op, DAG);
12950b57cec5SDimitry Andric   case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
12960b57cec5SDimitry Andric   case ISD::FLOG:
12975ffd83dbSDimitry Andric     return LowerFLOG(Op, DAG, numbers::ln2f);
12980b57cec5SDimitry Andric   case ISD::FLOG10:
12998bcb0991SDimitry Andric     return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f);
13000b57cec5SDimitry Andric   case ISD::FEXP:
13010b57cec5SDimitry Andric     return lowerFEXP(Op, DAG);
13020b57cec5SDimitry Andric   case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
13030b57cec5SDimitry Andric   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
13040b57cec5SDimitry Andric   case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
1305fe6060f1SDimitry Andric   case ISD::FP_TO_SINT:
1306fe6060f1SDimitry Andric   case ISD::FP_TO_UINT:
1307fe6060f1SDimitry Andric     return LowerFP_TO_INT(Op, DAG);
13080b57cec5SDimitry Andric   case ISD::CTTZ:
13090b57cec5SDimitry Andric   case ISD::CTTZ_ZERO_UNDEF:
13100b57cec5SDimitry Andric   case ISD::CTLZ:
13110b57cec5SDimitry Andric   case ISD::CTLZ_ZERO_UNDEF:
13120b57cec5SDimitry Andric     return LowerCTLZ_CTTZ(Op, DAG);
13130b57cec5SDimitry Andric   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
13140b57cec5SDimitry Andric   }
13150b57cec5SDimitry Andric   return Op;
13160b57cec5SDimitry Andric }
13170b57cec5SDimitry Andric 
13180b57cec5SDimitry Andric void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
13190b57cec5SDimitry Andric                                               SmallVectorImpl<SDValue> &Results,
13200b57cec5SDimitry Andric                                               SelectionDAG &DAG) const {
13210b57cec5SDimitry Andric   switch (N->getOpcode()) {
13220b57cec5SDimitry Andric   case ISD::SIGN_EXTEND_INREG:
13230b57cec5SDimitry Andric     // Different parts of legalization seem to interpret which type of
13240b57cec5SDimitry Andric     // sign_extend_inreg is the one to check for custom lowering. The extended
13250b57cec5SDimitry Andric     // from type is what really matters, but some places check for custom
13260b57cec5SDimitry Andric     // lowering of the result type. This results in trying to use
13270b57cec5SDimitry Andric     // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
13280b57cec5SDimitry Andric     // nothing here and let the illegal result integer be handled normally.
13290b57cec5SDimitry Andric     return;
13300b57cec5SDimitry Andric   default:
13310b57cec5SDimitry Andric     return;
13320b57cec5SDimitry Andric   }
13330b57cec5SDimitry Andric }
13340b57cec5SDimitry Andric 
13350b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
13360b57cec5SDimitry Andric                                                  SDValue Op,
13370b57cec5SDimitry Andric                                                  SelectionDAG &DAG) const {
13380b57cec5SDimitry Andric 
13390b57cec5SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
13400b57cec5SDimitry Andric   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
13410b57cec5SDimitry Andric   const GlobalValue *GV = G->getGlobal();
13420b57cec5SDimitry Andric 
13430b57cec5SDimitry Andric   if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
13440b57cec5SDimitry Andric       G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
1345fe6060f1SDimitry Andric     if (!MFI->isModuleEntryFunction() &&
1346fe6060f1SDimitry Andric         !GV->getName().equals("llvm.amdgcn.module.lds")) {
13475ffd83dbSDimitry Andric       SDLoc DL(Op);
13480b57cec5SDimitry Andric       const Function &Fn = DAG.getMachineFunction().getFunction();
13490b57cec5SDimitry Andric       DiagnosticInfoUnsupported BadLDSDecl(
13505ffd83dbSDimitry Andric         Fn, "local memory global used by non-kernel function",
13515ffd83dbSDimitry Andric         DL.getDebugLoc(), DS_Warning);
13520b57cec5SDimitry Andric       DAG.getContext()->diagnose(BadLDSDecl);
13535ffd83dbSDimitry Andric 
13545ffd83dbSDimitry Andric       // We currently don't have a way to correctly allocate LDS objects that
13555ffd83dbSDimitry Andric       // aren't directly associated with a kernel. We do force inlining of
13565ffd83dbSDimitry Andric       // functions that use local objects. However, if these dead functions are
13575ffd83dbSDimitry Andric       // not eliminated, we don't want a compile time error. Just emit a warning
13585ffd83dbSDimitry Andric       // and a trap, since there should be no callable path here.
13595ffd83dbSDimitry Andric       SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
13605ffd83dbSDimitry Andric       SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13615ffd83dbSDimitry Andric                                         Trap, DAG.getRoot());
13625ffd83dbSDimitry Andric       DAG.setRoot(OutputChain);
13635ffd83dbSDimitry Andric       return DAG.getUNDEF(Op.getValueType());
13640b57cec5SDimitry Andric     }
13650b57cec5SDimitry Andric 
13660b57cec5SDimitry Andric     // XXX: What does the value of G->getOffset() mean?
13670b57cec5SDimitry Andric     assert(G->getOffset() == 0 &&
13680b57cec5SDimitry Andric          "Do not know what to do with an non-zero offset");
13690b57cec5SDimitry Andric 
13700b57cec5SDimitry Andric     // TODO: We could emit code to handle the initialization somewhere.
1371349cc55cSDimitry Andric     // We ignore the initializer for now and legalize it to allow selection.
1372349cc55cSDimitry Andric     // The initializer will anyway get errored out during assembly emission.
13735ffd83dbSDimitry Andric     unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
13740b57cec5SDimitry Andric     return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
13750b57cec5SDimitry Andric   }
13760b57cec5SDimitry Andric   return SDValue();
13770b57cec5SDimitry Andric }
13780b57cec5SDimitry Andric 
13790b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
13800b57cec5SDimitry Andric                                                   SelectionDAG &DAG) const {
13810b57cec5SDimitry Andric   SmallVector<SDValue, 8> Args;
13820b57cec5SDimitry Andric 
13830b57cec5SDimitry Andric   EVT VT = Op.getValueType();
13840b57cec5SDimitry Andric   if (VT == MVT::v4i16 || VT == MVT::v4f16) {
13850b57cec5SDimitry Andric     SDLoc SL(Op);
13860b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(0));
13870b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(1));
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric     SDValue BV = DAG.getBuildVector(MVT::v2i32, SL, { Lo, Hi });
13900b57cec5SDimitry Andric     return DAG.getNode(ISD::BITCAST, SL, VT, BV);
13910b57cec5SDimitry Andric   }
13920b57cec5SDimitry Andric 
13930b57cec5SDimitry Andric   for (const SDUse &U : Op->ops())
13940b57cec5SDimitry Andric     DAG.ExtractVectorElements(U.get(), Args);
13950b57cec5SDimitry Andric 
13960b57cec5SDimitry Andric   return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
13970b57cec5SDimitry Andric }
13980b57cec5SDimitry Andric 
13990b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
14000b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
14010b57cec5SDimitry Andric 
14020b57cec5SDimitry Andric   SmallVector<SDValue, 8> Args;
14030b57cec5SDimitry Andric   unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
14040b57cec5SDimitry Andric   EVT VT = Op.getValueType();
1405fe6060f1SDimitry Andric   EVT SrcVT = Op.getOperand(0).getValueType();
1406fe6060f1SDimitry Andric 
1407fe6060f1SDimitry Andric   // For these types, we have some TableGen patterns except if the index is 1
1408fe6060f1SDimitry Andric   if (((SrcVT == MVT::v4f16 && VT == MVT::v2f16) ||
1409fe6060f1SDimitry Andric        (SrcVT == MVT::v4i16 && VT == MVT::v2i16)) &&
1410fe6060f1SDimitry Andric       Start != 1)
1411fe6060f1SDimitry Andric     return Op;
1412fe6060f1SDimitry Andric 
1413*04eeddc0SDimitry Andric   if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) ||
1414*04eeddc0SDimitry Andric        (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) &&
1415*04eeddc0SDimitry Andric       (Start == 0 || Start == 4))
1416*04eeddc0SDimitry Andric     return Op;
1417*04eeddc0SDimitry Andric 
14180b57cec5SDimitry Andric   DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
14190b57cec5SDimitry Andric                             VT.getVectorNumElements());
14200b57cec5SDimitry Andric 
14210b57cec5SDimitry Andric   return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
14220b57cec5SDimitry Andric }
14230b57cec5SDimitry Andric 
14240b57cec5SDimitry Andric /// Generate Min/Max node
14250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
14260b57cec5SDimitry Andric                                                    SDValue LHS, SDValue RHS,
14270b57cec5SDimitry Andric                                                    SDValue True, SDValue False,
14280b57cec5SDimitry Andric                                                    SDValue CC,
14290b57cec5SDimitry Andric                                                    DAGCombinerInfo &DCI) const {
14300b57cec5SDimitry Andric   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
14310b57cec5SDimitry Andric     return SDValue();
14320b57cec5SDimitry Andric 
14330b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
14340b57cec5SDimitry Andric   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
14350b57cec5SDimitry Andric   switch (CCOpcode) {
14360b57cec5SDimitry Andric   case ISD::SETOEQ:
14370b57cec5SDimitry Andric   case ISD::SETONE:
14380b57cec5SDimitry Andric   case ISD::SETUNE:
14390b57cec5SDimitry Andric   case ISD::SETNE:
14400b57cec5SDimitry Andric   case ISD::SETUEQ:
14410b57cec5SDimitry Andric   case ISD::SETEQ:
14420b57cec5SDimitry Andric   case ISD::SETFALSE:
14430b57cec5SDimitry Andric   case ISD::SETFALSE2:
14440b57cec5SDimitry Andric   case ISD::SETTRUE:
14450b57cec5SDimitry Andric   case ISD::SETTRUE2:
14460b57cec5SDimitry Andric   case ISD::SETUO:
14470b57cec5SDimitry Andric   case ISD::SETO:
14480b57cec5SDimitry Andric     break;
14490b57cec5SDimitry Andric   case ISD::SETULE:
14500b57cec5SDimitry Andric   case ISD::SETULT: {
14510b57cec5SDimitry Andric     if (LHS == True)
14520b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
14530b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
14540b57cec5SDimitry Andric   }
14550b57cec5SDimitry Andric   case ISD::SETOLE:
14560b57cec5SDimitry Andric   case ISD::SETOLT:
14570b57cec5SDimitry Andric   case ISD::SETLE:
14580b57cec5SDimitry Andric   case ISD::SETLT: {
14590b57cec5SDimitry Andric     // Ordered. Assume ordered for undefined.
14600b57cec5SDimitry Andric 
14610b57cec5SDimitry Andric     // Only do this after legalization to avoid interfering with other combines
14620b57cec5SDimitry Andric     // which might occur.
14630b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
14640b57cec5SDimitry Andric         !DCI.isCalledByLegalizer())
14650b57cec5SDimitry Andric       return SDValue();
14660b57cec5SDimitry Andric 
14670b57cec5SDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
14680b57cec5SDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
14690b57cec5SDimitry Andric     // so permute it based on the compare type the hardware uses.
14700b57cec5SDimitry Andric     if (LHS == True)
14710b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
14720b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
14730b57cec5SDimitry Andric   }
14740b57cec5SDimitry Andric   case ISD::SETUGE:
14750b57cec5SDimitry Andric   case ISD::SETUGT: {
14760b57cec5SDimitry Andric     if (LHS == True)
14770b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
14780b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
14790b57cec5SDimitry Andric   }
14800b57cec5SDimitry Andric   case ISD::SETGT:
14810b57cec5SDimitry Andric   case ISD::SETGE:
14820b57cec5SDimitry Andric   case ISD::SETOGE:
14830b57cec5SDimitry Andric   case ISD::SETOGT: {
14840b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
14850b57cec5SDimitry Andric         !DCI.isCalledByLegalizer())
14860b57cec5SDimitry Andric       return SDValue();
14870b57cec5SDimitry Andric 
14880b57cec5SDimitry Andric     if (LHS == True)
14890b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
14900b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
14910b57cec5SDimitry Andric   }
14920b57cec5SDimitry Andric   case ISD::SETCC_INVALID:
14930b57cec5SDimitry Andric     llvm_unreachable("Invalid setcc condcode!");
14940b57cec5SDimitry Andric   }
14950b57cec5SDimitry Andric   return SDValue();
14960b57cec5SDimitry Andric }
14970b57cec5SDimitry Andric 
14980b57cec5SDimitry Andric std::pair<SDValue, SDValue>
14990b57cec5SDimitry Andric AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
15000b57cec5SDimitry Andric   SDLoc SL(Op);
15010b57cec5SDimitry Andric 
15020b57cec5SDimitry Andric   SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
15030b57cec5SDimitry Andric 
15040b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
15050b57cec5SDimitry Andric   const SDValue One = DAG.getConstant(1, SL, MVT::i32);
15060b57cec5SDimitry Andric 
15070b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
15080b57cec5SDimitry Andric   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
15090b57cec5SDimitry Andric 
15100b57cec5SDimitry Andric   return std::make_pair(Lo, Hi);
15110b57cec5SDimitry Andric }
15120b57cec5SDimitry Andric 
15130b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const {
15140b57cec5SDimitry Andric   SDLoc SL(Op);
15150b57cec5SDimitry Andric 
15160b57cec5SDimitry Andric   SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
15170b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
15180b57cec5SDimitry Andric   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
15190b57cec5SDimitry Andric }
15200b57cec5SDimitry Andric 
15210b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const {
15220b57cec5SDimitry Andric   SDLoc SL(Op);
15230b57cec5SDimitry Andric 
15240b57cec5SDimitry Andric   SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
15250b57cec5SDimitry Andric   const SDValue One = DAG.getConstant(1, SL, MVT::i32);
15260b57cec5SDimitry Andric   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
15270b57cec5SDimitry Andric }
15280b57cec5SDimitry Andric 
15290b57cec5SDimitry Andric // Split a vector type into two parts. The first part is a power of two vector.
15300b57cec5SDimitry Andric // The second part is whatever is left over, and is a scalar if it would
15310b57cec5SDimitry Andric // otherwise be a 1-vector.
15320b57cec5SDimitry Andric std::pair<EVT, EVT>
15330b57cec5SDimitry Andric AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const {
15340b57cec5SDimitry Andric   EVT LoVT, HiVT;
15350b57cec5SDimitry Andric   EVT EltVT = VT.getVectorElementType();
15360b57cec5SDimitry Andric   unsigned NumElts = VT.getVectorNumElements();
15370b57cec5SDimitry Andric   unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);
15380b57cec5SDimitry Andric   LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts);
15390b57cec5SDimitry Andric   HiVT = NumElts - LoNumElts == 1
15400b57cec5SDimitry Andric              ? EltVT
15410b57cec5SDimitry Andric              : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts);
15420b57cec5SDimitry Andric   return std::make_pair(LoVT, HiVT);
15430b57cec5SDimitry Andric }
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric // Split a vector value into two parts of types LoVT and HiVT. HiVT could be
15460b57cec5SDimitry Andric // scalar.
15470b57cec5SDimitry Andric std::pair<SDValue, SDValue>
15480b57cec5SDimitry Andric AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
15490b57cec5SDimitry Andric                                   const EVT &LoVT, const EVT &HiVT,
15500b57cec5SDimitry Andric                                   SelectionDAG &DAG) const {
15510b57cec5SDimitry Andric   assert(LoVT.getVectorNumElements() +
15520b57cec5SDimitry Andric                  (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
15530b57cec5SDimitry Andric              N.getValueType().getVectorNumElements() &&
15540b57cec5SDimitry Andric          "More vector elements requested than available!");
15550b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
15565ffd83dbSDimitry Andric                            DAG.getVectorIdxConstant(0, DL));
15570b57cec5SDimitry Andric   SDValue Hi = DAG.getNode(
15580b57cec5SDimitry Andric       HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
15595ffd83dbSDimitry Andric       HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL));
15600b57cec5SDimitry Andric   return std::make_pair(Lo, Hi);
15610b57cec5SDimitry Andric }
15620b57cec5SDimitry Andric 
15630b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
15640b57cec5SDimitry Andric                                               SelectionDAG &DAG) const {
15650b57cec5SDimitry Andric   LoadSDNode *Load = cast<LoadSDNode>(Op);
15660b57cec5SDimitry Andric   EVT VT = Op.getValueType();
1567480093f4SDimitry Andric   SDLoc SL(Op);
15680b57cec5SDimitry Andric 
15690b57cec5SDimitry Andric 
15700b57cec5SDimitry Andric   // If this is a 2 element vector, we really want to scalarize and not create
15710b57cec5SDimitry Andric   // weird 1 element vectors.
1572480093f4SDimitry Andric   if (VT.getVectorNumElements() == 2) {
1573480093f4SDimitry Andric     SDValue Ops[2];
1574480093f4SDimitry Andric     std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
1575480093f4SDimitry Andric     return DAG.getMergeValues(Ops, SL);
1576480093f4SDimitry Andric   }
15770b57cec5SDimitry Andric 
15780b57cec5SDimitry Andric   SDValue BasePtr = Load->getBasePtr();
15790b57cec5SDimitry Andric   EVT MemVT = Load->getMemoryVT();
15800b57cec5SDimitry Andric 
15810b57cec5SDimitry Andric   const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
15820b57cec5SDimitry Andric 
15830b57cec5SDimitry Andric   EVT LoVT, HiVT;
15840b57cec5SDimitry Andric   EVT LoMemVT, HiMemVT;
15850b57cec5SDimitry Andric   SDValue Lo, Hi;
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
15880b57cec5SDimitry Andric   std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
15890b57cec5SDimitry Andric   std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);
15900b57cec5SDimitry Andric 
15910b57cec5SDimitry Andric   unsigned Size = LoMemVT.getStoreSize();
15920b57cec5SDimitry Andric   unsigned BaseAlign = Load->getAlignment();
15930b57cec5SDimitry Andric   unsigned HiAlign = MinAlign(BaseAlign, Size);
15940b57cec5SDimitry Andric 
15950b57cec5SDimitry Andric   SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
15960b57cec5SDimitry Andric                                   Load->getChain(), BasePtr, SrcValue, LoMemVT,
15970b57cec5SDimitry Andric                                   BaseAlign, Load->getMemOperand()->getFlags());
1598e8d8bef9SDimitry Andric   SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Size));
15990b57cec5SDimitry Andric   SDValue HiLoad =
16000b57cec5SDimitry Andric       DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
16010b57cec5SDimitry Andric                      HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
16020b57cec5SDimitry Andric                      HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
16030b57cec5SDimitry Andric 
16040b57cec5SDimitry Andric   SDValue Join;
16050b57cec5SDimitry Andric   if (LoVT == HiVT) {
16060b57cec5SDimitry Andric     // This is the case that the vector is power of two so was evenly split.
16070b57cec5SDimitry Andric     Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
16080b57cec5SDimitry Andric   } else {
16090b57cec5SDimitry Andric     Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
16105ffd83dbSDimitry Andric                        DAG.getVectorIdxConstant(0, SL));
16115ffd83dbSDimitry Andric     Join = DAG.getNode(
16125ffd83dbSDimitry Andric         HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL,
16135ffd83dbSDimitry Andric         VT, Join, HiLoad,
16145ffd83dbSDimitry Andric         DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL));
16150b57cec5SDimitry Andric   }
16160b57cec5SDimitry Andric 
16170b57cec5SDimitry Andric   SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
16180b57cec5SDimitry Andric                                      LoLoad.getValue(1), HiLoad.getValue(1))};
16190b57cec5SDimitry Andric 
16200b57cec5SDimitry Andric   return DAG.getMergeValues(Ops, SL);
16210b57cec5SDimitry Andric }
16220b57cec5SDimitry Andric 
1623e8d8bef9SDimitry Andric SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
16240b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
16250b57cec5SDimitry Andric   LoadSDNode *Load = cast<LoadSDNode>(Op);
16260b57cec5SDimitry Andric   EVT VT = Op.getValueType();
16270b57cec5SDimitry Andric   SDValue BasePtr = Load->getBasePtr();
16280b57cec5SDimitry Andric   EVT MemVT = Load->getMemoryVT();
16290b57cec5SDimitry Andric   SDLoc SL(Op);
16300b57cec5SDimitry Andric   const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
16310b57cec5SDimitry Andric   unsigned BaseAlign = Load->getAlignment();
1632e8d8bef9SDimitry Andric   unsigned NumElements = MemVT.getVectorNumElements();
1633e8d8bef9SDimitry Andric 
1634e8d8bef9SDimitry Andric   // Widen from vec3 to vec4 when the load is at least 8-byte aligned
1635e8d8bef9SDimitry Andric   // or 16-byte fully dereferenceable. Otherwise, split the vector load.
1636e8d8bef9SDimitry Andric   if (NumElements != 3 ||
1637e8d8bef9SDimitry Andric       (BaseAlign < 8 &&
1638e8d8bef9SDimitry Andric        !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
1639e8d8bef9SDimitry Andric     return SplitVectorLoad(Op, DAG);
1640e8d8bef9SDimitry Andric 
1641e8d8bef9SDimitry Andric   assert(NumElements == 3);
16420b57cec5SDimitry Andric 
16430b57cec5SDimitry Andric   EVT WideVT =
16440b57cec5SDimitry Andric       EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
16450b57cec5SDimitry Andric   EVT WideMemVT =
16460b57cec5SDimitry Andric       EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4);
16470b57cec5SDimitry Andric   SDValue WideLoad = DAG.getExtLoad(
16480b57cec5SDimitry Andric       Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
16490b57cec5SDimitry Andric       WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
16500b57cec5SDimitry Andric   return DAG.getMergeValues(
16510b57cec5SDimitry Andric       {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
16525ffd83dbSDimitry Andric                    DAG.getVectorIdxConstant(0, SL)),
16530b57cec5SDimitry Andric        WideLoad.getValue(1)},
16540b57cec5SDimitry Andric       SL);
16550b57cec5SDimitry Andric }
16560b57cec5SDimitry Andric 
16570b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
16580b57cec5SDimitry Andric                                                SelectionDAG &DAG) const {
16590b57cec5SDimitry Andric   StoreSDNode *Store = cast<StoreSDNode>(Op);
16600b57cec5SDimitry Andric   SDValue Val = Store->getValue();
16610b57cec5SDimitry Andric   EVT VT = Val.getValueType();
16620b57cec5SDimitry Andric 
16630b57cec5SDimitry Andric   // If this is a 2 element vector, we really want to scalarize and not create
16640b57cec5SDimitry Andric   // weird 1 element vectors.
16650b57cec5SDimitry Andric   if (VT.getVectorNumElements() == 2)
16660b57cec5SDimitry Andric     return scalarizeVectorStore(Store, DAG);
16670b57cec5SDimitry Andric 
16680b57cec5SDimitry Andric   EVT MemVT = Store->getMemoryVT();
16690b57cec5SDimitry Andric   SDValue Chain = Store->getChain();
16700b57cec5SDimitry Andric   SDValue BasePtr = Store->getBasePtr();
16710b57cec5SDimitry Andric   SDLoc SL(Op);
16720b57cec5SDimitry Andric 
16730b57cec5SDimitry Andric   EVT LoVT, HiVT;
16740b57cec5SDimitry Andric   EVT LoMemVT, HiMemVT;
16750b57cec5SDimitry Andric   SDValue Lo, Hi;
16760b57cec5SDimitry Andric 
16770b57cec5SDimitry Andric   std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
16780b57cec5SDimitry Andric   std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
16790b57cec5SDimitry Andric   std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);
16800b57cec5SDimitry Andric 
16810b57cec5SDimitry Andric   SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
16820b57cec5SDimitry Andric 
16830b57cec5SDimitry Andric   const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
16840b57cec5SDimitry Andric   unsigned BaseAlign = Store->getAlignment();
16850b57cec5SDimitry Andric   unsigned Size = LoMemVT.getStoreSize();
16860b57cec5SDimitry Andric   unsigned HiAlign = MinAlign(BaseAlign, Size);
16870b57cec5SDimitry Andric 
16880b57cec5SDimitry Andric   SDValue LoStore =
16890b57cec5SDimitry Andric       DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
16900b57cec5SDimitry Andric                         Store->getMemOperand()->getFlags());
16910b57cec5SDimitry Andric   SDValue HiStore =
16920b57cec5SDimitry Andric       DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
16930b57cec5SDimitry Andric                         HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
16940b57cec5SDimitry Andric 
16950b57cec5SDimitry Andric   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
16960b57cec5SDimitry Andric }
16970b57cec5SDimitry Andric 
16980b57cec5SDimitry Andric // This is a shortcut for integer division because we have fast i32<->f32
16990b57cec5SDimitry Andric // conversions, and fast f32 reciprocal instructions. The fractional part of a
17000b57cec5SDimitry Andric // float is enough to accurately represent up to a 24-bit signed integer.
17010b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
17020b57cec5SDimitry Andric                                             bool Sign) const {
17030b57cec5SDimitry Andric   SDLoc DL(Op);
17040b57cec5SDimitry Andric   EVT VT = Op.getValueType();
17050b57cec5SDimitry Andric   SDValue LHS = Op.getOperand(0);
17060b57cec5SDimitry Andric   SDValue RHS = Op.getOperand(1);
17070b57cec5SDimitry Andric   MVT IntVT = MVT::i32;
17080b57cec5SDimitry Andric   MVT FltVT = MVT::f32;
17090b57cec5SDimitry Andric 
17100b57cec5SDimitry Andric   unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS);
17110b57cec5SDimitry Andric   if (LHSSignBits < 9)
17120b57cec5SDimitry Andric     return SDValue();
17130b57cec5SDimitry Andric 
17140b57cec5SDimitry Andric   unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS);
17150b57cec5SDimitry Andric   if (RHSSignBits < 9)
17160b57cec5SDimitry Andric     return SDValue();
17170b57cec5SDimitry Andric 
17180b57cec5SDimitry Andric   unsigned BitSize = VT.getSizeInBits();
17190b57cec5SDimitry Andric   unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
17200b57cec5SDimitry Andric   unsigned DivBits = BitSize - SignBits;
17210b57cec5SDimitry Andric   if (Sign)
17220b57cec5SDimitry Andric     ++DivBits;
17230b57cec5SDimitry Andric 
17240b57cec5SDimitry Andric   ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
17250b57cec5SDimitry Andric   ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
17260b57cec5SDimitry Andric 
17270b57cec5SDimitry Andric   SDValue jq = DAG.getConstant(1, DL, IntVT);
17280b57cec5SDimitry Andric 
17290b57cec5SDimitry Andric   if (Sign) {
17300b57cec5SDimitry Andric     // char|short jq = ia ^ ib;
17310b57cec5SDimitry Andric     jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
17320b57cec5SDimitry Andric 
17330b57cec5SDimitry Andric     // jq = jq >> (bitsize - 2)
17340b57cec5SDimitry Andric     jq = DAG.getNode(ISD::SRA, DL, VT, jq,
17350b57cec5SDimitry Andric                      DAG.getConstant(BitSize - 2, DL, VT));
17360b57cec5SDimitry Andric 
17370b57cec5SDimitry Andric     // jq = jq | 0x1
17380b57cec5SDimitry Andric     jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
17390b57cec5SDimitry Andric   }
17400b57cec5SDimitry Andric 
17410b57cec5SDimitry Andric   // int ia = (int)LHS;
17420b57cec5SDimitry Andric   SDValue ia = LHS;
17430b57cec5SDimitry Andric 
17440b57cec5SDimitry Andric   // int ib, (int)RHS;
17450b57cec5SDimitry Andric   SDValue ib = RHS;
17460b57cec5SDimitry Andric 
17470b57cec5SDimitry Andric   // float fa = (float)ia;
17480b57cec5SDimitry Andric   SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
17490b57cec5SDimitry Andric 
17500b57cec5SDimitry Andric   // float fb = (float)ib;
17510b57cec5SDimitry Andric   SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
17520b57cec5SDimitry Andric 
17530b57cec5SDimitry Andric   SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
17540b57cec5SDimitry Andric                            fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
17550b57cec5SDimitry Andric 
17560b57cec5SDimitry Andric   // fq = trunc(fq);
17570b57cec5SDimitry Andric   fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
17580b57cec5SDimitry Andric 
17590b57cec5SDimitry Andric   // float fqneg = -fq;
17600b57cec5SDimitry Andric   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
17610b57cec5SDimitry Andric 
1762480093f4SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
1763480093f4SDimitry Andric   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
1764480093f4SDimitry Andric 
17650b57cec5SDimitry Andric   // float fr = mad(fqneg, fb, fa);
17665ffd83dbSDimitry Andric   unsigned OpCode = !Subtarget->hasMadMacF32Insts() ?
17675ffd83dbSDimitry Andric                     (unsigned)ISD::FMA :
17685ffd83dbSDimitry Andric                     !MFI->getMode().allFP32Denormals() ?
17695ffd83dbSDimitry Andric                     (unsigned)ISD::FMAD :
17705ffd83dbSDimitry Andric                     (unsigned)AMDGPUISD::FMAD_FTZ;
17710b57cec5SDimitry Andric   SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
17720b57cec5SDimitry Andric 
17730b57cec5SDimitry Andric   // int iq = (int)fq;
17740b57cec5SDimitry Andric   SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
17750b57cec5SDimitry Andric 
17760b57cec5SDimitry Andric   // fr = fabs(fr);
17770b57cec5SDimitry Andric   fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
17780b57cec5SDimitry Andric 
17790b57cec5SDimitry Andric   // fb = fabs(fb);
17800b57cec5SDimitry Andric   fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
17810b57cec5SDimitry Andric 
17820b57cec5SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
17830b57cec5SDimitry Andric 
17840b57cec5SDimitry Andric   // int cv = fr >= fb;
17850b57cec5SDimitry Andric   SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
17860b57cec5SDimitry Andric 
17870b57cec5SDimitry Andric   // jq = (cv ? jq : 0);
17880b57cec5SDimitry Andric   jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
17890b57cec5SDimitry Andric 
17900b57cec5SDimitry Andric   // dst = iq + jq;
17910b57cec5SDimitry Andric   SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
17920b57cec5SDimitry Andric 
17930b57cec5SDimitry Andric   // Rem needs compensation, it's easier to recompute it
17940b57cec5SDimitry Andric   SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
17950b57cec5SDimitry Andric   Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
17960b57cec5SDimitry Andric 
17970b57cec5SDimitry Andric   // Truncate to number of bits this divide really is.
17980b57cec5SDimitry Andric   if (Sign) {
17990b57cec5SDimitry Andric     SDValue InRegSize
18000b57cec5SDimitry Andric       = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits));
18010b57cec5SDimitry Andric     Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize);
18020b57cec5SDimitry Andric     Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize);
18030b57cec5SDimitry Andric   } else {
18040b57cec5SDimitry Andric     SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT);
18050b57cec5SDimitry Andric     Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask);
18060b57cec5SDimitry Andric     Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask);
18070b57cec5SDimitry Andric   }
18080b57cec5SDimitry Andric 
18090b57cec5SDimitry Andric   return DAG.getMergeValues({ Div, Rem }, DL);
18100b57cec5SDimitry Andric }
18110b57cec5SDimitry Andric 
18120b57cec5SDimitry Andric void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
18130b57cec5SDimitry Andric                                       SelectionDAG &DAG,
18140b57cec5SDimitry Andric                                       SmallVectorImpl<SDValue> &Results) const {
18150b57cec5SDimitry Andric   SDLoc DL(Op);
18160b57cec5SDimitry Andric   EVT VT = Op.getValueType();
18170b57cec5SDimitry Andric 
18180b57cec5SDimitry Andric   assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");
18190b57cec5SDimitry Andric 
18200b57cec5SDimitry Andric   EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
18210b57cec5SDimitry Andric 
18220b57cec5SDimitry Andric   SDValue One = DAG.getConstant(1, DL, HalfVT);
18230b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, DL, HalfVT);
18240b57cec5SDimitry Andric 
18250b57cec5SDimitry Andric   //HiLo split
18260b57cec5SDimitry Andric   SDValue LHS = Op.getOperand(0);
18270b57cec5SDimitry Andric   SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
18280b57cec5SDimitry Andric   SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One);
18290b57cec5SDimitry Andric 
18300b57cec5SDimitry Andric   SDValue RHS = Op.getOperand(1);
18310b57cec5SDimitry Andric   SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
18320b57cec5SDimitry Andric   SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One);
18330b57cec5SDimitry Andric 
18340b57cec5SDimitry Andric   if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
18350b57cec5SDimitry Andric       DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
18360b57cec5SDimitry Andric 
18370b57cec5SDimitry Andric     SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
18380b57cec5SDimitry Andric                               LHS_Lo, RHS_Lo);
18390b57cec5SDimitry Andric 
18400b57cec5SDimitry Andric     SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero});
18410b57cec5SDimitry Andric     SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero});
18420b57cec5SDimitry Andric 
18430b57cec5SDimitry Andric     Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));
18440b57cec5SDimitry Andric     Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));
18450b57cec5SDimitry Andric     return;
18460b57cec5SDimitry Andric   }
18470b57cec5SDimitry Andric 
18480b57cec5SDimitry Andric   if (isTypeLegal(MVT::i64)) {
1849349cc55cSDimitry Andric     // The algorithm here is based on ideas from "Software Integer Division",
1850349cc55cSDimitry Andric     // Tom Rodeheffer, August 2008.
1851349cc55cSDimitry Andric 
1852480093f4SDimitry Andric     MachineFunction &MF = DAG.getMachineFunction();
1853480093f4SDimitry Andric     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1854480093f4SDimitry Andric 
18550b57cec5SDimitry Andric     // Compute denominator reciprocal.
18565ffd83dbSDimitry Andric     unsigned FMAD = !Subtarget->hasMadMacF32Insts() ?
18575ffd83dbSDimitry Andric                     (unsigned)ISD::FMA :
18585ffd83dbSDimitry Andric                     !MFI->getMode().allFP32Denormals() ?
18595ffd83dbSDimitry Andric                     (unsigned)ISD::FMAD :
18605ffd83dbSDimitry Andric                     (unsigned)AMDGPUISD::FMAD_FTZ;
18610b57cec5SDimitry Andric 
18620b57cec5SDimitry Andric     SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
18630b57cec5SDimitry Andric     SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
18640b57cec5SDimitry Andric     SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi,
18650b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32),
18660b57cec5SDimitry Andric       Cvt_Lo);
18670b57cec5SDimitry Andric     SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);
18680b57cec5SDimitry Andric     SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp,
18690b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32));
18700b57cec5SDimitry Andric     SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1,
18710b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32));
18720b57cec5SDimitry Andric     SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);
18730b57cec5SDimitry Andric     SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc,
18740b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32),
18750b57cec5SDimitry Andric       Mul1);
18760b57cec5SDimitry Andric     SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2);
18770b57cec5SDimitry Andric     SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc);
18780b57cec5SDimitry Andric     SDValue Rcp64 = DAG.getBitcast(VT,
18790b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi}));
18800b57cec5SDimitry Andric 
18810b57cec5SDimitry Andric     SDValue Zero64 = DAG.getConstant(0, DL, VT);
18820b57cec5SDimitry Andric     SDValue One64  = DAG.getConstant(1, DL, VT);
18830b57cec5SDimitry Andric     SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
18840b57cec5SDimitry Andric     SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);
18850b57cec5SDimitry Andric 
1886349cc55cSDimitry Andric     // First round of UNR (Unsigned integer Newton-Raphson).
18870b57cec5SDimitry Andric     SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
18880b57cec5SDimitry Andric     SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
18890b57cec5SDimitry Andric     SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
18900b57cec5SDimitry Andric     SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
18910b57cec5SDimitry Andric                                     Zero);
1892349cc55cSDimitry Andric     SDValue Mulhi1_Hi =
1893349cc55cSDimitry Andric         DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, One);
18940b57cec5SDimitry Andric     SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo,
18950b57cec5SDimitry Andric                                   Mulhi1_Lo, Zero1);
18960b57cec5SDimitry Andric     SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi,
18970b57cec5SDimitry Andric                                   Mulhi1_Hi, Add1_Lo.getValue(1));
18980b57cec5SDimitry Andric     SDValue Add1 = DAG.getBitcast(VT,
18990b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
19000b57cec5SDimitry Andric 
1901349cc55cSDimitry Andric     // Second round of UNR.
19020b57cec5SDimitry Andric     SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
19030b57cec5SDimitry Andric     SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
19040b57cec5SDimitry Andric     SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
19050b57cec5SDimitry Andric                                     Zero);
1906349cc55cSDimitry Andric     SDValue Mulhi2_Hi =
1907349cc55cSDimitry Andric         DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, One);
19080b57cec5SDimitry Andric     SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo,
19090b57cec5SDimitry Andric                                   Mulhi2_Lo, Zero1);
1910349cc55cSDimitry Andric     SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Hi,
1911349cc55cSDimitry Andric                                   Mulhi2_Hi, Add2_Lo.getValue(1));
19120b57cec5SDimitry Andric     SDValue Add2 = DAG.getBitcast(VT,
19130b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
1914349cc55cSDimitry Andric 
19150b57cec5SDimitry Andric     SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);
19160b57cec5SDimitry Andric 
19170b57cec5SDimitry Andric     SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
19180b57cec5SDimitry Andric 
19190b57cec5SDimitry Andric     SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero);
19200b57cec5SDimitry Andric     SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One);
19210b57cec5SDimitry Andric     SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo,
19220b57cec5SDimitry Andric                                   Mul3_Lo, Zero1);
19230b57cec5SDimitry Andric     SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi,
19240b57cec5SDimitry Andric                                   Mul3_Hi, Sub1_Lo.getValue(1));
19250b57cec5SDimitry Andric     SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi);
19260b57cec5SDimitry Andric     SDValue Sub1 = DAG.getBitcast(VT,
19270b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi}));
19280b57cec5SDimitry Andric 
19290b57cec5SDimitry Andric     SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT);
19300b57cec5SDimitry Andric     SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
19310b57cec5SDimitry Andric                                  ISD::SETUGE);
19320b57cec5SDimitry Andric     SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
19330b57cec5SDimitry Andric                                  ISD::SETUGE);
19340b57cec5SDimitry Andric     SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ);
19350b57cec5SDimitry Andric 
19360b57cec5SDimitry Andric     // TODO: Here and below portions of the code can be enclosed into if/endif.
19370b57cec5SDimitry Andric     // Currently control flow is unconditional and we have 4 selects after
19380b57cec5SDimitry Andric     // potential endif to substitute PHIs.
19390b57cec5SDimitry Andric 
19400b57cec5SDimitry Andric     // if C3 != 0 ...
19410b57cec5SDimitry Andric     SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo,
19420b57cec5SDimitry Andric                                   RHS_Lo, Zero1);
19430b57cec5SDimitry Andric     SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi,
19440b57cec5SDimitry Andric                                   RHS_Hi, Sub1_Lo.getValue(1));
19450b57cec5SDimitry Andric     SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi,
19460b57cec5SDimitry Andric                                   Zero, Sub2_Lo.getValue(1));
19470b57cec5SDimitry Andric     SDValue Sub2 = DAG.getBitcast(VT,
19480b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi}));
19490b57cec5SDimitry Andric 
19500b57cec5SDimitry Andric     SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64);
19510b57cec5SDimitry Andric 
19520b57cec5SDimitry Andric     SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
19530b57cec5SDimitry Andric                                  ISD::SETUGE);
19540b57cec5SDimitry Andric     SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
19550b57cec5SDimitry Andric                                  ISD::SETUGE);
19560b57cec5SDimitry Andric     SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ);
19570b57cec5SDimitry Andric 
19580b57cec5SDimitry Andric     // if (C6 != 0)
19590b57cec5SDimitry Andric     SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64);
19600b57cec5SDimitry Andric 
19610b57cec5SDimitry Andric     SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo,
19620b57cec5SDimitry Andric                                   RHS_Lo, Zero1);
19630b57cec5SDimitry Andric     SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi,
19640b57cec5SDimitry Andric                                   RHS_Hi, Sub2_Lo.getValue(1));
19650b57cec5SDimitry Andric     SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi,
19660b57cec5SDimitry Andric                                   Zero, Sub3_Lo.getValue(1));
19670b57cec5SDimitry Andric     SDValue Sub3 = DAG.getBitcast(VT,
19680b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi}));
19690b57cec5SDimitry Andric 
19700b57cec5SDimitry Andric     // endif C6
19710b57cec5SDimitry Andric     // endif C3
19720b57cec5SDimitry Andric 
19730b57cec5SDimitry Andric     SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE);
19740b57cec5SDimitry Andric     SDValue Div  = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE);
19750b57cec5SDimitry Andric 
19760b57cec5SDimitry Andric     SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE);
19770b57cec5SDimitry Andric     SDValue Rem  = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE);
19780b57cec5SDimitry Andric 
19790b57cec5SDimitry Andric     Results.push_back(Div);
19800b57cec5SDimitry Andric     Results.push_back(Rem);
19810b57cec5SDimitry Andric 
19820b57cec5SDimitry Andric     return;
19830b57cec5SDimitry Andric   }
19840b57cec5SDimitry Andric 
19850b57cec5SDimitry Andric   // r600 expandion.
19860b57cec5SDimitry Andric   // Get Speculative values
19870b57cec5SDimitry Andric   SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
19880b57cec5SDimitry Andric   SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
19890b57cec5SDimitry Andric 
19900b57cec5SDimitry Andric   SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ);
19910b57cec5SDimitry Andric   SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero});
19920b57cec5SDimitry Andric   REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);
19930b57cec5SDimitry Andric 
19940b57cec5SDimitry Andric   SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ);
19950b57cec5SDimitry Andric   SDValue DIV_Lo = Zero;
19960b57cec5SDimitry Andric 
19970b57cec5SDimitry Andric   const unsigned halfBitWidth = HalfVT.getSizeInBits();
19980b57cec5SDimitry Andric 
19990b57cec5SDimitry Andric   for (unsigned i = 0; i < halfBitWidth; ++i) {
20000b57cec5SDimitry Andric     const unsigned bitPos = halfBitWidth - i - 1;
20010b57cec5SDimitry Andric     SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
20020b57cec5SDimitry Andric     // Get value of high bit
20030b57cec5SDimitry Andric     SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
20040b57cec5SDimitry Andric     HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One);
20050b57cec5SDimitry Andric     HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
20060b57cec5SDimitry Andric 
20070b57cec5SDimitry Andric     // Shift
20080b57cec5SDimitry Andric     REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
20090b57cec5SDimitry Andric     // Add LHS high bit
20100b57cec5SDimitry Andric     REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
20110b57cec5SDimitry Andric 
20120b57cec5SDimitry Andric     SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT);
20130b57cec5SDimitry Andric     SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE);
20140b57cec5SDimitry Andric 
20150b57cec5SDimitry Andric     DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
20160b57cec5SDimitry Andric 
20170b57cec5SDimitry Andric     // Update REM
20180b57cec5SDimitry Andric     SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
20190b57cec5SDimitry Andric     REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
20200b57cec5SDimitry Andric   }
20210b57cec5SDimitry Andric 
20220b57cec5SDimitry Andric   SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi});
20230b57cec5SDimitry Andric   DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);
20240b57cec5SDimitry Andric   Results.push_back(DIV);
20250b57cec5SDimitry Andric   Results.push_back(REM);
20260b57cec5SDimitry Andric }
20270b57cec5SDimitry Andric 
20280b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
20290b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
20300b57cec5SDimitry Andric   SDLoc DL(Op);
20310b57cec5SDimitry Andric   EVT VT = Op.getValueType();
20320b57cec5SDimitry Andric 
20330b57cec5SDimitry Andric   if (VT == MVT::i64) {
20340b57cec5SDimitry Andric     SmallVector<SDValue, 2> Results;
20350b57cec5SDimitry Andric     LowerUDIVREM64(Op, DAG, Results);
20360b57cec5SDimitry Andric     return DAG.getMergeValues(Results, DL);
20370b57cec5SDimitry Andric   }
20380b57cec5SDimitry Andric 
20390b57cec5SDimitry Andric   if (VT == MVT::i32) {
20400b57cec5SDimitry Andric     if (SDValue Res = LowerDIVREM24(Op, DAG, false))
20410b57cec5SDimitry Andric       return Res;
20420b57cec5SDimitry Andric   }
20430b57cec5SDimitry Andric 
20445ffd83dbSDimitry Andric   SDValue X = Op.getOperand(0);
20455ffd83dbSDimitry Andric   SDValue Y = Op.getOperand(1);
20460b57cec5SDimitry Andric 
20475ffd83dbSDimitry Andric   // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
20485ffd83dbSDimitry Andric   // algorithm used here.
20490b57cec5SDimitry Andric 
20505ffd83dbSDimitry Andric   // Initial estimate of inv(y).
20515ffd83dbSDimitry Andric   SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y);
20520b57cec5SDimitry Andric 
20535ffd83dbSDimitry Andric   // One round of UNR.
20545ffd83dbSDimitry Andric   SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y);
20555ffd83dbSDimitry Andric   SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z);
20565ffd83dbSDimitry Andric   Z = DAG.getNode(ISD::ADD, DL, VT, Z,
20575ffd83dbSDimitry Andric                   DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ));
20580b57cec5SDimitry Andric 
20595ffd83dbSDimitry Andric   // Quotient/remainder estimate.
20605ffd83dbSDimitry Andric   SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z);
20615ffd83dbSDimitry Andric   SDValue R =
20625ffd83dbSDimitry Andric       DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y));
20630b57cec5SDimitry Andric 
20645ffd83dbSDimitry Andric   // First quotient/remainder refinement.
20655ffd83dbSDimitry Andric   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
20665ffd83dbSDimitry Andric   SDValue One = DAG.getConstant(1, DL, VT);
20675ffd83dbSDimitry Andric   SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
20685ffd83dbSDimitry Andric   Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
20695ffd83dbSDimitry Andric                   DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
20705ffd83dbSDimitry Andric   R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
20715ffd83dbSDimitry Andric                   DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
20720b57cec5SDimitry Andric 
20735ffd83dbSDimitry Andric   // Second quotient/remainder refinement.
20745ffd83dbSDimitry Andric   Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
20755ffd83dbSDimitry Andric   Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
20765ffd83dbSDimitry Andric                   DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
20775ffd83dbSDimitry Andric   R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
20785ffd83dbSDimitry Andric                   DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
20790b57cec5SDimitry Andric 
20805ffd83dbSDimitry Andric   return DAG.getMergeValues({Q, R}, DL);
20810b57cec5SDimitry Andric }
20820b57cec5SDimitry Andric 
20830b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
20840b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
20850b57cec5SDimitry Andric   SDLoc DL(Op);
20860b57cec5SDimitry Andric   EVT VT = Op.getValueType();
20870b57cec5SDimitry Andric 
20880b57cec5SDimitry Andric   SDValue LHS = Op.getOperand(0);
20890b57cec5SDimitry Andric   SDValue RHS = Op.getOperand(1);
20900b57cec5SDimitry Andric 
20910b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, DL, VT);
20920b57cec5SDimitry Andric   SDValue NegOne = DAG.getConstant(-1, DL, VT);
20930b57cec5SDimitry Andric 
20940b57cec5SDimitry Andric   if (VT == MVT::i32) {
20950b57cec5SDimitry Andric     if (SDValue Res = LowerDIVREM24(Op, DAG, true))
20960b57cec5SDimitry Andric       return Res;
20970b57cec5SDimitry Andric   }
20980b57cec5SDimitry Andric 
20990b57cec5SDimitry Andric   if (VT == MVT::i64 &&
21000b57cec5SDimitry Andric       DAG.ComputeNumSignBits(LHS) > 32 &&
21010b57cec5SDimitry Andric       DAG.ComputeNumSignBits(RHS) > 32) {
21020b57cec5SDimitry Andric     EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
21030b57cec5SDimitry Andric 
21040b57cec5SDimitry Andric     //HiLo split
21050b57cec5SDimitry Andric     SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
21060b57cec5SDimitry Andric     SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
21070b57cec5SDimitry Andric     SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
21080b57cec5SDimitry Andric                                  LHS_Lo, RHS_Lo);
21090b57cec5SDimitry Andric     SDValue Res[2] = {
21100b57cec5SDimitry Andric       DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
21110b57cec5SDimitry Andric       DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
21120b57cec5SDimitry Andric     };
21130b57cec5SDimitry Andric     return DAG.getMergeValues(Res, DL);
21140b57cec5SDimitry Andric   }
21150b57cec5SDimitry Andric 
21160b57cec5SDimitry Andric   SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
21170b57cec5SDimitry Andric   SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
21180b57cec5SDimitry Andric   SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
21190b57cec5SDimitry Andric   SDValue RSign = LHSign; // Remainder sign is the same as LHS
21200b57cec5SDimitry Andric 
21210b57cec5SDimitry Andric   LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
21220b57cec5SDimitry Andric   RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
21230b57cec5SDimitry Andric 
21240b57cec5SDimitry Andric   LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
21250b57cec5SDimitry Andric   RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
21260b57cec5SDimitry Andric 
21270b57cec5SDimitry Andric   SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
21280b57cec5SDimitry Andric   SDValue Rem = Div.getValue(1);
21290b57cec5SDimitry Andric 
21300b57cec5SDimitry Andric   Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
21310b57cec5SDimitry Andric   Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
21320b57cec5SDimitry Andric 
21330b57cec5SDimitry Andric   Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
21340b57cec5SDimitry Andric   Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
21350b57cec5SDimitry Andric 
21360b57cec5SDimitry Andric   SDValue Res[2] = {
21370b57cec5SDimitry Andric     Div,
21380b57cec5SDimitry Andric     Rem
21390b57cec5SDimitry Andric   };
21400b57cec5SDimitry Andric   return DAG.getMergeValues(Res, DL);
21410b57cec5SDimitry Andric }
21420b57cec5SDimitry Andric 
2143e8d8bef9SDimitry Andric // (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
21440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
21450b57cec5SDimitry Andric   SDLoc SL(Op);
21460b57cec5SDimitry Andric   EVT VT = Op.getValueType();
2147e8d8bef9SDimitry Andric   auto Flags = Op->getFlags();
21480b57cec5SDimitry Andric   SDValue X = Op.getOperand(0);
21490b57cec5SDimitry Andric   SDValue Y = Op.getOperand(1);
21500b57cec5SDimitry Andric 
2151e8d8bef9SDimitry Andric   SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
2152e8d8bef9SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
2153e8d8bef9SDimitry Andric   SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
2154e8d8bef9SDimitry Andric   // TODO: For f32 use FMAD instead if !hasFastFMA32?
2155e8d8bef9SDimitry Andric   return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
21560b57cec5SDimitry Andric }
21570b57cec5SDimitry Andric 
21580b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
21590b57cec5SDimitry Andric   SDLoc SL(Op);
21600b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
21610b57cec5SDimitry Andric 
21620b57cec5SDimitry Andric   // result = trunc(src)
21630b57cec5SDimitry Andric   // if (src > 0.0 && src != result)
21640b57cec5SDimitry Andric   //   result += 1.0
21650b57cec5SDimitry Andric 
21660b57cec5SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
21670b57cec5SDimitry Andric 
21680b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
21690b57cec5SDimitry Andric   const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
21700b57cec5SDimitry Andric 
21710b57cec5SDimitry Andric   EVT SetCCVT =
21720b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
21730b57cec5SDimitry Andric 
21740b57cec5SDimitry Andric   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
21750b57cec5SDimitry Andric   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
21760b57cec5SDimitry Andric   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
21770b57cec5SDimitry Andric 
21780b57cec5SDimitry Andric   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
21790b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
21800b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
21810b57cec5SDimitry Andric }
21820b57cec5SDimitry Andric 
21830b57cec5SDimitry Andric static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL,
21840b57cec5SDimitry Andric                                   SelectionDAG &DAG) {
21850b57cec5SDimitry Andric   const unsigned FractBits = 52;
21860b57cec5SDimitry Andric   const unsigned ExpBits = 11;
21870b57cec5SDimitry Andric 
21880b57cec5SDimitry Andric   SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
21890b57cec5SDimitry Andric                                 Hi,
21900b57cec5SDimitry Andric                                 DAG.getConstant(FractBits - 32, SL, MVT::i32),
21910b57cec5SDimitry Andric                                 DAG.getConstant(ExpBits, SL, MVT::i32));
21920b57cec5SDimitry Andric   SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
21930b57cec5SDimitry Andric                             DAG.getConstant(1023, SL, MVT::i32));
21940b57cec5SDimitry Andric 
21950b57cec5SDimitry Andric   return Exp;
21960b57cec5SDimitry Andric }
21970b57cec5SDimitry Andric 
21980b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
21990b57cec5SDimitry Andric   SDLoc SL(Op);
22000b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
22010b57cec5SDimitry Andric 
22020b57cec5SDimitry Andric   assert(Op.getValueType() == MVT::f64);
22030b57cec5SDimitry Andric 
22040b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
22050b57cec5SDimitry Andric 
22060b57cec5SDimitry Andric   // Extract the upper half, since this is where we will find the sign and
22070b57cec5SDimitry Andric   // exponent.
2208349cc55cSDimitry Andric   SDValue Hi = getHiHalf64(Src, DAG);
22090b57cec5SDimitry Andric 
22100b57cec5SDimitry Andric   SDValue Exp = extractF64Exponent(Hi, SL, DAG);
22110b57cec5SDimitry Andric 
22120b57cec5SDimitry Andric   const unsigned FractBits = 52;
22130b57cec5SDimitry Andric 
22140b57cec5SDimitry Andric   // Extract the sign bit.
22150b57cec5SDimitry Andric   const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
22160b57cec5SDimitry Andric   SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
22170b57cec5SDimitry Andric 
22180b57cec5SDimitry Andric   // Extend back to 64-bits.
22190b57cec5SDimitry Andric   SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit});
22200b57cec5SDimitry Andric   SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
22210b57cec5SDimitry Andric 
22220b57cec5SDimitry Andric   SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
22230b57cec5SDimitry Andric   const SDValue FractMask
22240b57cec5SDimitry Andric     = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
22250b57cec5SDimitry Andric 
22260b57cec5SDimitry Andric   SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
22270b57cec5SDimitry Andric   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
22280b57cec5SDimitry Andric   SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
22290b57cec5SDimitry Andric 
22300b57cec5SDimitry Andric   EVT SetCCVT =
22310b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
22320b57cec5SDimitry Andric 
22330b57cec5SDimitry Andric   const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
22340b57cec5SDimitry Andric 
22350b57cec5SDimitry Andric   SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
22360b57cec5SDimitry Andric   SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
22370b57cec5SDimitry Andric 
22380b57cec5SDimitry Andric   SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
22390b57cec5SDimitry Andric   SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
22400b57cec5SDimitry Andric 
22410b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
22420b57cec5SDimitry Andric }
22430b57cec5SDimitry Andric 
22440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
22450b57cec5SDimitry Andric   SDLoc SL(Op);
22460b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
22470b57cec5SDimitry Andric 
22480b57cec5SDimitry Andric   assert(Op.getValueType() == MVT::f64);
22490b57cec5SDimitry Andric 
22500b57cec5SDimitry Andric   APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
22510b57cec5SDimitry Andric   SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
22520b57cec5SDimitry Andric   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
22530b57cec5SDimitry Andric 
22540b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
22550b57cec5SDimitry Andric 
22560b57cec5SDimitry Andric   SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
22570b57cec5SDimitry Andric   SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
22580b57cec5SDimitry Andric 
22590b57cec5SDimitry Andric   SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
22600b57cec5SDimitry Andric 
22610b57cec5SDimitry Andric   APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
22620b57cec5SDimitry Andric   SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
22630b57cec5SDimitry Andric 
22640b57cec5SDimitry Andric   EVT SetCCVT =
22650b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
22660b57cec5SDimitry Andric   SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
22670b57cec5SDimitry Andric 
22680b57cec5SDimitry Andric   return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
22690b57cec5SDimitry Andric }
22700b57cec5SDimitry Andric 
22710b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
22720b57cec5SDimitry Andric   // FNEARBYINT and FRINT are the same, except in their handling of FP
22730b57cec5SDimitry Andric   // exceptions. Those aren't really meaningful for us, and OpenCL only has
22740b57cec5SDimitry Andric   // rint, so just treat them as equivalent.
22750b57cec5SDimitry Andric   return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
22760b57cec5SDimitry Andric }
22770b57cec5SDimitry Andric 
22780b57cec5SDimitry Andric // XXX - May require not supporting f32 denormals?
22790b57cec5SDimitry Andric 
22800b57cec5SDimitry Andric // Don't handle v2f16. The extra instructions to scalarize and repack around the
22810b57cec5SDimitry Andric // compare and vselect end up producing worse code than scalarizing the whole
22820b57cec5SDimitry Andric // operation.
22835ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
22840b57cec5SDimitry Andric   SDLoc SL(Op);
22850b57cec5SDimitry Andric   SDValue X = Op.getOperand(0);
22860b57cec5SDimitry Andric   EVT VT = Op.getValueType();
22870b57cec5SDimitry Andric 
22880b57cec5SDimitry Andric   SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);
22890b57cec5SDimitry Andric 
22900b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
22910b57cec5SDimitry Andric 
22920b57cec5SDimitry Andric   SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);
22930b57cec5SDimitry Andric 
22940b57cec5SDimitry Andric   SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);
22950b57cec5SDimitry Andric 
22960b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
22970b57cec5SDimitry Andric   const SDValue One = DAG.getConstantFP(1.0, SL, VT);
22980b57cec5SDimitry Andric   const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
22990b57cec5SDimitry Andric 
23000b57cec5SDimitry Andric   SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
23010b57cec5SDimitry Andric 
23020b57cec5SDimitry Andric   EVT SetCCVT =
23030b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
23040b57cec5SDimitry Andric 
23050b57cec5SDimitry Andric   SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
23060b57cec5SDimitry Andric 
23070b57cec5SDimitry Andric   SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
23080b57cec5SDimitry Andric 
23090b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
23100b57cec5SDimitry Andric }
23110b57cec5SDimitry Andric 
23120b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
23130b57cec5SDimitry Andric   SDLoc SL(Op);
23140b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
23150b57cec5SDimitry Andric 
23160b57cec5SDimitry Andric   // result = trunc(src);
23170b57cec5SDimitry Andric   // if (src < 0.0 && src != result)
23180b57cec5SDimitry Andric   //   result += -1.0.
23190b57cec5SDimitry Andric 
23200b57cec5SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
23210b57cec5SDimitry Andric 
23220b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
23230b57cec5SDimitry Andric   const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
23240b57cec5SDimitry Andric 
23250b57cec5SDimitry Andric   EVT SetCCVT =
23260b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
23270b57cec5SDimitry Andric 
23280b57cec5SDimitry Andric   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
23290b57cec5SDimitry Andric   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
23300b57cec5SDimitry Andric   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
23310b57cec5SDimitry Andric 
23320b57cec5SDimitry Andric   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
23330b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
23340b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
23350b57cec5SDimitry Andric }
23360b57cec5SDimitry Andric 
23370b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
23380b57cec5SDimitry Andric                                         double Log2BaseInverted) const {
23390b57cec5SDimitry Andric   EVT VT = Op.getValueType();
23400b57cec5SDimitry Andric 
23410b57cec5SDimitry Andric   SDLoc SL(Op);
23420b57cec5SDimitry Andric   SDValue Operand = Op.getOperand(0);
23430b57cec5SDimitry Andric   SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand);
23440b57cec5SDimitry Andric   SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
23450b57cec5SDimitry Andric 
23460b57cec5SDimitry Andric   return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
23470b57cec5SDimitry Andric }
23480b57cec5SDimitry Andric 
23490b57cec5SDimitry Andric // exp2(M_LOG2E_F * f);
23500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
23510b57cec5SDimitry Andric   EVT VT = Op.getValueType();
23520b57cec5SDimitry Andric   SDLoc SL(Op);
23530b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
23540b57cec5SDimitry Andric 
23558bcb0991SDimitry Andric   const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT);
23560b57cec5SDimitry Andric   SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
23570b57cec5SDimitry Andric   return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
23580b57cec5SDimitry Andric }
23590b57cec5SDimitry Andric 
23600b57cec5SDimitry Andric static bool isCtlzOpc(unsigned Opc) {
23610b57cec5SDimitry Andric   return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
23620b57cec5SDimitry Andric }
23630b57cec5SDimitry Andric 
23640b57cec5SDimitry Andric static bool isCttzOpc(unsigned Opc) {
23650b57cec5SDimitry Andric   return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF;
23660b57cec5SDimitry Andric }
23670b57cec5SDimitry Andric 
23680b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
23690b57cec5SDimitry Andric   SDLoc SL(Op);
23700b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
23710b57cec5SDimitry Andric 
2372349cc55cSDimitry Andric   assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode()));
2373349cc55cSDimitry Andric   bool Ctlz = isCtlzOpc(Op.getOpcode());
2374349cc55cSDimitry Andric   unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;
23750b57cec5SDimitry Andric 
2376349cc55cSDimitry Andric   bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
2377349cc55cSDimitry Andric                    Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
23780b57cec5SDimitry Andric 
2379349cc55cSDimitry Andric   if (Src.getValueType() == MVT::i32) {
2380349cc55cSDimitry Andric     // (ctlz hi:lo) -> (umin (ffbh src), 32)
2381349cc55cSDimitry Andric     // (cttz hi:lo) -> (umin (ffbl src), 32)
2382349cc55cSDimitry Andric     // (ctlz_zero_undef src) -> (ffbh src)
2383349cc55cSDimitry Andric     // (cttz_zero_undef src) -> (ffbl src)
2384349cc55cSDimitry Andric     SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
2385349cc55cSDimitry Andric     if (!ZeroUndef) {
2386349cc55cSDimitry Andric       const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
2387349cc55cSDimitry Andric       NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
2388349cc55cSDimitry Andric     }
2389349cc55cSDimitry Andric     return NewOpr;
23900b57cec5SDimitry Andric   }
23910b57cec5SDimitry Andric 
2392349cc55cSDimitry Andric   SDValue Lo, Hi;
2393349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
2394349cc55cSDimitry Andric 
2395349cc55cSDimitry Andric   SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo);
2396349cc55cSDimitry Andric   SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi);
2397349cc55cSDimitry Andric 
2398349cc55cSDimitry Andric   // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64)
2399349cc55cSDimitry Andric   // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64)
2400349cc55cSDimitry Andric   // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
2401349cc55cSDimitry Andric   // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
2402349cc55cSDimitry Andric 
2403349cc55cSDimitry Andric   unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT;
2404349cc55cSDimitry Andric   const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
2405349cc55cSDimitry Andric   if (Ctlz)
2406349cc55cSDimitry Andric     OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
2407349cc55cSDimitry Andric   else
2408349cc55cSDimitry Andric     OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);
2409349cc55cSDimitry Andric 
2410349cc55cSDimitry Andric   SDValue NewOpr;
2411349cc55cSDimitry Andric   NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);
24120b57cec5SDimitry Andric   if (!ZeroUndef) {
2413349cc55cSDimitry Andric     const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32);
2414349cc55cSDimitry Andric     NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);
24150b57cec5SDimitry Andric   }
24160b57cec5SDimitry Andric 
24170b57cec5SDimitry Andric   return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
24180b57cec5SDimitry Andric }
24190b57cec5SDimitry Andric 
24200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
24210b57cec5SDimitry Andric                                                bool Signed) const {
2422349cc55cSDimitry Andric   // The regular method converting a 64-bit integer to float roughly consists of
2423349cc55cSDimitry Andric   // 2 steps: normalization and rounding. In fact, after normalization, the
2424349cc55cSDimitry Andric   // conversion from a 64-bit integer to a float is essentially the same as the
2425349cc55cSDimitry Andric   // one from a 32-bit integer. The only difference is that it has more
2426349cc55cSDimitry Andric   // trailing bits to be rounded. To leverage the native 32-bit conversion, a
2427349cc55cSDimitry Andric   // 64-bit integer could be preprocessed and fit into a 32-bit integer then
2428349cc55cSDimitry Andric   // converted into the correct float number. The basic steps for the unsigned
2429349cc55cSDimitry Andric   // conversion are illustrated in the following pseudo code:
2430349cc55cSDimitry Andric   //
2431349cc55cSDimitry Andric   // f32 uitofp(i64 u) {
2432349cc55cSDimitry Andric   //   i32 hi, lo = split(u);
2433349cc55cSDimitry Andric   //   // Only count the leading zeros in hi as we have native support of the
2434349cc55cSDimitry Andric   //   // conversion from i32 to f32. If hi is all 0s, the conversion is
2435349cc55cSDimitry Andric   //   // reduced to a 32-bit one automatically.
2436349cc55cSDimitry Andric   //   i32 shamt = clz(hi); // Return 32 if hi is all 0s.
2437349cc55cSDimitry Andric   //   u <<= shamt;
2438349cc55cSDimitry Andric   //   hi, lo = split(u);
2439349cc55cSDimitry Andric   //   hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo.
2440349cc55cSDimitry Andric   //   // convert it as a 32-bit integer and scale the result back.
2441349cc55cSDimitry Andric   //   return uitofp(hi) * 2^(32 - shamt);
24420b57cec5SDimitry Andric   // }
2443349cc55cSDimitry Andric   //
2444349cc55cSDimitry Andric   // The signed one follows the same principle but uses 'ffbh_i32' to count its
2445349cc55cSDimitry Andric   // sign bits instead. If 'ffbh_i32' is not available, its absolute value is
2446349cc55cSDimitry Andric   // converted instead followed by negation based its sign bit.
24470b57cec5SDimitry Andric 
24480b57cec5SDimitry Andric   SDLoc SL(Op);
24490b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
24500b57cec5SDimitry Andric 
2451349cc55cSDimitry Andric   SDValue Lo, Hi;
2452349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
2453349cc55cSDimitry Andric   SDValue Sign;
2454349cc55cSDimitry Andric   SDValue ShAmt;
2455349cc55cSDimitry Andric   if (Signed && Subtarget->isGCN()) {
2456349cc55cSDimitry Andric     // We also need to consider the sign bit in Lo if Hi has just sign bits,
2457349cc55cSDimitry Andric     // i.e. Hi is 0 or -1. However, that only needs to take the MSB into
2458349cc55cSDimitry Andric     // account. That is, the maximal shift is
2459349cc55cSDimitry Andric     // - 32 if Lo and Hi have opposite signs;
2460349cc55cSDimitry Andric     // - 33 if Lo and Hi have the same sign.
2461349cc55cSDimitry Andric     //
2462349cc55cSDimitry Andric     // Or, MaxShAmt = 33 + OppositeSign, where
2463349cc55cSDimitry Andric     //
2464349cc55cSDimitry Andric     // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
2465349cc55cSDimitry Andric     // - -1 if Lo and Hi have opposite signs; and
2466349cc55cSDimitry Andric     // -  0 otherwise.
2467349cc55cSDimitry Andric     //
2468349cc55cSDimitry Andric     // All in all, ShAmt is calculated as
2469349cc55cSDimitry Andric     //
2470349cc55cSDimitry Andric     //  umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
2471349cc55cSDimitry Andric     //
2472349cc55cSDimitry Andric     // or
2473349cc55cSDimitry Andric     //
2474349cc55cSDimitry Andric     //  umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
2475349cc55cSDimitry Andric     //
2476349cc55cSDimitry Andric     // to reduce the critical path.
2477349cc55cSDimitry Andric     SDValue OppositeSign = DAG.getNode(
2478349cc55cSDimitry Andric         ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
2479349cc55cSDimitry Andric         DAG.getConstant(31, SL, MVT::i32));
2480349cc55cSDimitry Andric     SDValue MaxShAmt =
2481349cc55cSDimitry Andric         DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
2482349cc55cSDimitry Andric                     OppositeSign);
2483349cc55cSDimitry Andric     // Count the leading sign bits.
2484349cc55cSDimitry Andric     ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
2485349cc55cSDimitry Andric     // Different from unsigned conversion, the shift should be one bit less to
2486349cc55cSDimitry Andric     // preserve the sign bit.
2487349cc55cSDimitry Andric     ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
2488349cc55cSDimitry Andric                         DAG.getConstant(1, SL, MVT::i32));
2489349cc55cSDimitry Andric     ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
2490349cc55cSDimitry Andric   } else {
24910b57cec5SDimitry Andric     if (Signed) {
2492349cc55cSDimitry Andric       // Without 'ffbh_i32', only leading zeros could be counted. Take the
2493349cc55cSDimitry Andric       // absolute value first.
2494349cc55cSDimitry Andric       Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src,
2495349cc55cSDimitry Andric                          DAG.getConstant(63, SL, MVT::i64));
2496349cc55cSDimitry Andric       SDValue Abs =
2497349cc55cSDimitry Andric           DAG.getNode(ISD::XOR, SL, MVT::i64,
2498349cc55cSDimitry Andric                       DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign);
2499349cc55cSDimitry Andric       std::tie(Lo, Hi) = split64BitValue(Abs, DAG);
25000b57cec5SDimitry Andric     }
2501349cc55cSDimitry Andric     // Count the leading zeros.
2502349cc55cSDimitry Andric     ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi);
2503349cc55cSDimitry Andric     // The shift amount for signed integers is [0, 32].
2504349cc55cSDimitry Andric   }
2505349cc55cSDimitry Andric   // Normalize the given 64-bit integer.
2506349cc55cSDimitry Andric   SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt);
2507349cc55cSDimitry Andric   // Split it again.
2508349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
2509349cc55cSDimitry Andric   // Calculate the adjust bit for rounding.
2510349cc55cSDimitry Andric   // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
2511349cc55cSDimitry Andric   SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
2512349cc55cSDimitry Andric                                DAG.getConstant(1, SL, MVT::i32), Lo);
2513349cc55cSDimitry Andric   // Get the 32-bit normalized integer.
2514349cc55cSDimitry Andric   Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
2515349cc55cSDimitry Andric   // Convert the normalized 32-bit integer into f32.
2516349cc55cSDimitry Andric   unsigned Opc =
2517349cc55cSDimitry Andric       (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
2518349cc55cSDimitry Andric   SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm);
25190b57cec5SDimitry Andric 
2520349cc55cSDimitry Andric   // Finally, need to scale back the converted floating number as the original
2521349cc55cSDimitry Andric   // 64-bit integer is converted as a 32-bit one.
2522349cc55cSDimitry Andric   ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
2523349cc55cSDimitry Andric                       ShAmt);
2524349cc55cSDimitry Andric   // On GCN, use LDEXP directly.
2525349cc55cSDimitry Andric   if (Subtarget->isGCN())
2526349cc55cSDimitry Andric     return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt);
25270b57cec5SDimitry Andric 
2528349cc55cSDimitry Andric   // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
2529349cc55cSDimitry Andric   // part directly to emulate the multiplication of 2^ShAmt. That 8-bit
2530349cc55cSDimitry Andric   // exponent is enough to avoid overflowing into the sign bit.
2531349cc55cSDimitry Andric   SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt,
2532349cc55cSDimitry Andric                             DAG.getConstant(23, SL, MVT::i32));
2533349cc55cSDimitry Andric   SDValue IVal =
2534349cc55cSDimitry Andric       DAG.getNode(ISD::ADD, SL, MVT::i32,
2535349cc55cSDimitry Andric                   DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);
2536349cc55cSDimitry Andric   if (Signed) {
2537349cc55cSDimitry Andric     // Set the sign bit.
2538349cc55cSDimitry Andric     Sign = DAG.getNode(ISD::SHL, SL, MVT::i32,
2539349cc55cSDimitry Andric                        DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign),
2540349cc55cSDimitry Andric                        DAG.getConstant(31, SL, MVT::i32));
2541349cc55cSDimitry Andric     IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);
2542349cc55cSDimitry Andric   }
2543349cc55cSDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);
25440b57cec5SDimitry Andric }
25450b57cec5SDimitry Andric 
25460b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
25470b57cec5SDimitry Andric                                                bool Signed) const {
25480b57cec5SDimitry Andric   SDLoc SL(Op);
25490b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
25500b57cec5SDimitry Andric 
2551349cc55cSDimitry Andric   SDValue Lo, Hi;
2552349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
25530b57cec5SDimitry Andric 
25540b57cec5SDimitry Andric   SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
25550b57cec5SDimitry Andric                               SL, MVT::f64, Hi);
25560b57cec5SDimitry Andric 
25570b57cec5SDimitry Andric   SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
25580b57cec5SDimitry Andric 
25590b57cec5SDimitry Andric   SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
25600b57cec5SDimitry Andric                               DAG.getConstant(32, SL, MVT::i32));
25610b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
25620b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
25630b57cec5SDimitry Andric }
25640b57cec5SDimitry Andric 
25650b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
25660b57cec5SDimitry Andric                                                SelectionDAG &DAG) const {
25670b57cec5SDimitry Andric   // TODO: Factor out code common with LowerSINT_TO_FP.
25680b57cec5SDimitry Andric   EVT DestVT = Op.getValueType();
2569480093f4SDimitry Andric   SDValue Src = Op.getOperand(0);
2570480093f4SDimitry Andric   EVT SrcVT = Src.getValueType();
2571480093f4SDimitry Andric 
2572480093f4SDimitry Andric   if (SrcVT == MVT::i16) {
2573480093f4SDimitry Andric     if (DestVT == MVT::f16)
2574480093f4SDimitry Andric       return Op;
2575480093f4SDimitry Andric     SDLoc DL(Op);
2576480093f4SDimitry Andric 
2577480093f4SDimitry Andric     // Promote src to i32
2578480093f4SDimitry Andric     SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
2579480093f4SDimitry Andric     return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext);
2580480093f4SDimitry Andric   }
2581480093f4SDimitry Andric 
2582480093f4SDimitry Andric   assert(SrcVT == MVT::i64 && "operation should be legal");
2583480093f4SDimitry Andric 
25840b57cec5SDimitry Andric   if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
25850b57cec5SDimitry Andric     SDLoc DL(Op);
25860b57cec5SDimitry Andric 
25870b57cec5SDimitry Andric     SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
25880b57cec5SDimitry Andric     SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
25890b57cec5SDimitry Andric     SDValue FPRound =
25900b57cec5SDimitry Andric         DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
25910b57cec5SDimitry Andric 
25920b57cec5SDimitry Andric     return FPRound;
25930b57cec5SDimitry Andric   }
25940b57cec5SDimitry Andric 
25950b57cec5SDimitry Andric   if (DestVT == MVT::f32)
25960b57cec5SDimitry Andric     return LowerINT_TO_FP32(Op, DAG, false);
25970b57cec5SDimitry Andric 
25980b57cec5SDimitry Andric   assert(DestVT == MVT::f64);
25990b57cec5SDimitry Andric   return LowerINT_TO_FP64(Op, DAG, false);
26000b57cec5SDimitry Andric }
26010b57cec5SDimitry Andric 
26020b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
26030b57cec5SDimitry Andric                                               SelectionDAG &DAG) const {
2604480093f4SDimitry Andric   EVT DestVT = Op.getValueType();
2605480093f4SDimitry Andric 
2606480093f4SDimitry Andric   SDValue Src = Op.getOperand(0);
2607480093f4SDimitry Andric   EVT SrcVT = Src.getValueType();
2608480093f4SDimitry Andric 
2609480093f4SDimitry Andric   if (SrcVT == MVT::i16) {
2610480093f4SDimitry Andric     if (DestVT == MVT::f16)
2611480093f4SDimitry Andric       return Op;
2612480093f4SDimitry Andric 
2613480093f4SDimitry Andric     SDLoc DL(Op);
2614480093f4SDimitry Andric     // Promote src to i32
2615480093f4SDimitry Andric     SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src);
2616480093f4SDimitry Andric     return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext);
2617480093f4SDimitry Andric   }
2618480093f4SDimitry Andric 
2619480093f4SDimitry Andric   assert(SrcVT == MVT::i64 && "operation should be legal");
26200b57cec5SDimitry Andric 
26210b57cec5SDimitry Andric   // TODO: Factor out code common with LowerUINT_TO_FP.
26220b57cec5SDimitry Andric 
26230b57cec5SDimitry Andric   if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
26240b57cec5SDimitry Andric     SDLoc DL(Op);
26250b57cec5SDimitry Andric     SDValue Src = Op.getOperand(0);
26260b57cec5SDimitry Andric 
26270b57cec5SDimitry Andric     SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
26280b57cec5SDimitry Andric     SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
26290b57cec5SDimitry Andric     SDValue FPRound =
26300b57cec5SDimitry Andric         DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
26310b57cec5SDimitry Andric 
26320b57cec5SDimitry Andric     return FPRound;
26330b57cec5SDimitry Andric   }
26340b57cec5SDimitry Andric 
26350b57cec5SDimitry Andric   if (DestVT == MVT::f32)
26360b57cec5SDimitry Andric     return LowerINT_TO_FP32(Op, DAG, true);
26370b57cec5SDimitry Andric 
26380b57cec5SDimitry Andric   assert(DestVT == MVT::f64);
26390b57cec5SDimitry Andric   return LowerINT_TO_FP64(Op, DAG, true);
26400b57cec5SDimitry Andric }
26410b57cec5SDimitry Andric 
2642fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
26430b57cec5SDimitry Andric                                                bool Signed) const {
26440b57cec5SDimitry Andric   SDLoc SL(Op);
26450b57cec5SDimitry Andric 
26460b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
2647fe6060f1SDimitry Andric   EVT SrcVT = Src.getValueType();
26480b57cec5SDimitry Andric 
2649fe6060f1SDimitry Andric   assert(SrcVT == MVT::f32 || SrcVT == MVT::f64);
26500b57cec5SDimitry Andric 
2651fe6060f1SDimitry Andric   // The basic idea of converting a floating point number into a pair of 32-bit
2652fe6060f1SDimitry Andric   // integers is illustrated as follows:
2653fe6060f1SDimitry Andric   //
2654fe6060f1SDimitry Andric   //     tf := trunc(val);
2655fe6060f1SDimitry Andric   //    hif := floor(tf * 2^-32);
2656fe6060f1SDimitry Andric   //    lof := tf - hif * 2^32; // lof is always positive due to floor.
2657fe6060f1SDimitry Andric   //     hi := fptoi(hif);
2658fe6060f1SDimitry Andric   //     lo := fptoi(lof);
2659fe6060f1SDimitry Andric   //
2660fe6060f1SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src);
2661fe6060f1SDimitry Andric   SDValue Sign;
2662fe6060f1SDimitry Andric   if (Signed && SrcVT == MVT::f32) {
2663fe6060f1SDimitry Andric     // However, a 32-bit floating point number has only 23 bits mantissa and
2664fe6060f1SDimitry Andric     // it's not enough to hold all the significant bits of `lof` if val is
2665fe6060f1SDimitry Andric     // negative. To avoid the loss of precision, We need to take the absolute
2666fe6060f1SDimitry Andric     // value after truncating and flip the result back based on the original
2667fe6060f1SDimitry Andric     // signedness.
2668fe6060f1SDimitry Andric     Sign = DAG.getNode(ISD::SRA, SL, MVT::i32,
2669fe6060f1SDimitry Andric                        DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc),
2670fe6060f1SDimitry Andric                        DAG.getConstant(31, SL, MVT::i32));
2671fe6060f1SDimitry Andric     Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc);
2672fe6060f1SDimitry Andric   }
2673fe6060f1SDimitry Andric 
2674fe6060f1SDimitry Andric   SDValue K0, K1;
2675fe6060f1SDimitry Andric   if (SrcVT == MVT::f64) {
2676fe6060f1SDimitry Andric     K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*2^-32*/ 0x3df0000000000000)),
2677fe6060f1SDimitry Andric                            SL, SrcVT);
2678fe6060f1SDimitry Andric     K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*-2^32*/ 0xc1f0000000000000)),
2679fe6060f1SDimitry Andric                            SL, SrcVT);
2680fe6060f1SDimitry Andric   } else {
2681fe6060f1SDimitry Andric     K0 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*2^-32*/ 0x2f800000)), SL,
2682fe6060f1SDimitry Andric                            SrcVT);
2683fe6060f1SDimitry Andric     K1 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*-2^32*/ 0xcf800000)), SL,
2684fe6060f1SDimitry Andric                            SrcVT);
2685fe6060f1SDimitry Andric   }
26860b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
2687fe6060f1SDimitry Andric   SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0);
26880b57cec5SDimitry Andric 
2689fe6060f1SDimitry Andric   SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul);
26900b57cec5SDimitry Andric 
2691fe6060f1SDimitry Andric   SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc);
26920b57cec5SDimitry Andric 
2693fe6060f1SDimitry Andric   SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT
2694fe6060f1SDimitry Andric                                                          : ISD::FP_TO_UINT,
2695fe6060f1SDimitry Andric                            SL, MVT::i32, FloorMul);
26960b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
26970b57cec5SDimitry Andric 
2698fe6060f1SDimitry Andric   SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
2699fe6060f1SDimitry Andric                                DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi}));
27000b57cec5SDimitry Andric 
2701fe6060f1SDimitry Andric   if (Signed && SrcVT == MVT::f32) {
2702fe6060f1SDimitry Andric     assert(Sign);
2703fe6060f1SDimitry Andric     // Flip the result based on the signedness, which is either all 0s or 1s.
2704fe6060f1SDimitry Andric     Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
2705fe6060f1SDimitry Andric                        DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign}));
2706fe6060f1SDimitry Andric     // r := xor(r, sign) - sign;
2707fe6060f1SDimitry Andric     Result =
2708fe6060f1SDimitry Andric         DAG.getNode(ISD::SUB, SL, MVT::i64,
2709fe6060f1SDimitry Andric                     DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign);
2710fe6060f1SDimitry Andric   }
2711fe6060f1SDimitry Andric 
2712fe6060f1SDimitry Andric   return Result;
27130b57cec5SDimitry Andric }
27140b57cec5SDimitry Andric 
27150b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
27160b57cec5SDimitry Andric   SDLoc DL(Op);
27170b57cec5SDimitry Andric   SDValue N0 = Op.getOperand(0);
27180b57cec5SDimitry Andric 
27190b57cec5SDimitry Andric   // Convert to target node to get known bits
27200b57cec5SDimitry Andric   if (N0.getValueType() == MVT::f32)
27210b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
27220b57cec5SDimitry Andric 
27230b57cec5SDimitry Andric   if (getTargetMachine().Options.UnsafeFPMath) {
27240b57cec5SDimitry Andric     // There is a generic expand for FP_TO_FP16 with unsafe fast math.
27250b57cec5SDimitry Andric     return SDValue();
27260b57cec5SDimitry Andric   }
27270b57cec5SDimitry Andric 
27280b57cec5SDimitry Andric   assert(N0.getSimpleValueType() == MVT::f64);
27290b57cec5SDimitry Andric 
27300b57cec5SDimitry Andric   // f64 -> f16 conversion using round-to-nearest-even rounding mode.
27310b57cec5SDimitry Andric   const unsigned ExpMask = 0x7ff;
27320b57cec5SDimitry Andric   const unsigned ExpBiasf64 = 1023;
27330b57cec5SDimitry Andric   const unsigned ExpBiasf16 = 15;
27340b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
27350b57cec5SDimitry Andric   SDValue One = DAG.getConstant(1, DL, MVT::i32);
27360b57cec5SDimitry Andric   SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
27370b57cec5SDimitry Andric   SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
27380b57cec5SDimitry Andric                            DAG.getConstant(32, DL, MVT::i64));
27390b57cec5SDimitry Andric   UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
27400b57cec5SDimitry Andric   U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
27410b57cec5SDimitry Andric   SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
27420b57cec5SDimitry Andric                           DAG.getConstant(20, DL, MVT::i64));
27430b57cec5SDimitry Andric   E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
27440b57cec5SDimitry Andric                   DAG.getConstant(ExpMask, DL, MVT::i32));
27450b57cec5SDimitry Andric   // Subtract the fp64 exponent bias (1023) to get the real exponent and
27460b57cec5SDimitry Andric   // add the f16 bias (15) to get the biased exponent for the f16 format.
27470b57cec5SDimitry Andric   E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
27480b57cec5SDimitry Andric                   DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));
27490b57cec5SDimitry Andric 
27500b57cec5SDimitry Andric   SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
27510b57cec5SDimitry Andric                           DAG.getConstant(8, DL, MVT::i32));
27520b57cec5SDimitry Andric   M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
27530b57cec5SDimitry Andric                   DAG.getConstant(0xffe, DL, MVT::i32));
27540b57cec5SDimitry Andric 
27550b57cec5SDimitry Andric   SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
27560b57cec5SDimitry Andric                                   DAG.getConstant(0x1ff, DL, MVT::i32));
27570b57cec5SDimitry Andric   MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);
27580b57cec5SDimitry Andric 
27590b57cec5SDimitry Andric   SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
27600b57cec5SDimitry Andric   M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);
27610b57cec5SDimitry Andric 
27620b57cec5SDimitry Andric   // (M != 0 ? 0x0200 : 0) | 0x7c00;
27630b57cec5SDimitry Andric   SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
27640b57cec5SDimitry Andric       DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
27650b57cec5SDimitry Andric                       Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));
27660b57cec5SDimitry Andric 
27670b57cec5SDimitry Andric   // N = M | (E << 12);
27680b57cec5SDimitry Andric   SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
27690b57cec5SDimitry Andric       DAG.getNode(ISD::SHL, DL, MVT::i32, E,
27700b57cec5SDimitry Andric                   DAG.getConstant(12, DL, MVT::i32)));
27710b57cec5SDimitry Andric 
27720b57cec5SDimitry Andric   // B = clamp(1-E, 0, 13);
27730b57cec5SDimitry Andric   SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
27740b57cec5SDimitry Andric                                   One, E);
27750b57cec5SDimitry Andric   SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
27760b57cec5SDimitry Andric   B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
27770b57cec5SDimitry Andric                   DAG.getConstant(13, DL, MVT::i32));
27780b57cec5SDimitry Andric 
27790b57cec5SDimitry Andric   SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
27800b57cec5SDimitry Andric                                    DAG.getConstant(0x1000, DL, MVT::i32));
27810b57cec5SDimitry Andric 
27820b57cec5SDimitry Andric   SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
27830b57cec5SDimitry Andric   SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
27840b57cec5SDimitry Andric   SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
27850b57cec5SDimitry Andric   D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);
27860b57cec5SDimitry Andric 
27870b57cec5SDimitry Andric   SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
27880b57cec5SDimitry Andric   SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
27890b57cec5SDimitry Andric                               DAG.getConstant(0x7, DL, MVT::i32));
27900b57cec5SDimitry Andric   V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
27910b57cec5SDimitry Andric                   DAG.getConstant(2, DL, MVT::i32));
27920b57cec5SDimitry Andric   SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
27930b57cec5SDimitry Andric                                One, Zero, ISD::SETEQ);
27940b57cec5SDimitry Andric   SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
27950b57cec5SDimitry Andric                                One, Zero, ISD::SETGT);
27960b57cec5SDimitry Andric   V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
27970b57cec5SDimitry Andric   V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);
27980b57cec5SDimitry Andric 
27990b57cec5SDimitry Andric   V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
28000b57cec5SDimitry Andric                       DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
28010b57cec5SDimitry Andric   V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
28020b57cec5SDimitry Andric                       I, V, ISD::SETEQ);
28030b57cec5SDimitry Andric 
28040b57cec5SDimitry Andric   // Extract the sign bit.
28050b57cec5SDimitry Andric   SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
28060b57cec5SDimitry Andric                             DAG.getConstant(16, DL, MVT::i32));
28070b57cec5SDimitry Andric   Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
28080b57cec5SDimitry Andric                      DAG.getConstant(0x8000, DL, MVT::i32));
28090b57cec5SDimitry Andric 
28100b57cec5SDimitry Andric   V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
28110b57cec5SDimitry Andric   return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
28120b57cec5SDimitry Andric }
28130b57cec5SDimitry Andric 
2814fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op,
28150b57cec5SDimitry Andric                                              SelectionDAG &DAG) const {
28160b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
2817fe6060f1SDimitry Andric   unsigned OpOpcode = Op.getOpcode();
28180b57cec5SDimitry Andric   EVT SrcVT = Src.getValueType();
2819fe6060f1SDimitry Andric   EVT DestVT = Op.getValueType();
2820fe6060f1SDimitry Andric 
2821fe6060f1SDimitry Andric   // Will be selected natively
2822fe6060f1SDimitry Andric   if (SrcVT == MVT::f16 && DestVT == MVT::i16)
2823fe6060f1SDimitry Andric     return Op;
2824fe6060f1SDimitry Andric 
2825fe6060f1SDimitry Andric   // Promote i16 to i32
2826fe6060f1SDimitry Andric   if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
2827fe6060f1SDimitry Andric     SDLoc DL(Op);
2828fe6060f1SDimitry Andric 
2829fe6060f1SDimitry Andric     SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
2830fe6060f1SDimitry Andric     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32);
2831fe6060f1SDimitry Andric   }
2832fe6060f1SDimitry Andric 
2833e8d8bef9SDimitry Andric   if (SrcVT == MVT::f16 ||
2834e8d8bef9SDimitry Andric       (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
28350b57cec5SDimitry Andric     SDLoc DL(Op);
28360b57cec5SDimitry Andric 
2837fe6060f1SDimitry Andric     SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
2838fe6060f1SDimitry Andric     unsigned Ext =
2839fe6060f1SDimitry Andric         OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2840fe6060f1SDimitry Andric     return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);
28410b57cec5SDimitry Andric   }
28420b57cec5SDimitry Andric 
2843fe6060f1SDimitry Andric   if (DestVT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64))
2844fe6060f1SDimitry Andric     return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT);
28450b57cec5SDimitry Andric 
28460b57cec5SDimitry Andric   return SDValue();
28470b57cec5SDimitry Andric }
28480b57cec5SDimitry Andric 
28490b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
28500b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
28510b57cec5SDimitry Andric   EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
28520b57cec5SDimitry Andric   MVT VT = Op.getSimpleValueType();
28530b57cec5SDimitry Andric   MVT ScalarVT = VT.getScalarType();
28540b57cec5SDimitry Andric 
28550b57cec5SDimitry Andric   assert(VT.isVector());
28560b57cec5SDimitry Andric 
28570b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
28580b57cec5SDimitry Andric   SDLoc DL(Op);
28590b57cec5SDimitry Andric 
28600b57cec5SDimitry Andric   // TODO: Don't scalarize on Evergreen?
28610b57cec5SDimitry Andric   unsigned NElts = VT.getVectorNumElements();
28620b57cec5SDimitry Andric   SmallVector<SDValue, 8> Args;
28630b57cec5SDimitry Andric   DAG.ExtractVectorElements(Src, Args, 0, NElts);
28640b57cec5SDimitry Andric 
28650b57cec5SDimitry Andric   SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
28660b57cec5SDimitry Andric   for (unsigned I = 0; I < NElts; ++I)
28670b57cec5SDimitry Andric     Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
28680b57cec5SDimitry Andric 
28690b57cec5SDimitry Andric   return DAG.getBuildVector(VT, DL, Args);
28700b57cec5SDimitry Andric }
28710b57cec5SDimitry Andric 
28720b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
28730b57cec5SDimitry Andric // Custom DAG optimizations
28740b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
28750b57cec5SDimitry Andric 
28760b57cec5SDimitry Andric static bool isU24(SDValue Op, SelectionDAG &DAG) {
28770b57cec5SDimitry Andric   return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
28780b57cec5SDimitry Andric }
28790b57cec5SDimitry Andric 
28800b57cec5SDimitry Andric static bool isI24(SDValue Op, SelectionDAG &DAG) {
28810b57cec5SDimitry Andric   EVT VT = Op.getValueType();
28820b57cec5SDimitry Andric   return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
28830b57cec5SDimitry Andric                                      // as unsigned 24-bit values.
2884349cc55cSDimitry Andric          AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24;
28850b57cec5SDimitry Andric }
28860b57cec5SDimitry Andric 
2887fe6060f1SDimitry Andric static SDValue simplifyMul24(SDNode *Node24,
28880b57cec5SDimitry Andric                              TargetLowering::DAGCombinerInfo &DCI) {
28890b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
28905ffd83dbSDimitry Andric   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28918bcb0991SDimitry Andric   bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
28928bcb0991SDimitry Andric 
28938bcb0991SDimitry Andric   SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
28948bcb0991SDimitry Andric   SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1);
28958bcb0991SDimitry Andric   unsigned NewOpcode = Node24->getOpcode();
28968bcb0991SDimitry Andric   if (IsIntrin) {
28978bcb0991SDimitry Andric     unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
2898349cc55cSDimitry Andric     switch (IID) {
2899349cc55cSDimitry Andric     case Intrinsic::amdgcn_mul_i24:
2900349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MUL_I24;
2901349cc55cSDimitry Andric       break;
2902349cc55cSDimitry Andric     case Intrinsic::amdgcn_mul_u24:
2903349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MUL_U24;
2904349cc55cSDimitry Andric       break;
2905349cc55cSDimitry Andric     case Intrinsic::amdgcn_mulhi_i24:
2906349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MULHI_I24;
2907349cc55cSDimitry Andric       break;
2908349cc55cSDimitry Andric     case Intrinsic::amdgcn_mulhi_u24:
2909349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MULHI_U24;
2910349cc55cSDimitry Andric       break;
2911349cc55cSDimitry Andric     default:
2912349cc55cSDimitry Andric       llvm_unreachable("Expected 24-bit mul intrinsic");
2913349cc55cSDimitry Andric     }
29148bcb0991SDimitry Andric   }
29150b57cec5SDimitry Andric 
29160b57cec5SDimitry Andric   APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
29170b57cec5SDimitry Andric 
29185ffd83dbSDimitry Andric   // First try to simplify using SimplifyMultipleUseDemandedBits which allows
29195ffd83dbSDimitry Andric   // the operands to have other uses, but will only perform simplifications that
29205ffd83dbSDimitry Andric   // involve bypassing some nodes for this user.
29215ffd83dbSDimitry Andric   SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG);
29225ffd83dbSDimitry Andric   SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG);
29230b57cec5SDimitry Andric   if (DemandedLHS || DemandedRHS)
29248bcb0991SDimitry Andric     return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
29250b57cec5SDimitry Andric                        DemandedLHS ? DemandedLHS : LHS,
29260b57cec5SDimitry Andric                        DemandedRHS ? DemandedRHS : RHS);
29270b57cec5SDimitry Andric 
29280b57cec5SDimitry Andric   // Now try SimplifyDemandedBits which can simplify the nodes used by our
29290b57cec5SDimitry Andric   // operands if this node is the only user.
29300b57cec5SDimitry Andric   if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
29310b57cec5SDimitry Andric     return SDValue(Node24, 0);
29320b57cec5SDimitry Andric   if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
29330b57cec5SDimitry Andric     return SDValue(Node24, 0);
29340b57cec5SDimitry Andric 
29350b57cec5SDimitry Andric   return SDValue();
29360b57cec5SDimitry Andric }
29370b57cec5SDimitry Andric 
29380b57cec5SDimitry Andric template <typename IntTy>
29390b57cec5SDimitry Andric static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
29400b57cec5SDimitry Andric                                uint32_t Width, const SDLoc &DL) {
29410b57cec5SDimitry Andric   if (Width + Offset < 32) {
29420b57cec5SDimitry Andric     uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
29430b57cec5SDimitry Andric     IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
29440b57cec5SDimitry Andric     return DAG.getConstant(Result, DL, MVT::i32);
29450b57cec5SDimitry Andric   }
29460b57cec5SDimitry Andric 
29470b57cec5SDimitry Andric   return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
29480b57cec5SDimitry Andric }
29490b57cec5SDimitry Andric 
29500b57cec5SDimitry Andric static bool hasVolatileUser(SDNode *Val) {
29510b57cec5SDimitry Andric   for (SDNode *U : Val->uses()) {
29520b57cec5SDimitry Andric     if (MemSDNode *M = dyn_cast<MemSDNode>(U)) {
29530b57cec5SDimitry Andric       if (M->isVolatile())
29540b57cec5SDimitry Andric         return true;
29550b57cec5SDimitry Andric     }
29560b57cec5SDimitry Andric   }
29570b57cec5SDimitry Andric 
29580b57cec5SDimitry Andric   return false;
29590b57cec5SDimitry Andric }
29600b57cec5SDimitry Andric 
29610b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
29620b57cec5SDimitry Andric   // i32 vectors are the canonical memory type.
29630b57cec5SDimitry Andric   if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
29640b57cec5SDimitry Andric     return false;
29650b57cec5SDimitry Andric 
29660b57cec5SDimitry Andric   if (!VT.isByteSized())
29670b57cec5SDimitry Andric     return false;
29680b57cec5SDimitry Andric 
29690b57cec5SDimitry Andric   unsigned Size = VT.getStoreSize();
29700b57cec5SDimitry Andric 
29710b57cec5SDimitry Andric   if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector())
29720b57cec5SDimitry Andric     return false;
29730b57cec5SDimitry Andric 
29740b57cec5SDimitry Andric   if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
29750b57cec5SDimitry Andric     return false;
29760b57cec5SDimitry Andric 
29770b57cec5SDimitry Andric   return true;
29780b57cec5SDimitry Andric }
29790b57cec5SDimitry Andric 
29800b57cec5SDimitry Andric // Replace load of an illegal type with a store of a bitcast to a friendlier
29810b57cec5SDimitry Andric // type.
29820b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
29830b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
29840b57cec5SDimitry Andric   if (!DCI.isBeforeLegalize())
29850b57cec5SDimitry Andric     return SDValue();
29860b57cec5SDimitry Andric 
29870b57cec5SDimitry Andric   LoadSDNode *LN = cast<LoadSDNode>(N);
29885ffd83dbSDimitry Andric   if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
29890b57cec5SDimitry Andric     return SDValue();
29900b57cec5SDimitry Andric 
29910b57cec5SDimitry Andric   SDLoc SL(N);
29920b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
29930b57cec5SDimitry Andric   EVT VT = LN->getMemoryVT();
29940b57cec5SDimitry Andric 
29950b57cec5SDimitry Andric   unsigned Size = VT.getStoreSize();
29965ffd83dbSDimitry Andric   Align Alignment = LN->getAlign();
29975ffd83dbSDimitry Andric   if (Alignment < Size && isTypeLegal(VT)) {
29980b57cec5SDimitry Andric     bool IsFast;
29990b57cec5SDimitry Andric     unsigned AS = LN->getAddressSpace();
30000b57cec5SDimitry Andric 
30010b57cec5SDimitry Andric     // Expand unaligned loads earlier than legalization. Due to visitation order
30020b57cec5SDimitry Andric     // problems during legalization, the emitted instructions to pack and unpack
30030b57cec5SDimitry Andric     // the bytes again are not eliminated in the case of an unaligned copy.
3004fe6060f1SDimitry Andric     if (!allowsMisalignedMemoryAccesses(
3005fe6060f1SDimitry Andric             VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {
30060b57cec5SDimitry Andric       SDValue Ops[2];
3007480093f4SDimitry Andric 
3008480093f4SDimitry Andric       if (VT.isVector())
3009480093f4SDimitry Andric         std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LN, DAG);
3010480093f4SDimitry Andric       else
30110b57cec5SDimitry Andric         std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
3012480093f4SDimitry Andric 
30130b57cec5SDimitry Andric       return DAG.getMergeValues(Ops, SDLoc(N));
30140b57cec5SDimitry Andric     }
30150b57cec5SDimitry Andric 
30160b57cec5SDimitry Andric     if (!IsFast)
30170b57cec5SDimitry Andric       return SDValue();
30180b57cec5SDimitry Andric   }
30190b57cec5SDimitry Andric 
30200b57cec5SDimitry Andric   if (!shouldCombineMemoryType(VT))
30210b57cec5SDimitry Andric     return SDValue();
30220b57cec5SDimitry Andric 
30230b57cec5SDimitry Andric   EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
30240b57cec5SDimitry Andric 
30250b57cec5SDimitry Andric   SDValue NewLoad
30260b57cec5SDimitry Andric     = DAG.getLoad(NewVT, SL, LN->getChain(),
30270b57cec5SDimitry Andric                   LN->getBasePtr(), LN->getMemOperand());
30280b57cec5SDimitry Andric 
30290b57cec5SDimitry Andric   SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);
30300b57cec5SDimitry Andric   DCI.CombineTo(N, BC, NewLoad.getValue(1));
30310b57cec5SDimitry Andric   return SDValue(N, 0);
30320b57cec5SDimitry Andric }
30330b57cec5SDimitry Andric 
30340b57cec5SDimitry Andric // Replace store of an illegal type with a store of a bitcast to a friendlier
30350b57cec5SDimitry Andric // type.
30360b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
30370b57cec5SDimitry Andric                                                   DAGCombinerInfo &DCI) const {
30380b57cec5SDimitry Andric   if (!DCI.isBeforeLegalize())
30390b57cec5SDimitry Andric     return SDValue();
30400b57cec5SDimitry Andric 
30410b57cec5SDimitry Andric   StoreSDNode *SN = cast<StoreSDNode>(N);
30425ffd83dbSDimitry Andric   if (!SN->isSimple() || !ISD::isNormalStore(SN))
30430b57cec5SDimitry Andric     return SDValue();
30440b57cec5SDimitry Andric 
30450b57cec5SDimitry Andric   EVT VT = SN->getMemoryVT();
30460b57cec5SDimitry Andric   unsigned Size = VT.getStoreSize();
30470b57cec5SDimitry Andric 
30480b57cec5SDimitry Andric   SDLoc SL(N);
30490b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
30505ffd83dbSDimitry Andric   Align Alignment = SN->getAlign();
30515ffd83dbSDimitry Andric   if (Alignment < Size && isTypeLegal(VT)) {
30520b57cec5SDimitry Andric     bool IsFast;
30530b57cec5SDimitry Andric     unsigned AS = SN->getAddressSpace();
30540b57cec5SDimitry Andric 
30550b57cec5SDimitry Andric     // Expand unaligned stores earlier than legalization. Due to visitation
30560b57cec5SDimitry Andric     // order problems during legalization, the emitted instructions to pack and
30570b57cec5SDimitry Andric     // unpack the bytes again are not eliminated in the case of an unaligned
30580b57cec5SDimitry Andric     // copy.
3059fe6060f1SDimitry Andric     if (!allowsMisalignedMemoryAccesses(
3060fe6060f1SDimitry Andric             VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {
30610b57cec5SDimitry Andric       if (VT.isVector())
30620b57cec5SDimitry Andric         return scalarizeVectorStore(SN, DAG);
30630b57cec5SDimitry Andric 
30640b57cec5SDimitry Andric       return expandUnalignedStore(SN, DAG);
30650b57cec5SDimitry Andric     }
30660b57cec5SDimitry Andric 
30670b57cec5SDimitry Andric     if (!IsFast)
30680b57cec5SDimitry Andric       return SDValue();
30690b57cec5SDimitry Andric   }
30700b57cec5SDimitry Andric 
30710b57cec5SDimitry Andric   if (!shouldCombineMemoryType(VT))
30720b57cec5SDimitry Andric     return SDValue();
30730b57cec5SDimitry Andric 
30740b57cec5SDimitry Andric   EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
30750b57cec5SDimitry Andric   SDValue Val = SN->getValue();
30760b57cec5SDimitry Andric 
30770b57cec5SDimitry Andric   //DCI.AddToWorklist(Val.getNode());
30780b57cec5SDimitry Andric 
30790b57cec5SDimitry Andric   bool OtherUses = !Val.hasOneUse();
30800b57cec5SDimitry Andric   SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
30810b57cec5SDimitry Andric   if (OtherUses) {
30820b57cec5SDimitry Andric     SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);
30830b57cec5SDimitry Andric     DAG.ReplaceAllUsesOfValueWith(Val, CastBack);
30840b57cec5SDimitry Andric   }
30850b57cec5SDimitry Andric 
30860b57cec5SDimitry Andric   return DAG.getStore(SN->getChain(), SL, CastVal,
30870b57cec5SDimitry Andric                       SN->getBasePtr(), SN->getMemOperand());
30880b57cec5SDimitry Andric }
30890b57cec5SDimitry Andric 
30900b57cec5SDimitry Andric // FIXME: This should go in generic DAG combiner with an isTruncateFree check,
30910b57cec5SDimitry Andric // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
30920b57cec5SDimitry Andric // issues.
30930b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
30940b57cec5SDimitry Andric                                                         DAGCombinerInfo &DCI) const {
30950b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
30960b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
30970b57cec5SDimitry Andric 
30980b57cec5SDimitry Andric   // (vt2 (assertzext (truncate vt0:x), vt1)) ->
30990b57cec5SDimitry Andric   //     (vt2 (truncate (assertzext vt0:x, vt1)))
31000b57cec5SDimitry Andric   if (N0.getOpcode() == ISD::TRUNCATE) {
31010b57cec5SDimitry Andric     SDValue N1 = N->getOperand(1);
31020b57cec5SDimitry Andric     EVT ExtVT = cast<VTSDNode>(N1)->getVT();
31030b57cec5SDimitry Andric     SDLoc SL(N);
31040b57cec5SDimitry Andric 
31050b57cec5SDimitry Andric     SDValue Src = N0.getOperand(0);
31060b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
31070b57cec5SDimitry Andric     if (SrcVT.bitsGE(ExtVT)) {
31080b57cec5SDimitry Andric       SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
31090b57cec5SDimitry Andric       return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
31100b57cec5SDimitry Andric     }
31110b57cec5SDimitry Andric   }
31120b57cec5SDimitry Andric 
31130b57cec5SDimitry Andric   return SDValue();
31140b57cec5SDimitry Andric }
31158bcb0991SDimitry Andric 
31168bcb0991SDimitry Andric SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
31178bcb0991SDimitry Andric   SDNode *N, DAGCombinerInfo &DCI) const {
31188bcb0991SDimitry Andric   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
31198bcb0991SDimitry Andric   switch (IID) {
31208bcb0991SDimitry Andric   case Intrinsic::amdgcn_mul_i24:
31218bcb0991SDimitry Andric   case Intrinsic::amdgcn_mul_u24:
3122349cc55cSDimitry Andric   case Intrinsic::amdgcn_mulhi_i24:
3123349cc55cSDimitry Andric   case Intrinsic::amdgcn_mulhi_u24:
3124fe6060f1SDimitry Andric     return simplifyMul24(N, DCI);
31255ffd83dbSDimitry Andric   case Intrinsic::amdgcn_fract:
31265ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rsq:
31275ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rcp_legacy:
31285ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rsq_legacy:
31295ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rsq_clamp:
31305ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ldexp: {
31315ffd83dbSDimitry Andric     // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
31325ffd83dbSDimitry Andric     SDValue Src = N->getOperand(1);
31335ffd83dbSDimitry Andric     return Src.isUndef() ? Src : SDValue();
31345ffd83dbSDimitry Andric   }
31358bcb0991SDimitry Andric   default:
31368bcb0991SDimitry Andric     return SDValue();
31378bcb0991SDimitry Andric   }
31388bcb0991SDimitry Andric }
31398bcb0991SDimitry Andric 
31400b57cec5SDimitry Andric /// Split the 64-bit value \p LHS into two 32-bit components, and perform the
31410b57cec5SDimitry Andric /// binary operation \p Opc to it with the corresponding constant operands.
31420b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
31430b57cec5SDimitry Andric   DAGCombinerInfo &DCI, const SDLoc &SL,
31440b57cec5SDimitry Andric   unsigned Opc, SDValue LHS,
31450b57cec5SDimitry Andric   uint32_t ValLo, uint32_t ValHi) const {
31460b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
31470b57cec5SDimitry Andric   SDValue Lo, Hi;
31480b57cec5SDimitry Andric   std::tie(Lo, Hi) = split64BitValue(LHS, DAG);
31490b57cec5SDimitry Andric 
31500b57cec5SDimitry Andric   SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
31510b57cec5SDimitry Andric   SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);
31520b57cec5SDimitry Andric 
31530b57cec5SDimitry Andric   SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
31540b57cec5SDimitry Andric   SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);
31550b57cec5SDimitry Andric 
31560b57cec5SDimitry Andric   // Re-visit the ands. It's possible we eliminated one of them and it could
31570b57cec5SDimitry Andric   // simplify the vector.
31580b57cec5SDimitry Andric   DCI.AddToWorklist(Lo.getNode());
31590b57cec5SDimitry Andric   DCI.AddToWorklist(Hi.getNode());
31600b57cec5SDimitry Andric 
31610b57cec5SDimitry Andric   SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd});
31620b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
31630b57cec5SDimitry Andric }
31640b57cec5SDimitry Andric 
31650b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
31660b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
31670b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
31680b57cec5SDimitry Andric 
31690b57cec5SDimitry Andric   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
31700b57cec5SDimitry Andric   if (!RHS)
31710b57cec5SDimitry Andric     return SDValue();
31720b57cec5SDimitry Andric 
31730b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
31740b57cec5SDimitry Andric   unsigned RHSVal = RHS->getZExtValue();
31750b57cec5SDimitry Andric   if (!RHSVal)
31760b57cec5SDimitry Andric     return LHS;
31770b57cec5SDimitry Andric 
31780b57cec5SDimitry Andric   SDLoc SL(N);
31790b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
31800b57cec5SDimitry Andric 
31810b57cec5SDimitry Andric   switch (LHS->getOpcode()) {
31820b57cec5SDimitry Andric   default:
31830b57cec5SDimitry Andric     break;
31840b57cec5SDimitry Andric   case ISD::ZERO_EXTEND:
31850b57cec5SDimitry Andric   case ISD::SIGN_EXTEND:
31860b57cec5SDimitry Andric   case ISD::ANY_EXTEND: {
31870b57cec5SDimitry Andric     SDValue X = LHS->getOperand(0);
31880b57cec5SDimitry Andric 
31890b57cec5SDimitry Andric     if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&
31900b57cec5SDimitry Andric         isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) {
31910b57cec5SDimitry Andric       // Prefer build_vector as the canonical form if packed types are legal.
31920b57cec5SDimitry Andric       // (shl ([asz]ext i16:x), 16 -> build_vector 0, x
31930b57cec5SDimitry Andric       SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL,
31940b57cec5SDimitry Andric        { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) });
31950b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
31960b57cec5SDimitry Andric     }
31970b57cec5SDimitry Andric 
31980b57cec5SDimitry Andric     // shl (ext x) => zext (shl x), if shift does not overflow int
31990b57cec5SDimitry Andric     if (VT != MVT::i64)
32000b57cec5SDimitry Andric       break;
32010b57cec5SDimitry Andric     KnownBits Known = DAG.computeKnownBits(X);
32020b57cec5SDimitry Andric     unsigned LZ = Known.countMinLeadingZeros();
32030b57cec5SDimitry Andric     if (LZ < RHSVal)
32040b57cec5SDimitry Andric       break;
32050b57cec5SDimitry Andric     EVT XVT = X.getValueType();
32060b57cec5SDimitry Andric     SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
32070b57cec5SDimitry Andric     return DAG.getZExtOrTrunc(Shl, SL, VT);
32080b57cec5SDimitry Andric   }
32090b57cec5SDimitry Andric   }
32100b57cec5SDimitry Andric 
32110b57cec5SDimitry Andric   if (VT != MVT::i64)
32120b57cec5SDimitry Andric     return SDValue();
32130b57cec5SDimitry Andric 
32140b57cec5SDimitry Andric   // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
32150b57cec5SDimitry Andric 
32160b57cec5SDimitry Andric   // On some subtargets, 64-bit shift is a quarter rate instruction. In the
32170b57cec5SDimitry Andric   // common case, splitting this into a move and a 32-bit shift is faster and
32180b57cec5SDimitry Andric   // the same code size.
32190b57cec5SDimitry Andric   if (RHSVal < 32)
32200b57cec5SDimitry Andric     return SDValue();
32210b57cec5SDimitry Andric 
32220b57cec5SDimitry Andric   SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
32230b57cec5SDimitry Andric 
32240b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
32250b57cec5SDimitry Andric   SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);
32260b57cec5SDimitry Andric 
32270b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
32280b57cec5SDimitry Andric 
32290b57cec5SDimitry Andric   SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift});
32300b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
32310b57cec5SDimitry Andric }
32320b57cec5SDimitry Andric 
32330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
32340b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
32350b57cec5SDimitry Andric   if (N->getValueType(0) != MVT::i64)
32360b57cec5SDimitry Andric     return SDValue();
32370b57cec5SDimitry Andric 
32380b57cec5SDimitry Andric   const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
32390b57cec5SDimitry Andric   if (!RHS)
32400b57cec5SDimitry Andric     return SDValue();
32410b57cec5SDimitry Andric 
32420b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
32430b57cec5SDimitry Andric   SDLoc SL(N);
32440b57cec5SDimitry Andric   unsigned RHSVal = RHS->getZExtValue();
32450b57cec5SDimitry Andric 
32460b57cec5SDimitry Andric   // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
32470b57cec5SDimitry Andric   if (RHSVal == 32) {
32480b57cec5SDimitry Andric     SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
32490b57cec5SDimitry Andric     SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
32500b57cec5SDimitry Andric                                    DAG.getConstant(31, SL, MVT::i32));
32510b57cec5SDimitry Andric 
32520b57cec5SDimitry Andric     SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
32530b57cec5SDimitry Andric     return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
32540b57cec5SDimitry Andric   }
32550b57cec5SDimitry Andric 
32560b57cec5SDimitry Andric   // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
32570b57cec5SDimitry Andric   if (RHSVal == 63) {
32580b57cec5SDimitry Andric     SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
32590b57cec5SDimitry Andric     SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
32600b57cec5SDimitry Andric                                    DAG.getConstant(31, SL, MVT::i32));
32610b57cec5SDimitry Andric     SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
32620b57cec5SDimitry Andric     return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
32630b57cec5SDimitry Andric   }
32640b57cec5SDimitry Andric 
32650b57cec5SDimitry Andric   return SDValue();
32660b57cec5SDimitry Andric }
32670b57cec5SDimitry Andric 
32680b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
32690b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
32700b57cec5SDimitry Andric   auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
32710b57cec5SDimitry Andric   if (!RHS)
32720b57cec5SDimitry Andric     return SDValue();
32730b57cec5SDimitry Andric 
32740b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
32750b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
32760b57cec5SDimitry Andric   unsigned ShiftAmt = RHS->getZExtValue();
32770b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
32780b57cec5SDimitry Andric   SDLoc SL(N);
32790b57cec5SDimitry Andric 
32800b57cec5SDimitry Andric   // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
32810b57cec5SDimitry Andric   // this improves the ability to match BFE patterns in isel.
32820b57cec5SDimitry Andric   if (LHS.getOpcode() == ISD::AND) {
32830b57cec5SDimitry Andric     if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
32840b57cec5SDimitry Andric       if (Mask->getAPIntValue().isShiftedMask() &&
32850b57cec5SDimitry Andric           Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
32860b57cec5SDimitry Andric         return DAG.getNode(
32870b57cec5SDimitry Andric             ISD::AND, SL, VT,
32880b57cec5SDimitry Andric             DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
32890b57cec5SDimitry Andric             DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
32900b57cec5SDimitry Andric       }
32910b57cec5SDimitry Andric     }
32920b57cec5SDimitry Andric   }
32930b57cec5SDimitry Andric 
32940b57cec5SDimitry Andric   if (VT != MVT::i64)
32950b57cec5SDimitry Andric     return SDValue();
32960b57cec5SDimitry Andric 
32970b57cec5SDimitry Andric   if (ShiftAmt < 32)
32980b57cec5SDimitry Andric     return SDValue();
32990b57cec5SDimitry Andric 
33000b57cec5SDimitry Andric   // srl i64:x, C for C >= 32
33010b57cec5SDimitry Andric   // =>
33020b57cec5SDimitry Andric   //   build_pair (srl hi_32(x), C - 32), 0
33030b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
33040b57cec5SDimitry Andric 
3305349cc55cSDimitry Andric   SDValue Hi = getHiHalf64(LHS, DAG);
33060b57cec5SDimitry Andric 
33070b57cec5SDimitry Andric   SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
33080b57cec5SDimitry Andric   SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
33090b57cec5SDimitry Andric 
33100b57cec5SDimitry Andric   SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero});
33110b57cec5SDimitry Andric 
33120b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
33130b57cec5SDimitry Andric }
33140b57cec5SDimitry Andric 
33150b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performTruncateCombine(
33160b57cec5SDimitry Andric   SDNode *N, DAGCombinerInfo &DCI) const {
33170b57cec5SDimitry Andric   SDLoc SL(N);
33180b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
33190b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
33200b57cec5SDimitry Andric   SDValue Src = N->getOperand(0);
33210b57cec5SDimitry Andric 
33220b57cec5SDimitry Andric   // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
33230b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
33240b57cec5SDimitry Andric     SDValue Vec = Src.getOperand(0);
33250b57cec5SDimitry Andric     if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
33260b57cec5SDimitry Andric       SDValue Elt0 = Vec.getOperand(0);
33270b57cec5SDimitry Andric       EVT EltVT = Elt0.getValueType();
3328e8d8bef9SDimitry Andric       if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) {
33290b57cec5SDimitry Andric         if (EltVT.isFloatingPoint()) {
33300b57cec5SDimitry Andric           Elt0 = DAG.getNode(ISD::BITCAST, SL,
33310b57cec5SDimitry Andric                              EltVT.changeTypeToInteger(), Elt0);
33320b57cec5SDimitry Andric         }
33330b57cec5SDimitry Andric 
33340b57cec5SDimitry Andric         return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0);
33350b57cec5SDimitry Andric       }
33360b57cec5SDimitry Andric     }
33370b57cec5SDimitry Andric   }
33380b57cec5SDimitry Andric 
33390b57cec5SDimitry Andric   // Equivalent of above for accessing the high element of a vector as an
33400b57cec5SDimitry Andric   // integer operation.
33410b57cec5SDimitry Andric   // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y)
33420b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::SRL && !VT.isVector()) {
33430b57cec5SDimitry Andric     if (auto K = isConstOrConstSplat(Src.getOperand(1))) {
33440b57cec5SDimitry Andric       if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
33450b57cec5SDimitry Andric         SDValue BV = stripBitcast(Src.getOperand(0));
33460b57cec5SDimitry Andric         if (BV.getOpcode() == ISD::BUILD_VECTOR &&
33470b57cec5SDimitry Andric             BV.getValueType().getVectorNumElements() == 2) {
33480b57cec5SDimitry Andric           SDValue SrcElt = BV.getOperand(1);
33490b57cec5SDimitry Andric           EVT SrcEltVT = SrcElt.getValueType();
33500b57cec5SDimitry Andric           if (SrcEltVT.isFloatingPoint()) {
33510b57cec5SDimitry Andric             SrcElt = DAG.getNode(ISD::BITCAST, SL,
33520b57cec5SDimitry Andric                                  SrcEltVT.changeTypeToInteger(), SrcElt);
33530b57cec5SDimitry Andric           }
33540b57cec5SDimitry Andric 
33550b57cec5SDimitry Andric           return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt);
33560b57cec5SDimitry Andric         }
33570b57cec5SDimitry Andric       }
33580b57cec5SDimitry Andric     }
33590b57cec5SDimitry Andric   }
33600b57cec5SDimitry Andric 
33610b57cec5SDimitry Andric   // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit.
33620b57cec5SDimitry Andric   //
33630b57cec5SDimitry Andric   // i16 (trunc (srl i64:x, K)), K <= 16 ->
33640b57cec5SDimitry Andric   //     i16 (trunc (srl (i32 (trunc x), K)))
33650b57cec5SDimitry Andric   if (VT.getScalarSizeInBits() < 32) {
33660b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
33670b57cec5SDimitry Andric     if (SrcVT.getScalarSizeInBits() > 32 &&
33680b57cec5SDimitry Andric         (Src.getOpcode() == ISD::SRL ||
33690b57cec5SDimitry Andric          Src.getOpcode() == ISD::SRA ||
33700b57cec5SDimitry Andric          Src.getOpcode() == ISD::SHL)) {
33710b57cec5SDimitry Andric       SDValue Amt = Src.getOperand(1);
33720b57cec5SDimitry Andric       KnownBits Known = DAG.computeKnownBits(Amt);
33730b57cec5SDimitry Andric       unsigned Size = VT.getScalarSizeInBits();
33740b57cec5SDimitry Andric       if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
3375349cc55cSDimitry Andric           (Known.countMaxActiveBits() <= Log2_32(Size))) {
33760b57cec5SDimitry Andric         EVT MidVT = VT.isVector() ?
33770b57cec5SDimitry Andric           EVT::getVectorVT(*DAG.getContext(), MVT::i32,
33780b57cec5SDimitry Andric                            VT.getVectorNumElements()) : MVT::i32;
33790b57cec5SDimitry Andric 
33800b57cec5SDimitry Andric         EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout());
33810b57cec5SDimitry Andric         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
33820b57cec5SDimitry Andric                                     Src.getOperand(0));
33830b57cec5SDimitry Andric         DCI.AddToWorklist(Trunc.getNode());
33840b57cec5SDimitry Andric 
33850b57cec5SDimitry Andric         if (Amt.getValueType() != NewShiftVT) {
33860b57cec5SDimitry Andric           Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT);
33870b57cec5SDimitry Andric           DCI.AddToWorklist(Amt.getNode());
33880b57cec5SDimitry Andric         }
33890b57cec5SDimitry Andric 
33900b57cec5SDimitry Andric         SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
33910b57cec5SDimitry Andric                                           Trunc, Amt);
33920b57cec5SDimitry Andric         return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
33930b57cec5SDimitry Andric       }
33940b57cec5SDimitry Andric     }
33950b57cec5SDimitry Andric   }
33960b57cec5SDimitry Andric 
33970b57cec5SDimitry Andric   return SDValue();
33980b57cec5SDimitry Andric }
33990b57cec5SDimitry Andric 
34000b57cec5SDimitry Andric // We need to specifically handle i64 mul here to avoid unnecessary conversion
34010b57cec5SDimitry Andric // instructions. If we only match on the legalized i64 mul expansion,
34020b57cec5SDimitry Andric // SimplifyDemandedBits will be unable to remove them because there will be
34030b57cec5SDimitry Andric // multiple uses due to the separate mul + mulh[su].
34040b57cec5SDimitry Andric static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
34050b57cec5SDimitry Andric                         SDValue N0, SDValue N1, unsigned Size, bool Signed) {
34060b57cec5SDimitry Andric   if (Size <= 32) {
34070b57cec5SDimitry Andric     unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
34080b57cec5SDimitry Andric     return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
34090b57cec5SDimitry Andric   }
34100b57cec5SDimitry Andric 
3411e8d8bef9SDimitry Andric   unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
3412e8d8bef9SDimitry Andric   unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
34130b57cec5SDimitry Andric 
3414e8d8bef9SDimitry Andric   SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
3415e8d8bef9SDimitry Andric   SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
34160b57cec5SDimitry Andric 
3417e8d8bef9SDimitry Andric   return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi);
34180b57cec5SDimitry Andric }
34190b57cec5SDimitry Andric 
34200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
34210b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
34220b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
34230b57cec5SDimitry Andric 
3424fe6060f1SDimitry Andric   // Don't generate 24-bit multiplies on values that are in SGPRs, since
3425fe6060f1SDimitry Andric   // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
3426fe6060f1SDimitry Andric   // unnecessarily). isDivergent() is used as an approximation of whether the
3427fe6060f1SDimitry Andric   // value is in an SGPR.
3428fe6060f1SDimitry Andric   if (!N->isDivergent())
3429fe6060f1SDimitry Andric     return SDValue();
3430fe6060f1SDimitry Andric 
34310b57cec5SDimitry Andric   unsigned Size = VT.getSizeInBits();
34320b57cec5SDimitry Andric   if (VT.isVector() || Size > 64)
34330b57cec5SDimitry Andric     return SDValue();
34340b57cec5SDimitry Andric 
34350b57cec5SDimitry Andric   // There are i16 integer mul/mad.
34360b57cec5SDimitry Andric   if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
34370b57cec5SDimitry Andric     return SDValue();
34380b57cec5SDimitry Andric 
34390b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
34400b57cec5SDimitry Andric   SDLoc DL(N);
34410b57cec5SDimitry Andric 
34420b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
34430b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
34440b57cec5SDimitry Andric 
34450b57cec5SDimitry Andric   // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
34460b57cec5SDimitry Andric   // in the source into any_extends if the result of the mul is truncated. Since
34470b57cec5SDimitry Andric   // we can assume the high bits are whatever we want, use the underlying value
34480b57cec5SDimitry Andric   // to avoid the unknown high bits from interfering.
34490b57cec5SDimitry Andric   if (N0.getOpcode() == ISD::ANY_EXTEND)
34500b57cec5SDimitry Andric     N0 = N0.getOperand(0);
34510b57cec5SDimitry Andric 
34520b57cec5SDimitry Andric   if (N1.getOpcode() == ISD::ANY_EXTEND)
34530b57cec5SDimitry Andric     N1 = N1.getOperand(0);
34540b57cec5SDimitry Andric 
34550b57cec5SDimitry Andric   SDValue Mul;
34560b57cec5SDimitry Andric 
34570b57cec5SDimitry Andric   if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
34580b57cec5SDimitry Andric     N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
34590b57cec5SDimitry Andric     N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
34600b57cec5SDimitry Andric     Mul = getMul24(DAG, DL, N0, N1, Size, false);
34610b57cec5SDimitry Andric   } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
34620b57cec5SDimitry Andric     N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
34630b57cec5SDimitry Andric     N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
34640b57cec5SDimitry Andric     Mul = getMul24(DAG, DL, N0, N1, Size, true);
34650b57cec5SDimitry Andric   } else {
34660b57cec5SDimitry Andric     return SDValue();
34670b57cec5SDimitry Andric   }
34680b57cec5SDimitry Andric 
34690b57cec5SDimitry Andric   // We need to use sext even for MUL_U24, because MUL_U24 is used
34700b57cec5SDimitry Andric   // for signed multiply of 8 and 16-bit types.
34710b57cec5SDimitry Andric   return DAG.getSExtOrTrunc(Mul, DL, VT);
34720b57cec5SDimitry Andric }
34730b57cec5SDimitry Andric 
34744824e7fdSDimitry Andric SDValue
34754824e7fdSDimitry Andric AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
34764824e7fdSDimitry Andric                                             DAGCombinerInfo &DCI) const {
34774824e7fdSDimitry Andric   if (N->getValueType(0) != MVT::i32)
34784824e7fdSDimitry Andric     return SDValue();
34794824e7fdSDimitry Andric 
34804824e7fdSDimitry Andric   SelectionDAG &DAG = DCI.DAG;
34814824e7fdSDimitry Andric   SDLoc DL(N);
34824824e7fdSDimitry Andric 
34834824e7fdSDimitry Andric   SDValue N0 = N->getOperand(0);
34844824e7fdSDimitry Andric   SDValue N1 = N->getOperand(1);
34854824e7fdSDimitry Andric 
34864824e7fdSDimitry Andric   // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
34874824e7fdSDimitry Andric   // in the source into any_extends if the result of the mul is truncated. Since
34884824e7fdSDimitry Andric   // we can assume the high bits are whatever we want, use the underlying value
34894824e7fdSDimitry Andric   // to avoid the unknown high bits from interfering.
34904824e7fdSDimitry Andric   if (N0.getOpcode() == ISD::ANY_EXTEND)
34914824e7fdSDimitry Andric     N0 = N0.getOperand(0);
34924824e7fdSDimitry Andric   if (N1.getOpcode() == ISD::ANY_EXTEND)
34934824e7fdSDimitry Andric     N1 = N1.getOperand(0);
34944824e7fdSDimitry Andric 
34954824e7fdSDimitry Andric   // Try to use two fast 24-bit multiplies (one for each half of the result)
34964824e7fdSDimitry Andric   // instead of one slow extending multiply.
34974824e7fdSDimitry Andric   unsigned LoOpcode, HiOpcode;
34984824e7fdSDimitry Andric   if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
34994824e7fdSDimitry Andric     N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
35004824e7fdSDimitry Andric     N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
35014824e7fdSDimitry Andric     LoOpcode = AMDGPUISD::MUL_U24;
35024824e7fdSDimitry Andric     HiOpcode = AMDGPUISD::MULHI_U24;
35034824e7fdSDimitry Andric   } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
35044824e7fdSDimitry Andric     N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
35054824e7fdSDimitry Andric     N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
35064824e7fdSDimitry Andric     LoOpcode = AMDGPUISD::MUL_I24;
35074824e7fdSDimitry Andric     HiOpcode = AMDGPUISD::MULHI_I24;
35084824e7fdSDimitry Andric   } else {
35094824e7fdSDimitry Andric     return SDValue();
35104824e7fdSDimitry Andric   }
35114824e7fdSDimitry Andric 
35124824e7fdSDimitry Andric   SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
35134824e7fdSDimitry Andric   SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
35144824e7fdSDimitry Andric   DCI.CombineTo(N, Lo, Hi);
35154824e7fdSDimitry Andric   return SDValue(N, 0);
35164824e7fdSDimitry Andric }
35174824e7fdSDimitry Andric 
35180b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
35190b57cec5SDimitry Andric                                                   DAGCombinerInfo &DCI) const {
35200b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
35210b57cec5SDimitry Andric 
35220b57cec5SDimitry Andric   if (!Subtarget->hasMulI24() || VT.isVector())
35230b57cec5SDimitry Andric     return SDValue();
35240b57cec5SDimitry Andric 
3525fe6060f1SDimitry Andric   // Don't generate 24-bit multiplies on values that are in SGPRs, since
3526fe6060f1SDimitry Andric   // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
3527fe6060f1SDimitry Andric   // unnecessarily). isDivergent() is used as an approximation of whether the
3528fe6060f1SDimitry Andric   // value is in an SGPR.
3529fe6060f1SDimitry Andric   // This doesn't apply if no s_mul_hi is available (since we'll end up with a
3530fe6060f1SDimitry Andric   // valu op anyway)
3531fe6060f1SDimitry Andric   if (Subtarget->hasSMulHi() && !N->isDivergent())
3532fe6060f1SDimitry Andric     return SDValue();
3533fe6060f1SDimitry Andric 
35340b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
35350b57cec5SDimitry Andric   SDLoc DL(N);
35360b57cec5SDimitry Andric 
35370b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
35380b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
35390b57cec5SDimitry Andric 
35400b57cec5SDimitry Andric   if (!isI24(N0, DAG) || !isI24(N1, DAG))
35410b57cec5SDimitry Andric     return SDValue();
35420b57cec5SDimitry Andric 
35430b57cec5SDimitry Andric   N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
35440b57cec5SDimitry Andric   N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
35450b57cec5SDimitry Andric 
35460b57cec5SDimitry Andric   SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
35470b57cec5SDimitry Andric   DCI.AddToWorklist(Mulhi.getNode());
35480b57cec5SDimitry Andric   return DAG.getSExtOrTrunc(Mulhi, DL, VT);
35490b57cec5SDimitry Andric }
35500b57cec5SDimitry Andric 
35510b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
35520b57cec5SDimitry Andric                                                   DAGCombinerInfo &DCI) const {
35530b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
35540b57cec5SDimitry Andric 
35550b57cec5SDimitry Andric   if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32)
35560b57cec5SDimitry Andric     return SDValue();
35570b57cec5SDimitry Andric 
3558fe6060f1SDimitry Andric   // Don't generate 24-bit multiplies on values that are in SGPRs, since
3559fe6060f1SDimitry Andric   // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
3560fe6060f1SDimitry Andric   // unnecessarily). isDivergent() is used as an approximation of whether the
3561fe6060f1SDimitry Andric   // value is in an SGPR.
3562fe6060f1SDimitry Andric   // This doesn't apply if no s_mul_hi is available (since we'll end up with a
3563fe6060f1SDimitry Andric   // valu op anyway)
3564fe6060f1SDimitry Andric   if (Subtarget->hasSMulHi() && !N->isDivergent())
3565fe6060f1SDimitry Andric     return SDValue();
3566fe6060f1SDimitry Andric 
35670b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
35680b57cec5SDimitry Andric   SDLoc DL(N);
35690b57cec5SDimitry Andric 
35700b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
35710b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
35720b57cec5SDimitry Andric 
35730b57cec5SDimitry Andric   if (!isU24(N0, DAG) || !isU24(N1, DAG))
35740b57cec5SDimitry Andric     return SDValue();
35750b57cec5SDimitry Andric 
35760b57cec5SDimitry Andric   N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
35770b57cec5SDimitry Andric   N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
35780b57cec5SDimitry Andric 
35790b57cec5SDimitry Andric   SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
35800b57cec5SDimitry Andric   DCI.AddToWorklist(Mulhi.getNode());
35810b57cec5SDimitry Andric   return DAG.getZExtOrTrunc(Mulhi, DL, VT);
35820b57cec5SDimitry Andric }
35830b57cec5SDimitry Andric 
35840b57cec5SDimitry Andric static bool isNegativeOne(SDValue Val) {
35850b57cec5SDimitry Andric   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
3586349cc55cSDimitry Andric     return C->isAllOnes();
35870b57cec5SDimitry Andric   return false;
35880b57cec5SDimitry Andric }
35890b57cec5SDimitry Andric 
35900b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
35910b57cec5SDimitry Andric                                           SDValue Op,
35920b57cec5SDimitry Andric                                           const SDLoc &DL,
35930b57cec5SDimitry Andric                                           unsigned Opc) const {
35940b57cec5SDimitry Andric   EVT VT = Op.getValueType();
35950b57cec5SDimitry Andric   EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
35960b57cec5SDimitry Andric   if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
35970b57cec5SDimitry Andric                               LegalVT != MVT::i16))
35980b57cec5SDimitry Andric     return SDValue();
35990b57cec5SDimitry Andric 
36000b57cec5SDimitry Andric   if (VT != MVT::i32)
36010b57cec5SDimitry Andric     Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
36020b57cec5SDimitry Andric 
36030b57cec5SDimitry Andric   SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op);
36040b57cec5SDimitry Andric   if (VT != MVT::i32)
36050b57cec5SDimitry Andric     FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX);
36060b57cec5SDimitry Andric 
36070b57cec5SDimitry Andric   return FFBX;
36080b57cec5SDimitry Andric }
36090b57cec5SDimitry Andric 
36100b57cec5SDimitry Andric // The native instructions return -1 on 0 input. Optimize out a select that
36110b57cec5SDimitry Andric // produces -1 on 0.
36120b57cec5SDimitry Andric //
36130b57cec5SDimitry Andric // TODO: If zero is not undef, we could also do this if the output is compared
36140b57cec5SDimitry Andric // against the bitwidth.
36150b57cec5SDimitry Andric //
36160b57cec5SDimitry Andric // TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
36170b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
36180b57cec5SDimitry Andric                                                  SDValue LHS, SDValue RHS,
36190b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
36200b57cec5SDimitry Andric   ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3621349cc55cSDimitry Andric   if (!CmpRhs || !CmpRhs->isZero())
36220b57cec5SDimitry Andric     return SDValue();
36230b57cec5SDimitry Andric 
36240b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
36250b57cec5SDimitry Andric   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
36260b57cec5SDimitry Andric   SDValue CmpLHS = Cond.getOperand(0);
36270b57cec5SDimitry Andric 
36280b57cec5SDimitry Andric   // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
36290b57cec5SDimitry Andric   // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
36300b57cec5SDimitry Andric   if (CCOpcode == ISD::SETEQ &&
36310b57cec5SDimitry Andric       (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
36325ffd83dbSDimitry Andric       RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) {
36335ffd83dbSDimitry Andric     unsigned Opc =
36345ffd83dbSDimitry Andric         isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
36350b57cec5SDimitry Andric     return getFFBX_U32(DAG, CmpLHS, SL, Opc);
36360b57cec5SDimitry Andric   }
36370b57cec5SDimitry Andric 
36380b57cec5SDimitry Andric   // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
36390b57cec5SDimitry Andric   // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
36400b57cec5SDimitry Andric   if (CCOpcode == ISD::SETNE &&
36415ffd83dbSDimitry Andric       (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
36425ffd83dbSDimitry Andric       LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) {
36435ffd83dbSDimitry Andric     unsigned Opc =
36445ffd83dbSDimitry Andric         isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
36455ffd83dbSDimitry Andric 
36460b57cec5SDimitry Andric     return getFFBX_U32(DAG, CmpLHS, SL, Opc);
36470b57cec5SDimitry Andric   }
36480b57cec5SDimitry Andric 
36490b57cec5SDimitry Andric   return SDValue();
36500b57cec5SDimitry Andric }
36510b57cec5SDimitry Andric 
36520b57cec5SDimitry Andric static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
36530b57cec5SDimitry Andric                                          unsigned Op,
36540b57cec5SDimitry Andric                                          const SDLoc &SL,
36550b57cec5SDimitry Andric                                          SDValue Cond,
36560b57cec5SDimitry Andric                                          SDValue N1,
36570b57cec5SDimitry Andric                                          SDValue N2) {
36580b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
36590b57cec5SDimitry Andric   EVT VT = N1.getValueType();
36600b57cec5SDimitry Andric 
36610b57cec5SDimitry Andric   SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond,
36620b57cec5SDimitry Andric                                   N1.getOperand(0), N2.getOperand(0));
36630b57cec5SDimitry Andric   DCI.AddToWorklist(NewSelect.getNode());
36640b57cec5SDimitry Andric   return DAG.getNode(Op, SL, VT, NewSelect);
36650b57cec5SDimitry Andric }
36660b57cec5SDimitry Andric 
36670b57cec5SDimitry Andric // Pull a free FP operation out of a select so it may fold into uses.
36680b57cec5SDimitry Andric //
36690b57cec5SDimitry Andric // select c, (fneg x), (fneg y) -> fneg (select c, x, y)
36700b57cec5SDimitry Andric // select c, (fneg x), k -> fneg (select c, x, (fneg k))
36710b57cec5SDimitry Andric //
36720b57cec5SDimitry Andric // select c, (fabs x), (fabs y) -> fabs (select c, x, y)
36730b57cec5SDimitry Andric // select c, (fabs x), +k -> fabs (select c, x, k)
36740b57cec5SDimitry Andric static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
36750b57cec5SDimitry Andric                                     SDValue N) {
36760b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
36770b57cec5SDimitry Andric   SDValue Cond = N.getOperand(0);
36780b57cec5SDimitry Andric   SDValue LHS = N.getOperand(1);
36790b57cec5SDimitry Andric   SDValue RHS = N.getOperand(2);
36800b57cec5SDimitry Andric 
36810b57cec5SDimitry Andric   EVT VT = N.getValueType();
36820b57cec5SDimitry Andric   if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
36830b57cec5SDimitry Andric       (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
36840b57cec5SDimitry Andric     return distributeOpThroughSelect(DCI, LHS.getOpcode(),
36850b57cec5SDimitry Andric                                      SDLoc(N), Cond, LHS, RHS);
36860b57cec5SDimitry Andric   }
36870b57cec5SDimitry Andric 
36880b57cec5SDimitry Andric   bool Inv = false;
36890b57cec5SDimitry Andric   if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {
36900b57cec5SDimitry Andric     std::swap(LHS, RHS);
36910b57cec5SDimitry Andric     Inv = true;
36920b57cec5SDimitry Andric   }
36930b57cec5SDimitry Andric 
36940b57cec5SDimitry Andric   // TODO: Support vector constants.
36950b57cec5SDimitry Andric   ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
36960b57cec5SDimitry Andric   if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
36970b57cec5SDimitry Andric     SDLoc SL(N);
36980b57cec5SDimitry Andric     // If one side is an fneg/fabs and the other is a constant, we can push the
36990b57cec5SDimitry Andric     // fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
37000b57cec5SDimitry Andric     SDValue NewLHS = LHS.getOperand(0);
37010b57cec5SDimitry Andric     SDValue NewRHS = RHS;
37020b57cec5SDimitry Andric 
37030b57cec5SDimitry Andric     // Careful: if the neg can be folded up, don't try to pull it back down.
37040b57cec5SDimitry Andric     bool ShouldFoldNeg = true;
37050b57cec5SDimitry Andric 
37060b57cec5SDimitry Andric     if (NewLHS.hasOneUse()) {
37070b57cec5SDimitry Andric       unsigned Opc = NewLHS.getOpcode();
37080b57cec5SDimitry Andric       if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
37090b57cec5SDimitry Andric         ShouldFoldNeg = false;
37100b57cec5SDimitry Andric       if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
37110b57cec5SDimitry Andric         ShouldFoldNeg = false;
37120b57cec5SDimitry Andric     }
37130b57cec5SDimitry Andric 
37140b57cec5SDimitry Andric     if (ShouldFoldNeg) {
37150b57cec5SDimitry Andric       if (LHS.getOpcode() == ISD::FNEG)
37160b57cec5SDimitry Andric         NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
37170b57cec5SDimitry Andric       else if (CRHS->isNegative())
37180b57cec5SDimitry Andric         return SDValue();
37190b57cec5SDimitry Andric 
37200b57cec5SDimitry Andric       if (Inv)
37210b57cec5SDimitry Andric         std::swap(NewLHS, NewRHS);
37220b57cec5SDimitry Andric 
37230b57cec5SDimitry Andric       SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
37240b57cec5SDimitry Andric                                       Cond, NewLHS, NewRHS);
37250b57cec5SDimitry Andric       DCI.AddToWorklist(NewSelect.getNode());
37260b57cec5SDimitry Andric       return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
37270b57cec5SDimitry Andric     }
37280b57cec5SDimitry Andric   }
37290b57cec5SDimitry Andric 
37300b57cec5SDimitry Andric   return SDValue();
37310b57cec5SDimitry Andric }
37320b57cec5SDimitry Andric 
37330b57cec5SDimitry Andric 
37340b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
37350b57cec5SDimitry Andric                                                    DAGCombinerInfo &DCI) const {
37360b57cec5SDimitry Andric   if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
37370b57cec5SDimitry Andric     return Folded;
37380b57cec5SDimitry Andric 
37390b57cec5SDimitry Andric   SDValue Cond = N->getOperand(0);
37400b57cec5SDimitry Andric   if (Cond.getOpcode() != ISD::SETCC)
37410b57cec5SDimitry Andric     return SDValue();
37420b57cec5SDimitry Andric 
37430b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
37440b57cec5SDimitry Andric   SDValue LHS = Cond.getOperand(0);
37450b57cec5SDimitry Andric   SDValue RHS = Cond.getOperand(1);
37460b57cec5SDimitry Andric   SDValue CC = Cond.getOperand(2);
37470b57cec5SDimitry Andric 
37480b57cec5SDimitry Andric   SDValue True = N->getOperand(1);
37490b57cec5SDimitry Andric   SDValue False = N->getOperand(2);
37500b57cec5SDimitry Andric 
37510b57cec5SDimitry Andric   if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
37520b57cec5SDimitry Andric     SelectionDAG &DAG = DCI.DAG;
37530b57cec5SDimitry Andric     if (DAG.isConstantValueOfAnyType(True) &&
37540b57cec5SDimitry Andric         !DAG.isConstantValueOfAnyType(False)) {
37550b57cec5SDimitry Andric       // Swap cmp + select pair to move constant to false input.
37560b57cec5SDimitry Andric       // This will allow using VOPC cndmasks more often.
37570b57cec5SDimitry Andric       // select (setcc x, y), k, x -> select (setccinv x, y), x, k
37580b57cec5SDimitry Andric 
37590b57cec5SDimitry Andric       SDLoc SL(N);
3760480093f4SDimitry Andric       ISD::CondCode NewCC =
3761480093f4SDimitry Andric           getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());
37620b57cec5SDimitry Andric 
37630b57cec5SDimitry Andric       SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
37640b57cec5SDimitry Andric       return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
37650b57cec5SDimitry Andric     }
37660b57cec5SDimitry Andric 
37670b57cec5SDimitry Andric     if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
37680b57cec5SDimitry Andric       SDValue MinMax
37690b57cec5SDimitry Andric         = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
37700b57cec5SDimitry Andric       // Revisit this node so we can catch min3/max3/med3 patterns.
37710b57cec5SDimitry Andric       //DCI.AddToWorklist(MinMax.getNode());
37720b57cec5SDimitry Andric       return MinMax;
37730b57cec5SDimitry Andric     }
37740b57cec5SDimitry Andric   }
37750b57cec5SDimitry Andric 
37760b57cec5SDimitry Andric   // There's no reason to not do this if the condition has other uses.
37770b57cec5SDimitry Andric   return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
37780b57cec5SDimitry Andric }
37790b57cec5SDimitry Andric 
37800b57cec5SDimitry Andric static bool isInv2Pi(const APFloat &APF) {
37810b57cec5SDimitry Andric   static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
37820b57cec5SDimitry Andric   static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
37830b57cec5SDimitry Andric   static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
37840b57cec5SDimitry Andric 
37850b57cec5SDimitry Andric   return APF.bitwiseIsEqual(KF16) ||
37860b57cec5SDimitry Andric          APF.bitwiseIsEqual(KF32) ||
37870b57cec5SDimitry Andric          APF.bitwiseIsEqual(KF64);
37880b57cec5SDimitry Andric }
37890b57cec5SDimitry Andric 
37900b57cec5SDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
37910b57cec5SDimitry Andric // additional cost to negate them.
37920b57cec5SDimitry Andric bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
37930b57cec5SDimitry Andric   if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
37940b57cec5SDimitry Andric     if (C->isZero() && !C->isNegative())
37950b57cec5SDimitry Andric       return true;
37960b57cec5SDimitry Andric 
37970b57cec5SDimitry Andric     if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
37980b57cec5SDimitry Andric       return true;
37990b57cec5SDimitry Andric   }
38000b57cec5SDimitry Andric 
38010b57cec5SDimitry Andric   return false;
38020b57cec5SDimitry Andric }
38030b57cec5SDimitry Andric 
38040b57cec5SDimitry Andric static unsigned inverseMinMax(unsigned Opc) {
38050b57cec5SDimitry Andric   switch (Opc) {
38060b57cec5SDimitry Andric   case ISD::FMAXNUM:
38070b57cec5SDimitry Andric     return ISD::FMINNUM;
38080b57cec5SDimitry Andric   case ISD::FMINNUM:
38090b57cec5SDimitry Andric     return ISD::FMAXNUM;
38100b57cec5SDimitry Andric   case ISD::FMAXNUM_IEEE:
38110b57cec5SDimitry Andric     return ISD::FMINNUM_IEEE;
38120b57cec5SDimitry Andric   case ISD::FMINNUM_IEEE:
38130b57cec5SDimitry Andric     return ISD::FMAXNUM_IEEE;
38140b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY:
38150b57cec5SDimitry Andric     return AMDGPUISD::FMIN_LEGACY;
38160b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY:
38170b57cec5SDimitry Andric     return  AMDGPUISD::FMAX_LEGACY;
38180b57cec5SDimitry Andric   default:
38190b57cec5SDimitry Andric     llvm_unreachable("invalid min/max opcode");
38200b57cec5SDimitry Andric   }
38210b57cec5SDimitry Andric }
38220b57cec5SDimitry Andric 
38230b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
38240b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
38250b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
38260b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
38270b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
38280b57cec5SDimitry Andric 
38290b57cec5SDimitry Andric   unsigned Opc = N0.getOpcode();
38300b57cec5SDimitry Andric 
38310b57cec5SDimitry Andric   // If the input has multiple uses and we can either fold the negate down, or
38320b57cec5SDimitry Andric   // the other uses cannot, give up. This both prevents unprofitable
38330b57cec5SDimitry Andric   // transformations and infinite loops: we won't repeatedly try to fold around
38340b57cec5SDimitry Andric   // a negate that has no 'good' form.
38350b57cec5SDimitry Andric   if (N0.hasOneUse()) {
38360b57cec5SDimitry Andric     // This may be able to fold into the source, but at a code size cost. Don't
38370b57cec5SDimitry Andric     // fold if the fold into the user is free.
38380b57cec5SDimitry Andric     if (allUsesHaveSourceMods(N, 0))
38390b57cec5SDimitry Andric       return SDValue();
38400b57cec5SDimitry Andric   } else {
38410b57cec5SDimitry Andric     if (fnegFoldsIntoOp(Opc) &&
38420b57cec5SDimitry Andric         (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode())))
38430b57cec5SDimitry Andric       return SDValue();
38440b57cec5SDimitry Andric   }
38450b57cec5SDimitry Andric 
38460b57cec5SDimitry Andric   SDLoc SL(N);
38470b57cec5SDimitry Andric   switch (Opc) {
38480b57cec5SDimitry Andric   case ISD::FADD: {
38490b57cec5SDimitry Andric     if (!mayIgnoreSignedZero(N0))
38500b57cec5SDimitry Andric       return SDValue();
38510b57cec5SDimitry Andric 
38520b57cec5SDimitry Andric     // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
38530b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
38540b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(1);
38550b57cec5SDimitry Andric 
38560b57cec5SDimitry Andric     if (LHS.getOpcode() != ISD::FNEG)
38570b57cec5SDimitry Andric       LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
38580b57cec5SDimitry Andric     else
38590b57cec5SDimitry Andric       LHS = LHS.getOperand(0);
38600b57cec5SDimitry Andric 
38610b57cec5SDimitry Andric     if (RHS.getOpcode() != ISD::FNEG)
38620b57cec5SDimitry Andric       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
38630b57cec5SDimitry Andric     else
38640b57cec5SDimitry Andric       RHS = RHS.getOperand(0);
38650b57cec5SDimitry Andric 
38660b57cec5SDimitry Andric     SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
38670b57cec5SDimitry Andric     if (Res.getOpcode() != ISD::FADD)
38680b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
38690b57cec5SDimitry Andric     if (!N0.hasOneUse())
38700b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
38710b57cec5SDimitry Andric     return Res;
38720b57cec5SDimitry Andric   }
38730b57cec5SDimitry Andric   case ISD::FMUL:
38740b57cec5SDimitry Andric   case AMDGPUISD::FMUL_LEGACY: {
38750b57cec5SDimitry Andric     // (fneg (fmul x, y)) -> (fmul x, (fneg y))
38760b57cec5SDimitry Andric     // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y))
38770b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
38780b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(1);
38790b57cec5SDimitry Andric 
38800b57cec5SDimitry Andric     if (LHS.getOpcode() == ISD::FNEG)
38810b57cec5SDimitry Andric       LHS = LHS.getOperand(0);
38820b57cec5SDimitry Andric     else if (RHS.getOpcode() == ISD::FNEG)
38830b57cec5SDimitry Andric       RHS = RHS.getOperand(0);
38840b57cec5SDimitry Andric     else
38850b57cec5SDimitry Andric       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
38860b57cec5SDimitry Andric 
38870b57cec5SDimitry Andric     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
38880b57cec5SDimitry Andric     if (Res.getOpcode() != Opc)
38890b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
38900b57cec5SDimitry Andric     if (!N0.hasOneUse())
38910b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
38920b57cec5SDimitry Andric     return Res;
38930b57cec5SDimitry Andric   }
38940b57cec5SDimitry Andric   case ISD::FMA:
38950b57cec5SDimitry Andric   case ISD::FMAD: {
3896e8d8bef9SDimitry Andric     // TODO: handle llvm.amdgcn.fma.legacy
38970b57cec5SDimitry Andric     if (!mayIgnoreSignedZero(N0))
38980b57cec5SDimitry Andric       return SDValue();
38990b57cec5SDimitry Andric 
39000b57cec5SDimitry Andric     // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
39010b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
39020b57cec5SDimitry Andric     SDValue MHS = N0.getOperand(1);
39030b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(2);
39040b57cec5SDimitry Andric 
39050b57cec5SDimitry Andric     if (LHS.getOpcode() == ISD::FNEG)
39060b57cec5SDimitry Andric       LHS = LHS.getOperand(0);
39070b57cec5SDimitry Andric     else if (MHS.getOpcode() == ISD::FNEG)
39080b57cec5SDimitry Andric       MHS = MHS.getOperand(0);
39090b57cec5SDimitry Andric     else
39100b57cec5SDimitry Andric       MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);
39110b57cec5SDimitry Andric 
39120b57cec5SDimitry Andric     if (RHS.getOpcode() != ISD::FNEG)
39130b57cec5SDimitry Andric       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
39140b57cec5SDimitry Andric     else
39150b57cec5SDimitry Andric       RHS = RHS.getOperand(0);
39160b57cec5SDimitry Andric 
39170b57cec5SDimitry Andric     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
39180b57cec5SDimitry Andric     if (Res.getOpcode() != Opc)
39190b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
39200b57cec5SDimitry Andric     if (!N0.hasOneUse())
39210b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
39220b57cec5SDimitry Andric     return Res;
39230b57cec5SDimitry Andric   }
39240b57cec5SDimitry Andric   case ISD::FMAXNUM:
39250b57cec5SDimitry Andric   case ISD::FMINNUM:
39260b57cec5SDimitry Andric   case ISD::FMAXNUM_IEEE:
39270b57cec5SDimitry Andric   case ISD::FMINNUM_IEEE:
39280b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY:
39290b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY: {
39300b57cec5SDimitry Andric     // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
39310b57cec5SDimitry Andric     // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
39320b57cec5SDimitry Andric     // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
39330b57cec5SDimitry Andric     // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)
39340b57cec5SDimitry Andric 
39350b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
39360b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(1);
39370b57cec5SDimitry Andric 
39380b57cec5SDimitry Andric     // 0 doesn't have a negated inline immediate.
39390b57cec5SDimitry Andric     // TODO: This constant check should be generalized to other operations.
39400b57cec5SDimitry Andric     if (isConstantCostlierToNegate(RHS))
39410b57cec5SDimitry Andric       return SDValue();
39420b57cec5SDimitry Andric 
39430b57cec5SDimitry Andric     SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
39440b57cec5SDimitry Andric     SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
39450b57cec5SDimitry Andric     unsigned Opposite = inverseMinMax(Opc);
39460b57cec5SDimitry Andric 
39470b57cec5SDimitry Andric     SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
39480b57cec5SDimitry Andric     if (Res.getOpcode() != Opposite)
39490b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
39500b57cec5SDimitry Andric     if (!N0.hasOneUse())
39510b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
39520b57cec5SDimitry Andric     return Res;
39530b57cec5SDimitry Andric   }
39540b57cec5SDimitry Andric   case AMDGPUISD::FMED3: {
39550b57cec5SDimitry Andric     SDValue Ops[3];
39560b57cec5SDimitry Andric     for (unsigned I = 0; I < 3; ++I)
39570b57cec5SDimitry Andric       Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
39580b57cec5SDimitry Andric 
39590b57cec5SDimitry Andric     SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
39600b57cec5SDimitry Andric     if (Res.getOpcode() != AMDGPUISD::FMED3)
39610b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
3962e8d8bef9SDimitry Andric 
3963e8d8bef9SDimitry Andric     if (!N0.hasOneUse()) {
3964e8d8bef9SDimitry Andric       SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
3965e8d8bef9SDimitry Andric       DAG.ReplaceAllUsesWith(N0, Neg);
3966e8d8bef9SDimitry Andric 
3967e8d8bef9SDimitry Andric       for (SDNode *U : Neg->uses())
3968e8d8bef9SDimitry Andric         DCI.AddToWorklist(U);
3969e8d8bef9SDimitry Andric     }
3970e8d8bef9SDimitry Andric 
39710b57cec5SDimitry Andric     return Res;
39720b57cec5SDimitry Andric   }
39730b57cec5SDimitry Andric   case ISD::FP_EXTEND:
39740b57cec5SDimitry Andric   case ISD::FTRUNC:
39750b57cec5SDimitry Andric   case ISD::FRINT:
39760b57cec5SDimitry Andric   case ISD::FNEARBYINT: // XXX - Should fround be handled?
39770b57cec5SDimitry Andric   case ISD::FSIN:
39780b57cec5SDimitry Andric   case ISD::FCANONICALIZE:
39790b57cec5SDimitry Andric   case AMDGPUISD::RCP:
39800b57cec5SDimitry Andric   case AMDGPUISD::RCP_LEGACY:
39810b57cec5SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
39820b57cec5SDimitry Andric   case AMDGPUISD::SIN_HW: {
39830b57cec5SDimitry Andric     SDValue CvtSrc = N0.getOperand(0);
39840b57cec5SDimitry Andric     if (CvtSrc.getOpcode() == ISD::FNEG) {
39850b57cec5SDimitry Andric       // (fneg (fp_extend (fneg x))) -> (fp_extend x)
39860b57cec5SDimitry Andric       // (fneg (rcp (fneg x))) -> (rcp x)
39870b57cec5SDimitry Andric       return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0));
39880b57cec5SDimitry Andric     }
39890b57cec5SDimitry Andric 
39900b57cec5SDimitry Andric     if (!N0.hasOneUse())
39910b57cec5SDimitry Andric       return SDValue();
39920b57cec5SDimitry Andric 
39930b57cec5SDimitry Andric     // (fneg (fp_extend x)) -> (fp_extend (fneg x))
39940b57cec5SDimitry Andric     // (fneg (rcp x)) -> (rcp (fneg x))
39950b57cec5SDimitry Andric     SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
39960b57cec5SDimitry Andric     return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
39970b57cec5SDimitry Andric   }
39980b57cec5SDimitry Andric   case ISD::FP_ROUND: {
39990b57cec5SDimitry Andric     SDValue CvtSrc = N0.getOperand(0);
40000b57cec5SDimitry Andric 
40010b57cec5SDimitry Andric     if (CvtSrc.getOpcode() == ISD::FNEG) {
40020b57cec5SDimitry Andric       // (fneg (fp_round (fneg x))) -> (fp_round x)
40030b57cec5SDimitry Andric       return DAG.getNode(ISD::FP_ROUND, SL, VT,
40040b57cec5SDimitry Andric                          CvtSrc.getOperand(0), N0.getOperand(1));
40050b57cec5SDimitry Andric     }
40060b57cec5SDimitry Andric 
40070b57cec5SDimitry Andric     if (!N0.hasOneUse())
40080b57cec5SDimitry Andric       return SDValue();
40090b57cec5SDimitry Andric 
40100b57cec5SDimitry Andric     // (fneg (fp_round x)) -> (fp_round (fneg x))
40110b57cec5SDimitry Andric     SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
40120b57cec5SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
40130b57cec5SDimitry Andric   }
40140b57cec5SDimitry Andric   case ISD::FP16_TO_FP: {
40150b57cec5SDimitry Andric     // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
40160b57cec5SDimitry Andric     // f16, but legalization of f16 fneg ends up pulling it out of the source.
40170b57cec5SDimitry Andric     // Put the fneg back as a legal source operation that can be matched later.
40180b57cec5SDimitry Andric     SDLoc SL(N);
40190b57cec5SDimitry Andric 
40200b57cec5SDimitry Andric     SDValue Src = N0.getOperand(0);
40210b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
40220b57cec5SDimitry Andric 
40230b57cec5SDimitry Andric     // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
40240b57cec5SDimitry Andric     SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
40250b57cec5SDimitry Andric                                   DAG.getConstant(0x8000, SL, SrcVT));
40260b57cec5SDimitry Andric     return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
40270b57cec5SDimitry Andric   }
40280b57cec5SDimitry Andric   default:
40290b57cec5SDimitry Andric     return SDValue();
40300b57cec5SDimitry Andric   }
40310b57cec5SDimitry Andric }
40320b57cec5SDimitry Andric 
40330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
40340b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
40350b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
40360b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
40370b57cec5SDimitry Andric 
40380b57cec5SDimitry Andric   if (!N0.hasOneUse())
40390b57cec5SDimitry Andric     return SDValue();
40400b57cec5SDimitry Andric 
40410b57cec5SDimitry Andric   switch (N0.getOpcode()) {
40420b57cec5SDimitry Andric   case ISD::FP16_TO_FP: {
40430b57cec5SDimitry Andric     assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
40440b57cec5SDimitry Andric     SDLoc SL(N);
40450b57cec5SDimitry Andric     SDValue Src = N0.getOperand(0);
40460b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
40470b57cec5SDimitry Andric 
40480b57cec5SDimitry Andric     // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
40490b57cec5SDimitry Andric     SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
40500b57cec5SDimitry Andric                                   DAG.getConstant(0x7fff, SL, SrcVT));
40510b57cec5SDimitry Andric     return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
40520b57cec5SDimitry Andric   }
40530b57cec5SDimitry Andric   default:
40540b57cec5SDimitry Andric     return SDValue();
40550b57cec5SDimitry Andric   }
40560b57cec5SDimitry Andric }
40570b57cec5SDimitry Andric 
40580b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
40590b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
40600b57cec5SDimitry Andric   const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
40610b57cec5SDimitry Andric   if (!CFP)
40620b57cec5SDimitry Andric     return SDValue();
40630b57cec5SDimitry Andric 
40640b57cec5SDimitry Andric   // XXX - Should this flush denormals?
40650b57cec5SDimitry Andric   const APFloat &Val = CFP->getValueAPF();
40660b57cec5SDimitry Andric   APFloat One(Val.getSemantics(), "1.0");
40670b57cec5SDimitry Andric   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
40680b57cec5SDimitry Andric }
40690b57cec5SDimitry Andric 
40700b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
40710b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
40720b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
40730b57cec5SDimitry Andric   SDLoc DL(N);
40740b57cec5SDimitry Andric 
40750b57cec5SDimitry Andric   switch(N->getOpcode()) {
40760b57cec5SDimitry Andric   default:
40770b57cec5SDimitry Andric     break;
40780b57cec5SDimitry Andric   case ISD::BITCAST: {
40790b57cec5SDimitry Andric     EVT DestVT = N->getValueType(0);
40800b57cec5SDimitry Andric 
40810b57cec5SDimitry Andric     // Push casts through vector builds. This helps avoid emitting a large
40820b57cec5SDimitry Andric     // number of copies when materializing floating point vector constants.
40830b57cec5SDimitry Andric     //
40840b57cec5SDimitry Andric     // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
40850b57cec5SDimitry Andric     //   vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
40860b57cec5SDimitry Andric     if (DestVT.isVector()) {
40870b57cec5SDimitry Andric       SDValue Src = N->getOperand(0);
40880b57cec5SDimitry Andric       if (Src.getOpcode() == ISD::BUILD_VECTOR) {
40890b57cec5SDimitry Andric         EVT SrcVT = Src.getValueType();
40900b57cec5SDimitry Andric         unsigned NElts = DestVT.getVectorNumElements();
40910b57cec5SDimitry Andric 
40920b57cec5SDimitry Andric         if (SrcVT.getVectorNumElements() == NElts) {
40930b57cec5SDimitry Andric           EVT DestEltVT = DestVT.getVectorElementType();
40940b57cec5SDimitry Andric 
40950b57cec5SDimitry Andric           SmallVector<SDValue, 8> CastedElts;
40960b57cec5SDimitry Andric           SDLoc SL(N);
40970b57cec5SDimitry Andric           for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
40980b57cec5SDimitry Andric             SDValue Elt = Src.getOperand(I);
40990b57cec5SDimitry Andric             CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
41000b57cec5SDimitry Andric           }
41010b57cec5SDimitry Andric 
41020b57cec5SDimitry Andric           return DAG.getBuildVector(DestVT, SL, CastedElts);
41030b57cec5SDimitry Andric         }
41040b57cec5SDimitry Andric       }
41050b57cec5SDimitry Andric     }
41060b57cec5SDimitry Andric 
4107e8d8bef9SDimitry Andric     if (DestVT.getSizeInBits() != 64 || !DestVT.isVector())
41080b57cec5SDimitry Andric       break;
41090b57cec5SDimitry Andric 
41100b57cec5SDimitry Andric     // Fold bitcasts of constants.
41110b57cec5SDimitry Andric     //
41120b57cec5SDimitry Andric     // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
41130b57cec5SDimitry Andric     // TODO: Generalize and move to DAGCombiner
41140b57cec5SDimitry Andric     SDValue Src = N->getOperand(0);
41150b57cec5SDimitry Andric     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
41160b57cec5SDimitry Andric       SDLoc SL(N);
41170b57cec5SDimitry Andric       uint64_t CVal = C->getZExtValue();
41180b57cec5SDimitry Andric       SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
41190b57cec5SDimitry Andric                                DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
41200b57cec5SDimitry Andric                                DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
41210b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
41220b57cec5SDimitry Andric     }
41230b57cec5SDimitry Andric 
41240b57cec5SDimitry Andric     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
41250b57cec5SDimitry Andric       const APInt &Val = C->getValueAPF().bitcastToAPInt();
41260b57cec5SDimitry Andric       SDLoc SL(N);
41270b57cec5SDimitry Andric       uint64_t CVal = Val.getZExtValue();
41280b57cec5SDimitry Andric       SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
41290b57cec5SDimitry Andric                                 DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
41300b57cec5SDimitry Andric                                 DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
41310b57cec5SDimitry Andric 
41320b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
41330b57cec5SDimitry Andric     }
41340b57cec5SDimitry Andric 
41350b57cec5SDimitry Andric     break;
41360b57cec5SDimitry Andric   }
41370b57cec5SDimitry Andric   case ISD::SHL: {
41380b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
41390b57cec5SDimitry Andric       break;
41400b57cec5SDimitry Andric 
41410b57cec5SDimitry Andric     return performShlCombine(N, DCI);
41420b57cec5SDimitry Andric   }
41430b57cec5SDimitry Andric   case ISD::SRL: {
41440b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
41450b57cec5SDimitry Andric       break;
41460b57cec5SDimitry Andric 
41470b57cec5SDimitry Andric     return performSrlCombine(N, DCI);
41480b57cec5SDimitry Andric   }
41490b57cec5SDimitry Andric   case ISD::SRA: {
41500b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
41510b57cec5SDimitry Andric       break;
41520b57cec5SDimitry Andric 
41530b57cec5SDimitry Andric     return performSraCombine(N, DCI);
41540b57cec5SDimitry Andric   }
41550b57cec5SDimitry Andric   case ISD::TRUNCATE:
41560b57cec5SDimitry Andric     return performTruncateCombine(N, DCI);
41570b57cec5SDimitry Andric   case ISD::MUL:
41580b57cec5SDimitry Andric     return performMulCombine(N, DCI);
41594824e7fdSDimitry Andric   case ISD::SMUL_LOHI:
41604824e7fdSDimitry Andric   case ISD::UMUL_LOHI:
41614824e7fdSDimitry Andric     return performMulLoHiCombine(N, DCI);
41620b57cec5SDimitry Andric   case ISD::MULHS:
41630b57cec5SDimitry Andric     return performMulhsCombine(N, DCI);
41640b57cec5SDimitry Andric   case ISD::MULHU:
41650b57cec5SDimitry Andric     return performMulhuCombine(N, DCI);
41660b57cec5SDimitry Andric   case AMDGPUISD::MUL_I24:
41670b57cec5SDimitry Andric   case AMDGPUISD::MUL_U24:
41680b57cec5SDimitry Andric   case AMDGPUISD::MULHI_I24:
4169fe6060f1SDimitry Andric   case AMDGPUISD::MULHI_U24:
4170fe6060f1SDimitry Andric     return simplifyMul24(N, DCI);
41710b57cec5SDimitry Andric   case ISD::SELECT:
41720b57cec5SDimitry Andric     return performSelectCombine(N, DCI);
41730b57cec5SDimitry Andric   case ISD::FNEG:
41740b57cec5SDimitry Andric     return performFNegCombine(N, DCI);
41750b57cec5SDimitry Andric   case ISD::FABS:
41760b57cec5SDimitry Andric     return performFAbsCombine(N, DCI);
41770b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32:
41780b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
41790b57cec5SDimitry Andric     assert(!N->getValueType(0).isVector() &&
41800b57cec5SDimitry Andric            "Vector handling of BFE not implemented");
41810b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
41820b57cec5SDimitry Andric     if (!Width)
41830b57cec5SDimitry Andric       break;
41840b57cec5SDimitry Andric 
41850b57cec5SDimitry Andric     uint32_t WidthVal = Width->getZExtValue() & 0x1f;
41860b57cec5SDimitry Andric     if (WidthVal == 0)
41870b57cec5SDimitry Andric       return DAG.getConstant(0, DL, MVT::i32);
41880b57cec5SDimitry Andric 
41890b57cec5SDimitry Andric     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
41900b57cec5SDimitry Andric     if (!Offset)
41910b57cec5SDimitry Andric       break;
41920b57cec5SDimitry Andric 
41930b57cec5SDimitry Andric     SDValue BitsFrom = N->getOperand(0);
41940b57cec5SDimitry Andric     uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
41950b57cec5SDimitry Andric 
41960b57cec5SDimitry Andric     bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
41970b57cec5SDimitry Andric 
41980b57cec5SDimitry Andric     if (OffsetVal == 0) {
41990b57cec5SDimitry Andric       // This is already sign / zero extended, so try to fold away extra BFEs.
42000b57cec5SDimitry Andric       unsigned SignBits =  Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
42010b57cec5SDimitry Andric 
42020b57cec5SDimitry Andric       unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
42030b57cec5SDimitry Andric       if (OpSignBits >= SignBits)
42040b57cec5SDimitry Andric         return BitsFrom;
42050b57cec5SDimitry Andric 
42060b57cec5SDimitry Andric       EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
42070b57cec5SDimitry Andric       if (Signed) {
42080b57cec5SDimitry Andric         // This is a sign_extend_inreg. Replace it to take advantage of existing
42090b57cec5SDimitry Andric         // DAG Combines. If not eliminated, we will match back to BFE during
42100b57cec5SDimitry Andric         // selection.
42110b57cec5SDimitry Andric 
42120b57cec5SDimitry Andric         // TODO: The sext_inreg of extended types ends, although we can could
42130b57cec5SDimitry Andric         // handle them in a single BFE.
42140b57cec5SDimitry Andric         return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
42150b57cec5SDimitry Andric                            DAG.getValueType(SmallVT));
42160b57cec5SDimitry Andric       }
42170b57cec5SDimitry Andric 
42180b57cec5SDimitry Andric       return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
42190b57cec5SDimitry Andric     }
42200b57cec5SDimitry Andric 
42210b57cec5SDimitry Andric     if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
42220b57cec5SDimitry Andric       if (Signed) {
42230b57cec5SDimitry Andric         return constantFoldBFE<int32_t>(DAG,
42240b57cec5SDimitry Andric                                         CVal->getSExtValue(),
42250b57cec5SDimitry Andric                                         OffsetVal,
42260b57cec5SDimitry Andric                                         WidthVal,
42270b57cec5SDimitry Andric                                         DL);
42280b57cec5SDimitry Andric       }
42290b57cec5SDimitry Andric 
42300b57cec5SDimitry Andric       return constantFoldBFE<uint32_t>(DAG,
42310b57cec5SDimitry Andric                                        CVal->getZExtValue(),
42320b57cec5SDimitry Andric                                        OffsetVal,
42330b57cec5SDimitry Andric                                        WidthVal,
42340b57cec5SDimitry Andric                                        DL);
42350b57cec5SDimitry Andric     }
42360b57cec5SDimitry Andric 
42370b57cec5SDimitry Andric     if ((OffsetVal + WidthVal) >= 32 &&
42380b57cec5SDimitry Andric         !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
42390b57cec5SDimitry Andric       SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
42400b57cec5SDimitry Andric       return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
42410b57cec5SDimitry Andric                          BitsFrom, ShiftVal);
42420b57cec5SDimitry Andric     }
42430b57cec5SDimitry Andric 
42440b57cec5SDimitry Andric     if (BitsFrom.hasOneUse()) {
42450b57cec5SDimitry Andric       APInt Demanded = APInt::getBitsSet(32,
42460b57cec5SDimitry Andric                                          OffsetVal,
42470b57cec5SDimitry Andric                                          OffsetVal + WidthVal);
42480b57cec5SDimitry Andric 
42490b57cec5SDimitry Andric       KnownBits Known;
42500b57cec5SDimitry Andric       TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
42510b57cec5SDimitry Andric                                             !DCI.isBeforeLegalizeOps());
42520b57cec5SDimitry Andric       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
42530b57cec5SDimitry Andric       if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
42540b57cec5SDimitry Andric           TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) {
42550b57cec5SDimitry Andric         DCI.CommitTargetLoweringOpt(TLO);
42560b57cec5SDimitry Andric       }
42570b57cec5SDimitry Andric     }
42580b57cec5SDimitry Andric 
42590b57cec5SDimitry Andric     break;
42600b57cec5SDimitry Andric   }
42610b57cec5SDimitry Andric   case ISD::LOAD:
42620b57cec5SDimitry Andric     return performLoadCombine(N, DCI);
42630b57cec5SDimitry Andric   case ISD::STORE:
42640b57cec5SDimitry Andric     return performStoreCombine(N, DCI);
42650b57cec5SDimitry Andric   case AMDGPUISD::RCP:
42660b57cec5SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
42670b57cec5SDimitry Andric     return performRcpCombine(N, DCI);
42680b57cec5SDimitry Andric   case ISD::AssertZext:
42690b57cec5SDimitry Andric   case ISD::AssertSext:
42700b57cec5SDimitry Andric     return performAssertSZExtCombine(N, DCI);
42718bcb0991SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN:
42728bcb0991SDimitry Andric     return performIntrinsicWOChainCombine(N, DCI);
42730b57cec5SDimitry Andric   }
42740b57cec5SDimitry Andric   return SDValue();
42750b57cec5SDimitry Andric }
42760b57cec5SDimitry Andric 
42770b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
42780b57cec5SDimitry Andric // Helper functions
42790b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
42800b57cec5SDimitry Andric 
42810b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
42820b57cec5SDimitry Andric                                                    const TargetRegisterClass *RC,
42835ffd83dbSDimitry Andric                                                    Register Reg, EVT VT,
42840b57cec5SDimitry Andric                                                    const SDLoc &SL,
42850b57cec5SDimitry Andric                                                    bool RawReg) const {
42860b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
42870b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
42885ffd83dbSDimitry Andric   Register VReg;
42890b57cec5SDimitry Andric 
42900b57cec5SDimitry Andric   if (!MRI.isLiveIn(Reg)) {
42910b57cec5SDimitry Andric     VReg = MRI.createVirtualRegister(RC);
42920b57cec5SDimitry Andric     MRI.addLiveIn(Reg, VReg);
42930b57cec5SDimitry Andric   } else {
42940b57cec5SDimitry Andric     VReg = MRI.getLiveInVirtReg(Reg);
42950b57cec5SDimitry Andric   }
42960b57cec5SDimitry Andric 
42970b57cec5SDimitry Andric   if (RawReg)
42980b57cec5SDimitry Andric     return DAG.getRegister(VReg, VT);
42990b57cec5SDimitry Andric 
43000b57cec5SDimitry Andric   return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
43010b57cec5SDimitry Andric }
43020b57cec5SDimitry Andric 
43038bcb0991SDimitry Andric // This may be called multiple times, and nothing prevents creating multiple
43048bcb0991SDimitry Andric // objects at the same offset. See if we already defined this object.
43058bcb0991SDimitry Andric static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
43068bcb0991SDimitry Andric                                        int64_t Offset) {
43078bcb0991SDimitry Andric   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
43088bcb0991SDimitry Andric     if (MFI.getObjectOffset(I) == Offset) {
43098bcb0991SDimitry Andric       assert(MFI.getObjectSize(I) == Size);
43108bcb0991SDimitry Andric       return I;
43118bcb0991SDimitry Andric     }
43128bcb0991SDimitry Andric   }
43138bcb0991SDimitry Andric 
43148bcb0991SDimitry Andric   return MFI.CreateFixedObject(Size, Offset, true);
43158bcb0991SDimitry Andric }
43168bcb0991SDimitry Andric 
43170b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
43180b57cec5SDimitry Andric                                                   EVT VT,
43190b57cec5SDimitry Andric                                                   const SDLoc &SL,
43200b57cec5SDimitry Andric                                                   int64_t Offset) const {
43210b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
43220b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
43238bcb0991SDimitry Andric   int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset);
43240b57cec5SDimitry Andric 
43250b57cec5SDimitry Andric   auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
43260b57cec5SDimitry Andric   SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
43270b57cec5SDimitry Andric 
4328e8d8bef9SDimitry Andric   return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4),
43290b57cec5SDimitry Andric                      MachineMemOperand::MODereferenceable |
43300b57cec5SDimitry Andric                          MachineMemOperand::MOInvariant);
43310b57cec5SDimitry Andric }
43320b57cec5SDimitry Andric 
43330b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
43340b57cec5SDimitry Andric                                                    const SDLoc &SL,
43350b57cec5SDimitry Andric                                                    SDValue Chain,
43360b57cec5SDimitry Andric                                                    SDValue ArgVal,
43370b57cec5SDimitry Andric                                                    int64_t Offset) const {
43380b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
43390b57cec5SDimitry Andric   MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
4340fe6060f1SDimitry Andric   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
43410b57cec5SDimitry Andric 
43420b57cec5SDimitry Andric   SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
4343fe6060f1SDimitry Andric   // Stores to the argument stack area are relative to the stack pointer.
4344fe6060f1SDimitry Andric   SDValue SP =
4345fe6060f1SDimitry Andric       DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);
4346fe6060f1SDimitry Andric   Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr);
4347e8d8bef9SDimitry Andric   SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
43480b57cec5SDimitry Andric                                MachineMemOperand::MODereferenceable);
43490b57cec5SDimitry Andric   return Store;
43500b57cec5SDimitry Andric }
43510b57cec5SDimitry Andric 
43520b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
43530b57cec5SDimitry Andric                                              const TargetRegisterClass *RC,
43540b57cec5SDimitry Andric                                              EVT VT, const SDLoc &SL,
43550b57cec5SDimitry Andric                                              const ArgDescriptor &Arg) const {
43560b57cec5SDimitry Andric   assert(Arg && "Attempting to load missing argument");
43570b57cec5SDimitry Andric 
43580b57cec5SDimitry Andric   SDValue V = Arg.isRegister() ?
43590b57cec5SDimitry Andric     CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) :
43600b57cec5SDimitry Andric     loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
43610b57cec5SDimitry Andric 
43620b57cec5SDimitry Andric   if (!Arg.isMasked())
43630b57cec5SDimitry Andric     return V;
43640b57cec5SDimitry Andric 
43650b57cec5SDimitry Andric   unsigned Mask = Arg.getMask();
43660b57cec5SDimitry Andric   unsigned Shift = countTrailingZeros<unsigned>(Mask);
43670b57cec5SDimitry Andric   V = DAG.getNode(ISD::SRL, SL, VT, V,
43680b57cec5SDimitry Andric                   DAG.getShiftAmountConstant(Shift, VT, SL));
43690b57cec5SDimitry Andric   return DAG.getNode(ISD::AND, SL, VT, V,
43700b57cec5SDimitry Andric                      DAG.getConstant(Mask >> Shift, SL, VT));
43710b57cec5SDimitry Andric }
43720b57cec5SDimitry Andric 
43730b57cec5SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
43740b57cec5SDimitry Andric     const MachineFunction &MF, const ImplicitParameter Param) const {
43750b57cec5SDimitry Andric   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
43760b57cec5SDimitry Andric   const AMDGPUSubtarget &ST =
43770b57cec5SDimitry Andric       AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction());
43780b57cec5SDimitry Andric   unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
43798bcb0991SDimitry Andric   const Align Alignment = ST.getAlignmentForImplicitArgPtr();
43800b57cec5SDimitry Andric   uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
43810b57cec5SDimitry Andric                        ExplicitArgOffset;
43820b57cec5SDimitry Andric   switch (Param) {
43830b57cec5SDimitry Andric   case GRID_DIM:
43840b57cec5SDimitry Andric     return ArgOffset;
43850b57cec5SDimitry Andric   case GRID_OFFSET:
43860b57cec5SDimitry Andric     return ArgOffset + 4;
43870b57cec5SDimitry Andric   }
43880b57cec5SDimitry Andric   llvm_unreachable("unexpected implicit parameter type");
43890b57cec5SDimitry Andric }
43900b57cec5SDimitry Andric 
43910b57cec5SDimitry Andric #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
43920b57cec5SDimitry Andric 
43930b57cec5SDimitry Andric const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
43940b57cec5SDimitry Andric   switch ((AMDGPUISD::NodeType)Opcode) {
43950b57cec5SDimitry Andric   case AMDGPUISD::FIRST_NUMBER: break;
43960b57cec5SDimitry Andric   // AMDIL DAG nodes
43970b57cec5SDimitry Andric   NODE_NAME_CASE(UMUL);
43980b57cec5SDimitry Andric   NODE_NAME_CASE(BRANCH_COND);
43990b57cec5SDimitry Andric 
44000b57cec5SDimitry Andric   // AMDGPU DAG nodes
44010b57cec5SDimitry Andric   NODE_NAME_CASE(IF)
44020b57cec5SDimitry Andric   NODE_NAME_CASE(ELSE)
44030b57cec5SDimitry Andric   NODE_NAME_CASE(LOOP)
44040b57cec5SDimitry Andric   NODE_NAME_CASE(CALL)
44050b57cec5SDimitry Andric   NODE_NAME_CASE(TC_RETURN)
44060b57cec5SDimitry Andric   NODE_NAME_CASE(TRAP)
44070b57cec5SDimitry Andric   NODE_NAME_CASE(RET_FLAG)
4408349cc55cSDimitry Andric   NODE_NAME_CASE(RET_GFX_FLAG)
44090b57cec5SDimitry Andric   NODE_NAME_CASE(RETURN_TO_EPILOG)
44100b57cec5SDimitry Andric   NODE_NAME_CASE(ENDPGM)
44110b57cec5SDimitry Andric   NODE_NAME_CASE(DWORDADDR)
44120b57cec5SDimitry Andric   NODE_NAME_CASE(FRACT)
44130b57cec5SDimitry Andric   NODE_NAME_CASE(SETCC)
44140b57cec5SDimitry Andric   NODE_NAME_CASE(SETREG)
44158bcb0991SDimitry Andric   NODE_NAME_CASE(DENORM_MODE)
44160b57cec5SDimitry Andric   NODE_NAME_CASE(FMA_W_CHAIN)
44170b57cec5SDimitry Andric   NODE_NAME_CASE(FMUL_W_CHAIN)
44180b57cec5SDimitry Andric   NODE_NAME_CASE(CLAMP)
44190b57cec5SDimitry Andric   NODE_NAME_CASE(COS_HW)
44200b57cec5SDimitry Andric   NODE_NAME_CASE(SIN_HW)
44210b57cec5SDimitry Andric   NODE_NAME_CASE(FMAX_LEGACY)
44220b57cec5SDimitry Andric   NODE_NAME_CASE(FMIN_LEGACY)
44230b57cec5SDimitry Andric   NODE_NAME_CASE(FMAX3)
44240b57cec5SDimitry Andric   NODE_NAME_CASE(SMAX3)
44250b57cec5SDimitry Andric   NODE_NAME_CASE(UMAX3)
44260b57cec5SDimitry Andric   NODE_NAME_CASE(FMIN3)
44270b57cec5SDimitry Andric   NODE_NAME_CASE(SMIN3)
44280b57cec5SDimitry Andric   NODE_NAME_CASE(UMIN3)
44290b57cec5SDimitry Andric   NODE_NAME_CASE(FMED3)
44300b57cec5SDimitry Andric   NODE_NAME_CASE(SMED3)
44310b57cec5SDimitry Andric   NODE_NAME_CASE(UMED3)
44320b57cec5SDimitry Andric   NODE_NAME_CASE(FDOT2)
44330b57cec5SDimitry Andric   NODE_NAME_CASE(URECIP)
44340b57cec5SDimitry Andric   NODE_NAME_CASE(DIV_SCALE)
44350b57cec5SDimitry Andric   NODE_NAME_CASE(DIV_FMAS)
44360b57cec5SDimitry Andric   NODE_NAME_CASE(DIV_FIXUP)
44370b57cec5SDimitry Andric   NODE_NAME_CASE(FMAD_FTZ)
44380b57cec5SDimitry Andric   NODE_NAME_CASE(RCP)
44390b57cec5SDimitry Andric   NODE_NAME_CASE(RSQ)
44400b57cec5SDimitry Andric   NODE_NAME_CASE(RCP_LEGACY)
44410b57cec5SDimitry Andric   NODE_NAME_CASE(RCP_IFLAG)
44420b57cec5SDimitry Andric   NODE_NAME_CASE(FMUL_LEGACY)
44430b57cec5SDimitry Andric   NODE_NAME_CASE(RSQ_CLAMP)
44440b57cec5SDimitry Andric   NODE_NAME_CASE(LDEXP)
44450b57cec5SDimitry Andric   NODE_NAME_CASE(FP_CLASS)
44460b57cec5SDimitry Andric   NODE_NAME_CASE(DOT4)
44470b57cec5SDimitry Andric   NODE_NAME_CASE(CARRY)
44480b57cec5SDimitry Andric   NODE_NAME_CASE(BORROW)
44490b57cec5SDimitry Andric   NODE_NAME_CASE(BFE_U32)
44500b57cec5SDimitry Andric   NODE_NAME_CASE(BFE_I32)
44510b57cec5SDimitry Andric   NODE_NAME_CASE(BFI)
44520b57cec5SDimitry Andric   NODE_NAME_CASE(BFM)
44530b57cec5SDimitry Andric   NODE_NAME_CASE(FFBH_U32)
44540b57cec5SDimitry Andric   NODE_NAME_CASE(FFBH_I32)
44550b57cec5SDimitry Andric   NODE_NAME_CASE(FFBL_B32)
44560b57cec5SDimitry Andric   NODE_NAME_CASE(MUL_U24)
44570b57cec5SDimitry Andric   NODE_NAME_CASE(MUL_I24)
44580b57cec5SDimitry Andric   NODE_NAME_CASE(MULHI_U24)
44590b57cec5SDimitry Andric   NODE_NAME_CASE(MULHI_I24)
44600b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_U24)
44610b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_I24)
44620b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_I64_I32)
44630b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_U64_U32)
44640b57cec5SDimitry Andric   NODE_NAME_CASE(PERM)
44650b57cec5SDimitry Andric   NODE_NAME_CASE(TEXTURE_FETCH)
44660b57cec5SDimitry Andric   NODE_NAME_CASE(R600_EXPORT)
44670b57cec5SDimitry Andric   NODE_NAME_CASE(CONST_ADDRESS)
44680b57cec5SDimitry Andric   NODE_NAME_CASE(REGISTER_LOAD)
44690b57cec5SDimitry Andric   NODE_NAME_CASE(REGISTER_STORE)
44700b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLE)
44710b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLEB)
44720b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLED)
44730b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLEL)
44740b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE0)
44750b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE1)
44760b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE2)
44770b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE3)
44780b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
44790b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PKNORM_I16_F32)
44800b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PKNORM_U16_F32)
44810b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PK_I16_I32)
44820b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PK_U16_U32)
44830b57cec5SDimitry Andric   NODE_NAME_CASE(FP_TO_FP16)
44840b57cec5SDimitry Andric   NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
44850b57cec5SDimitry Andric   NODE_NAME_CASE(CONST_DATA_PTR)
44860b57cec5SDimitry Andric   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
44870b57cec5SDimitry Andric   NODE_NAME_CASE(LDS)
44880b57cec5SDimitry Andric   NODE_NAME_CASE(DUMMY_CHAIN)
44890b57cec5SDimitry Andric   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
44900b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_HI)
44910b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_LO)
44920b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_HI_I8)
44930b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_HI_U8)
44940b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_LO_I8)
44950b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_LO_U8)
44960b57cec5SDimitry Andric   NODE_NAME_CASE(STORE_MSKOR)
44970b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_CONSTANT)
44980b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
44990b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
45000b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
45010b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
45020b57cec5SDimitry Andric   NODE_NAME_CASE(DS_ORDERED_COUNT)
45030b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_CMP_SWAP)
45040b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_INC)
45050b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_DEC)
45060b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
45070b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
45080b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD)
45090b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
45100b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_USHORT)
45110b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_BYTE)
45120b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_SHORT)
45130b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
45140b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
45150b57cec5SDimitry Andric   NODE_NAME_CASE(SBUFFER_LOAD)
45160b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE)
45170b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_BYTE)
45180b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_SHORT)
45190b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_FORMAT)
45200b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
45210b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
45220b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
45230b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
45240b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SMIN)
45250b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_UMIN)
45260b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SMAX)
45270b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_UMAX)
45280b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_AND)
45290b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_OR)
45300b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
45318bcb0991SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_INC)
45328bcb0991SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
45330b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
45345ffd83dbSDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
45350b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
4536fe6060f1SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
4537fe6060f1SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
45380b57cec5SDimitry Andric 
45390b57cec5SDimitry Andric   case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
45400b57cec5SDimitry Andric   }
45410b57cec5SDimitry Andric   return nullptr;
45420b57cec5SDimitry Andric }
45430b57cec5SDimitry Andric 
45440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
45450b57cec5SDimitry Andric                                               SelectionDAG &DAG, int Enabled,
45460b57cec5SDimitry Andric                                               int &RefinementSteps,
45470b57cec5SDimitry Andric                                               bool &UseOneConstNR,
45480b57cec5SDimitry Andric                                               bool Reciprocal) const {
45490b57cec5SDimitry Andric   EVT VT = Operand.getValueType();
45500b57cec5SDimitry Andric 
45510b57cec5SDimitry Andric   if (VT == MVT::f32) {
45520b57cec5SDimitry Andric     RefinementSteps = 0;
45530b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
45540b57cec5SDimitry Andric   }
45550b57cec5SDimitry Andric 
45560b57cec5SDimitry Andric   // TODO: There is also f64 rsq instruction, but the documentation is less
45570b57cec5SDimitry Andric   // clear on its precision.
45580b57cec5SDimitry Andric 
45590b57cec5SDimitry Andric   return SDValue();
45600b57cec5SDimitry Andric }
45610b57cec5SDimitry Andric 
45620b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
45630b57cec5SDimitry Andric                                                SelectionDAG &DAG, int Enabled,
45640b57cec5SDimitry Andric                                                int &RefinementSteps) const {
45650b57cec5SDimitry Andric   EVT VT = Operand.getValueType();
45660b57cec5SDimitry Andric 
45670b57cec5SDimitry Andric   if (VT == MVT::f32) {
45680b57cec5SDimitry Andric     // Reciprocal, < 1 ulp error.
45690b57cec5SDimitry Andric     //
45700b57cec5SDimitry Andric     // This reciprocal approximation converges to < 0.5 ulp error with one
45710b57cec5SDimitry Andric     // newton rhapson performed with two fused multiple adds (FMAs).
45720b57cec5SDimitry Andric 
45730b57cec5SDimitry Andric     RefinementSteps = 0;
45740b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
45750b57cec5SDimitry Andric   }
45760b57cec5SDimitry Andric 
45770b57cec5SDimitry Andric   // TODO: There is also f64 rcp instruction, but the documentation is less
45780b57cec5SDimitry Andric   // clear on its precision.
45790b57cec5SDimitry Andric 
45800b57cec5SDimitry Andric   return SDValue();
45810b57cec5SDimitry Andric }
45820b57cec5SDimitry Andric 
45830b57cec5SDimitry Andric void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
45840b57cec5SDimitry Andric     const SDValue Op, KnownBits &Known,
45850b57cec5SDimitry Andric     const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
45860b57cec5SDimitry Andric 
45870b57cec5SDimitry Andric   Known.resetAll(); // Don't know anything.
45880b57cec5SDimitry Andric 
45890b57cec5SDimitry Andric   unsigned Opc = Op.getOpcode();
45900b57cec5SDimitry Andric 
45910b57cec5SDimitry Andric   switch (Opc) {
45920b57cec5SDimitry Andric   default:
45930b57cec5SDimitry Andric     break;
45940b57cec5SDimitry Andric   case AMDGPUISD::CARRY:
45950b57cec5SDimitry Andric   case AMDGPUISD::BORROW: {
45960b57cec5SDimitry Andric     Known.Zero = APInt::getHighBitsSet(32, 31);
45970b57cec5SDimitry Andric     break;
45980b57cec5SDimitry Andric   }
45990b57cec5SDimitry Andric 
46000b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32:
46010b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
46020b57cec5SDimitry Andric     ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
46030b57cec5SDimitry Andric     if (!CWidth)
46040b57cec5SDimitry Andric       return;
46050b57cec5SDimitry Andric 
46060b57cec5SDimitry Andric     uint32_t Width = CWidth->getZExtValue() & 0x1f;
46070b57cec5SDimitry Andric 
46080b57cec5SDimitry Andric     if (Opc == AMDGPUISD::BFE_U32)
46090b57cec5SDimitry Andric       Known.Zero = APInt::getHighBitsSet(32, 32 - Width);
46100b57cec5SDimitry Andric 
46110b57cec5SDimitry Andric     break;
46120b57cec5SDimitry Andric   }
4613fe6060f1SDimitry Andric   case AMDGPUISD::FP_TO_FP16: {
46140b57cec5SDimitry Andric     unsigned BitWidth = Known.getBitWidth();
46150b57cec5SDimitry Andric 
46160b57cec5SDimitry Andric     // High bits are zero.
46170b57cec5SDimitry Andric     Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
46180b57cec5SDimitry Andric     break;
46190b57cec5SDimitry Andric   }
46200b57cec5SDimitry Andric   case AMDGPUISD::MUL_U24:
46210b57cec5SDimitry Andric   case AMDGPUISD::MUL_I24: {
46220b57cec5SDimitry Andric     KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
46230b57cec5SDimitry Andric     KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
46240b57cec5SDimitry Andric     unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
46250b57cec5SDimitry Andric                       RHSKnown.countMinTrailingZeros();
46260b57cec5SDimitry Andric     Known.Zero.setLowBits(std::min(TrailZ, 32u));
4627480093f4SDimitry Andric     // Skip extra check if all bits are known zeros.
4628480093f4SDimitry Andric     if (TrailZ >= 32)
4629480093f4SDimitry Andric       break;
46300b57cec5SDimitry Andric 
46310b57cec5SDimitry Andric     // Truncate to 24 bits.
46320b57cec5SDimitry Andric     LHSKnown = LHSKnown.trunc(24);
46330b57cec5SDimitry Andric     RHSKnown = RHSKnown.trunc(24);
46340b57cec5SDimitry Andric 
46350b57cec5SDimitry Andric     if (Opc == AMDGPUISD::MUL_I24) {
4636*04eeddc0SDimitry Andric       unsigned LHSValBits = LHSKnown.countMaxSignificantBits();
4637*04eeddc0SDimitry Andric       unsigned RHSValBits = RHSKnown.countMaxSignificantBits();
4638*04eeddc0SDimitry Andric       unsigned MaxValBits = LHSValBits + RHSValBits;
4639*04eeddc0SDimitry Andric       if (MaxValBits > 32)
46400b57cec5SDimitry Andric         break;
4641*04eeddc0SDimitry Andric       unsigned SignBits = 32 - MaxValBits + 1;
46420b57cec5SDimitry Andric       bool LHSNegative = LHSKnown.isNegative();
4643480093f4SDimitry Andric       bool LHSNonNegative = LHSKnown.isNonNegative();
4644480093f4SDimitry Andric       bool LHSPositive = LHSKnown.isStrictlyPositive();
46450b57cec5SDimitry Andric       bool RHSNegative = RHSKnown.isNegative();
4646480093f4SDimitry Andric       bool RHSNonNegative = RHSKnown.isNonNegative();
4647480093f4SDimitry Andric       bool RHSPositive = RHSKnown.isStrictlyPositive();
4648480093f4SDimitry Andric 
4649480093f4SDimitry Andric       if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
4650*04eeddc0SDimitry Andric         Known.Zero.setHighBits(SignBits);
4651480093f4SDimitry Andric       else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
4652*04eeddc0SDimitry Andric         Known.One.setHighBits(SignBits);
46530b57cec5SDimitry Andric     } else {
4654*04eeddc0SDimitry Andric       unsigned LHSValBits = LHSKnown.countMaxActiveBits();
4655*04eeddc0SDimitry Andric       unsigned RHSValBits = RHSKnown.countMaxActiveBits();
4656*04eeddc0SDimitry Andric       unsigned MaxValBits = LHSValBits + RHSValBits;
46570b57cec5SDimitry Andric       if (MaxValBits >= 32)
46580b57cec5SDimitry Andric         break;
4659*04eeddc0SDimitry Andric       Known.Zero.setBitsFrom(MaxValBits);
46600b57cec5SDimitry Andric     }
46610b57cec5SDimitry Andric     break;
46620b57cec5SDimitry Andric   }
46630b57cec5SDimitry Andric   case AMDGPUISD::PERM: {
46640b57cec5SDimitry Andric     ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2));
46650b57cec5SDimitry Andric     if (!CMask)
46660b57cec5SDimitry Andric       return;
46670b57cec5SDimitry Andric 
46680b57cec5SDimitry Andric     KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
46690b57cec5SDimitry Andric     KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
46700b57cec5SDimitry Andric     unsigned Sel = CMask->getZExtValue();
46710b57cec5SDimitry Andric 
46720b57cec5SDimitry Andric     for (unsigned I = 0; I < 32; I += 8) {
46730b57cec5SDimitry Andric       unsigned SelBits = Sel & 0xff;
46740b57cec5SDimitry Andric       if (SelBits < 4) {
46750b57cec5SDimitry Andric         SelBits *= 8;
46760b57cec5SDimitry Andric         Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
46770b57cec5SDimitry Andric         Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
46780b57cec5SDimitry Andric       } else if (SelBits < 7) {
46790b57cec5SDimitry Andric         SelBits = (SelBits & 3) * 8;
46800b57cec5SDimitry Andric         Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
46810b57cec5SDimitry Andric         Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
46820b57cec5SDimitry Andric       } else if (SelBits == 0x0c) {
46838bcb0991SDimitry Andric         Known.Zero |= 0xFFull << I;
46840b57cec5SDimitry Andric       } else if (SelBits > 0x0c) {
46858bcb0991SDimitry Andric         Known.One |= 0xFFull << I;
46860b57cec5SDimitry Andric       }
46870b57cec5SDimitry Andric       Sel >>= 8;
46880b57cec5SDimitry Andric     }
46890b57cec5SDimitry Andric     break;
46900b57cec5SDimitry Andric   }
46910b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_UBYTE:  {
46920b57cec5SDimitry Andric     Known.Zero.setHighBits(24);
46930b57cec5SDimitry Andric     break;
46940b57cec5SDimitry Andric   }
46950b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_USHORT: {
46960b57cec5SDimitry Andric     Known.Zero.setHighBits(16);
46970b57cec5SDimitry Andric     break;
46980b57cec5SDimitry Andric   }
46990b57cec5SDimitry Andric   case AMDGPUISD::LDS: {
47000b57cec5SDimitry Andric     auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
47015ffd83dbSDimitry Andric     Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());
47020b57cec5SDimitry Andric 
47030b57cec5SDimitry Andric     Known.Zero.setHighBits(16);
47045ffd83dbSDimitry Andric     Known.Zero.setLowBits(Log2(Alignment));
47050b57cec5SDimitry Andric     break;
47060b57cec5SDimitry Andric   }
47070b57cec5SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
47080b57cec5SDimitry Andric     unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
47090b57cec5SDimitry Andric     switch (IID) {
47100b57cec5SDimitry Andric     case Intrinsic::amdgcn_mbcnt_lo:
47110b57cec5SDimitry Andric     case Intrinsic::amdgcn_mbcnt_hi: {
47120b57cec5SDimitry Andric       const GCNSubtarget &ST =
47130b57cec5SDimitry Andric           DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
47140b57cec5SDimitry Andric       // These return at most the wavefront size - 1.
47150b57cec5SDimitry Andric       unsigned Size = Op.getValueType().getSizeInBits();
47160b57cec5SDimitry Andric       Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
47170b57cec5SDimitry Andric       break;
47180b57cec5SDimitry Andric     }
47190b57cec5SDimitry Andric     default:
47200b57cec5SDimitry Andric       break;
47210b57cec5SDimitry Andric     }
47220b57cec5SDimitry Andric   }
47230b57cec5SDimitry Andric   }
47240b57cec5SDimitry Andric }
47250b57cec5SDimitry Andric 
47260b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
47270b57cec5SDimitry Andric     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
47280b57cec5SDimitry Andric     unsigned Depth) const {
47290b57cec5SDimitry Andric   switch (Op.getOpcode()) {
47300b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32: {
47310b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
47320b57cec5SDimitry Andric     if (!Width)
47330b57cec5SDimitry Andric       return 1;
47340b57cec5SDimitry Andric 
47350b57cec5SDimitry Andric     unsigned SignBits = 32 - Width->getZExtValue() + 1;
47360b57cec5SDimitry Andric     if (!isNullConstant(Op.getOperand(1)))
47370b57cec5SDimitry Andric       return SignBits;
47380b57cec5SDimitry Andric 
47390b57cec5SDimitry Andric     // TODO: Could probably figure something out with non-0 offsets.
47400b57cec5SDimitry Andric     unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
47410b57cec5SDimitry Andric     return std::max(SignBits, Op0SignBits);
47420b57cec5SDimitry Andric   }
47430b57cec5SDimitry Andric 
47440b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
47450b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
47460b57cec5SDimitry Andric     return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
47470b57cec5SDimitry Andric   }
47480b57cec5SDimitry Andric 
47490b57cec5SDimitry Andric   case AMDGPUISD::CARRY:
47500b57cec5SDimitry Andric   case AMDGPUISD::BORROW:
47510b57cec5SDimitry Andric     return 31;
47520b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_BYTE:
47530b57cec5SDimitry Andric     return 25;
47540b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_SHORT:
47550b57cec5SDimitry Andric     return 17;
47560b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_UBYTE:
47570b57cec5SDimitry Andric     return 24;
47580b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_USHORT:
47590b57cec5SDimitry Andric     return 16;
47600b57cec5SDimitry Andric   case AMDGPUISD::FP_TO_FP16:
47610b57cec5SDimitry Andric     return 16;
47620b57cec5SDimitry Andric   default:
47630b57cec5SDimitry Andric     return 1;
47640b57cec5SDimitry Andric   }
47650b57cec5SDimitry Andric }
47660b57cec5SDimitry Andric 
47675ffd83dbSDimitry Andric unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
47685ffd83dbSDimitry Andric   GISelKnownBits &Analysis, Register R,
47695ffd83dbSDimitry Andric   const APInt &DemandedElts, const MachineRegisterInfo &MRI,
47705ffd83dbSDimitry Andric   unsigned Depth) const {
47715ffd83dbSDimitry Andric   const MachineInstr *MI = MRI.getVRegDef(R);
47725ffd83dbSDimitry Andric   if (!MI)
47735ffd83dbSDimitry Andric     return 1;
47745ffd83dbSDimitry Andric 
47755ffd83dbSDimitry Andric   // TODO: Check range metadata on MMO.
47765ffd83dbSDimitry Andric   switch (MI->getOpcode()) {
47775ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
47785ffd83dbSDimitry Andric     return 25;
47795ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
47805ffd83dbSDimitry Andric     return 17;
47815ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
47825ffd83dbSDimitry Andric     return 24;
47835ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
47845ffd83dbSDimitry Andric     return 16;
47855ffd83dbSDimitry Andric   default:
47865ffd83dbSDimitry Andric     return 1;
47875ffd83dbSDimitry Andric   }
47885ffd83dbSDimitry Andric }
47895ffd83dbSDimitry Andric 
47900b57cec5SDimitry Andric bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
47910b57cec5SDimitry Andric                                                         const SelectionDAG &DAG,
47920b57cec5SDimitry Andric                                                         bool SNaN,
47930b57cec5SDimitry Andric                                                         unsigned Depth) const {
47940b57cec5SDimitry Andric   unsigned Opcode = Op.getOpcode();
47950b57cec5SDimitry Andric   switch (Opcode) {
47960b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY:
47970b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY: {
47980b57cec5SDimitry Andric     if (SNaN)
47990b57cec5SDimitry Andric       return true;
48000b57cec5SDimitry Andric 
48010b57cec5SDimitry Andric     // TODO: Can check no nans on one of the operands for each one, but which
48020b57cec5SDimitry Andric     // one?
48030b57cec5SDimitry Andric     return false;
48040b57cec5SDimitry Andric   }
48050b57cec5SDimitry Andric   case AMDGPUISD::FMUL_LEGACY:
48060b57cec5SDimitry Andric   case AMDGPUISD::CVT_PKRTZ_F16_F32: {
48070b57cec5SDimitry Andric     if (SNaN)
48080b57cec5SDimitry Andric       return true;
48090b57cec5SDimitry Andric     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
48100b57cec5SDimitry Andric            DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
48110b57cec5SDimitry Andric   }
48120b57cec5SDimitry Andric   case AMDGPUISD::FMED3:
48130b57cec5SDimitry Andric   case AMDGPUISD::FMIN3:
48140b57cec5SDimitry Andric   case AMDGPUISD::FMAX3:
48150b57cec5SDimitry Andric   case AMDGPUISD::FMAD_FTZ: {
48160b57cec5SDimitry Andric     if (SNaN)
48170b57cec5SDimitry Andric       return true;
48180b57cec5SDimitry Andric     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
48190b57cec5SDimitry Andric            DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
48200b57cec5SDimitry Andric            DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
48210b57cec5SDimitry Andric   }
48220b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE0:
48230b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE1:
48240b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE2:
48250b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE3:
48260b57cec5SDimitry Andric     return true;
48270b57cec5SDimitry Andric 
48280b57cec5SDimitry Andric   case AMDGPUISD::RCP:
48290b57cec5SDimitry Andric   case AMDGPUISD::RSQ:
48300b57cec5SDimitry Andric   case AMDGPUISD::RCP_LEGACY:
48310b57cec5SDimitry Andric   case AMDGPUISD::RSQ_CLAMP: {
48320b57cec5SDimitry Andric     if (SNaN)
48330b57cec5SDimitry Andric       return true;
48340b57cec5SDimitry Andric 
48350b57cec5SDimitry Andric     // TODO: Need is known positive check.
48360b57cec5SDimitry Andric     return false;
48370b57cec5SDimitry Andric   }
48380b57cec5SDimitry Andric   case AMDGPUISD::LDEXP:
48390b57cec5SDimitry Andric   case AMDGPUISD::FRACT: {
48400b57cec5SDimitry Andric     if (SNaN)
48410b57cec5SDimitry Andric       return true;
48420b57cec5SDimitry Andric     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
48430b57cec5SDimitry Andric   }
48440b57cec5SDimitry Andric   case AMDGPUISD::DIV_SCALE:
48450b57cec5SDimitry Andric   case AMDGPUISD::DIV_FMAS:
48460b57cec5SDimitry Andric   case AMDGPUISD::DIV_FIXUP:
48470b57cec5SDimitry Andric     // TODO: Refine on operands.
48480b57cec5SDimitry Andric     return SNaN;
48490b57cec5SDimitry Andric   case AMDGPUISD::SIN_HW:
48500b57cec5SDimitry Andric   case AMDGPUISD::COS_HW: {
48510b57cec5SDimitry Andric     // TODO: Need check for infinity
48520b57cec5SDimitry Andric     return SNaN;
48530b57cec5SDimitry Andric   }
48540b57cec5SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
48550b57cec5SDimitry Andric     unsigned IntrinsicID
48560b57cec5SDimitry Andric       = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
48570b57cec5SDimitry Andric     // TODO: Handle more intrinsics
48580b57cec5SDimitry Andric     switch (IntrinsicID) {
48590b57cec5SDimitry Andric     case Intrinsic::amdgcn_cubeid:
48600b57cec5SDimitry Andric       return true;
48610b57cec5SDimitry Andric 
48620b57cec5SDimitry Andric     case Intrinsic::amdgcn_frexp_mant: {
48630b57cec5SDimitry Andric       if (SNaN)
48640b57cec5SDimitry Andric         return true;
48650b57cec5SDimitry Andric       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
48660b57cec5SDimitry Andric     }
48670b57cec5SDimitry Andric     case Intrinsic::amdgcn_cvt_pkrtz: {
48680b57cec5SDimitry Andric       if (SNaN)
48690b57cec5SDimitry Andric         return true;
48700b57cec5SDimitry Andric       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
48710b57cec5SDimitry Andric              DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
48720b57cec5SDimitry Andric     }
48735ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rcp:
48745ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rsq:
48755ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
48765ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rsq_legacy:
48775ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rsq_clamp: {
48785ffd83dbSDimitry Andric       if (SNaN)
48795ffd83dbSDimitry Andric         return true;
48805ffd83dbSDimitry Andric 
48815ffd83dbSDimitry Andric       // TODO: Need is known positive check.
48825ffd83dbSDimitry Andric       return false;
48835ffd83dbSDimitry Andric     }
48845ffd83dbSDimitry Andric     case Intrinsic::amdgcn_trig_preop:
48850b57cec5SDimitry Andric     case Intrinsic::amdgcn_fdot2:
48860b57cec5SDimitry Andric       // TODO: Refine on operand
48870b57cec5SDimitry Andric       return SNaN;
4888e8d8bef9SDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
4889e8d8bef9SDimitry Andric       if (SNaN)
4890e8d8bef9SDimitry Andric         return true;
4891e8d8bef9SDimitry Andric       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
4892e8d8bef9SDimitry Andric              DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) &&
4893e8d8bef9SDimitry Andric              DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1);
48940b57cec5SDimitry Andric     default:
48950b57cec5SDimitry Andric       return false;
48960b57cec5SDimitry Andric     }
48970b57cec5SDimitry Andric   }
48980b57cec5SDimitry Andric   default:
48990b57cec5SDimitry Andric     return false;
49000b57cec5SDimitry Andric   }
49010b57cec5SDimitry Andric }
49020b57cec5SDimitry Andric 
49030b57cec5SDimitry Andric TargetLowering::AtomicExpansionKind
49040b57cec5SDimitry Andric AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
49050b57cec5SDimitry Andric   switch (RMW->getOperation()) {
49060b57cec5SDimitry Andric   case AtomicRMWInst::Nand:
49070b57cec5SDimitry Andric   case AtomicRMWInst::FAdd:
49080b57cec5SDimitry Andric   case AtomicRMWInst::FSub:
49090b57cec5SDimitry Andric     return AtomicExpansionKind::CmpXChg;
49100b57cec5SDimitry Andric   default:
49110b57cec5SDimitry Andric     return AtomicExpansionKind::None;
49120b57cec5SDimitry Andric   }
49130b57cec5SDimitry Andric }
4914fe6060f1SDimitry Andric 
4915*04eeddc0SDimitry Andric bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal(
4916fe6060f1SDimitry Andric     unsigned Opc, LLT Ty1, LLT Ty2) const {
4917*04eeddc0SDimitry Andric   return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) &&
4918*04eeddc0SDimitry Andric          Ty2 == LLT::scalar(32);
4919fe6060f1SDimitry Andric }
4920