10b57cec5SDimitry Andric //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This is the parent TargetLowering class for hardware code gen
110b57cec5SDimitry Andric /// targets.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric #include "AMDGPUISelLowering.h"
160b57cec5SDimitry Andric #include "AMDGPU.h"
17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
18e8d8bef9SDimitry Andric #include "AMDGPUMachineFunction.h"
190b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
2106c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
2281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
230b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
2506c3fb27SDimitry Andric #include "llvm/IR/PatternMatch.h"
26e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h"
270b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
28e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
29e8d8bef9SDimitry Andric
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric
320b57cec5SDimitry Andric #include "AMDGPUGenCallingConv.inc"
330b57cec5SDimitry Andric
345ffd83dbSDimitry Andric static cl::opt<bool> AMDGPUBypassSlowDiv(
355ffd83dbSDimitry Andric "amdgpu-bypass-slow-div",
365ffd83dbSDimitry Andric cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
375ffd83dbSDimitry Andric cl::init(true));
385ffd83dbSDimitry Andric
390b57cec5SDimitry Andric // Find a larger type to do a load / store of a vector with.
getEquivalentMemType(LLVMContext & Ctx,EVT VT)400b57cec5SDimitry Andric EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
410b57cec5SDimitry Andric unsigned StoreSize = VT.getStoreSizeInBits();
420b57cec5SDimitry Andric if (StoreSize <= 32)
430b57cec5SDimitry Andric return EVT::getIntegerVT(Ctx, StoreSize);
440b57cec5SDimitry Andric
450fca6ea1SDimitry Andric if (StoreSize % 32 == 0)
460b57cec5SDimitry Andric return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
470fca6ea1SDimitry Andric
480fca6ea1SDimitry Andric return VT;
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric
numBitsUnsigned(SDValue Op,SelectionDAG & DAG)510b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
52349cc55cSDimitry Andric return DAG.computeKnownBits(Op).countMaxActiveBits();
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric
numBitsSigned(SDValue Op,SelectionDAG & DAG)550b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
560b57cec5SDimitry Andric // In order for this to be a signed 24-bit value, bit 23, must
570b57cec5SDimitry Andric // be a sign bit.
5804eeddc0SDimitry Andric return DAG.ComputeMaxSignificantBits(Op);
590b57cec5SDimitry Andric }
600b57cec5SDimitry Andric
AMDGPUTargetLowering(const TargetMachine & TM,const AMDGPUSubtarget & STI)610b57cec5SDimitry Andric AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
620b57cec5SDimitry Andric const AMDGPUSubtarget &STI)
630b57cec5SDimitry Andric : TargetLowering(TM), Subtarget(&STI) {
640fca6ea1SDimitry Andric // Always lower memset, memcpy, and memmove intrinsics to load/store
650fca6ea1SDimitry Andric // instructions, rather then generating calls to memset, mempcy or memmove.
660fca6ea1SDimitry Andric MaxStoresPerMemset = MaxStoresPerMemsetOptSize = ~0U;
670fca6ea1SDimitry Andric MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = ~0U;
680fca6ea1SDimitry Andric MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = ~0U;
690fca6ea1SDimitry Andric
700fca6ea1SDimitry Andric // Enable ganging up loads and stores in the memcpy DAG lowering.
710fca6ea1SDimitry Andric MaxGluedStoresPerMemcpy = 16;
720fca6ea1SDimitry Andric
730b57cec5SDimitry Andric // Lower floating point store/load to integer store/load to reduce the number
740b57cec5SDimitry Andric // of patterns in tablegen.
750b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f32, Promote);
760b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
790b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f32, Promote);
820b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32);
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
850b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
860b57cec5SDimitry Andric
870b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
880b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
890b57cec5SDimitry Andric
90fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v6f32, Promote);
91fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32);
92fe6060f1SDimitry Andric
93fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v7f32, Promote);
94fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32);
95fe6060f1SDimitry Andric
960b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
970b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
980b57cec5SDimitry Andric
99bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v9f32, Promote);
100bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v9f32, MVT::v9i32);
101bdd1243dSDimitry Andric
102bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v10f32, Promote);
103bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v10f32, MVT::v10i32);
104bdd1243dSDimitry Andric
105bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v11f32, Promote);
106bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v11f32, MVT::v11i32);
107bdd1243dSDimitry Andric
108bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v12f32, Promote);
109bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v12f32, MVT::v12i32);
110bdd1243dSDimitry Andric
1110b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
1120b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
1130b57cec5SDimitry Andric
1140b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
1150b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);
1160b57cec5SDimitry Andric
1170b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::i64, Promote);
1180b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1210b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f64, Promote);
1240b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
1270b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32);
1280b57cec5SDimitry Andric
129fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3i64, Promote);
130fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32);
131fe6060f1SDimitry Andric
1325ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v4i64, Promote);
1335ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32);
1345ffd83dbSDimitry Andric
135fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f64, Promote);
136fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32);
137fe6060f1SDimitry Andric
1385ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f64, Promote);
1395ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32);
1405ffd83dbSDimitry Andric
1415ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v8i64, Promote);
1425ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32);
1435ffd83dbSDimitry Andric
1445ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f64, Promote);
1455ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32);
1465ffd83dbSDimitry Andric
1475ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v16i64, Promote);
1485ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32);
1495ffd83dbSDimitry Andric
1505ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f64, Promote);
1515ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32);
1525ffd83dbSDimitry Andric
15306c3fb27SDimitry Andric setOperationAction(ISD::LOAD, MVT::i128, Promote);
15406c3fb27SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32);
15506c3fb27SDimitry Andric
1560fca6ea1SDimitry Andric // TODO: Would be better to consume as directly legal
1570fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::f32, Promote);
1580fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
1590fca6ea1SDimitry Andric
1600fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::f64, Promote);
1610fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
1620fca6ea1SDimitry Andric
1630fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Promote);
1640fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
1650fca6ea1SDimitry Andric
1660fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::bf16, Promote);
1670fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::bf16, MVT::i16);
1680fca6ea1SDimitry Andric
1690fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::f32, Promote);
1700fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::f32, MVT::i32);
1710fca6ea1SDimitry Andric
1720fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::f64, Promote);
1730fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::f64, MVT::i64);
1740fca6ea1SDimitry Andric
1750fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::f16, Promote);
1760fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::f16, MVT::i16);
1770fca6ea1SDimitry Andric
1780fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::bf16, Promote);
1790fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::bf16, MVT::i16);
1800fca6ea1SDimitry Andric
1810b57cec5SDimitry Andric // There are no 64-bit extloads. These should be done as a 32-bit extload and
1820b57cec5SDimitry Andric // an extension to 64-bit.
18381ad6265SDimitry Andric for (MVT VT : MVT::integer_valuetypes())
18481ad6265SDimitry Andric setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT,
18581ad6265SDimitry Andric Expand);
1860b57cec5SDimitry Andric
1870b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) {
1880b57cec5SDimitry Andric if (VT == MVT::i64)
1890b57cec5SDimitry Andric continue;
1900b57cec5SDimitry Andric
19181ad6265SDimitry Andric for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) {
19281ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i1, Promote);
19381ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i8, Legal);
19481ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i16, Legal);
19581ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i32, Expand);
19681ad6265SDimitry Andric }
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric
19981ad6265SDimitry Andric for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
20081ad6265SDimitry Andric for (auto MemVT :
20181ad6265SDimitry Andric {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})
20281ad6265SDimitry Andric setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT,
20381ad6265SDimitry Andric Expand);
2040b57cec5SDimitry Andric
2050b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
206bdd1243dSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
2070b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
208cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
2098bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
210cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand);
2110b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
212cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
2130b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
214cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
2158bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
216cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand);
2178bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
218cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand);
2190b57cec5SDimitry Andric
2200b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
2210b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
222fe6060f1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand);
2230b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
2240b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand);
2255ffd83dbSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand);
2260b57cec5SDimitry Andric
2270b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
228bdd1243dSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
2290b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
230cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
231fe6060f1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand);
232cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand);
2330b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
234cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
2350b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
236cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
2375ffd83dbSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
238cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand);
2390b57cec5SDimitry Andric
2400b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f32, Promote);
2410b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
2420b57cec5SDimitry Andric
2430b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f32, Promote);
2440b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f32, Promote);
2470b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32);
2480b57cec5SDimitry Andric
2490b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v4f32, Promote);
2500b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
2510b57cec5SDimitry Andric
2520b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v5f32, Promote);
2530b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
2540b57cec5SDimitry Andric
255fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v6f32, Promote);
256fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32);
257fe6060f1SDimitry Andric
258fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v7f32, Promote);
259fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32);
260fe6060f1SDimitry Andric
2610b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v8f32, Promote);
2620b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
2630b57cec5SDimitry Andric
264bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v9f32, Promote);
265bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v9f32, MVT::v9i32);
266bdd1243dSDimitry Andric
267bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v10f32, Promote);
268bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v10f32, MVT::v10i32);
269bdd1243dSDimitry Andric
270bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v11f32, Promote);
271bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v11f32, MVT::v11i32);
272bdd1243dSDimitry Andric
273bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v12f32, Promote);
274bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v12f32, MVT::v12i32);
275bdd1243dSDimitry Andric
2760b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v16f32, Promote);
2770b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
2780b57cec5SDimitry Andric
2790b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v32f32, Promote);
2800b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);
2810b57cec5SDimitry Andric
2820b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::i64, Promote);
2830b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
2840b57cec5SDimitry Andric
2850b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2i64, Promote);
2860b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
2870b57cec5SDimitry Andric
2880b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f64, Promote);
2890b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);
2900b57cec5SDimitry Andric
2910b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f64, Promote);
2920b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);
2930b57cec5SDimitry Andric
294fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v3i64, Promote);
295fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32);
296fe6060f1SDimitry Andric
297fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f64, Promote);
298fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32);
299fe6060f1SDimitry Andric
3005ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v4i64, Promote);
3015ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32);
3025ffd83dbSDimitry Andric
3035ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v4f64, Promote);
3045ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32);
3055ffd83dbSDimitry Andric
3065ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v8i64, Promote);
3075ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32);
3085ffd83dbSDimitry Andric
3095ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v8f64, Promote);
3105ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32);
3115ffd83dbSDimitry Andric
3125ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v16i64, Promote);
3135ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32);
3145ffd83dbSDimitry Andric
3155ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v16f64, Promote);
3165ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32);
3175ffd83dbSDimitry Andric
31806c3fb27SDimitry Andric setOperationAction(ISD::STORE, MVT::i128, Promote);
31906c3fb27SDimitry Andric AddPromotedToType(ISD::STORE, MVT::i128, MVT::v4i32);
32006c3fb27SDimitry Andric
3210b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i1, Expand);
3220b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i8, Expand);
3230b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i16, Expand);
3240b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i32, Expand);
3250b57cec5SDimitry Andric
3260b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
3270b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand);
3280b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand);
3290b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
3300b57cec5SDimitry Andric
331bdd1243dSDimitry Andric setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
3320b57cec5SDimitry Andric setTruncStoreAction(MVT::f32, MVT::f16, Expand);
3330fca6ea1SDimitry Andric setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
3340b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
3350fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3f32, MVT::v3bf16, Expand);
3368bcb0991SDimitry Andric setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
3370fca6ea1SDimitry Andric setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Expand);
3380b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
3390fca6ea1SDimitry Andric setTruncStoreAction(MVT::v8f32, MVT::v8bf16, Expand);
3400b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
3410fca6ea1SDimitry Andric setTruncStoreAction(MVT::v16f32, MVT::v16bf16, Expand);
3428bcb0991SDimitry Andric setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
3430fca6ea1SDimitry Andric setTruncStoreAction(MVT::v32f32, MVT::v32bf16, Expand);
3448bcb0991SDimitry Andric setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
3450b57cec5SDimitry Andric
346bdd1243dSDimitry Andric setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
3470b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f16, Expand);
3480b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f32, Expand);
3490b57cec5SDimitry Andric
3500b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
3510fca6ea1SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Expand);
3520b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
3530b57cec5SDimitry Andric
3540fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3i32, MVT::v3i8, Expand);
3550fca6ea1SDimitry Andric
356fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand);
357fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand);
3580fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i8, Expand);
3590fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i1, Expand);
360fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand);
3610fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3bf16, Expand);
362fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand);
363fe6060f1SDimitry Andric
3645ffd83dbSDimitry Andric setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand);
3655ffd83dbSDimitry Andric setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand);
3660b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand);
3670fca6ea1SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand);
3680b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
3690b57cec5SDimitry Andric
3700b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
3710fca6ea1SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand);
3720b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
3730b57cec5SDimitry Andric
3745ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand);
3750fca6ea1SDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16bf16, Expand);
3765ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand);
3775ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
3785ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
3795ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
3805ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
3815ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand);
3820b57cec5SDimitry Andric
38381ad6265SDimitry Andric setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal);
38481ad6265SDimitry Andric setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal);
3850b57cec5SDimitry Andric
38681ad6265SDimitry Andric setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
3870b57cec5SDimitry Andric
3885f757f3fSDimitry Andric // For R600, this is totally unsupported, just custom lower to produce an
3895f757f3fSDimitry Andric // error.
3900b57cec5SDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andric // Library functions. These default to Expand, but we have instructions
3930b57cec5SDimitry Andric // for them.
3945f757f3fSDimitry Andric setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
3955f757f3fSDimitry Andric ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
39681ad6265SDimitry Andric MVT::f32, Legal);
3970b57cec5SDimitry Andric
39806c3fb27SDimitry Andric setOperationAction(ISD::FLOG2, MVT::f32, Custom);
39981ad6265SDimitry Andric setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
4000b57cec5SDimitry Andric
4015f757f3fSDimitry Andric setOperationAction(
4025f757f3fSDimitry Andric {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32,
40306c3fb27SDimitry Andric Custom);
4040b57cec5SDimitry Andric
405bdd1243dSDimitry Andric setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
406bdd1243dSDimitry Andric
4075f757f3fSDimitry Andric setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
4080b57cec5SDimitry Andric
40981ad6265SDimitry Andric setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
4100b57cec5SDimitry Andric
411bdd1243dSDimitry Andric if (Subtarget->has16BitInsts())
412bdd1243dSDimitry Andric setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
41306c3fb27SDimitry Andric else {
414bdd1243dSDimitry Andric setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
41506c3fb27SDimitry Andric setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
41606c3fb27SDimitry Andric }
41706c3fb27SDimitry Andric
4185f757f3fSDimitry Andric setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16,
4195f757f3fSDimitry Andric Custom);
420bdd1243dSDimitry Andric
421bdd1243dSDimitry Andric // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
422bdd1243dSDimitry Andric // scalarization code. Can be removed when IS_FPCLASS expand isn't called by
423bdd1243dSDimitry Andric // default unless marked custom/legal.
424bdd1243dSDimitry Andric setOperationAction(
425bdd1243dSDimitry Andric ISD::IS_FPCLASS,
426bdd1243dSDimitry Andric {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
427bdd1243dSDimitry Andric MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
428bdd1243dSDimitry Andric MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
429bdd1243dSDimitry Andric Custom);
430bdd1243dSDimitry Andric
4310b57cec5SDimitry Andric // Expand to fneg + fadd.
4320b57cec5SDimitry Andric setOperationAction(ISD::FSUB, MVT::f64, Expand);
4330b57cec5SDimitry Andric
43481ad6265SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS,
43581ad6265SDimitry Andric {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
43681ad6265SDimitry Andric MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
437bdd1243dSDimitry Andric MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
438bdd1243dSDimitry Andric MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
439bdd1243dSDimitry Andric MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
44081ad6265SDimitry Andric Custom);
4411db9f3b2SDimitry Andric
4421db9f3b2SDimitry Andric // FIXME: Why is v8f16/v8bf16 missing?
44381ad6265SDimitry Andric setOperationAction(
44481ad6265SDimitry Andric ISD::EXTRACT_SUBVECTOR,
4451db9f3b2SDimitry Andric {MVT::v2f16, MVT::v2bf16, MVT::v2i16, MVT::v4f16, MVT::v4bf16,
4461db9f3b2SDimitry Andric MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32,
4471db9f3b2SDimitry Andric MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32,
4481db9f3b2SDimitry Andric MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32,
4491db9f3b2SDimitry Andric MVT::v9f32, MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32,
4501db9f3b2SDimitry Andric MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16,
4511db9f3b2SDimitry Andric MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32,
4521db9f3b2SDimitry Andric MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64,
4531db9f3b2SDimitry Andric MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64,
4541db9f3b2SDimitry Andric MVT::v32i16, MVT::v32f16, MVT::v32bf16},
45581ad6265SDimitry Andric Custom);
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
45881ad6265SDimitry Andric setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom);
4590b57cec5SDimitry Andric
4600b57cec5SDimitry Andric const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
4610b57cec5SDimitry Andric for (MVT VT : ScalarIntVTs) {
4620b57cec5SDimitry Andric // These should use [SU]DIVREM, so set them to expand
46381ad6265SDimitry Andric setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT,
46481ad6265SDimitry Andric Expand);
4650b57cec5SDimitry Andric
4660b57cec5SDimitry Andric // GPU does not have divrem function for signed or unsigned.
46781ad6265SDimitry Andric setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom);
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
47081ad6265SDimitry Andric setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
4710b57cec5SDimitry Andric
47281ad6265SDimitry Andric setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand);
4730b57cec5SDimitry Andric
4740b57cec5SDimitry Andric // AMDGPU uses ADDC/SUBC/ADDE/SUBE
47581ad6265SDimitry Andric setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal);
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric
4785ffd83dbSDimitry Andric // The hardware supports 32-bit FSHR, but not FSHL.
4795ffd83dbSDimitry Andric setOperationAction(ISD::FSHR, MVT::i32, Legal);
4805ffd83dbSDimitry Andric
4810b57cec5SDimitry Andric // The hardware supports 32-bit ROTR, but not ROTL.
48281ad6265SDimitry Andric setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand);
4830b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i64, Expand);
4840b57cec5SDimitry Andric
48581ad6265SDimitry Andric setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand);
486e8d8bef9SDimitry Andric
48781ad6265SDimitry Andric setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand);
48881ad6265SDimitry Andric setOperationAction(
48981ad6265SDimitry Andric {ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
49081ad6265SDimitry Andric MVT::i64, Custom);
4910b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
4920b57cec5SDimitry Andric
49381ad6265SDimitry Andric setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
49481ad6265SDimitry Andric Legal);
4950b57cec5SDimitry Andric
49681ad6265SDimitry Andric setOperationAction(
49781ad6265SDimitry Andric {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
49881ad6265SDimitry Andric MVT::i64, Custom);
4990b57cec5SDimitry Andric
5007a6dacacSDimitry Andric for (auto VT : {MVT::i8, MVT::i16})
5017a6dacacSDimitry Andric setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, VT, Custom);
5027a6dacacSDimitry Andric
5030b57cec5SDimitry Andric static const MVT::SimpleValueType VectorIntTypes[] = {
504bdd1243dSDimitry Andric MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,
505bdd1243dSDimitry Andric MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};
5060b57cec5SDimitry Andric
5070b57cec5SDimitry Andric for (MVT VT : VectorIntTypes) {
5080b57cec5SDimitry Andric // Expand the following operations for the current type by default.
50981ad6265SDimitry Andric setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT,
51081ad6265SDimitry Andric ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU,
51181ad6265SDimitry Andric ISD::MULHS, ISD::OR, ISD::SHL,
51281ad6265SDimitry Andric ISD::SRA, ISD::SRL, ISD::ROTL,
51381ad6265SDimitry Andric ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP,
51481ad6265SDimitry Andric ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV,
51581ad6265SDimitry Andric ISD::SREM, ISD::UREM, ISD::SMUL_LOHI,
51681ad6265SDimitry Andric ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM,
51781ad6265SDimitry Andric ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC,
51881ad6265SDimitry Andric ISD::XOR, ISD::BSWAP, ISD::CTPOP,
51981ad6265SDimitry Andric ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE,
52081ad6265SDimitry Andric ISD::SETCC},
52181ad6265SDimitry Andric VT, Expand);
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric
5240b57cec5SDimitry Andric static const MVT::SimpleValueType FloatVectorTypes[] = {
525bdd1243dSDimitry Andric MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,
526bdd1243dSDimitry Andric MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};
5270b57cec5SDimitry Andric
5280b57cec5SDimitry Andric for (MVT VT : FloatVectorTypes) {
52981ad6265SDimitry Andric setOperationAction(
5305f757f3fSDimitry Andric {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM,
5315f757f3fSDimitry Andric ISD::FADD, ISD::FCEIL, ISD::FCOS,
5325f757f3fSDimitry Andric ISD::FDIV, ISD::FEXP2, ISD::FEXP,
5335f757f3fSDimitry Andric ISD::FEXP10, ISD::FLOG2, ISD::FREM,
5345f757f3fSDimitry Andric ISD::FLOG, ISD::FLOG10, ISD::FPOW,
5355f757f3fSDimitry Andric ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL,
5365f757f3fSDimitry Andric ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
5375f757f3fSDimitry Andric ISD::FSQRT, ISD::FSIN, ISD::FSUB,
5385f757f3fSDimitry Andric ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC,
5395f757f3fSDimitry Andric ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC,
5405f757f3fSDimitry Andric ISD::FCANONICALIZE, ISD::FROUNDEVEN},
54181ad6265SDimitry Andric VT, Expand);
5420b57cec5SDimitry Andric }
5430b57cec5SDimitry Andric
5440b57cec5SDimitry Andric // This causes using an unrolled select operation rather than expansion with
5450b57cec5SDimitry Andric // bit operations. This is in general better, but the alternative using BFI
5460b57cec5SDimitry Andric // instructions may be better if the select sources are SGPRs.
5470b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v2f32, Promote);
5480b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32);
5490b57cec5SDimitry Andric
5500b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v3f32, Promote);
5510b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32);
5520b57cec5SDimitry Andric
5530b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
5540b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
5550b57cec5SDimitry Andric
5560b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
5570b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
5580b57cec5SDimitry Andric
559fe6060f1SDimitry Andric setOperationAction(ISD::SELECT, MVT::v6f32, Promote);
560fe6060f1SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32);
561fe6060f1SDimitry Andric
562fe6060f1SDimitry Andric setOperationAction(ISD::SELECT, MVT::v7f32, Promote);
563fe6060f1SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);
564fe6060f1SDimitry Andric
565bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v9f32, Promote);
566bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v9f32, MVT::v9i32);
567bdd1243dSDimitry Andric
568bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v10f32, Promote);
569bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v10f32, MVT::v10i32);
570bdd1243dSDimitry Andric
571bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v11f32, Promote);
572bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v11f32, MVT::v11i32);
573bdd1243dSDimitry Andric
574bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
575bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
576bdd1243dSDimitry Andric
5770b57cec5SDimitry Andric setSchedulingPreference(Sched::RegPressure);
5780b57cec5SDimitry Andric setJumpIsExpensive(true);
5790b57cec5SDimitry Andric
5800b57cec5SDimitry Andric // FIXME: This is only partially true. If we have to do vector compares, any
5810b57cec5SDimitry Andric // SGPR pair can be a condition register. If we have a uniform condition, we
5820b57cec5SDimitry Andric // are better off doing SALU operations, where there is only one SCC. For now,
5830b57cec5SDimitry Andric // we don't have a way of knowing during instruction selection if a condition
5840b57cec5SDimitry Andric // will be uniform and we always use vector compares. Assume we are using
5850b57cec5SDimitry Andric // vector compares until that is fixed.
5860b57cec5SDimitry Andric setHasMultipleConditionRegisters(true);
5870b57cec5SDimitry Andric
5880b57cec5SDimitry Andric setMinCmpXchgSizeInBits(32);
5890b57cec5SDimitry Andric setSupportsUnalignedAtomics(false);
5900b57cec5SDimitry Andric
5910b57cec5SDimitry Andric PredictableSelectIsExpensive = false;
5920b57cec5SDimitry Andric
5930b57cec5SDimitry Andric // We want to find all load dependencies for long chains of stores to enable
5940b57cec5SDimitry Andric // merging into very wide vectors. The problem is with vectors with > 4
5950b57cec5SDimitry Andric // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
5960b57cec5SDimitry Andric // vectors are a legal type, even though we have to split the loads
5970b57cec5SDimitry Andric // usually. When we can more precisely specify load legality per address
5980b57cec5SDimitry Andric // space, we should be able to make FindBetterChain/MergeConsecutiveStores
5990b57cec5SDimitry Andric // smarter so that they can figure out what to do in 2 iterations without all
6000b57cec5SDimitry Andric // N > 4 stores on the same chain.
6010b57cec5SDimitry Andric GatherAllAliasesMaxDepth = 16;
6020b57cec5SDimitry Andric
6030b57cec5SDimitry Andric // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
6040b57cec5SDimitry Andric // about these during lowering.
6050b57cec5SDimitry Andric MaxStoresPerMemcpy = 0xffffffff;
6060b57cec5SDimitry Andric MaxStoresPerMemmove = 0xffffffff;
6070b57cec5SDimitry Andric MaxStoresPerMemset = 0xffffffff;
6080b57cec5SDimitry Andric
6095ffd83dbSDimitry Andric // The expansion for 64-bit division is enormous.
6105ffd83dbSDimitry Andric if (AMDGPUBypassSlowDiv)
6115ffd83dbSDimitry Andric addBypassSlowDiv(64, 32);
6125ffd83dbSDimitry Andric
61381ad6265SDimitry Andric setTargetDAGCombine({ISD::BITCAST, ISD::SHL,
61481ad6265SDimitry Andric ISD::SRA, ISD::SRL,
61581ad6265SDimitry Andric ISD::TRUNCATE, ISD::MUL,
61681ad6265SDimitry Andric ISD::SMUL_LOHI, ISD::UMUL_LOHI,
61781ad6265SDimitry Andric ISD::MULHU, ISD::MULHS,
61881ad6265SDimitry Andric ISD::SELECT, ISD::SELECT_CC,
61981ad6265SDimitry Andric ISD::STORE, ISD::FADD,
62081ad6265SDimitry Andric ISD::FSUB, ISD::FNEG,
62181ad6265SDimitry Andric ISD::FABS, ISD::AssertZext,
62281ad6265SDimitry Andric ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
623cb14a3feSDimitry Andric
624cb14a3feSDimitry Andric setMaxAtomicSizeInBitsSupported(64);
625b3edf446SDimitry Andric setMaxDivRemBitWidthSupported(64);
6260fca6ea1SDimitry Andric setMaxLargeFPConvertBitWidthSupported(64);
6270b57cec5SDimitry Andric }
6280b57cec5SDimitry Andric
mayIgnoreSignedZero(SDValue Op) const629e8d8bef9SDimitry Andric bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
630e8d8bef9SDimitry Andric if (getTargetMachine().Options.NoSignedZerosFPMath)
631e8d8bef9SDimitry Andric return true;
632e8d8bef9SDimitry Andric
633e8d8bef9SDimitry Andric const auto Flags = Op.getNode()->getFlags();
634e8d8bef9SDimitry Andric if (Flags.hasNoSignedZeros())
635e8d8bef9SDimitry Andric return true;
636e8d8bef9SDimitry Andric
637e8d8bef9SDimitry Andric return false;
638e8d8bef9SDimitry Andric }
639e8d8bef9SDimitry Andric
6400b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
6410b57cec5SDimitry Andric // Target Information
6420b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
6430b57cec5SDimitry Andric
6440b57cec5SDimitry Andric LLVM_READNONE
fnegFoldsIntoOpcode(unsigned Opc)64506c3fb27SDimitry Andric static bool fnegFoldsIntoOpcode(unsigned Opc) {
6460b57cec5SDimitry Andric switch (Opc) {
6470b57cec5SDimitry Andric case ISD::FADD:
6480b57cec5SDimitry Andric case ISD::FSUB:
6490b57cec5SDimitry Andric case ISD::FMUL:
6500b57cec5SDimitry Andric case ISD::FMA:
6510b57cec5SDimitry Andric case ISD::FMAD:
6520b57cec5SDimitry Andric case ISD::FMINNUM:
6530b57cec5SDimitry Andric case ISD::FMAXNUM:
6540b57cec5SDimitry Andric case ISD::FMINNUM_IEEE:
6550b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE:
6565f757f3fSDimitry Andric case ISD::FMINIMUM:
6575f757f3fSDimitry Andric case ISD::FMAXIMUM:
65806c3fb27SDimitry Andric case ISD::SELECT:
6590b57cec5SDimitry Andric case ISD::FSIN:
6600b57cec5SDimitry Andric case ISD::FTRUNC:
6610b57cec5SDimitry Andric case ISD::FRINT:
6620b57cec5SDimitry Andric case ISD::FNEARBYINT:
6635f757f3fSDimitry Andric case ISD::FROUNDEVEN:
6640b57cec5SDimitry Andric case ISD::FCANONICALIZE:
6650b57cec5SDimitry Andric case AMDGPUISD::RCP:
6660b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY:
6670b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG:
6680b57cec5SDimitry Andric case AMDGPUISD::SIN_HW:
6690b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY:
6700b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY:
6710b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY:
6720b57cec5SDimitry Andric case AMDGPUISD::FMED3:
673e8d8bef9SDimitry Andric // TODO: handle llvm.amdgcn.fma.legacy
6740b57cec5SDimitry Andric return true;
67506c3fb27SDimitry Andric case ISD::BITCAST:
67606c3fb27SDimitry Andric llvm_unreachable("bitcast is special cased");
6770b57cec5SDimitry Andric default:
6780b57cec5SDimitry Andric return false;
6790b57cec5SDimitry Andric }
6800b57cec5SDimitry Andric }
6810b57cec5SDimitry Andric
fnegFoldsIntoOp(const SDNode * N)68206c3fb27SDimitry Andric static bool fnegFoldsIntoOp(const SDNode *N) {
68306c3fb27SDimitry Andric unsigned Opc = N->getOpcode();
68406c3fb27SDimitry Andric if (Opc == ISD::BITCAST) {
68506c3fb27SDimitry Andric // TODO: Is there a benefit to checking the conditions performFNegCombine
68606c3fb27SDimitry Andric // does? We don't for the other cases.
68706c3fb27SDimitry Andric SDValue BCSrc = N->getOperand(0);
68806c3fb27SDimitry Andric if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
68906c3fb27SDimitry Andric return BCSrc.getNumOperands() == 2 &&
69006c3fb27SDimitry Andric BCSrc.getOperand(1).getValueSizeInBits() == 32;
69106c3fb27SDimitry Andric }
69206c3fb27SDimitry Andric
69306c3fb27SDimitry Andric return BCSrc.getOpcode() == ISD::SELECT && BCSrc.getValueType() == MVT::f32;
69406c3fb27SDimitry Andric }
69506c3fb27SDimitry Andric
69606c3fb27SDimitry Andric return fnegFoldsIntoOpcode(Opc);
69706c3fb27SDimitry Andric }
69806c3fb27SDimitry Andric
6990b57cec5SDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit
7000b57cec5SDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source
7010b57cec5SDimitry Andric /// modifiers.
7020b57cec5SDimitry Andric LLVM_READONLY
opMustUseVOP3Encoding(const SDNode * N,MVT VT)7030b57cec5SDimitry Andric static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
70406c3fb27SDimitry Andric return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) ||
70506c3fb27SDimitry Andric VT == MVT::f64;
70606c3fb27SDimitry Andric }
70706c3fb27SDimitry Andric
70806c3fb27SDimitry Andric /// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
70906c3fb27SDimitry Andric /// type for ISD::SELECT.
71006c3fb27SDimitry Andric LLVM_READONLY
selectSupportsSourceMods(const SDNode * N)71106c3fb27SDimitry Andric static bool selectSupportsSourceMods(const SDNode *N) {
71206c3fb27SDimitry Andric // TODO: Only applies if select will be vector
71306c3fb27SDimitry Andric return N->getValueType(0) == MVT::f32;
7140b57cec5SDimitry Andric }
7150b57cec5SDimitry Andric
7160b57cec5SDimitry Andric // Most FP instructions support source modifiers, but this could be refined
7170b57cec5SDimitry Andric // slightly.
7180b57cec5SDimitry Andric LLVM_READONLY
hasSourceMods(const SDNode * N)7190b57cec5SDimitry Andric static bool hasSourceMods(const SDNode *N) {
7200b57cec5SDimitry Andric if (isa<MemSDNode>(N))
7210b57cec5SDimitry Andric return false;
7220b57cec5SDimitry Andric
7230b57cec5SDimitry Andric switch (N->getOpcode()) {
7240b57cec5SDimitry Andric case ISD::CopyToReg:
7250b57cec5SDimitry Andric case ISD::FDIV:
7260b57cec5SDimitry Andric case ISD::FREM:
7270b57cec5SDimitry Andric case ISD::INLINEASM:
7280b57cec5SDimitry Andric case ISD::INLINEASM_BR:
7290b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE:
7308bcb0991SDimitry Andric case ISD::INTRINSIC_W_CHAIN:
7310b57cec5SDimitry Andric
7320b57cec5SDimitry Andric // TODO: Should really be looking at the users of the bitcast. These are
7330b57cec5SDimitry Andric // problematic because bitcasts are used to legalize all stores to integer
7340b57cec5SDimitry Andric // types.
7350b57cec5SDimitry Andric case ISD::BITCAST:
7360b57cec5SDimitry Andric return false;
7378bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: {
738647cbc5dSDimitry Andric switch (N->getConstantOperandVal(0)) {
7398bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p1:
7408bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p2:
7418bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_mov:
7428bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p1_f16:
7438bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p2_f16:
7448bcb0991SDimitry Andric return false;
7458bcb0991SDimitry Andric default:
7468bcb0991SDimitry Andric return true;
7478bcb0991SDimitry Andric }
7488bcb0991SDimitry Andric }
74906c3fb27SDimitry Andric case ISD::SELECT:
75006c3fb27SDimitry Andric return selectSupportsSourceMods(N);
7510b57cec5SDimitry Andric default:
7520b57cec5SDimitry Andric return true;
7530b57cec5SDimitry Andric }
7540b57cec5SDimitry Andric }
7550b57cec5SDimitry Andric
allUsesHaveSourceMods(const SDNode * N,unsigned CostThreshold)7560b57cec5SDimitry Andric bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
7570b57cec5SDimitry Andric unsigned CostThreshold) {
7580b57cec5SDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
7590b57cec5SDimitry Andric // it is truly free to use a source modifier in all cases. If there are
7600b57cec5SDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be
7610b57cec5SDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it
7620b57cec5SDimitry Andric // will save on the instruction count.
7630b57cec5SDimitry Andric unsigned NumMayIncreaseSize = 0;
7640b57cec5SDimitry Andric MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
7650b57cec5SDimitry Andric
76606c3fb27SDimitry Andric assert(!N->use_empty());
76706c3fb27SDimitry Andric
7680b57cec5SDimitry Andric // XXX - Should this limit number of uses to check?
7690b57cec5SDimitry Andric for (const SDNode *U : N->uses()) {
7700b57cec5SDimitry Andric if (!hasSourceMods(U))
7710b57cec5SDimitry Andric return false;
7720b57cec5SDimitry Andric
7730b57cec5SDimitry Andric if (!opMustUseVOP3Encoding(U, VT)) {
7740b57cec5SDimitry Andric if (++NumMayIncreaseSize > CostThreshold)
7750b57cec5SDimitry Andric return false;
7760b57cec5SDimitry Andric }
7770b57cec5SDimitry Andric }
7780b57cec5SDimitry Andric
7790b57cec5SDimitry Andric return true;
7800b57cec5SDimitry Andric }
7810b57cec5SDimitry Andric
getTypeForExtReturn(LLVMContext & Context,EVT VT,ISD::NodeType ExtendKind) const7825ffd83dbSDimitry Andric EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
7835ffd83dbSDimitry Andric ISD::NodeType ExtendKind) const {
7845ffd83dbSDimitry Andric assert(!VT.isVector() && "only scalar expected");
7855ffd83dbSDimitry Andric
7865ffd83dbSDimitry Andric // Round to the next multiple of 32-bits.
7875ffd83dbSDimitry Andric unsigned Size = VT.getSizeInBits();
7885ffd83dbSDimitry Andric if (Size <= 32)
7895ffd83dbSDimitry Andric return MVT::i32;
7905ffd83dbSDimitry Andric return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32));
7915ffd83dbSDimitry Andric }
7925ffd83dbSDimitry Andric
getVectorIdxTy(const DataLayout &) const7930b57cec5SDimitry Andric MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
7940b57cec5SDimitry Andric return MVT::i32;
7950b57cec5SDimitry Andric }
7960b57cec5SDimitry Andric
isSelectSupported(SelectSupportKind SelType) const7970b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
7980b57cec5SDimitry Andric return true;
7990b57cec5SDimitry Andric }
8000b57cec5SDimitry Andric
8010b57cec5SDimitry Andric // The backend supports 32 and 64 bit floating point immediates.
8020b57cec5SDimitry Andric // FIXME: Why are we reporting vectors of FP immediates as legal?
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const8030b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8040b57cec5SDimitry Andric bool ForCodeSize) const {
8050b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType();
8060b57cec5SDimitry Andric return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
8070b57cec5SDimitry Andric (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric
8100b57cec5SDimitry Andric // We don't want to shrink f64 / f32 constants.
ShouldShrinkFPConstant(EVT VT) const8110b57cec5SDimitry Andric bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
8120b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType();
8130b57cec5SDimitry Andric return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
8140b57cec5SDimitry Andric }
8150b57cec5SDimitry Andric
shouldReduceLoadWidth(SDNode * N,ISD::LoadExtType ExtTy,EVT NewVT) const8160b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
8170b57cec5SDimitry Andric ISD::LoadExtType ExtTy,
8180b57cec5SDimitry Andric EVT NewVT) const {
8190b57cec5SDimitry Andric // TODO: This may be worth removing. Check regression tests for diffs.
8200b57cec5SDimitry Andric if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
8210b57cec5SDimitry Andric return false;
8220b57cec5SDimitry Andric
8230b57cec5SDimitry Andric unsigned NewSize = NewVT.getStoreSizeInBits();
8240b57cec5SDimitry Andric
8255ffd83dbSDimitry Andric // If we are reducing to a 32-bit load or a smaller multi-dword load,
8265ffd83dbSDimitry Andric // this is always better.
8275ffd83dbSDimitry Andric if (NewSize >= 32)
8280b57cec5SDimitry Andric return true;
8290b57cec5SDimitry Andric
8300b57cec5SDimitry Andric EVT OldVT = N->getValueType(0);
8310b57cec5SDimitry Andric unsigned OldSize = OldVT.getStoreSizeInBits();
8320b57cec5SDimitry Andric
8330b57cec5SDimitry Andric MemSDNode *MN = cast<MemSDNode>(N);
8340b57cec5SDimitry Andric unsigned AS = MN->getAddressSpace();
8350b57cec5SDimitry Andric // Do not shrink an aligned scalar load to sub-dword.
8360b57cec5SDimitry Andric // Scalar engine cannot do sub-dword loads.
8377a6dacacSDimitry Andric // TODO: Update this for GFX12 which does have scalar sub-dword loads.
83881ad6265SDimitry Andric if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) &&
8390b57cec5SDimitry Andric (AS == AMDGPUAS::CONSTANT_ADDRESS ||
8400b57cec5SDimitry Andric AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
84181ad6265SDimitry Andric (isa<LoadSDNode>(N) && AS == AMDGPUAS::GLOBAL_ADDRESS &&
84281ad6265SDimitry Andric MN->isInvariant())) &&
8430b57cec5SDimitry Andric AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
8440b57cec5SDimitry Andric return false;
8450b57cec5SDimitry Andric
8460b57cec5SDimitry Andric // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
8470b57cec5SDimitry Andric // extloads, so doing one requires using a buffer_load. In cases where we
8480b57cec5SDimitry Andric // still couldn't use a scalar load, using the wider load shouldn't really
8490b57cec5SDimitry Andric // hurt anything.
8500b57cec5SDimitry Andric
8510b57cec5SDimitry Andric // If the old size already had to be an extload, there's no harm in continuing
8520b57cec5SDimitry Andric // to reduce the width.
8530b57cec5SDimitry Andric return (OldSize < 32);
8540b57cec5SDimitry Andric }
8550b57cec5SDimitry Andric
isLoadBitCastBeneficial(EVT LoadTy,EVT CastTy,const SelectionDAG & DAG,const MachineMemOperand & MMO) const8560b57cec5SDimitry Andric bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
8570b57cec5SDimitry Andric const SelectionDAG &DAG,
8580b57cec5SDimitry Andric const MachineMemOperand &MMO) const {
8590b57cec5SDimitry Andric
8600b57cec5SDimitry Andric assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
8610b57cec5SDimitry Andric
8620b57cec5SDimitry Andric if (LoadTy.getScalarType() == MVT::i32)
8630b57cec5SDimitry Andric return false;
8640b57cec5SDimitry Andric
8650b57cec5SDimitry Andric unsigned LScalarSize = LoadTy.getScalarSizeInBits();
8660b57cec5SDimitry Andric unsigned CastScalarSize = CastTy.getScalarSizeInBits();
8670b57cec5SDimitry Andric
8680b57cec5SDimitry Andric if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
8690b57cec5SDimitry Andric return false;
8700b57cec5SDimitry Andric
871bdd1243dSDimitry Andric unsigned Fast = 0;
8728bcb0991SDimitry Andric return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8738bcb0991SDimitry Andric CastTy, MMO, &Fast) &&
8748bcb0991SDimitry Andric Fast;
8750b57cec5SDimitry Andric }
8760b57cec5SDimitry Andric
8770b57cec5SDimitry Andric // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
8780b57cec5SDimitry Andric // profitable with the expansion for 64-bit since it's generally good to
8790b57cec5SDimitry Andric // speculate things.
isCheapToSpeculateCttz(Type * Ty) const880bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
8810b57cec5SDimitry Andric return true;
8820b57cec5SDimitry Andric }
8830b57cec5SDimitry Andric
isCheapToSpeculateCtlz(Type * Ty) const884bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
8850b57cec5SDimitry Andric return true;
8860b57cec5SDimitry Andric }
8870b57cec5SDimitry Andric
isSDNodeAlwaysUniform(const SDNode * N) const8880b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
8890b57cec5SDimitry Andric switch (N->getOpcode()) {
8900b57cec5SDimitry Andric case ISD::EntryToken:
8910b57cec5SDimitry Andric case ISD::TokenFactor:
8920b57cec5SDimitry Andric return true;
893e8d8bef9SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: {
894647cbc5dSDimitry Andric unsigned IntrID = N->getConstantOperandVal(0);
8950fca6ea1SDimitry Andric return AMDGPU::isIntrinsicAlwaysUniform(IntrID);
8960b57cec5SDimitry Andric }
8970b57cec5SDimitry Andric case ISD::LOAD:
8988bcb0991SDimitry Andric if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
8998bcb0991SDimitry Andric AMDGPUAS::CONSTANT_ADDRESS_32BIT)
9000b57cec5SDimitry Andric return true;
9010b57cec5SDimitry Andric return false;
90281ad6265SDimitry Andric case AMDGPUISD::SETCC: // ballot-style instruction
90381ad6265SDimitry Andric return true;
9040b57cec5SDimitry Andric }
905e8d8bef9SDimitry Andric return false;
9060b57cec5SDimitry Andric }
9070b57cec5SDimitry Andric
getNegatedExpression(SDValue Op,SelectionDAG & DAG,bool LegalOperations,bool ForCodeSize,NegatibleCost & Cost,unsigned Depth) const9085ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::getNegatedExpression(
9095ffd83dbSDimitry Andric SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
9105ffd83dbSDimitry Andric NegatibleCost &Cost, unsigned Depth) const {
9115ffd83dbSDimitry Andric
9125ffd83dbSDimitry Andric switch (Op.getOpcode()) {
9135ffd83dbSDimitry Andric case ISD::FMA:
9145ffd83dbSDimitry Andric case ISD::FMAD: {
9155ffd83dbSDimitry Andric // Negating a fma is not free if it has users without source mods.
9165ffd83dbSDimitry Andric if (!allUsesHaveSourceMods(Op.getNode()))
9175ffd83dbSDimitry Andric return SDValue();
9185ffd83dbSDimitry Andric break;
9195ffd83dbSDimitry Andric }
92006c3fb27SDimitry Andric case AMDGPUISD::RCP: {
92106c3fb27SDimitry Andric SDValue Src = Op.getOperand(0);
92206c3fb27SDimitry Andric EVT VT = Op.getValueType();
92306c3fb27SDimitry Andric SDLoc SL(Op);
92406c3fb27SDimitry Andric
92506c3fb27SDimitry Andric SDValue NegSrc = getNegatedExpression(Src, DAG, LegalOperations,
92606c3fb27SDimitry Andric ForCodeSize, Cost, Depth + 1);
92706c3fb27SDimitry Andric if (NegSrc)
92806c3fb27SDimitry Andric return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags());
92906c3fb27SDimitry Andric return SDValue();
93006c3fb27SDimitry Andric }
9315ffd83dbSDimitry Andric default:
9325ffd83dbSDimitry Andric break;
9335ffd83dbSDimitry Andric }
9345ffd83dbSDimitry Andric
9355ffd83dbSDimitry Andric return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
9365ffd83dbSDimitry Andric ForCodeSize, Cost, Depth);
9375ffd83dbSDimitry Andric }
9385ffd83dbSDimitry Andric
9390b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
9400b57cec5SDimitry Andric // Target Properties
9410b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
9420b57cec5SDimitry Andric
isFAbsFree(EVT VT) const9430b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
9440b57cec5SDimitry Andric assert(VT.isFloatingPoint());
9450b57cec5SDimitry Andric
9460b57cec5SDimitry Andric // Packed operations do not have a fabs modifier.
9470b57cec5SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 ||
9480fca6ea1SDimitry Andric (Subtarget->has16BitInsts() && (VT == MVT::f16 || VT == MVT::bf16));
9490b57cec5SDimitry Andric }
9500b57cec5SDimitry Andric
isFNegFree(EVT VT) const9510b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
9520b57cec5SDimitry Andric assert(VT.isFloatingPoint());
953fe6060f1SDimitry Andric // Report this based on the end legalized type.
954fe6060f1SDimitry Andric VT = VT.getScalarType();
9550fca6ea1SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16 || VT == MVT::bf16;
9560b57cec5SDimitry Andric }
9570b57cec5SDimitry Andric
storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AS) const95806c3fb27SDimitry Andric bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
9590b57cec5SDimitry Andric unsigned NumElem,
9600b57cec5SDimitry Andric unsigned AS) const {
9610b57cec5SDimitry Andric return true;
9620b57cec5SDimitry Andric }
9630b57cec5SDimitry Andric
aggressivelyPreferBuildVectorSources(EVT VecVT) const9640b57cec5SDimitry Andric bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
9650b57cec5SDimitry Andric // There are few operations which truly have vector input operands. Any vector
9660b57cec5SDimitry Andric // operation is going to involve operations on each component, and a
9670b57cec5SDimitry Andric // build_vector will be a copy per element, so it always makes sense to use a
9680b57cec5SDimitry Andric // build_vector input in place of the extracted element to avoid a copy into a
9690b57cec5SDimitry Andric // super register.
9700b57cec5SDimitry Andric //
9710b57cec5SDimitry Andric // We should probably only do this if all users are extracts only, but this
9720b57cec5SDimitry Andric // should be the common case.
9730b57cec5SDimitry Andric return true;
9740b57cec5SDimitry Andric }
9750b57cec5SDimitry Andric
isTruncateFree(EVT Source,EVT Dest) const9760b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
9770b57cec5SDimitry Andric // Truncate is just accessing a subregister.
9780b57cec5SDimitry Andric
9790b57cec5SDimitry Andric unsigned SrcSize = Source.getSizeInBits();
9800b57cec5SDimitry Andric unsigned DestSize = Dest.getSizeInBits();
9810b57cec5SDimitry Andric
9820b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0 ;
9830b57cec5SDimitry Andric }
9840b57cec5SDimitry Andric
isTruncateFree(Type * Source,Type * Dest) const9850b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
9860b57cec5SDimitry Andric // Truncate is just accessing a subregister.
9870b57cec5SDimitry Andric
9880b57cec5SDimitry Andric unsigned SrcSize = Source->getScalarSizeInBits();
9890b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits();
9900b57cec5SDimitry Andric
9910b57cec5SDimitry Andric if (DestSize== 16 && Subtarget->has16BitInsts())
9920b57cec5SDimitry Andric return SrcSize >= 32;
9930b57cec5SDimitry Andric
9940b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0;
9950b57cec5SDimitry Andric }
9960b57cec5SDimitry Andric
isZExtFree(Type * Src,Type * Dest) const9970b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
9980b57cec5SDimitry Andric unsigned SrcSize = Src->getScalarSizeInBits();
9990b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits();
10000b57cec5SDimitry Andric
10010b57cec5SDimitry Andric if (SrcSize == 16 && Subtarget->has16BitInsts())
10020b57cec5SDimitry Andric return DestSize >= 32;
10030b57cec5SDimitry Andric
10040b57cec5SDimitry Andric return SrcSize == 32 && DestSize == 64;
10050b57cec5SDimitry Andric }
10060b57cec5SDimitry Andric
isZExtFree(EVT Src,EVT Dest) const10070b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
10080b57cec5SDimitry Andric // Any register load of a 64-bit value really requires 2 32-bit moves. For all
10090b57cec5SDimitry Andric // practical purposes, the extra mov 0 to load a 64-bit is free. As used,
10100b57cec5SDimitry Andric // this will enable reducing 64-bit operations the 32-bit, which is always
10110b57cec5SDimitry Andric // good.
10120b57cec5SDimitry Andric
10130b57cec5SDimitry Andric if (Src == MVT::i16)
10140b57cec5SDimitry Andric return Dest == MVT::i32 ||Dest == MVT::i64 ;
10150b57cec5SDimitry Andric
10160b57cec5SDimitry Andric return Src == MVT::i32 && Dest == MVT::i64;
10170b57cec5SDimitry Andric }
10180b57cec5SDimitry Andric
isNarrowingProfitable(EVT SrcVT,EVT DestVT) const10190b57cec5SDimitry Andric bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
10200b57cec5SDimitry Andric // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
10210b57cec5SDimitry Andric // limited number of native 64-bit operations. Shrinking an operation to fit
10220b57cec5SDimitry Andric // in a single 32-bit register should always be helpful. As currently used,
10230b57cec5SDimitry Andric // this is much less general than the name suggests, and is only used in
10240b57cec5SDimitry Andric // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
10250b57cec5SDimitry Andric // not profitable, and may actually be harmful.
10260b57cec5SDimitry Andric return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
10270b57cec5SDimitry Andric }
10280b57cec5SDimitry Andric
isDesirableToCommuteWithShift(const SDNode * N,CombineLevel Level) const1029bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
1030bdd1243dSDimitry Andric const SDNode* N, CombineLevel Level) const {
1031bdd1243dSDimitry Andric assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
1032bdd1243dSDimitry Andric N->getOpcode() == ISD::SRL) &&
1033bdd1243dSDimitry Andric "Expected shift op");
1034bdd1243dSDimitry Andric // Always commute pre-type legalization and right shifts.
1035bdd1243dSDimitry Andric // We're looking for shl(or(x,y),z) patterns.
1036bdd1243dSDimitry Andric if (Level < CombineLevel::AfterLegalizeTypes ||
1037bdd1243dSDimitry Andric N->getOpcode() != ISD::SHL || N->getOperand(0).getOpcode() != ISD::OR)
1038bdd1243dSDimitry Andric return true;
1039bdd1243dSDimitry Andric
1040bdd1243dSDimitry Andric // If only user is a i32 right-shift, then don't destroy a BFE pattern.
1041bdd1243dSDimitry Andric if (N->getValueType(0) == MVT::i32 && N->use_size() == 1 &&
1042bdd1243dSDimitry Andric (N->use_begin()->getOpcode() == ISD::SRA ||
1043bdd1243dSDimitry Andric N->use_begin()->getOpcode() == ISD::SRL))
1044bdd1243dSDimitry Andric return false;
1045bdd1243dSDimitry Andric
1046bdd1243dSDimitry Andric // Don't destroy or(shl(load_zext(),c), load_zext()) patterns.
1047bdd1243dSDimitry Andric auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) {
1048bdd1243dSDimitry Andric if (LHS.getOpcode() != ISD::SHL)
1049bdd1243dSDimitry Andric return false;
1050bdd1243dSDimitry Andric auto *RHSLd = dyn_cast<LoadSDNode>(RHS);
1051bdd1243dSDimitry Andric auto *LHS0 = dyn_cast<LoadSDNode>(LHS.getOperand(0));
1052bdd1243dSDimitry Andric auto *LHS1 = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
1053bdd1243dSDimitry Andric return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD &&
1054bdd1243dSDimitry Andric LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&
1055bdd1243dSDimitry Andric RHSLd->getExtensionType() == ISD::ZEXTLOAD;
1056bdd1243dSDimitry Andric };
1057bdd1243dSDimitry Andric SDValue LHS = N->getOperand(0).getOperand(0);
1058bdd1243dSDimitry Andric SDValue RHS = N->getOperand(0).getOperand(1);
1059bdd1243dSDimitry Andric return !(IsShiftAndLoad(LHS, RHS) || IsShiftAndLoad(RHS, LHS));
1060bdd1243dSDimitry Andric }
1061bdd1243dSDimitry Andric
10620b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
10630b57cec5SDimitry Andric // TargetLowering Callbacks
10640b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
10650b57cec5SDimitry Andric
CCAssignFnForCall(CallingConv::ID CC,bool IsVarArg)10660b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
10670b57cec5SDimitry Andric bool IsVarArg) {
10680b57cec5SDimitry Andric switch (CC) {
10690b57cec5SDimitry Andric case CallingConv::AMDGPU_VS:
10700b57cec5SDimitry Andric case CallingConv::AMDGPU_GS:
10710b57cec5SDimitry Andric case CallingConv::AMDGPU_PS:
10720b57cec5SDimitry Andric case CallingConv::AMDGPU_CS:
10730b57cec5SDimitry Andric case CallingConv::AMDGPU_HS:
10740b57cec5SDimitry Andric case CallingConv::AMDGPU_ES:
10750b57cec5SDimitry Andric case CallingConv::AMDGPU_LS:
10760b57cec5SDimitry Andric return CC_AMDGPU;
10775f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_Chain:
10785f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_ChainPreserve:
10795f757f3fSDimitry Andric return CC_AMDGPU_CS_CHAIN;
10800b57cec5SDimitry Andric case CallingConv::C:
10810b57cec5SDimitry Andric case CallingConv::Fast:
10820b57cec5SDimitry Andric case CallingConv::Cold:
10830b57cec5SDimitry Andric return CC_AMDGPU_Func;
1084e8d8bef9SDimitry Andric case CallingConv::AMDGPU_Gfx:
1085e8d8bef9SDimitry Andric return CC_SI_Gfx;
10860b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL:
10870b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL:
10880b57cec5SDimitry Andric default:
10890b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention for call");
10900b57cec5SDimitry Andric }
10910b57cec5SDimitry Andric }
10920b57cec5SDimitry Andric
CCAssignFnForReturn(CallingConv::ID CC,bool IsVarArg)10930b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
10940b57cec5SDimitry Andric bool IsVarArg) {
10950b57cec5SDimitry Andric switch (CC) {
10960b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL:
10970b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL:
10980b57cec5SDimitry Andric llvm_unreachable("kernels should not be handled here");
10990b57cec5SDimitry Andric case CallingConv::AMDGPU_VS:
11000b57cec5SDimitry Andric case CallingConv::AMDGPU_GS:
11010b57cec5SDimitry Andric case CallingConv::AMDGPU_PS:
11020b57cec5SDimitry Andric case CallingConv::AMDGPU_CS:
11035f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_Chain:
11045f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_ChainPreserve:
11050b57cec5SDimitry Andric case CallingConv::AMDGPU_HS:
11060b57cec5SDimitry Andric case CallingConv::AMDGPU_ES:
11070b57cec5SDimitry Andric case CallingConv::AMDGPU_LS:
11080b57cec5SDimitry Andric return RetCC_SI_Shader;
1109e8d8bef9SDimitry Andric case CallingConv::AMDGPU_Gfx:
1110e8d8bef9SDimitry Andric return RetCC_SI_Gfx;
11110b57cec5SDimitry Andric case CallingConv::C:
11120b57cec5SDimitry Andric case CallingConv::Fast:
11130b57cec5SDimitry Andric case CallingConv::Cold:
11140b57cec5SDimitry Andric return RetCC_AMDGPU_Func;
11150b57cec5SDimitry Andric default:
11160b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention.");
11170b57cec5SDimitry Andric }
11180b57cec5SDimitry Andric }
11190b57cec5SDimitry Andric
11200b57cec5SDimitry Andric /// The SelectionDAGBuilder will automatically promote function arguments
11210b57cec5SDimitry Andric /// with illegal types. However, this does not work for the AMDGPU targets
11220b57cec5SDimitry Andric /// since the function arguments are stored in memory as these illegal types.
11230b57cec5SDimitry Andric /// In order to handle this properly we need to get the original types sizes
11240b57cec5SDimitry Andric /// from the LLVM IR Function and fixup the ISD:InputArg values before
11250b57cec5SDimitry Andric /// passing them to AnalyzeFormalArguments()
11260b57cec5SDimitry Andric
11270b57cec5SDimitry Andric /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
11280b57cec5SDimitry Andric /// input values across multiple registers. Each item in the Ins array
11290b57cec5SDimitry Andric /// represents a single value that will be stored in registers. Ins[x].VT is
11300b57cec5SDimitry Andric /// the value type of the value that will be stored in the register, so
11310b57cec5SDimitry Andric /// whatever SDNode we lower the argument to needs to be this type.
11320b57cec5SDimitry Andric ///
11330b57cec5SDimitry Andric /// In order to correctly lower the arguments we need to know the size of each
11340b57cec5SDimitry Andric /// argument. Since Ins[x].VT gives us the size of the register that will
11350b57cec5SDimitry Andric /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
1136349cc55cSDimitry Andric /// for the original function argument so that we can deduce the correct memory
11370b57cec5SDimitry Andric /// type to use for Ins[x]. In most cases the correct memory type will be
11380b57cec5SDimitry Andric /// Ins[x].ArgVT. However, this will not always be the case. If, for example,
11390b57cec5SDimitry Andric /// we have a kernel argument of type v8i8, this argument will be split into
11400b57cec5SDimitry Andric /// 8 parts and each part will be represented by its own item in the Ins array.
11410b57cec5SDimitry Andric /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
11420b57cec5SDimitry Andric /// the argument before it was split. From this, we deduce that the memory type
11430b57cec5SDimitry Andric /// for each individual part is i8. We pass the memory type as LocVT to the
11440b57cec5SDimitry Andric /// calling convention analysis function and the register type (Ins[x].VT) as
11450b57cec5SDimitry Andric /// the ValVT.
analyzeFormalArgumentsCompute(CCState & State,const SmallVectorImpl<ISD::InputArg> & Ins) const11460b57cec5SDimitry Andric void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
11470b57cec5SDimitry Andric CCState &State,
11480b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins) const {
11490b57cec5SDimitry Andric const MachineFunction &MF = State.getMachineFunction();
11500b57cec5SDimitry Andric const Function &Fn = MF.getFunction();
11510b57cec5SDimitry Andric LLVMContext &Ctx = Fn.getParent()->getContext();
11520b57cec5SDimitry Andric const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
115306c3fb27SDimitry Andric const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();
11540b57cec5SDimitry Andric CallingConv::ID CC = Fn.getCallingConv();
11550b57cec5SDimitry Andric
11565ffd83dbSDimitry Andric Align MaxAlign = Align(1);
11570b57cec5SDimitry Andric uint64_t ExplicitArgOffset = 0;
11580fca6ea1SDimitry Andric const DataLayout &DL = Fn.getDataLayout();
11590b57cec5SDimitry Andric
11600b57cec5SDimitry Andric unsigned InIndex = 0;
11610b57cec5SDimitry Andric
11620b57cec5SDimitry Andric for (const Argument &Arg : Fn.args()) {
1163e8d8bef9SDimitry Andric const bool IsByRef = Arg.hasByRefAttr();
11640b57cec5SDimitry Andric Type *BaseArgTy = Arg.getType();
1165e8d8bef9SDimitry Andric Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
116681ad6265SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(
1167bdd1243dSDimitry Andric IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);
116881ad6265SDimitry Andric MaxAlign = std::max(Alignment, MaxAlign);
1169e8d8bef9SDimitry Andric uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);
11700b57cec5SDimitry Andric
11715ffd83dbSDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
11725ffd83dbSDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;
11730b57cec5SDimitry Andric
11740b57cec5SDimitry Andric // We're basically throwing away everything passed into us and starting over
11750b57cec5SDimitry Andric // to get accurate in-memory offsets. The "PartOffset" is completely useless
11760b57cec5SDimitry Andric // to us as computed in Ins.
11770b57cec5SDimitry Andric //
11780b57cec5SDimitry Andric // We also need to figure out what type legalization is trying to do to get
11790b57cec5SDimitry Andric // the correct memory offsets.
11800b57cec5SDimitry Andric
11810b57cec5SDimitry Andric SmallVector<EVT, 16> ValueVTs;
11820b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets;
11830b57cec5SDimitry Andric ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
11840b57cec5SDimitry Andric
11850b57cec5SDimitry Andric for (unsigned Value = 0, NumValues = ValueVTs.size();
11860b57cec5SDimitry Andric Value != NumValues; ++Value) {
11870b57cec5SDimitry Andric uint64_t BasePartOffset = Offsets[Value];
11880b57cec5SDimitry Andric
11890b57cec5SDimitry Andric EVT ArgVT = ValueVTs[Value];
11900b57cec5SDimitry Andric EVT MemVT = ArgVT;
11910b57cec5SDimitry Andric MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
11920b57cec5SDimitry Andric unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
11930b57cec5SDimitry Andric
11940b57cec5SDimitry Andric if (NumRegs == 1) {
11950b57cec5SDimitry Andric // This argument is not split, so the IR type is the memory type.
11960b57cec5SDimitry Andric if (ArgVT.isExtended()) {
11970b57cec5SDimitry Andric // We have an extended type, like i24, so we should just use the
11980b57cec5SDimitry Andric // register type.
11990b57cec5SDimitry Andric MemVT = RegisterVT;
12000b57cec5SDimitry Andric } else {
12010b57cec5SDimitry Andric MemVT = ArgVT;
12020b57cec5SDimitry Andric }
12030b57cec5SDimitry Andric } else if (ArgVT.isVector() && RegisterVT.isVector() &&
12040b57cec5SDimitry Andric ArgVT.getScalarType() == RegisterVT.getScalarType()) {
12050b57cec5SDimitry Andric assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements());
12060b57cec5SDimitry Andric // We have a vector value which has been split into a vector with
12070b57cec5SDimitry Andric // the same scalar type, but fewer elements. This should handle
12080b57cec5SDimitry Andric // all the floating-point vector types.
12090b57cec5SDimitry Andric MemVT = RegisterVT;
12100b57cec5SDimitry Andric } else if (ArgVT.isVector() &&
12110b57cec5SDimitry Andric ArgVT.getVectorNumElements() == NumRegs) {
12120b57cec5SDimitry Andric // This arg has been split so that each element is stored in a separate
12130b57cec5SDimitry Andric // register.
12140b57cec5SDimitry Andric MemVT = ArgVT.getScalarType();
12150b57cec5SDimitry Andric } else if (ArgVT.isExtended()) {
12160b57cec5SDimitry Andric // We have an extended type, like i65.
12170b57cec5SDimitry Andric MemVT = RegisterVT;
12180b57cec5SDimitry Andric } else {
12190b57cec5SDimitry Andric unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs;
12200b57cec5SDimitry Andric assert(ArgVT.getStoreSizeInBits() % NumRegs == 0);
12210b57cec5SDimitry Andric if (RegisterVT.isInteger()) {
12220b57cec5SDimitry Andric MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
12230b57cec5SDimitry Andric } else if (RegisterVT.isVector()) {
12240b57cec5SDimitry Andric assert(!RegisterVT.getScalarType().isFloatingPoint());
12250b57cec5SDimitry Andric unsigned NumElements = RegisterVT.getVectorNumElements();
12260b57cec5SDimitry Andric assert(MemoryBits % NumElements == 0);
12270b57cec5SDimitry Andric // This vector type has been split into another vector type with
12280b57cec5SDimitry Andric // a different elements size.
12290b57cec5SDimitry Andric EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
12300b57cec5SDimitry Andric MemoryBits / NumElements);
12310b57cec5SDimitry Andric MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
12320b57cec5SDimitry Andric } else {
12330b57cec5SDimitry Andric llvm_unreachable("cannot deduce memory type.");
12340b57cec5SDimitry Andric }
12350b57cec5SDimitry Andric }
12360b57cec5SDimitry Andric
12370b57cec5SDimitry Andric // Convert one element vectors to scalar.
12380b57cec5SDimitry Andric if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
12390b57cec5SDimitry Andric MemVT = MemVT.getScalarType();
12400b57cec5SDimitry Andric
12410b57cec5SDimitry Andric // Round up vec3/vec5 argument.
12420b57cec5SDimitry Andric if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
12430b57cec5SDimitry Andric assert(MemVT.getVectorNumElements() == 3 ||
1244bdd1243dSDimitry Andric MemVT.getVectorNumElements() == 5 ||
1245bdd1243dSDimitry Andric (MemVT.getVectorNumElements() >= 9 &&
1246bdd1243dSDimitry Andric MemVT.getVectorNumElements() <= 12));
12470b57cec5SDimitry Andric MemVT = MemVT.getPow2VectorType(State.getContext());
12485ffd83dbSDimitry Andric } else if (!MemVT.isSimple() && !MemVT.isVector()) {
12495ffd83dbSDimitry Andric MemVT = MemVT.getRoundIntegerType(State.getContext());
12500b57cec5SDimitry Andric }
12510b57cec5SDimitry Andric
12520b57cec5SDimitry Andric unsigned PartOffset = 0;
12530b57cec5SDimitry Andric for (unsigned i = 0; i != NumRegs; ++i) {
12540b57cec5SDimitry Andric State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,
12550b57cec5SDimitry Andric BasePartOffset + PartOffset,
12560b57cec5SDimitry Andric MemVT.getSimpleVT(),
12570b57cec5SDimitry Andric CCValAssign::Full));
12580b57cec5SDimitry Andric PartOffset += MemVT.getStoreSize();
12590b57cec5SDimitry Andric }
12600b57cec5SDimitry Andric }
12610b57cec5SDimitry Andric }
12620b57cec5SDimitry Andric }
12630b57cec5SDimitry Andric
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const12640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerReturn(
12650b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv,
12660b57cec5SDimitry Andric bool isVarArg,
12670b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs,
12680b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals,
12690b57cec5SDimitry Andric const SDLoc &DL, SelectionDAG &DAG) const {
12700b57cec5SDimitry Andric // FIXME: Fails for r600 tests
12710b57cec5SDimitry Andric //assert(!isVarArg && Outs.empty() && OutVals.empty() &&
12720b57cec5SDimitry Andric // "wave terminate should not have return values");
12730b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
12740b57cec5SDimitry Andric }
12750b57cec5SDimitry Andric
12760b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
12770b57cec5SDimitry Andric // Target specific lowering
12780b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
12790b57cec5SDimitry Andric
12800b57cec5SDimitry Andric /// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFnForCall(CallingConv::ID CC,bool IsVarArg)12810b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
12820b57cec5SDimitry Andric bool IsVarArg) {
12830b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
12840b57cec5SDimitry Andric }
12850b57cec5SDimitry Andric
CCAssignFnForReturn(CallingConv::ID CC,bool IsVarArg)12860b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
12870b57cec5SDimitry Andric bool IsVarArg) {
12880b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
12890b57cec5SDimitry Andric }
12900b57cec5SDimitry Andric
addTokenForArgument(SDValue Chain,SelectionDAG & DAG,MachineFrameInfo & MFI,int ClobberedFI) const12910b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
12920b57cec5SDimitry Andric SelectionDAG &DAG,
12930b57cec5SDimitry Andric MachineFrameInfo &MFI,
12940b57cec5SDimitry Andric int ClobberedFI) const {
12950b57cec5SDimitry Andric SmallVector<SDValue, 8> ArgChains;
12960b57cec5SDimitry Andric int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
12970b57cec5SDimitry Andric int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
12980b57cec5SDimitry Andric
12990b57cec5SDimitry Andric // Include the original chain at the beginning of the list. When this is
13000b57cec5SDimitry Andric // used by target LowerCall hooks, this helps legalize find the
13010b57cec5SDimitry Andric // CALLSEQ_BEGIN node.
13020b57cec5SDimitry Andric ArgChains.push_back(Chain);
13030b57cec5SDimitry Andric
13040b57cec5SDimitry Andric // Add a chain value for each stack argument corresponding
1305349cc55cSDimitry Andric for (SDNode *U : DAG.getEntryNode().getNode()->uses()) {
1306349cc55cSDimitry Andric if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
13070b57cec5SDimitry Andric if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
13080b57cec5SDimitry Andric if (FI->getIndex() < 0) {
13090b57cec5SDimitry Andric int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
13100b57cec5SDimitry Andric int64_t InLastByte = InFirstByte;
13110b57cec5SDimitry Andric InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
13120b57cec5SDimitry Andric
13130b57cec5SDimitry Andric if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
13140b57cec5SDimitry Andric (FirstByte <= InFirstByte && InFirstByte <= LastByte))
13150b57cec5SDimitry Andric ArgChains.push_back(SDValue(L, 1));
13160b57cec5SDimitry Andric }
13170b57cec5SDimitry Andric }
13180b57cec5SDimitry Andric }
13190b57cec5SDimitry Andric }
13200b57cec5SDimitry Andric
13210b57cec5SDimitry Andric // Build a tokenfactor for all the chains.
13220b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
13230b57cec5SDimitry Andric }
13240b57cec5SDimitry Andric
lowerUnhandledCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals,StringRef Reason) const13250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
13260b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals,
13270b57cec5SDimitry Andric StringRef Reason) const {
13280b57cec5SDimitry Andric SDValue Callee = CLI.Callee;
13290b57cec5SDimitry Andric SelectionDAG &DAG = CLI.DAG;
13300b57cec5SDimitry Andric
13310b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction();
13320b57cec5SDimitry Andric
13330b57cec5SDimitry Andric StringRef FuncName("<unknown>");
13340b57cec5SDimitry Andric
13350b57cec5SDimitry Andric if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
13360b57cec5SDimitry Andric FuncName = G->getSymbol();
13370b57cec5SDimitry Andric else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
13380b57cec5SDimitry Andric FuncName = G->getGlobal()->getName();
13390b57cec5SDimitry Andric
13400b57cec5SDimitry Andric DiagnosticInfoUnsupported NoCalls(
13410b57cec5SDimitry Andric Fn, Reason + FuncName, CLI.DL.getDebugLoc());
13420b57cec5SDimitry Andric DAG.getContext()->diagnose(NoCalls);
13430b57cec5SDimitry Andric
13440b57cec5SDimitry Andric if (!CLI.IsTailCall) {
13450fca6ea1SDimitry Andric for (ISD::InputArg &Arg : CLI.Ins)
13460fca6ea1SDimitry Andric InVals.push_back(DAG.getUNDEF(Arg.VT));
13470b57cec5SDimitry Andric }
13480b57cec5SDimitry Andric
13490b57cec5SDimitry Andric return DAG.getEntryNode();
13500b57cec5SDimitry Andric }
13510b57cec5SDimitry Andric
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const13520b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
13530b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const {
13540b57cec5SDimitry Andric return lowerUnhandledCall(CLI, InVals, "unsupported call to function ");
13550b57cec5SDimitry Andric }
13560b57cec5SDimitry Andric
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const13570b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
13580b57cec5SDimitry Andric SelectionDAG &DAG) const {
13590b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction();
13600b57cec5SDimitry Andric
13610b57cec5SDimitry Andric DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca",
13620b57cec5SDimitry Andric SDLoc(Op).getDebugLoc());
13630b57cec5SDimitry Andric DAG.getContext()->diagnose(NoDynamicAlloca);
13640b57cec5SDimitry Andric auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
13650b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc());
13660b57cec5SDimitry Andric }
13670b57cec5SDimitry Andric
LowerOperation(SDValue Op,SelectionDAG & DAG) const13680b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
13690b57cec5SDimitry Andric SelectionDAG &DAG) const {
13700b57cec5SDimitry Andric switch (Op.getOpcode()) {
13710b57cec5SDimitry Andric default:
13720b57cec5SDimitry Andric Op->print(errs(), &DAG);
13730b57cec5SDimitry Andric llvm_unreachable("Custom lowering code for this "
13740b57cec5SDimitry Andric "instruction is not implemented yet!");
13750b57cec5SDimitry Andric break;
13760b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
13770b57cec5SDimitry Andric case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
13780b57cec5SDimitry Andric case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
13790b57cec5SDimitry Andric case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
13800b57cec5SDimitry Andric case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
13810b57cec5SDimitry Andric case ISD::FREM: return LowerFREM(Op, DAG);
13820b57cec5SDimitry Andric case ISD::FCEIL: return LowerFCEIL(Op, DAG);
13830b57cec5SDimitry Andric case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
13840b57cec5SDimitry Andric case ISD::FRINT: return LowerFRINT(Op, DAG);
13850b57cec5SDimitry Andric case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
1386bdd1243dSDimitry Andric case ISD::FROUNDEVEN:
1387bdd1243dSDimitry Andric return LowerFROUNDEVEN(Op, DAG);
13880b57cec5SDimitry Andric case ISD::FROUND: return LowerFROUND(Op, DAG);
13890b57cec5SDimitry Andric case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
139006c3fb27SDimitry Andric case ISD::FLOG2:
139106c3fb27SDimitry Andric return LowerFLOG2(Op, DAG);
13920b57cec5SDimitry Andric case ISD::FLOG:
13930b57cec5SDimitry Andric case ISD::FLOG10:
139406c3fb27SDimitry Andric return LowerFLOGCommon(Op, DAG);
13950b57cec5SDimitry Andric case ISD::FEXP:
13965f757f3fSDimitry Andric case ISD::FEXP10:
13970b57cec5SDimitry Andric return lowerFEXP(Op, DAG);
139806c3fb27SDimitry Andric case ISD::FEXP2:
139906c3fb27SDimitry Andric return lowerFEXP2(Op, DAG);
14000b57cec5SDimitry Andric case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
14010b57cec5SDimitry Andric case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
14020b57cec5SDimitry Andric case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
1403fe6060f1SDimitry Andric case ISD::FP_TO_SINT:
1404fe6060f1SDimitry Andric case ISD::FP_TO_UINT:
1405fe6060f1SDimitry Andric return LowerFP_TO_INT(Op, DAG);
14060b57cec5SDimitry Andric case ISD::CTTZ:
14070b57cec5SDimitry Andric case ISD::CTTZ_ZERO_UNDEF:
14080b57cec5SDimitry Andric case ISD::CTLZ:
14090b57cec5SDimitry Andric case ISD::CTLZ_ZERO_UNDEF:
14100b57cec5SDimitry Andric return LowerCTLZ_CTTZ(Op, DAG);
14110b57cec5SDimitry Andric case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
14120b57cec5SDimitry Andric }
14130b57cec5SDimitry Andric return Op;
14140b57cec5SDimitry Andric }
14150b57cec5SDimitry Andric
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const14160b57cec5SDimitry Andric void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
14170b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results,
14180b57cec5SDimitry Andric SelectionDAG &DAG) const {
14190b57cec5SDimitry Andric switch (N->getOpcode()) {
14200b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG:
14210b57cec5SDimitry Andric // Different parts of legalization seem to interpret which type of
14220b57cec5SDimitry Andric // sign_extend_inreg is the one to check for custom lowering. The extended
14230b57cec5SDimitry Andric // from type is what really matters, but some places check for custom
14240b57cec5SDimitry Andric // lowering of the result type. This results in trying to use
14250b57cec5SDimitry Andric // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
14260b57cec5SDimitry Andric // nothing here and let the illegal result integer be handled normally.
14270b57cec5SDimitry Andric return;
142806c3fb27SDimitry Andric case ISD::FLOG2:
142906c3fb27SDimitry Andric if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG))
143006c3fb27SDimitry Andric Results.push_back(Lowered);
143106c3fb27SDimitry Andric return;
143206c3fb27SDimitry Andric case ISD::FLOG:
143306c3fb27SDimitry Andric case ISD::FLOG10:
143406c3fb27SDimitry Andric if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG))
143506c3fb27SDimitry Andric Results.push_back(Lowered);
143606c3fb27SDimitry Andric return;
143706c3fb27SDimitry Andric case ISD::FEXP2:
143806c3fb27SDimitry Andric if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG))
143906c3fb27SDimitry Andric Results.push_back(Lowered);
144006c3fb27SDimitry Andric return;
144106c3fb27SDimitry Andric case ISD::FEXP:
14425f757f3fSDimitry Andric case ISD::FEXP10:
144306c3fb27SDimitry Andric if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG))
144406c3fb27SDimitry Andric Results.push_back(Lowered);
144506c3fb27SDimitry Andric return;
14467a6dacacSDimitry Andric case ISD::CTLZ:
14477a6dacacSDimitry Andric case ISD::CTLZ_ZERO_UNDEF:
14487a6dacacSDimitry Andric if (auto Lowered = lowerCTLZResults(SDValue(N, 0u), DAG))
14497a6dacacSDimitry Andric Results.push_back(Lowered);
14507a6dacacSDimitry Andric return;
14510b57cec5SDimitry Andric default:
14520b57cec5SDimitry Andric return;
14530b57cec5SDimitry Andric }
14540b57cec5SDimitry Andric }
14550b57cec5SDimitry Andric
LowerGlobalAddress(AMDGPUMachineFunction * MFI,SDValue Op,SelectionDAG & DAG) const14560b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
14570b57cec5SDimitry Andric SDValue Op,
14580b57cec5SDimitry Andric SelectionDAG &DAG) const {
14590b57cec5SDimitry Andric
14600b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout();
14610b57cec5SDimitry Andric GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
14620b57cec5SDimitry Andric const GlobalValue *GV = G->getGlobal();
14630b57cec5SDimitry Andric
146406c3fb27SDimitry Andric if (!MFI->isModuleEntryFunction()) {
146506c3fb27SDimitry Andric if (std::optional<uint32_t> Address =
146606c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
146706c3fb27SDimitry Andric return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
146806c3fb27SDimitry Andric }
146906c3fb27SDimitry Andric }
147006c3fb27SDimitry Andric
14710b57cec5SDimitry Andric if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
14720b57cec5SDimitry Andric G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
1473fe6060f1SDimitry Andric if (!MFI->isModuleEntryFunction() &&
14740fca6ea1SDimitry Andric GV->getName() != "llvm.amdgcn.module.lds") {
14755ffd83dbSDimitry Andric SDLoc DL(Op);
14760b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction();
14770b57cec5SDimitry Andric DiagnosticInfoUnsupported BadLDSDecl(
14785ffd83dbSDimitry Andric Fn, "local memory global used by non-kernel function",
14795ffd83dbSDimitry Andric DL.getDebugLoc(), DS_Warning);
14800b57cec5SDimitry Andric DAG.getContext()->diagnose(BadLDSDecl);
14815ffd83dbSDimitry Andric
14825ffd83dbSDimitry Andric // We currently don't have a way to correctly allocate LDS objects that
14835ffd83dbSDimitry Andric // aren't directly associated with a kernel. We do force inlining of
14845ffd83dbSDimitry Andric // functions that use local objects. However, if these dead functions are
14855ffd83dbSDimitry Andric // not eliminated, we don't want a compile time error. Just emit a warning
14865ffd83dbSDimitry Andric // and a trap, since there should be no callable path here.
14875ffd83dbSDimitry Andric SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
14885ffd83dbSDimitry Andric SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14895ffd83dbSDimitry Andric Trap, DAG.getRoot());
14905ffd83dbSDimitry Andric DAG.setRoot(OutputChain);
14915ffd83dbSDimitry Andric return DAG.getUNDEF(Op.getValueType());
14920b57cec5SDimitry Andric }
14930b57cec5SDimitry Andric
14940b57cec5SDimitry Andric // XXX: What does the value of G->getOffset() mean?
14950b57cec5SDimitry Andric assert(G->getOffset() == 0 &&
14960b57cec5SDimitry Andric "Do not know what to do with an non-zero offset");
14970b57cec5SDimitry Andric
14980b57cec5SDimitry Andric // TODO: We could emit code to handle the initialization somewhere.
1499349cc55cSDimitry Andric // We ignore the initializer for now and legalize it to allow selection.
1500349cc55cSDimitry Andric // The initializer will anyway get errored out during assembly emission.
15015ffd83dbSDimitry Andric unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
15020b57cec5SDimitry Andric return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
15030b57cec5SDimitry Andric }
15040b57cec5SDimitry Andric return SDValue();
15050b57cec5SDimitry Andric }
15060b57cec5SDimitry Andric
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const15070b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
15080b57cec5SDimitry Andric SelectionDAG &DAG) const {
15090b57cec5SDimitry Andric SmallVector<SDValue, 8> Args;
1510bdd1243dSDimitry Andric SDLoc SL(Op);
15110b57cec5SDimitry Andric
15120b57cec5SDimitry Andric EVT VT = Op.getValueType();
1513bdd1243dSDimitry Andric if (VT.getVectorElementType().getSizeInBits() < 32) {
1514bdd1243dSDimitry Andric unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits();
1515bdd1243dSDimitry Andric if (OpBitSize >= 32 && OpBitSize % 32 == 0) {
1516bdd1243dSDimitry Andric unsigned NewNumElt = OpBitSize / 32;
1517bdd1243dSDimitry Andric EVT NewEltVT = (NewNumElt == 1) ? MVT::i32
1518bdd1243dSDimitry Andric : EVT::getVectorVT(*DAG.getContext(),
1519bdd1243dSDimitry Andric MVT::i32, NewNumElt);
1520bdd1243dSDimitry Andric for (const SDUse &U : Op->ops()) {
1521bdd1243dSDimitry Andric SDValue In = U.get();
1522bdd1243dSDimitry Andric SDValue NewIn = DAG.getNode(ISD::BITCAST, SL, NewEltVT, In);
1523bdd1243dSDimitry Andric if (NewNumElt > 1)
1524bdd1243dSDimitry Andric DAG.ExtractVectorElements(NewIn, Args);
1525bdd1243dSDimitry Andric else
1526bdd1243dSDimitry Andric Args.push_back(NewIn);
1527bdd1243dSDimitry Andric }
15280b57cec5SDimitry Andric
1529bdd1243dSDimitry Andric EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
1530bdd1243dSDimitry Andric NewNumElt * Op.getNumOperands());
1531bdd1243dSDimitry Andric SDValue BV = DAG.getBuildVector(NewVT, SL, Args);
15320b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, VT, BV);
15330b57cec5SDimitry Andric }
1534bdd1243dSDimitry Andric }
15350b57cec5SDimitry Andric
15360b57cec5SDimitry Andric for (const SDUse &U : Op->ops())
15370b57cec5SDimitry Andric DAG.ExtractVectorElements(U.get(), Args);
15380b57cec5SDimitry Andric
1539bdd1243dSDimitry Andric return DAG.getBuildVector(Op.getValueType(), SL, Args);
15400b57cec5SDimitry Andric }
15410b57cec5SDimitry Andric
LowerEXTRACT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const15420b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
15430b57cec5SDimitry Andric SelectionDAG &DAG) const {
154406c3fb27SDimitry Andric SDLoc SL(Op);
15450b57cec5SDimitry Andric SmallVector<SDValue, 8> Args;
1546647cbc5dSDimitry Andric unsigned Start = Op.getConstantOperandVal(1);
15470b57cec5SDimitry Andric EVT VT = Op.getValueType();
1548fe6060f1SDimitry Andric EVT SrcVT = Op.getOperand(0).getValueType();
1549fe6060f1SDimitry Andric
155006c3fb27SDimitry Andric if (VT.getScalarSizeInBits() == 16 && Start % 2 == 0) {
155106c3fb27SDimitry Andric unsigned NumElt = VT.getVectorNumElements();
155206c3fb27SDimitry Andric unsigned NumSrcElt = SrcVT.getVectorNumElements();
155306c3fb27SDimitry Andric assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types");
1554fe6060f1SDimitry Andric
155506c3fb27SDimitry Andric // Extract 32-bit registers at a time.
155606c3fb27SDimitry Andric EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumSrcElt / 2);
155706c3fb27SDimitry Andric EVT NewVT = NumElt == 2
155806c3fb27SDimitry Andric ? MVT::i32
155906c3fb27SDimitry Andric : EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElt / 2);
156006c3fb27SDimitry Andric SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0));
156104eeddc0SDimitry Andric
156206c3fb27SDimitry Andric DAG.ExtractVectorElements(Tmp, Args, Start / 2, NumElt / 2);
156306c3fb27SDimitry Andric if (NumElt == 2)
156406c3fb27SDimitry Andric Tmp = Args[0];
156506c3fb27SDimitry Andric else
156606c3fb27SDimitry Andric Tmp = DAG.getBuildVector(NewVT, SL, Args);
156706c3fb27SDimitry Andric
156806c3fb27SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, VT, Tmp);
156906c3fb27SDimitry Andric }
157081ad6265SDimitry Andric
15710b57cec5SDimitry Andric DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
15720b57cec5SDimitry Andric VT.getVectorNumElements());
15730b57cec5SDimitry Andric
157406c3fb27SDimitry Andric return DAG.getBuildVector(Op.getValueType(), SL, Args);
15750b57cec5SDimitry Andric }
15760b57cec5SDimitry Andric
157706c3fb27SDimitry Andric // TODO: Handle fabs too
peekFNeg(SDValue Val)157806c3fb27SDimitry Andric static SDValue peekFNeg(SDValue Val) {
157906c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FNEG)
158006c3fb27SDimitry Andric return Val.getOperand(0);
15810b57cec5SDimitry Andric
158206c3fb27SDimitry Andric return Val;
158306c3fb27SDimitry Andric }
158406c3fb27SDimitry Andric
peekFPSignOps(SDValue Val)158506c3fb27SDimitry Andric static SDValue peekFPSignOps(SDValue Val) {
158606c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FNEG)
158706c3fb27SDimitry Andric Val = Val.getOperand(0);
158806c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FABS)
158906c3fb27SDimitry Andric Val = Val.getOperand(0);
159006c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FCOPYSIGN)
159106c3fb27SDimitry Andric Val = Val.getOperand(0);
159206c3fb27SDimitry Andric return Val;
159306c3fb27SDimitry Andric }
159406c3fb27SDimitry Andric
combineFMinMaxLegacyImpl(const SDLoc & DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,DAGCombinerInfo & DCI) const159506c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
159606c3fb27SDimitry Andric const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
159706c3fb27SDimitry Andric SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
15980b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
15990b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
16000b57cec5SDimitry Andric switch (CCOpcode) {
16010b57cec5SDimitry Andric case ISD::SETOEQ:
16020b57cec5SDimitry Andric case ISD::SETONE:
16030b57cec5SDimitry Andric case ISD::SETUNE:
16040b57cec5SDimitry Andric case ISD::SETNE:
16050b57cec5SDimitry Andric case ISD::SETUEQ:
16060b57cec5SDimitry Andric case ISD::SETEQ:
16070b57cec5SDimitry Andric case ISD::SETFALSE:
16080b57cec5SDimitry Andric case ISD::SETFALSE2:
16090b57cec5SDimitry Andric case ISD::SETTRUE:
16100b57cec5SDimitry Andric case ISD::SETTRUE2:
16110b57cec5SDimitry Andric case ISD::SETUO:
16120b57cec5SDimitry Andric case ISD::SETO:
16130b57cec5SDimitry Andric break;
16140b57cec5SDimitry Andric case ISD::SETULE:
16150b57cec5SDimitry Andric case ISD::SETULT: {
16160b57cec5SDimitry Andric if (LHS == True)
16170b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
16180b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
16190b57cec5SDimitry Andric }
16200b57cec5SDimitry Andric case ISD::SETOLE:
16210b57cec5SDimitry Andric case ISD::SETOLT:
16220b57cec5SDimitry Andric case ISD::SETLE:
16230b57cec5SDimitry Andric case ISD::SETLT: {
16240b57cec5SDimitry Andric // Ordered. Assume ordered for undefined.
16250b57cec5SDimitry Andric
16260b57cec5SDimitry Andric // Only do this after legalization to avoid interfering with other combines
16270b57cec5SDimitry Andric // which might occur.
16280b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
16290b57cec5SDimitry Andric !DCI.isCalledByLegalizer())
16300b57cec5SDimitry Andric return SDValue();
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andric // We need to permute the operands to get the correct NaN behavior. The
16330b57cec5SDimitry Andric // selected operand is the second one based on the failing compare with NaN,
16340b57cec5SDimitry Andric // so permute it based on the compare type the hardware uses.
16350b57cec5SDimitry Andric if (LHS == True)
16360b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
16370b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
16380b57cec5SDimitry Andric }
16390b57cec5SDimitry Andric case ISD::SETUGE:
16400b57cec5SDimitry Andric case ISD::SETUGT: {
16410b57cec5SDimitry Andric if (LHS == True)
16420b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
16430b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
16440b57cec5SDimitry Andric }
16450b57cec5SDimitry Andric case ISD::SETGT:
16460b57cec5SDimitry Andric case ISD::SETGE:
16470b57cec5SDimitry Andric case ISD::SETOGE:
16480b57cec5SDimitry Andric case ISD::SETOGT: {
16490b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
16500b57cec5SDimitry Andric !DCI.isCalledByLegalizer())
16510b57cec5SDimitry Andric return SDValue();
16520b57cec5SDimitry Andric
16530b57cec5SDimitry Andric if (LHS == True)
16540b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
16550b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
16560b57cec5SDimitry Andric }
16570b57cec5SDimitry Andric case ISD::SETCC_INVALID:
16580b57cec5SDimitry Andric llvm_unreachable("Invalid setcc condcode!");
16590b57cec5SDimitry Andric }
16600b57cec5SDimitry Andric return SDValue();
16610b57cec5SDimitry Andric }
16620b57cec5SDimitry Andric
166306c3fb27SDimitry Andric /// Generate Min/Max node
combineFMinMaxLegacy(const SDLoc & DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,DAGCombinerInfo & DCI) const166406c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
166506c3fb27SDimitry Andric SDValue LHS, SDValue RHS,
166606c3fb27SDimitry Andric SDValue True, SDValue False,
166706c3fb27SDimitry Andric SDValue CC,
166806c3fb27SDimitry Andric DAGCombinerInfo &DCI) const {
166906c3fb27SDimitry Andric if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
167006c3fb27SDimitry Andric return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI);
167106c3fb27SDimitry Andric
167206c3fb27SDimitry Andric SelectionDAG &DAG = DCI.DAG;
167306c3fb27SDimitry Andric
167406c3fb27SDimitry Andric // If we can't directly match this, try to see if we can fold an fneg to
167506c3fb27SDimitry Andric // match.
167606c3fb27SDimitry Andric
167706c3fb27SDimitry Andric ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
167806c3fb27SDimitry Andric ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
167906c3fb27SDimitry Andric SDValue NegTrue = peekFNeg(True);
168006c3fb27SDimitry Andric
168106c3fb27SDimitry Andric // Undo the combine foldFreeOpFromSelect does if it helps us match the
168206c3fb27SDimitry Andric // fmin/fmax.
168306c3fb27SDimitry Andric //
168406c3fb27SDimitry Andric // select (fcmp olt (lhs, K)), (fneg lhs), -K
168506c3fb27SDimitry Andric // -> fneg (fmin_legacy lhs, K)
168606c3fb27SDimitry Andric //
168706c3fb27SDimitry Andric // TODO: Use getNegatedExpression
168806c3fb27SDimitry Andric if (LHS == NegTrue && CFalse && CRHS) {
168906c3fb27SDimitry Andric APFloat NegRHS = neg(CRHS->getValueAPF());
169006c3fb27SDimitry Andric if (NegRHS == CFalse->getValueAPF()) {
169106c3fb27SDimitry Andric SDValue Combined =
169206c3fb27SDimitry Andric combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI);
169306c3fb27SDimitry Andric if (Combined)
169406c3fb27SDimitry Andric return DAG.getNode(ISD::FNEG, DL, VT, Combined);
169506c3fb27SDimitry Andric return SDValue();
169606c3fb27SDimitry Andric }
169706c3fb27SDimitry Andric }
169806c3fb27SDimitry Andric
169906c3fb27SDimitry Andric return SDValue();
170006c3fb27SDimitry Andric }
170106c3fb27SDimitry Andric
17020b57cec5SDimitry Andric std::pair<SDValue, SDValue>
split64BitValue(SDValue Op,SelectionDAG & DAG) const17030b57cec5SDimitry Andric AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
17040b57cec5SDimitry Andric SDLoc SL(Op);
17050b57cec5SDimitry Andric
17060b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
17070b57cec5SDimitry Andric
17080b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
17090b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32);
17100b57cec5SDimitry Andric
17110b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
17120b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
17130b57cec5SDimitry Andric
1714bdd1243dSDimitry Andric return std::pair(Lo, Hi);
17150b57cec5SDimitry Andric }
17160b57cec5SDimitry Andric
getLoHalf64(SDValue Op,SelectionDAG & DAG) const17170b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const {
17180b57cec5SDimitry Andric SDLoc SL(Op);
17190b57cec5SDimitry Andric
17200b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
17210b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
17220b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
17230b57cec5SDimitry Andric }
17240b57cec5SDimitry Andric
getHiHalf64(SDValue Op,SelectionDAG & DAG) const17250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const {
17260b57cec5SDimitry Andric SDLoc SL(Op);
17270b57cec5SDimitry Andric
17280b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
17290b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32);
17300b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
17310b57cec5SDimitry Andric }
17320b57cec5SDimitry Andric
17330b57cec5SDimitry Andric // Split a vector type into two parts. The first part is a power of two vector.
17340b57cec5SDimitry Andric // The second part is whatever is left over, and is a scalar if it would
17350b57cec5SDimitry Andric // otherwise be a 1-vector.
17360b57cec5SDimitry Andric std::pair<EVT, EVT>
getSplitDestVTs(const EVT & VT,SelectionDAG & DAG) const17370b57cec5SDimitry Andric AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const {
17380b57cec5SDimitry Andric EVT LoVT, HiVT;
17390b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType();
17400b57cec5SDimitry Andric unsigned NumElts = VT.getVectorNumElements();
17410b57cec5SDimitry Andric unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);
17420b57cec5SDimitry Andric LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts);
17430b57cec5SDimitry Andric HiVT = NumElts - LoNumElts == 1
17440b57cec5SDimitry Andric ? EltVT
17450b57cec5SDimitry Andric : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts);
1746bdd1243dSDimitry Andric return std::pair(LoVT, HiVT);
17470b57cec5SDimitry Andric }
17480b57cec5SDimitry Andric
17490b57cec5SDimitry Andric // Split a vector value into two parts of types LoVT and HiVT. HiVT could be
17500b57cec5SDimitry Andric // scalar.
17510b57cec5SDimitry Andric std::pair<SDValue, SDValue>
splitVector(const SDValue & N,const SDLoc & DL,const EVT & LoVT,const EVT & HiVT,SelectionDAG & DAG) const17520b57cec5SDimitry Andric AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
17530b57cec5SDimitry Andric const EVT &LoVT, const EVT &HiVT,
17540b57cec5SDimitry Andric SelectionDAG &DAG) const {
17550b57cec5SDimitry Andric assert(LoVT.getVectorNumElements() +
17560b57cec5SDimitry Andric (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
17570b57cec5SDimitry Andric N.getValueType().getVectorNumElements() &&
17580b57cec5SDimitry Andric "More vector elements requested than available!");
17590b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
17605ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, DL));
17610b57cec5SDimitry Andric SDValue Hi = DAG.getNode(
17620b57cec5SDimitry Andric HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
17635ffd83dbSDimitry Andric HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL));
1764bdd1243dSDimitry Andric return std::pair(Lo, Hi);
17650b57cec5SDimitry Andric }
17660b57cec5SDimitry Andric
SplitVectorLoad(const SDValue Op,SelectionDAG & DAG) const17670b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
17680b57cec5SDimitry Andric SelectionDAG &DAG) const {
17690b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op);
17700b57cec5SDimitry Andric EVT VT = Op.getValueType();
1771480093f4SDimitry Andric SDLoc SL(Op);
17720b57cec5SDimitry Andric
17730b57cec5SDimitry Andric
17740b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create
17750b57cec5SDimitry Andric // weird 1 element vectors.
1776480093f4SDimitry Andric if (VT.getVectorNumElements() == 2) {
1777480093f4SDimitry Andric SDValue Ops[2];
1778480093f4SDimitry Andric std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
1779480093f4SDimitry Andric return DAG.getMergeValues(Ops, SL);
1780480093f4SDimitry Andric }
17810b57cec5SDimitry Andric
17820b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr();
17830b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT();
17840b57cec5SDimitry Andric
17850b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
17860b57cec5SDimitry Andric
17870b57cec5SDimitry Andric EVT LoVT, HiVT;
17880b57cec5SDimitry Andric EVT LoMemVT, HiMemVT;
17890b57cec5SDimitry Andric SDValue Lo, Hi;
17900b57cec5SDimitry Andric
17910b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
17920b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
17930b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);
17940b57cec5SDimitry Andric
17950b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize();
179681ad6265SDimitry Andric Align BaseAlign = Load->getAlign();
179781ad6265SDimitry Andric Align HiAlign = commonAlignment(BaseAlign, Size);
17980b57cec5SDimitry Andric
17990b57cec5SDimitry Andric SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
18000b57cec5SDimitry Andric Load->getChain(), BasePtr, SrcValue, LoMemVT,
18010b57cec5SDimitry Andric BaseAlign, Load->getMemOperand()->getFlags());
18025f757f3fSDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size));
18030b57cec5SDimitry Andric SDValue HiLoad =
18040b57cec5SDimitry Andric DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
18050b57cec5SDimitry Andric HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
18060b57cec5SDimitry Andric HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
18070b57cec5SDimitry Andric
18080b57cec5SDimitry Andric SDValue Join;
18090b57cec5SDimitry Andric if (LoVT == HiVT) {
18100b57cec5SDimitry Andric // This is the case that the vector is power of two so was evenly split.
18110b57cec5SDimitry Andric Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
18120b57cec5SDimitry Andric } else {
18130b57cec5SDimitry Andric Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
18145ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, SL));
18155ffd83dbSDimitry Andric Join = DAG.getNode(
18165ffd83dbSDimitry Andric HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL,
18175ffd83dbSDimitry Andric VT, Join, HiLoad,
18185ffd83dbSDimitry Andric DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL));
18190b57cec5SDimitry Andric }
18200b57cec5SDimitry Andric
18210b57cec5SDimitry Andric SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
18220b57cec5SDimitry Andric LoLoad.getValue(1), HiLoad.getValue(1))};
18230b57cec5SDimitry Andric
18240b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SL);
18250b57cec5SDimitry Andric }
18260b57cec5SDimitry Andric
WidenOrSplitVectorLoad(SDValue Op,SelectionDAG & DAG) const1827e8d8bef9SDimitry Andric SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
18280b57cec5SDimitry Andric SelectionDAG &DAG) const {
18290b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op);
18300b57cec5SDimitry Andric EVT VT = Op.getValueType();
18310b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr();
18320b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT();
18330b57cec5SDimitry Andric SDLoc SL(Op);
18340b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
183581ad6265SDimitry Andric Align BaseAlign = Load->getAlign();
1836e8d8bef9SDimitry Andric unsigned NumElements = MemVT.getVectorNumElements();
1837e8d8bef9SDimitry Andric
1838e8d8bef9SDimitry Andric // Widen from vec3 to vec4 when the load is at least 8-byte aligned
1839e8d8bef9SDimitry Andric // or 16-byte fully dereferenceable. Otherwise, split the vector load.
1840e8d8bef9SDimitry Andric if (NumElements != 3 ||
184181ad6265SDimitry Andric (BaseAlign < Align(8) &&
1842e8d8bef9SDimitry Andric !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
1843e8d8bef9SDimitry Andric return SplitVectorLoad(Op, DAG);
1844e8d8bef9SDimitry Andric
1845e8d8bef9SDimitry Andric assert(NumElements == 3);
18460b57cec5SDimitry Andric
18470b57cec5SDimitry Andric EVT WideVT =
18480b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
18490b57cec5SDimitry Andric EVT WideMemVT =
18500b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4);
18510b57cec5SDimitry Andric SDValue WideLoad = DAG.getExtLoad(
18520b57cec5SDimitry Andric Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
18530b57cec5SDimitry Andric WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
18540b57cec5SDimitry Andric return DAG.getMergeValues(
18550b57cec5SDimitry Andric {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
18565ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, SL)),
18570b57cec5SDimitry Andric WideLoad.getValue(1)},
18580b57cec5SDimitry Andric SL);
18590b57cec5SDimitry Andric }
18600b57cec5SDimitry Andric
SplitVectorStore(SDValue Op,SelectionDAG & DAG) const18610b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
18620b57cec5SDimitry Andric SelectionDAG &DAG) const {
18630b57cec5SDimitry Andric StoreSDNode *Store = cast<StoreSDNode>(Op);
18640b57cec5SDimitry Andric SDValue Val = Store->getValue();
18650b57cec5SDimitry Andric EVT VT = Val.getValueType();
18660b57cec5SDimitry Andric
18670b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create
18680b57cec5SDimitry Andric // weird 1 element vectors.
18690b57cec5SDimitry Andric if (VT.getVectorNumElements() == 2)
18700b57cec5SDimitry Andric return scalarizeVectorStore(Store, DAG);
18710b57cec5SDimitry Andric
18720b57cec5SDimitry Andric EVT MemVT = Store->getMemoryVT();
18730b57cec5SDimitry Andric SDValue Chain = Store->getChain();
18740b57cec5SDimitry Andric SDValue BasePtr = Store->getBasePtr();
18750b57cec5SDimitry Andric SDLoc SL(Op);
18760b57cec5SDimitry Andric
18770b57cec5SDimitry Andric EVT LoVT, HiVT;
18780b57cec5SDimitry Andric EVT LoMemVT, HiMemVT;
18790b57cec5SDimitry Andric SDValue Lo, Hi;
18800b57cec5SDimitry Andric
18810b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
18820b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
18830b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);
18840b57cec5SDimitry Andric
18850b57cec5SDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
18860b57cec5SDimitry Andric
18870b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
188881ad6265SDimitry Andric Align BaseAlign = Store->getAlign();
18890b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize();
189081ad6265SDimitry Andric Align HiAlign = commonAlignment(BaseAlign, Size);
18910b57cec5SDimitry Andric
18920b57cec5SDimitry Andric SDValue LoStore =
18930b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
18940b57cec5SDimitry Andric Store->getMemOperand()->getFlags());
18950b57cec5SDimitry Andric SDValue HiStore =
18960b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
18970b57cec5SDimitry Andric HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
18980b57cec5SDimitry Andric
18990b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
19000b57cec5SDimitry Andric }
19010b57cec5SDimitry Andric
19020b57cec5SDimitry Andric // This is a shortcut for integer division because we have fast i32<->f32
19030b57cec5SDimitry Andric // conversions, and fast f32 reciprocal instructions. The fractional part of a
19040b57cec5SDimitry Andric // float is enough to accurately represent up to a 24-bit signed integer.
LowerDIVREM24(SDValue Op,SelectionDAG & DAG,bool Sign) const19050b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
19060b57cec5SDimitry Andric bool Sign) const {
19070b57cec5SDimitry Andric SDLoc DL(Op);
19080b57cec5SDimitry Andric EVT VT = Op.getValueType();
19090b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0);
19100b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1);
19110b57cec5SDimitry Andric MVT IntVT = MVT::i32;
19120b57cec5SDimitry Andric MVT FltVT = MVT::f32;
19130b57cec5SDimitry Andric
19140b57cec5SDimitry Andric unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS);
19150b57cec5SDimitry Andric if (LHSSignBits < 9)
19160b57cec5SDimitry Andric return SDValue();
19170b57cec5SDimitry Andric
19180b57cec5SDimitry Andric unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS);
19190b57cec5SDimitry Andric if (RHSSignBits < 9)
19200b57cec5SDimitry Andric return SDValue();
19210b57cec5SDimitry Andric
19220b57cec5SDimitry Andric unsigned BitSize = VT.getSizeInBits();
19230b57cec5SDimitry Andric unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
19240b57cec5SDimitry Andric unsigned DivBits = BitSize - SignBits;
19250b57cec5SDimitry Andric if (Sign)
19260b57cec5SDimitry Andric ++DivBits;
19270b57cec5SDimitry Andric
19280b57cec5SDimitry Andric ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
19290b57cec5SDimitry Andric ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
19300b57cec5SDimitry Andric
19310b57cec5SDimitry Andric SDValue jq = DAG.getConstant(1, DL, IntVT);
19320b57cec5SDimitry Andric
19330b57cec5SDimitry Andric if (Sign) {
19340b57cec5SDimitry Andric // char|short jq = ia ^ ib;
19350b57cec5SDimitry Andric jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
19360b57cec5SDimitry Andric
19370b57cec5SDimitry Andric // jq = jq >> (bitsize - 2)
19380b57cec5SDimitry Andric jq = DAG.getNode(ISD::SRA, DL, VT, jq,
19390b57cec5SDimitry Andric DAG.getConstant(BitSize - 2, DL, VT));
19400b57cec5SDimitry Andric
19410b57cec5SDimitry Andric // jq = jq | 0x1
19420b57cec5SDimitry Andric jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
19430b57cec5SDimitry Andric }
19440b57cec5SDimitry Andric
19450b57cec5SDimitry Andric // int ia = (int)LHS;
19460b57cec5SDimitry Andric SDValue ia = LHS;
19470b57cec5SDimitry Andric
19480b57cec5SDimitry Andric // int ib, (int)RHS;
19490b57cec5SDimitry Andric SDValue ib = RHS;
19500b57cec5SDimitry Andric
19510b57cec5SDimitry Andric // float fa = (float)ia;
19520b57cec5SDimitry Andric SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
19530b57cec5SDimitry Andric
19540b57cec5SDimitry Andric // float fb = (float)ib;
19550b57cec5SDimitry Andric SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
19560b57cec5SDimitry Andric
19570b57cec5SDimitry Andric SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
19580b57cec5SDimitry Andric fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
19590b57cec5SDimitry Andric
19600b57cec5SDimitry Andric // fq = trunc(fq);
19610b57cec5SDimitry Andric fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
19620b57cec5SDimitry Andric
19630b57cec5SDimitry Andric // float fqneg = -fq;
19640b57cec5SDimitry Andric SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
19650b57cec5SDimitry Andric
1966480093f4SDimitry Andric MachineFunction &MF = DAG.getMachineFunction();
1967bdd1243dSDimitry Andric
1968bdd1243dSDimitry Andric bool UseFmadFtz = false;
1969bdd1243dSDimitry Andric if (Subtarget->isGCN()) {
1970bdd1243dSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
197106c3fb27SDimitry Andric UseFmadFtz =
197206c3fb27SDimitry Andric MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign();
1973bdd1243dSDimitry Andric }
1974480093f4SDimitry Andric
19750b57cec5SDimitry Andric // float fr = mad(fqneg, fb, fa);
1976bdd1243dSDimitry Andric unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
1977bdd1243dSDimitry Andric : UseFmadFtz ? (unsigned)AMDGPUISD::FMAD_FTZ
1978bdd1243dSDimitry Andric : (unsigned)ISD::FMAD;
19790b57cec5SDimitry Andric SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
19800b57cec5SDimitry Andric
19810b57cec5SDimitry Andric // int iq = (int)fq;
19820b57cec5SDimitry Andric SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
19830b57cec5SDimitry Andric
19840b57cec5SDimitry Andric // fr = fabs(fr);
19850b57cec5SDimitry Andric fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
19860b57cec5SDimitry Andric
19870b57cec5SDimitry Andric // fb = fabs(fb);
19880b57cec5SDimitry Andric fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
19890b57cec5SDimitry Andric
19900b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
19910b57cec5SDimitry Andric
19920b57cec5SDimitry Andric // int cv = fr >= fb;
19930b57cec5SDimitry Andric SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
19940b57cec5SDimitry Andric
19950b57cec5SDimitry Andric // jq = (cv ? jq : 0);
19960b57cec5SDimitry Andric jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
19970b57cec5SDimitry Andric
19980b57cec5SDimitry Andric // dst = iq + jq;
19990b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
20000b57cec5SDimitry Andric
20010b57cec5SDimitry Andric // Rem needs compensation, it's easier to recompute it
20020b57cec5SDimitry Andric SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
20030b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
20040b57cec5SDimitry Andric
20050b57cec5SDimitry Andric // Truncate to number of bits this divide really is.
20060b57cec5SDimitry Andric if (Sign) {
20070b57cec5SDimitry Andric SDValue InRegSize
20080b57cec5SDimitry Andric = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits));
20090b57cec5SDimitry Andric Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize);
20100b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize);
20110b57cec5SDimitry Andric } else {
20120b57cec5SDimitry Andric SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT);
20130b57cec5SDimitry Andric Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask);
20140b57cec5SDimitry Andric Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask);
20150b57cec5SDimitry Andric }
20160b57cec5SDimitry Andric
20170b57cec5SDimitry Andric return DAG.getMergeValues({ Div, Rem }, DL);
20180b57cec5SDimitry Andric }
20190b57cec5SDimitry Andric
LowerUDIVREM64(SDValue Op,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results) const20200b57cec5SDimitry Andric void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
20210b57cec5SDimitry Andric SelectionDAG &DAG,
20220b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) const {
20230b57cec5SDimitry Andric SDLoc DL(Op);
20240b57cec5SDimitry Andric EVT VT = Op.getValueType();
20250b57cec5SDimitry Andric
20260b57cec5SDimitry Andric assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");
20270b57cec5SDimitry Andric
20280b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
20290b57cec5SDimitry Andric
20300b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, HalfVT);
20310b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, HalfVT);
20320b57cec5SDimitry Andric
20330b57cec5SDimitry Andric //HiLo split
203406c3fb27SDimitry Andric SDValue LHS_Lo, LHS_Hi;
20350b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0);
203606c3fb27SDimitry Andric std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT);
20370b57cec5SDimitry Andric
203806c3fb27SDimitry Andric SDValue RHS_Lo, RHS_Hi;
20390b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1);
204006c3fb27SDimitry Andric std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT);
20410b57cec5SDimitry Andric
20420b57cec5SDimitry Andric if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
20430b57cec5SDimitry Andric DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
20440b57cec5SDimitry Andric
20450b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
20460b57cec5SDimitry Andric LHS_Lo, RHS_Lo);
20470b57cec5SDimitry Andric
20480b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero});
20490b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero});
20500b57cec5SDimitry Andric
20510b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));
20520b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));
20530b57cec5SDimitry Andric return;
20540b57cec5SDimitry Andric }
20550b57cec5SDimitry Andric
20560b57cec5SDimitry Andric if (isTypeLegal(MVT::i64)) {
2057349cc55cSDimitry Andric // The algorithm here is based on ideas from "Software Integer Division",
2058349cc55cSDimitry Andric // Tom Rodeheffer, August 2008.
2059349cc55cSDimitry Andric
2060480093f4SDimitry Andric MachineFunction &MF = DAG.getMachineFunction();
2061480093f4SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2062480093f4SDimitry Andric
20630b57cec5SDimitry Andric // Compute denominator reciprocal.
206406c3fb27SDimitry Andric unsigned FMAD =
206506c3fb27SDimitry Andric !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
206606c3fb27SDimitry Andric : MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()
206706c3fb27SDimitry Andric ? (unsigned)ISD::FMAD
206806c3fb27SDimitry Andric : (unsigned)AMDGPUISD::FMAD_FTZ;
20690b57cec5SDimitry Andric
20700b57cec5SDimitry Andric SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
20710b57cec5SDimitry Andric SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
20720b57cec5SDimitry Andric SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi,
20730b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32),
20740b57cec5SDimitry Andric Cvt_Lo);
20750b57cec5SDimitry Andric SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);
20760b57cec5SDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp,
20770b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32));
20780b57cec5SDimitry Andric SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1,
20790b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32));
20800b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);
20810b57cec5SDimitry Andric SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc,
20820b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32),
20830b57cec5SDimitry Andric Mul1);
20840b57cec5SDimitry Andric SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2);
20850b57cec5SDimitry Andric SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc);
20860b57cec5SDimitry Andric SDValue Rcp64 = DAG.getBitcast(VT,
20870b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi}));
20880b57cec5SDimitry Andric
20890b57cec5SDimitry Andric SDValue Zero64 = DAG.getConstant(0, DL, VT);
20900b57cec5SDimitry Andric SDValue One64 = DAG.getConstant(1, DL, VT);
20910b57cec5SDimitry Andric SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
20920b57cec5SDimitry Andric SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);
20930b57cec5SDimitry Andric
2094349cc55cSDimitry Andric // First round of UNR (Unsigned integer Newton-Raphson).
20950b57cec5SDimitry Andric SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
20960b57cec5SDimitry Andric SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
20970b57cec5SDimitry Andric SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
209806c3fb27SDimitry Andric SDValue Mulhi1_Lo, Mulhi1_Hi;
209906c3fb27SDimitry Andric std::tie(Mulhi1_Lo, Mulhi1_Hi) =
210006c3fb27SDimitry Andric DAG.SplitScalar(Mulhi1, DL, HalfVT, HalfVT);
210106c3fb27SDimitry Andric SDValue Add1_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Lo,
21020b57cec5SDimitry Andric Mulhi1_Lo, Zero1);
210306c3fb27SDimitry Andric SDValue Add1_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Hi,
21040b57cec5SDimitry Andric Mulhi1_Hi, Add1_Lo.getValue(1));
21050b57cec5SDimitry Andric SDValue Add1 = DAG.getBitcast(VT,
21060b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
21070b57cec5SDimitry Andric
2108349cc55cSDimitry Andric // Second round of UNR.
21090b57cec5SDimitry Andric SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
21100b57cec5SDimitry Andric SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
211106c3fb27SDimitry Andric SDValue Mulhi2_Lo, Mulhi2_Hi;
211206c3fb27SDimitry Andric std::tie(Mulhi2_Lo, Mulhi2_Hi) =
211306c3fb27SDimitry Andric DAG.SplitScalar(Mulhi2, DL, HalfVT, HalfVT);
211406c3fb27SDimitry Andric SDValue Add2_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Lo,
21150b57cec5SDimitry Andric Mulhi2_Lo, Zero1);
211606c3fb27SDimitry Andric SDValue Add2_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Hi,
2117349cc55cSDimitry Andric Mulhi2_Hi, Add2_Lo.getValue(1));
21180b57cec5SDimitry Andric SDValue Add2 = DAG.getBitcast(VT,
21190b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
2120349cc55cSDimitry Andric
21210b57cec5SDimitry Andric SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);
21220b57cec5SDimitry Andric
21230b57cec5SDimitry Andric SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
21240b57cec5SDimitry Andric
212506c3fb27SDimitry Andric SDValue Mul3_Lo, Mul3_Hi;
212606c3fb27SDimitry Andric std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT);
212706c3fb27SDimitry Andric SDValue Sub1_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Lo,
21280b57cec5SDimitry Andric Mul3_Lo, Zero1);
212906c3fb27SDimitry Andric SDValue Sub1_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Hi,
21300b57cec5SDimitry Andric Mul3_Hi, Sub1_Lo.getValue(1));
21310b57cec5SDimitry Andric SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi);
21320b57cec5SDimitry Andric SDValue Sub1 = DAG.getBitcast(VT,
21330b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi}));
21340b57cec5SDimitry Andric
21350b57cec5SDimitry Andric SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT);
21360b57cec5SDimitry Andric SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
21370b57cec5SDimitry Andric ISD::SETUGE);
21380b57cec5SDimitry Andric SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
21390b57cec5SDimitry Andric ISD::SETUGE);
21400b57cec5SDimitry Andric SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ);
21410b57cec5SDimitry Andric
21420b57cec5SDimitry Andric // TODO: Here and below portions of the code can be enclosed into if/endif.
21430b57cec5SDimitry Andric // Currently control flow is unconditional and we have 4 selects after
21440b57cec5SDimitry Andric // potential endif to substitute PHIs.
21450b57cec5SDimitry Andric
21460b57cec5SDimitry Andric // if C3 != 0 ...
214706c3fb27SDimitry Andric SDValue Sub2_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Lo,
21480b57cec5SDimitry Andric RHS_Lo, Zero1);
214906c3fb27SDimitry Andric SDValue Sub2_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Mi,
21500b57cec5SDimitry Andric RHS_Hi, Sub1_Lo.getValue(1));
215106c3fb27SDimitry Andric SDValue Sub2_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
21520b57cec5SDimitry Andric Zero, Sub2_Lo.getValue(1));
21530b57cec5SDimitry Andric SDValue Sub2 = DAG.getBitcast(VT,
21540b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi}));
21550b57cec5SDimitry Andric
21560b57cec5SDimitry Andric SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64);
21570b57cec5SDimitry Andric
21580b57cec5SDimitry Andric SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
21590b57cec5SDimitry Andric ISD::SETUGE);
21600b57cec5SDimitry Andric SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
21610b57cec5SDimitry Andric ISD::SETUGE);
21620b57cec5SDimitry Andric SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ);
21630b57cec5SDimitry Andric
21640b57cec5SDimitry Andric // if (C6 != 0)
21650b57cec5SDimitry Andric SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64);
21660b57cec5SDimitry Andric
216706c3fb27SDimitry Andric SDValue Sub3_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Lo,
21680b57cec5SDimitry Andric RHS_Lo, Zero1);
216906c3fb27SDimitry Andric SDValue Sub3_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
21700b57cec5SDimitry Andric RHS_Hi, Sub2_Lo.getValue(1));
217106c3fb27SDimitry Andric SDValue Sub3_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub3_Mi,
21720b57cec5SDimitry Andric Zero, Sub3_Lo.getValue(1));
21730b57cec5SDimitry Andric SDValue Sub3 = DAG.getBitcast(VT,
21740b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi}));
21750b57cec5SDimitry Andric
21760b57cec5SDimitry Andric // endif C6
21770b57cec5SDimitry Andric // endif C3
21780b57cec5SDimitry Andric
21790b57cec5SDimitry Andric SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE);
21800b57cec5SDimitry Andric SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE);
21810b57cec5SDimitry Andric
21820b57cec5SDimitry Andric SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE);
21830b57cec5SDimitry Andric SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE);
21840b57cec5SDimitry Andric
21850b57cec5SDimitry Andric Results.push_back(Div);
21860b57cec5SDimitry Andric Results.push_back(Rem);
21870b57cec5SDimitry Andric
21880b57cec5SDimitry Andric return;
21890b57cec5SDimitry Andric }
21900b57cec5SDimitry Andric
21910b57cec5SDimitry Andric // r600 expandion.
21920b57cec5SDimitry Andric // Get Speculative values
21930b57cec5SDimitry Andric SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
21940b57cec5SDimitry Andric SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
21950b57cec5SDimitry Andric
21960b57cec5SDimitry Andric SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ);
21970b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero});
21980b57cec5SDimitry Andric REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);
21990b57cec5SDimitry Andric
22000b57cec5SDimitry Andric SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ);
22010b57cec5SDimitry Andric SDValue DIV_Lo = Zero;
22020b57cec5SDimitry Andric
22030b57cec5SDimitry Andric const unsigned halfBitWidth = HalfVT.getSizeInBits();
22040b57cec5SDimitry Andric
22050b57cec5SDimitry Andric for (unsigned i = 0; i < halfBitWidth; ++i) {
22060b57cec5SDimitry Andric const unsigned bitPos = halfBitWidth - i - 1;
22070b57cec5SDimitry Andric SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
22080b57cec5SDimitry Andric // Get value of high bit
22090b57cec5SDimitry Andric SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
22100b57cec5SDimitry Andric HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One);
22110b57cec5SDimitry Andric HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
22120b57cec5SDimitry Andric
22130b57cec5SDimitry Andric // Shift
22140b57cec5SDimitry Andric REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
22150b57cec5SDimitry Andric // Add LHS high bit
22160b57cec5SDimitry Andric REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
22170b57cec5SDimitry Andric
22180b57cec5SDimitry Andric SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT);
22190b57cec5SDimitry Andric SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE);
22200b57cec5SDimitry Andric
22210b57cec5SDimitry Andric DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
22220b57cec5SDimitry Andric
22230b57cec5SDimitry Andric // Update REM
22240b57cec5SDimitry Andric SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
22250b57cec5SDimitry Andric REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
22260b57cec5SDimitry Andric }
22270b57cec5SDimitry Andric
22280b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi});
22290b57cec5SDimitry Andric DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);
22300b57cec5SDimitry Andric Results.push_back(DIV);
22310b57cec5SDimitry Andric Results.push_back(REM);
22320b57cec5SDimitry Andric }
22330b57cec5SDimitry Andric
LowerUDIVREM(SDValue Op,SelectionDAG & DAG) const22340b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
22350b57cec5SDimitry Andric SelectionDAG &DAG) const {
22360b57cec5SDimitry Andric SDLoc DL(Op);
22370b57cec5SDimitry Andric EVT VT = Op.getValueType();
22380b57cec5SDimitry Andric
22390b57cec5SDimitry Andric if (VT == MVT::i64) {
22400b57cec5SDimitry Andric SmallVector<SDValue, 2> Results;
22410b57cec5SDimitry Andric LowerUDIVREM64(Op, DAG, Results);
22420b57cec5SDimitry Andric return DAG.getMergeValues(Results, DL);
22430b57cec5SDimitry Andric }
22440b57cec5SDimitry Andric
22450b57cec5SDimitry Andric if (VT == MVT::i32) {
22460b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, false))
22470b57cec5SDimitry Andric return Res;
22480b57cec5SDimitry Andric }
22490b57cec5SDimitry Andric
22505ffd83dbSDimitry Andric SDValue X = Op.getOperand(0);
22515ffd83dbSDimitry Andric SDValue Y = Op.getOperand(1);
22520b57cec5SDimitry Andric
22535ffd83dbSDimitry Andric // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
22545ffd83dbSDimitry Andric // algorithm used here.
22550b57cec5SDimitry Andric
22565ffd83dbSDimitry Andric // Initial estimate of inv(y).
22575ffd83dbSDimitry Andric SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y);
22580b57cec5SDimitry Andric
22595ffd83dbSDimitry Andric // One round of UNR.
22605ffd83dbSDimitry Andric SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y);
22615ffd83dbSDimitry Andric SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z);
22625ffd83dbSDimitry Andric Z = DAG.getNode(ISD::ADD, DL, VT, Z,
22635ffd83dbSDimitry Andric DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ));
22640b57cec5SDimitry Andric
22655ffd83dbSDimitry Andric // Quotient/remainder estimate.
22665ffd83dbSDimitry Andric SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z);
22675ffd83dbSDimitry Andric SDValue R =
22685ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y));
22690b57cec5SDimitry Andric
22705ffd83dbSDimitry Andric // First quotient/remainder refinement.
22715ffd83dbSDimitry Andric EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
22725ffd83dbSDimitry Andric SDValue One = DAG.getConstant(1, DL, VT);
22735ffd83dbSDimitry Andric SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
22745ffd83dbSDimitry Andric Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22755ffd83dbSDimitry Andric DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
22765ffd83dbSDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22775ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
22780b57cec5SDimitry Andric
22795ffd83dbSDimitry Andric // Second quotient/remainder refinement.
22805ffd83dbSDimitry Andric Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
22815ffd83dbSDimitry Andric Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22825ffd83dbSDimitry Andric DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
22835ffd83dbSDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22845ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
22850b57cec5SDimitry Andric
22865ffd83dbSDimitry Andric return DAG.getMergeValues({Q, R}, DL);
22870b57cec5SDimitry Andric }
22880b57cec5SDimitry Andric
LowerSDIVREM(SDValue Op,SelectionDAG & DAG) const22890b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
22900b57cec5SDimitry Andric SelectionDAG &DAG) const {
22910b57cec5SDimitry Andric SDLoc DL(Op);
22920b57cec5SDimitry Andric EVT VT = Op.getValueType();
22930b57cec5SDimitry Andric
22940b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0);
22950b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1);
22960b57cec5SDimitry Andric
22970b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, VT);
22980b57cec5SDimitry Andric SDValue NegOne = DAG.getConstant(-1, DL, VT);
22990b57cec5SDimitry Andric
23000b57cec5SDimitry Andric if (VT == MVT::i32) {
23010b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, true))
23020b57cec5SDimitry Andric return Res;
23030b57cec5SDimitry Andric }
23040b57cec5SDimitry Andric
23050b57cec5SDimitry Andric if (VT == MVT::i64 &&
23060b57cec5SDimitry Andric DAG.ComputeNumSignBits(LHS) > 32 &&
23070b57cec5SDimitry Andric DAG.ComputeNumSignBits(RHS) > 32) {
23080b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
23090b57cec5SDimitry Andric
23100b57cec5SDimitry Andric //HiLo split
23110b57cec5SDimitry Andric SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
23120b57cec5SDimitry Andric SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
23130b57cec5SDimitry Andric SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
23140b57cec5SDimitry Andric LHS_Lo, RHS_Lo);
23150b57cec5SDimitry Andric SDValue Res[2] = {
23160b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
23170b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
23180b57cec5SDimitry Andric };
23190b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL);
23200b57cec5SDimitry Andric }
23210b57cec5SDimitry Andric
23220b57cec5SDimitry Andric SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
23230b57cec5SDimitry Andric SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
23240b57cec5SDimitry Andric SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
23250b57cec5SDimitry Andric SDValue RSign = LHSign; // Remainder sign is the same as LHS
23260b57cec5SDimitry Andric
23270b57cec5SDimitry Andric LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
23280b57cec5SDimitry Andric RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
23290b57cec5SDimitry Andric
23300b57cec5SDimitry Andric LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
23310b57cec5SDimitry Andric RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
23320b57cec5SDimitry Andric
23330b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
23340b57cec5SDimitry Andric SDValue Rem = Div.getValue(1);
23350b57cec5SDimitry Andric
23360b57cec5SDimitry Andric Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
23370b57cec5SDimitry Andric Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
23380b57cec5SDimitry Andric
23390b57cec5SDimitry Andric Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
23400b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
23410b57cec5SDimitry Andric
23420b57cec5SDimitry Andric SDValue Res[2] = {
23430b57cec5SDimitry Andric Div,
23440b57cec5SDimitry Andric Rem
23450b57cec5SDimitry Andric };
23460b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL);
23470b57cec5SDimitry Andric }
23480b57cec5SDimitry Andric
2349e8d8bef9SDimitry Andric // (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
LowerFREM(SDValue Op,SelectionDAG & DAG) const23500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
23510b57cec5SDimitry Andric SDLoc SL(Op);
23520b57cec5SDimitry Andric EVT VT = Op.getValueType();
2353e8d8bef9SDimitry Andric auto Flags = Op->getFlags();
23540b57cec5SDimitry Andric SDValue X = Op.getOperand(0);
23550b57cec5SDimitry Andric SDValue Y = Op.getOperand(1);
23560b57cec5SDimitry Andric
2357e8d8bef9SDimitry Andric SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
2358e8d8bef9SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
2359e8d8bef9SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
2360e8d8bef9SDimitry Andric // TODO: For f32 use FMAD instead if !hasFastFMA32?
2361e8d8bef9SDimitry Andric return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
23620b57cec5SDimitry Andric }
23630b57cec5SDimitry Andric
LowerFCEIL(SDValue Op,SelectionDAG & DAG) const23640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
23650b57cec5SDimitry Andric SDLoc SL(Op);
23660b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
23670b57cec5SDimitry Andric
23680b57cec5SDimitry Andric // result = trunc(src)
23690b57cec5SDimitry Andric // if (src > 0.0 && src != result)
23700b57cec5SDimitry Andric // result += 1.0
23710b57cec5SDimitry Andric
23720b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
23730b57cec5SDimitry Andric
23740b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
23750b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
23760b57cec5SDimitry Andric
23770b57cec5SDimitry Andric EVT SetCCVT =
23780b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
23790b57cec5SDimitry Andric
23800b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
23810b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
23820b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
23830b57cec5SDimitry Andric
23840b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
23850b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags?
23860b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
23870b57cec5SDimitry Andric }
23880b57cec5SDimitry Andric
extractF64Exponent(SDValue Hi,const SDLoc & SL,SelectionDAG & DAG)23890b57cec5SDimitry Andric static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL,
23900b57cec5SDimitry Andric SelectionDAG &DAG) {
23910b57cec5SDimitry Andric const unsigned FractBits = 52;
23920b57cec5SDimitry Andric const unsigned ExpBits = 11;
23930b57cec5SDimitry Andric
23940b57cec5SDimitry Andric SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
23950b57cec5SDimitry Andric Hi,
23960b57cec5SDimitry Andric DAG.getConstant(FractBits - 32, SL, MVT::i32),
23970b57cec5SDimitry Andric DAG.getConstant(ExpBits, SL, MVT::i32));
23980b57cec5SDimitry Andric SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
23990b57cec5SDimitry Andric DAG.getConstant(1023, SL, MVT::i32));
24000b57cec5SDimitry Andric
24010b57cec5SDimitry Andric return Exp;
24020b57cec5SDimitry Andric }
24030b57cec5SDimitry Andric
LowerFTRUNC(SDValue Op,SelectionDAG & DAG) const24040b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
24050b57cec5SDimitry Andric SDLoc SL(Op);
24060b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
24070b57cec5SDimitry Andric
24080b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64);
24090b57cec5SDimitry Andric
24100b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
24110b57cec5SDimitry Andric
24120b57cec5SDimitry Andric // Extract the upper half, since this is where we will find the sign and
24130b57cec5SDimitry Andric // exponent.
2414349cc55cSDimitry Andric SDValue Hi = getHiHalf64(Src, DAG);
24150b57cec5SDimitry Andric
24160b57cec5SDimitry Andric SDValue Exp = extractF64Exponent(Hi, SL, DAG);
24170b57cec5SDimitry Andric
24180b57cec5SDimitry Andric const unsigned FractBits = 52;
24190b57cec5SDimitry Andric
24200b57cec5SDimitry Andric // Extract the sign bit.
24210b57cec5SDimitry Andric const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
24220b57cec5SDimitry Andric SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
24230b57cec5SDimitry Andric
24240b57cec5SDimitry Andric // Extend back to 64-bits.
24250b57cec5SDimitry Andric SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit});
24260b57cec5SDimitry Andric SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
24270b57cec5SDimitry Andric
24280b57cec5SDimitry Andric SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
24290b57cec5SDimitry Andric const SDValue FractMask
24300b57cec5SDimitry Andric = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
24310b57cec5SDimitry Andric
24320b57cec5SDimitry Andric SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
24330b57cec5SDimitry Andric SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
24340b57cec5SDimitry Andric SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
24350b57cec5SDimitry Andric
24360b57cec5SDimitry Andric EVT SetCCVT =
24370b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
24380b57cec5SDimitry Andric
24390b57cec5SDimitry Andric const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
24400b57cec5SDimitry Andric
24410b57cec5SDimitry Andric SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
24420b57cec5SDimitry Andric SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
24430b57cec5SDimitry Andric
24440b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
24450b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
24460b57cec5SDimitry Andric
24470b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
24480b57cec5SDimitry Andric }
24490b57cec5SDimitry Andric
LowerFROUNDEVEN(SDValue Op,SelectionDAG & DAG) const24505f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
24515f757f3fSDimitry Andric SelectionDAG &DAG) const {
24520b57cec5SDimitry Andric SDLoc SL(Op);
24530b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
24540b57cec5SDimitry Andric
24550b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64);
24560b57cec5SDimitry Andric
24570b57cec5SDimitry Andric APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
24580b57cec5SDimitry Andric SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
24590b57cec5SDimitry Andric SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
24600b57cec5SDimitry Andric
24610b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags?
24620b57cec5SDimitry Andric
24630b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
24640b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
24650b57cec5SDimitry Andric
24660b57cec5SDimitry Andric SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
24670b57cec5SDimitry Andric
24680b57cec5SDimitry Andric APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
24690b57cec5SDimitry Andric SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
24700b57cec5SDimitry Andric
24710b57cec5SDimitry Andric EVT SetCCVT =
24720b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
24730b57cec5SDimitry Andric SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
24740b57cec5SDimitry Andric
24750b57cec5SDimitry Andric return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
24760b57cec5SDimitry Andric }
24770b57cec5SDimitry Andric
LowerFNEARBYINT(SDValue Op,SelectionDAG & DAG) const24785f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
24795f757f3fSDimitry Andric SelectionDAG &DAG) const {
24800b57cec5SDimitry Andric // FNEARBYINT and FRINT are the same, except in their handling of FP
24810b57cec5SDimitry Andric // exceptions. Those aren't really meaningful for us, and OpenCL only has
24820b57cec5SDimitry Andric // rint, so just treat them as equivalent.
24835f757f3fSDimitry Andric return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),
24845f757f3fSDimitry Andric Op.getOperand(0));
24850b57cec5SDimitry Andric }
24860b57cec5SDimitry Andric
LowerFRINT(SDValue Op,SelectionDAG & DAG) const24875f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
2488bdd1243dSDimitry Andric auto VT = Op.getValueType();
2489bdd1243dSDimitry Andric auto Arg = Op.getOperand(0u);
24905f757f3fSDimitry Andric return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
2491bdd1243dSDimitry Andric }
2492bdd1243dSDimitry Andric
24930b57cec5SDimitry Andric // XXX - May require not supporting f32 denormals?
24940b57cec5SDimitry Andric
24950b57cec5SDimitry Andric // Don't handle v2f16. The extra instructions to scalarize and repack around the
24960b57cec5SDimitry Andric // compare and vselect end up producing worse code than scalarizing the whole
24970b57cec5SDimitry Andric // operation.
LowerFROUND(SDValue Op,SelectionDAG & DAG) const24985ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
24990b57cec5SDimitry Andric SDLoc SL(Op);
25000b57cec5SDimitry Andric SDValue X = Op.getOperand(0);
25010b57cec5SDimitry Andric EVT VT = Op.getValueType();
25020b57cec5SDimitry Andric
25030b57cec5SDimitry Andric SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);
25040b57cec5SDimitry Andric
25050b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags?
25060b57cec5SDimitry Andric
25070b57cec5SDimitry Andric SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);
25080b57cec5SDimitry Andric
25090b57cec5SDimitry Andric SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);
25100b57cec5SDimitry Andric
25110b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
25120b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, VT);
25130b57cec5SDimitry Andric
25140b57cec5SDimitry Andric EVT SetCCVT =
25150b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
25160b57cec5SDimitry Andric
25175f757f3fSDimitry Andric const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
25180b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
25195f757f3fSDimitry Andric SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero);
25200b57cec5SDimitry Andric
25215f757f3fSDimitry Andric SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X);
25225f757f3fSDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset);
25230b57cec5SDimitry Andric }
25240b57cec5SDimitry Andric
LowerFFLOOR(SDValue Op,SelectionDAG & DAG) const25250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
25260b57cec5SDimitry Andric SDLoc SL(Op);
25270b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
25280b57cec5SDimitry Andric
25290b57cec5SDimitry Andric // result = trunc(src);
25300b57cec5SDimitry Andric // if (src < 0.0 && src != result)
25310b57cec5SDimitry Andric // result += -1.0.
25320b57cec5SDimitry Andric
25330b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
25340b57cec5SDimitry Andric
25350b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
25360b57cec5SDimitry Andric const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
25370b57cec5SDimitry Andric
25380b57cec5SDimitry Andric EVT SetCCVT =
25390b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
25400b57cec5SDimitry Andric
25410b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
25420b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
25430b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
25440b57cec5SDimitry Andric
25450b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
25460b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags?
25470b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
25480b57cec5SDimitry Andric }
25490b57cec5SDimitry Andric
255006c3fb27SDimitry Andric /// Return true if it's known that \p Src can never be an f32 denormal value.
valueIsKnownNeverF32Denorm(SDValue Src)255106c3fb27SDimitry Andric static bool valueIsKnownNeverF32Denorm(SDValue Src) {
255206c3fb27SDimitry Andric switch (Src.getOpcode()) {
255306c3fb27SDimitry Andric case ISD::FP_EXTEND:
255406c3fb27SDimitry Andric return Src.getOperand(0).getValueType() == MVT::f16;
255506c3fb27SDimitry Andric case ISD::FP16_TO_FP:
25565f757f3fSDimitry Andric case ISD::FFREXP:
255706c3fb27SDimitry Andric return true;
25585f757f3fSDimitry Andric case ISD::INTRINSIC_WO_CHAIN: {
2559647cbc5dSDimitry Andric unsigned IntrinsicID = Src.getConstantOperandVal(0);
25605f757f3fSDimitry Andric switch (IntrinsicID) {
25615f757f3fSDimitry Andric case Intrinsic::amdgcn_frexp_mant:
25625f757f3fSDimitry Andric return true;
25635f757f3fSDimitry Andric default:
25645f757f3fSDimitry Andric return false;
25655f757f3fSDimitry Andric }
25665f757f3fSDimitry Andric }
256706c3fb27SDimitry Andric default:
256806c3fb27SDimitry Andric return false;
25690b57cec5SDimitry Andric }
25700b57cec5SDimitry Andric
257106c3fb27SDimitry Andric llvm_unreachable("covered opcode switch");
257206c3fb27SDimitry Andric }
257306c3fb27SDimitry Andric
allowApproxFunc(const SelectionDAG & DAG,SDNodeFlags Flags)25745f757f3fSDimitry Andric bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
25755f757f3fSDimitry Andric SDNodeFlags Flags) {
257606c3fb27SDimitry Andric if (Flags.hasApproximateFuncs())
257706c3fb27SDimitry Andric return true;
257806c3fb27SDimitry Andric auto &Options = DAG.getTarget().Options;
257906c3fb27SDimitry Andric return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
258006c3fb27SDimitry Andric }
258106c3fb27SDimitry Andric
needsDenormHandlingF32(const SelectionDAG & DAG,SDValue Src,SDNodeFlags Flags)25825f757f3fSDimitry Andric bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
25835f757f3fSDimitry Andric SDValue Src,
258406c3fb27SDimitry Andric SDNodeFlags Flags) {
258506c3fb27SDimitry Andric return !valueIsKnownNeverF32Denorm(Src) &&
258606c3fb27SDimitry Andric DAG.getMachineFunction()
258706c3fb27SDimitry Andric .getDenormalMode(APFloat::IEEEsingle())
258806c3fb27SDimitry Andric .Input != DenormalMode::PreserveSign;
258906c3fb27SDimitry Andric }
259006c3fb27SDimitry Andric
getIsLtSmallestNormal(SelectionDAG & DAG,SDValue Src,SDNodeFlags Flags) const259106c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
259206c3fb27SDimitry Andric SDValue Src,
259306c3fb27SDimitry Andric SDNodeFlags Flags) const {
259406c3fb27SDimitry Andric SDLoc SL(Src);
259506c3fb27SDimitry Andric EVT VT = Src.getValueType();
259606c3fb27SDimitry Andric const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
259706c3fb27SDimitry Andric SDValue SmallestNormal =
259806c3fb27SDimitry Andric DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
259906c3fb27SDimitry Andric
260006c3fb27SDimitry Andric // Want to scale denormals up, but negatives and 0 work just as well on the
260106c3fb27SDimitry Andric // scaled path.
260206c3fb27SDimitry Andric SDValue IsLtSmallestNormal = DAG.getSetCC(
260306c3fb27SDimitry Andric SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
260406c3fb27SDimitry Andric SmallestNormal, ISD::SETOLT);
260506c3fb27SDimitry Andric
260606c3fb27SDimitry Andric return IsLtSmallestNormal;
260706c3fb27SDimitry Andric }
260806c3fb27SDimitry Andric
getIsFinite(SelectionDAG & DAG,SDValue Src,SDNodeFlags Flags) const260906c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
261006c3fb27SDimitry Andric SDNodeFlags Flags) const {
261106c3fb27SDimitry Andric SDLoc SL(Src);
261206c3fb27SDimitry Andric EVT VT = Src.getValueType();
261306c3fb27SDimitry Andric const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
261406c3fb27SDimitry Andric SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT);
261506c3fb27SDimitry Andric
261606c3fb27SDimitry Andric SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);
261706c3fb27SDimitry Andric SDValue IsFinite = DAG.getSetCC(
261806c3fb27SDimitry Andric SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs,
261906c3fb27SDimitry Andric Inf, ISD::SETOLT);
262006c3fb27SDimitry Andric return IsFinite;
262106c3fb27SDimitry Andric }
262206c3fb27SDimitry Andric
262306c3fb27SDimitry Andric /// If denormal handling is required return the scaled input to FLOG2, and the
262406c3fb27SDimitry Andric /// check for denormal range. Otherwise, return null values.
262506c3fb27SDimitry Andric std::pair<SDValue, SDValue>
getScaledLogInput(SelectionDAG & DAG,const SDLoc SL,SDValue Src,SDNodeFlags Flags) const262606c3fb27SDimitry Andric AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
262706c3fb27SDimitry Andric SDValue Src, SDNodeFlags Flags) const {
26288a4dda33SDimitry Andric if (!needsDenormHandlingF32(DAG, Src, Flags))
262906c3fb27SDimitry Andric return {};
263006c3fb27SDimitry Andric
263106c3fb27SDimitry Andric MVT VT = MVT::f32;
263206c3fb27SDimitry Andric const fltSemantics &Semantics = APFloat::IEEEsingle();
263306c3fb27SDimitry Andric SDValue SmallestNormal =
263406c3fb27SDimitry Andric DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
263506c3fb27SDimitry Andric
263606c3fb27SDimitry Andric SDValue IsLtSmallestNormal = DAG.getSetCC(
263706c3fb27SDimitry Andric SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
263806c3fb27SDimitry Andric SmallestNormal, ISD::SETOLT);
263906c3fb27SDimitry Andric
264006c3fb27SDimitry Andric SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT);
264106c3fb27SDimitry Andric SDValue One = DAG.getConstantFP(1.0, SL, VT);
264206c3fb27SDimitry Andric SDValue ScaleFactor =
264306c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);
264406c3fb27SDimitry Andric
264506c3fb27SDimitry Andric SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags);
264606c3fb27SDimitry Andric return {ScaledInput, IsLtSmallestNormal};
264706c3fb27SDimitry Andric }
264806c3fb27SDimitry Andric
LowerFLOG2(SDValue Op,SelectionDAG & DAG) const264906c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const {
265006c3fb27SDimitry Andric // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
265106c3fb27SDimitry Andric // If we have to handle denormals, scale up the input and adjust the result.
265206c3fb27SDimitry Andric
265306c3fb27SDimitry Andric // scaled = x * (is_denormal ? 0x1.0p+32 : 1.0)
265406c3fb27SDimitry Andric // log2 = amdgpu_log2 - (is_denormal ? 32.0 : 0.0)
265506c3fb27SDimitry Andric
265606c3fb27SDimitry Andric SDLoc SL(Op);
265706c3fb27SDimitry Andric EVT VT = Op.getValueType();
265806c3fb27SDimitry Andric SDValue Src = Op.getOperand(0);
265906c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags();
266006c3fb27SDimitry Andric
266106c3fb27SDimitry Andric if (VT == MVT::f16) {
266206c3fb27SDimitry Andric // Nothing in half is a denormal when promoted to f32.
266306c3fb27SDimitry Andric assert(!Subtarget->has16BitInsts());
266406c3fb27SDimitry Andric SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
266506c3fb27SDimitry Andric SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags);
266606c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
266706c3fb27SDimitry Andric DAG.getTargetConstant(0, SL, MVT::i32), Flags);
266806c3fb27SDimitry Andric }
266906c3fb27SDimitry Andric
267006c3fb27SDimitry Andric auto [ScaledInput, IsLtSmallestNormal] =
267106c3fb27SDimitry Andric getScaledLogInput(DAG, SL, Src, Flags);
267206c3fb27SDimitry Andric if (!ScaledInput)
267306c3fb27SDimitry Andric return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);
267406c3fb27SDimitry Andric
267506c3fb27SDimitry Andric SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
267606c3fb27SDimitry Andric
267706c3fb27SDimitry Andric SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT);
267806c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
267906c3fb27SDimitry Andric SDValue ResultOffset =
268006c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero);
268106c3fb27SDimitry Andric return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags);
268206c3fb27SDimitry Andric }
268306c3fb27SDimitry Andric
getMad(SelectionDAG & DAG,const SDLoc & SL,EVT VT,SDValue X,SDValue Y,SDValue C,SDNodeFlags Flags=SDNodeFlags ())268406c3fb27SDimitry Andric static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
268506c3fb27SDimitry Andric SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) {
268606c3fb27SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags);
268706c3fb27SDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags);
268806c3fb27SDimitry Andric }
268906c3fb27SDimitry Andric
LowerFLOGCommon(SDValue Op,SelectionDAG & DAG) const269006c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
269106c3fb27SDimitry Andric SelectionDAG &DAG) const {
269206c3fb27SDimitry Andric SDValue X = Op.getOperand(0);
269306c3fb27SDimitry Andric EVT VT = Op.getValueType();
269406c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags();
269506c3fb27SDimitry Andric SDLoc DL(Op);
269606c3fb27SDimitry Andric
269706c3fb27SDimitry Andric const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
269806c3fb27SDimitry Andric assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
269906c3fb27SDimitry Andric
270006c3fb27SDimitry Andric const auto &Options = getTargetMachine().Options;
270106c3fb27SDimitry Andric if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
270206c3fb27SDimitry Andric Options.ApproxFuncFPMath || Options.UnsafeFPMath) {
270306c3fb27SDimitry Andric
270406c3fb27SDimitry Andric if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
270506c3fb27SDimitry Andric // Log and multiply in f32 is good enough for f16.
270606c3fb27SDimitry Andric X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);
270706c3fb27SDimitry Andric }
270806c3fb27SDimitry Andric
27098a4dda33SDimitry Andric SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags);
271006c3fb27SDimitry Andric if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
271106c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered,
271206c3fb27SDimitry Andric DAG.getTargetConstant(0, DL, MVT::i32), Flags);
271306c3fb27SDimitry Andric }
271406c3fb27SDimitry Andric
271506c3fb27SDimitry Andric return Lowered;
271606c3fb27SDimitry Andric }
271706c3fb27SDimitry Andric
271806c3fb27SDimitry Andric auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags);
271906c3fb27SDimitry Andric if (ScaledInput)
272006c3fb27SDimitry Andric X = ScaledInput;
272106c3fb27SDimitry Andric
272206c3fb27SDimitry Andric SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags);
272306c3fb27SDimitry Andric
272406c3fb27SDimitry Andric SDValue R;
272506c3fb27SDimitry Andric if (Subtarget->hasFastFMAF32()) {
272606c3fb27SDimitry Andric // c+cc are ln(2)/ln(10) to more than 49 bits
272706c3fb27SDimitry Andric const float c_log10 = 0x1.344134p-2f;
272806c3fb27SDimitry Andric const float cc_log10 = 0x1.09f79ep-26f;
272906c3fb27SDimitry Andric
273006c3fb27SDimitry Andric // c + cc is ln(2) to more than 49 bits
273106c3fb27SDimitry Andric const float c_log = 0x1.62e42ep-1f;
273206c3fb27SDimitry Andric const float cc_log = 0x1.efa39ep-25f;
273306c3fb27SDimitry Andric
273406c3fb27SDimitry Andric SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
273506c3fb27SDimitry Andric SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
273606c3fb27SDimitry Andric
273706c3fb27SDimitry Andric R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
273806c3fb27SDimitry Andric SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
273906c3fb27SDimitry Andric SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags);
274006c3fb27SDimitry Andric SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags);
274106c3fb27SDimitry Andric R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags);
274206c3fb27SDimitry Andric } else {
274306c3fb27SDimitry Andric // ch+ct is ln(2)/ln(10) to more than 36 bits
274406c3fb27SDimitry Andric const float ch_log10 = 0x1.344000p-2f;
274506c3fb27SDimitry Andric const float ct_log10 = 0x1.3509f6p-18f;
274606c3fb27SDimitry Andric
274706c3fb27SDimitry Andric // ch + ct is ln(2) to more than 36 bits
274806c3fb27SDimitry Andric const float ch_log = 0x1.62e000p-1f;
274906c3fb27SDimitry Andric const float ct_log = 0x1.0bfbe8p-15f;
275006c3fb27SDimitry Andric
275106c3fb27SDimitry Andric SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT);
275206c3fb27SDimitry Andric SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT);
275306c3fb27SDimitry Andric
275406c3fb27SDimitry Andric SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y);
275506c3fb27SDimitry Andric SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32);
275606c3fb27SDimitry Andric SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
275706c3fb27SDimitry Andric SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
275806c3fb27SDimitry Andric SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
275906c3fb27SDimitry Andric
276006c3fb27SDimitry Andric SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
276106c3fb27SDimitry Andric SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
276206c3fb27SDimitry Andric SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags);
276306c3fb27SDimitry Andric R = getMad(DAG, DL, VT, YH, CH, Mad1);
276406c3fb27SDimitry Andric }
276506c3fb27SDimitry Andric
276606c3fb27SDimitry Andric const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) &&
276706c3fb27SDimitry Andric (Flags.hasNoInfs() || Options.NoInfsFPMath);
276806c3fb27SDimitry Andric
276906c3fb27SDimitry Andric // TODO: Check if known finite from source value.
277006c3fb27SDimitry Andric if (!IsFiniteOnly) {
277106c3fb27SDimitry Andric SDValue IsFinite = getIsFinite(DAG, Y, Flags);
277206c3fb27SDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags);
277306c3fb27SDimitry Andric }
277406c3fb27SDimitry Andric
277506c3fb27SDimitry Andric if (IsScaled) {
277606c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
277706c3fb27SDimitry Andric SDValue ShiftK =
277806c3fb27SDimitry Andric DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);
277906c3fb27SDimitry Andric SDValue Shift =
278006c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags);
278106c3fb27SDimitry Andric R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags);
278206c3fb27SDimitry Andric }
278306c3fb27SDimitry Andric
278406c3fb27SDimitry Andric return R;
278506c3fb27SDimitry Andric }
278606c3fb27SDimitry Andric
LowerFLOG10(SDValue Op,SelectionDAG & DAG) const278706c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const {
278806c3fb27SDimitry Andric return LowerFLOGCommon(Op, DAG);
278906c3fb27SDimitry Andric }
279006c3fb27SDimitry Andric
279106c3fb27SDimitry Andric // Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
279206c3fb27SDimitry Andric // promote f16 operation.
LowerFLOGUnsafe(SDValue Src,const SDLoc & SL,SelectionDAG & DAG,bool IsLog10,SDNodeFlags Flags) const279306c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
27948a4dda33SDimitry Andric SelectionDAG &DAG, bool IsLog10,
279506c3fb27SDimitry Andric SDNodeFlags Flags) const {
279606c3fb27SDimitry Andric EVT VT = Src.getValueType();
27975f757f3fSDimitry Andric unsigned LogOp =
27985f757f3fSDimitry Andric VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2;
27998a4dda33SDimitry Andric
28008a4dda33SDimitry Andric double Log2BaseInverted =
28018a4dda33SDimitry Andric IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
28028a4dda33SDimitry Andric
28038a4dda33SDimitry Andric if (VT == MVT::f32) {
28048a4dda33SDimitry Andric auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags);
28058a4dda33SDimitry Andric if (ScaledInput) {
28068a4dda33SDimitry Andric SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
28078a4dda33SDimitry Andric SDValue ScaledResultOffset =
28088a4dda33SDimitry Andric DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT);
28098a4dda33SDimitry Andric
28108a4dda33SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0f, SL, VT);
28118a4dda33SDimitry Andric
28128a4dda33SDimitry Andric SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled,
28138a4dda33SDimitry Andric ScaledResultOffset, Zero, Flags);
28148a4dda33SDimitry Andric
28158a4dda33SDimitry Andric SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT);
28168a4dda33SDimitry Andric
28178a4dda33SDimitry Andric if (Subtarget->hasFastFMAF32())
28188a4dda33SDimitry Andric return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset,
28198a4dda33SDimitry Andric Flags);
28208a4dda33SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags);
28218a4dda33SDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset);
28228a4dda33SDimitry Andric }
28238a4dda33SDimitry Andric }
28248a4dda33SDimitry Andric
282506c3fb27SDimitry Andric SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);
282606c3fb27SDimitry Andric SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
282706c3fb27SDimitry Andric
282806c3fb27SDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,
282906c3fb27SDimitry Andric Flags);
283006c3fb27SDimitry Andric }
283106c3fb27SDimitry Andric
lowerFEXP2(SDValue Op,SelectionDAG & DAG) const283206c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
283306c3fb27SDimitry Andric // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
283406c3fb27SDimitry Andric // If we have to handle denormals, scale up the input and adjust the result.
283506c3fb27SDimitry Andric
283606c3fb27SDimitry Andric SDLoc SL(Op);
283706c3fb27SDimitry Andric EVT VT = Op.getValueType();
283806c3fb27SDimitry Andric SDValue Src = Op.getOperand(0);
283906c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags();
284006c3fb27SDimitry Andric
284106c3fb27SDimitry Andric if (VT == MVT::f16) {
284206c3fb27SDimitry Andric // Nothing in half is a denormal when promoted to f32.
284306c3fb27SDimitry Andric assert(!Subtarget->has16BitInsts());
284406c3fb27SDimitry Andric SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
284506c3fb27SDimitry Andric SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);
284606c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
284706c3fb27SDimitry Andric DAG.getTargetConstant(0, SL, MVT::i32), Flags);
284806c3fb27SDimitry Andric }
284906c3fb27SDimitry Andric
285006c3fb27SDimitry Andric assert(VT == MVT::f32);
285106c3fb27SDimitry Andric
28528a4dda33SDimitry Andric if (!needsDenormHandlingF32(DAG, Src, Flags))
285306c3fb27SDimitry Andric return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);
285406c3fb27SDimitry Andric
285506c3fb27SDimitry Andric // bool needs_scaling = x < -0x1.f80000p+6f;
285606c3fb27SDimitry Andric // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);
285706c3fb27SDimitry Andric
285806c3fb27SDimitry Andric // -nextafter(128.0, -1)
285906c3fb27SDimitry Andric SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT);
286006c3fb27SDimitry Andric
286106c3fb27SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
286206c3fb27SDimitry Andric
286306c3fb27SDimitry Andric SDValue NeedsScaling =
286406c3fb27SDimitry Andric DAG.getSetCC(SL, SetCCVT, Src, RangeCheckConst, ISD::SETOLT);
286506c3fb27SDimitry Andric
286606c3fb27SDimitry Andric SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT);
286706c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
286806c3fb27SDimitry Andric
286906c3fb27SDimitry Andric SDValue AddOffset =
287006c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, SixtyFour, Zero);
287106c3fb27SDimitry Andric
287206c3fb27SDimitry Andric SDValue AddInput = DAG.getNode(ISD::FADD, SL, VT, Src, AddOffset, Flags);
287306c3fb27SDimitry Andric SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);
287406c3fb27SDimitry Andric
287506c3fb27SDimitry Andric SDValue TwoExpNeg64 = DAG.getConstantFP(0x1.0p-64f, SL, VT);
287606c3fb27SDimitry Andric SDValue One = DAG.getConstantFP(1.0, SL, VT);
287706c3fb27SDimitry Andric SDValue ResultScale =
287806c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, TwoExpNeg64, One);
287906c3fb27SDimitry Andric
288006c3fb27SDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
288106c3fb27SDimitry Andric }
288206c3fb27SDimitry Andric
lowerFEXPUnsafe(SDValue X,const SDLoc & SL,SelectionDAG & DAG,SDNodeFlags Flags) const28835f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
288406c3fb27SDimitry Andric SelectionDAG &DAG,
288506c3fb27SDimitry Andric SDNodeFlags Flags) const {
28865f757f3fSDimitry Andric EVT VT = X.getValueType();
28875f757f3fSDimitry Andric const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);
28885f757f3fSDimitry Andric
28895f757f3fSDimitry Andric if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
28900b57cec5SDimitry Andric // exp2(M_LOG2E_F * f);
28915f757f3fSDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags);
28925f757f3fSDimitry Andric return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP
28935f757f3fSDimitry Andric : (unsigned)ISD::FEXP2,
28945f757f3fSDimitry Andric SL, VT, Mul, Flags);
28955f757f3fSDimitry Andric }
28965f757f3fSDimitry Andric
28975f757f3fSDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
28985f757f3fSDimitry Andric
28995f757f3fSDimitry Andric SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT);
29005f757f3fSDimitry Andric SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
29015f757f3fSDimitry Andric
29025f757f3fSDimitry Andric SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT);
29035f757f3fSDimitry Andric
29045f757f3fSDimitry Andric SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
29055f757f3fSDimitry Andric
29065f757f3fSDimitry Andric SDValue AdjustedX =
29075f757f3fSDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
29085f757f3fSDimitry Andric
29095f757f3fSDimitry Andric SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);
29105f757f3fSDimitry Andric
29115f757f3fSDimitry Andric SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);
29125f757f3fSDimitry Andric
29135f757f3fSDimitry Andric SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT);
29145f757f3fSDimitry Andric SDValue AdjustedResult =
29155f757f3fSDimitry Andric DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);
29165f757f3fSDimitry Andric
29175f757f3fSDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,
29185f757f3fSDimitry Andric Flags);
29195f757f3fSDimitry Andric }
29205f757f3fSDimitry Andric
29215f757f3fSDimitry Andric /// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be
29225f757f3fSDimitry Andric /// handled correctly.
lowerFEXP10Unsafe(SDValue X,const SDLoc & SL,SelectionDAG & DAG,SDNodeFlags Flags) const29235f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
29245f757f3fSDimitry Andric SelectionDAG &DAG,
29255f757f3fSDimitry Andric SDNodeFlags Flags) const {
29265f757f3fSDimitry Andric const EVT VT = X.getValueType();
29275f757f3fSDimitry Andric const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2;
29285f757f3fSDimitry Andric
29295f757f3fSDimitry Andric if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
29305f757f3fSDimitry Andric // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f);
29315f757f3fSDimitry Andric SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
29325f757f3fSDimitry Andric SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);
29335f757f3fSDimitry Andric
29345f757f3fSDimitry Andric SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, X, K0, Flags);
29355f757f3fSDimitry Andric SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
29365f757f3fSDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, X, K1, Flags);
29375f757f3fSDimitry Andric SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
29385f757f3fSDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1);
29395f757f3fSDimitry Andric }
29405f757f3fSDimitry Andric
29415f757f3fSDimitry Andric // bool s = x < -0x1.2f7030p+5f;
29425f757f3fSDimitry Andric // x += s ? 0x1.0p+5f : 0.0f;
29435f757f3fSDimitry Andric // exp10 = exp2(x * 0x1.a92000p+1f) *
29445f757f3fSDimitry Andric // exp2(x * 0x1.4f0978p-11f) *
29455f757f3fSDimitry Andric // (s ? 0x1.9f623ep-107f : 1.0f);
29465f757f3fSDimitry Andric
29475f757f3fSDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
29485f757f3fSDimitry Andric
29495f757f3fSDimitry Andric SDValue Threshold = DAG.getConstantFP(-0x1.2f7030p+5f, SL, VT);
29505f757f3fSDimitry Andric SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
29515f757f3fSDimitry Andric
29525f757f3fSDimitry Andric SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+5f, SL, VT);
29535f757f3fSDimitry Andric SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
29545f757f3fSDimitry Andric SDValue AdjustedX =
29555f757f3fSDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
29565f757f3fSDimitry Andric
29575f757f3fSDimitry Andric SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
29585f757f3fSDimitry Andric SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);
29595f757f3fSDimitry Andric
29605f757f3fSDimitry Andric SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K0, Flags);
29615f757f3fSDimitry Andric SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
29625f757f3fSDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K1, Flags);
29635f757f3fSDimitry Andric SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
29645f757f3fSDimitry Andric
29655f757f3fSDimitry Andric SDValue MulExps = DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags);
29665f757f3fSDimitry Andric
29675f757f3fSDimitry Andric SDValue ResultScaleFactor = DAG.getConstantFP(0x1.9f623ep-107f, SL, VT);
29685f757f3fSDimitry Andric SDValue AdjustedResult =
29695f757f3fSDimitry Andric DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);
29705f757f3fSDimitry Andric
29715f757f3fSDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,
297206c3fb27SDimitry Andric Flags);
297306c3fb27SDimitry Andric }
297406c3fb27SDimitry Andric
lowerFEXP(SDValue Op,SelectionDAG & DAG) const29750b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
29760b57cec5SDimitry Andric EVT VT = Op.getValueType();
29770b57cec5SDimitry Andric SDLoc SL(Op);
297806c3fb27SDimitry Andric SDValue X = Op.getOperand(0);
297906c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags();
29805f757f3fSDimitry Andric const bool IsExp10 = Op.getOpcode() == ISD::FEXP10;
29810b57cec5SDimitry Andric
298206c3fb27SDimitry Andric if (VT.getScalarType() == MVT::f16) {
298306c3fb27SDimitry Andric // v_exp_f16 (fmul x, log2e)
298406c3fb27SDimitry Andric if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
298506c3fb27SDimitry Andric return lowerFEXPUnsafe(X, SL, DAG, Flags);
298606c3fb27SDimitry Andric
298706c3fb27SDimitry Andric if (VT.isVector())
298806c3fb27SDimitry Andric return SDValue();
298906c3fb27SDimitry Andric
299006c3fb27SDimitry Andric // exp(f16 x) ->
299106c3fb27SDimitry Andric // fptrunc (v_exp_f32 (fmul (fpext x), log2e))
299206c3fb27SDimitry Andric
299306c3fb27SDimitry Andric // Nothing in half is a denormal when promoted to f32.
299406c3fb27SDimitry Andric SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
299506c3fb27SDimitry Andric SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
299606c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
299706c3fb27SDimitry Andric DAG.getTargetConstant(0, SL, MVT::i32), Flags);
299806c3fb27SDimitry Andric }
299906c3fb27SDimitry Andric
300006c3fb27SDimitry Andric assert(VT == MVT::f32);
300106c3fb27SDimitry Andric
300206c3fb27SDimitry Andric // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
300306c3fb27SDimitry Andric // library behavior. Also, is known-not-daz source sufficient?
30045f757f3fSDimitry Andric if (allowApproxFunc(DAG, Flags)) {
30055f757f3fSDimitry Andric return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
30065f757f3fSDimitry Andric : lowerFEXPUnsafe(X, SL, DAG, Flags);
300706c3fb27SDimitry Andric }
300806c3fb27SDimitry Andric
300906c3fb27SDimitry Andric // Algorithm:
301006c3fb27SDimitry Andric //
301106c3fb27SDimitry Andric // e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64)
301206c3fb27SDimitry Andric //
301306c3fb27SDimitry Andric // x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer
301406c3fb27SDimitry Andric // n = 64*m + j, 0 <= j < 64
301506c3fb27SDimitry Andric //
301606c3fb27SDimitry Andric // e^x = 2^((64*m + j + f)/64)
301706c3fb27SDimitry Andric // = (2^m) * (2^(j/64)) * 2^(f/64)
301806c3fb27SDimitry Andric // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
301906c3fb27SDimitry Andric //
302006c3fb27SDimitry Andric // f = x*(64/ln(2)) - n
302106c3fb27SDimitry Andric // r = f*(ln(2)/64) = x - n*(ln(2)/64)
302206c3fb27SDimitry Andric //
302306c3fb27SDimitry Andric // e^x = (2^m) * (2^(j/64)) * e^r
302406c3fb27SDimitry Andric //
302506c3fb27SDimitry Andric // (2^(j/64)) is precomputed
302606c3fb27SDimitry Andric //
302706c3fb27SDimitry Andric // e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
302806c3fb27SDimitry Andric // e^r = 1 + q
302906c3fb27SDimitry Andric //
303006c3fb27SDimitry Andric // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
303106c3fb27SDimitry Andric //
303206c3fb27SDimitry Andric // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) )
303306c3fb27SDimitry Andric SDNodeFlags FlagsNoContract = Flags;
303406c3fb27SDimitry Andric FlagsNoContract.setAllowContract(false);
303506c3fb27SDimitry Andric
303606c3fb27SDimitry Andric SDValue PH, PL;
303706c3fb27SDimitry Andric if (Subtarget->hasFastFMAF32()) {
303806c3fb27SDimitry Andric const float c_exp = numbers::log2ef;
303906c3fb27SDimitry Andric const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits
304006c3fb27SDimitry Andric const float c_exp10 = 0x1.a934f0p+1f;
304106c3fb27SDimitry Andric const float cc_exp10 = 0x1.2f346ep-24f;
304206c3fb27SDimitry Andric
304306c3fb27SDimitry Andric SDValue C = DAG.getConstantFP(IsExp10 ? c_exp10 : c_exp, SL, VT);
304406c3fb27SDimitry Andric SDValue CC = DAG.getConstantFP(IsExp10 ? cc_exp10 : cc_exp, SL, VT);
304506c3fb27SDimitry Andric
304606c3fb27SDimitry Andric PH = DAG.getNode(ISD::FMUL, SL, VT, X, C, Flags);
304706c3fb27SDimitry Andric SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags);
304806c3fb27SDimitry Andric SDValue FMA0 = DAG.getNode(ISD::FMA, SL, VT, X, C, NegPH, Flags);
304906c3fb27SDimitry Andric PL = DAG.getNode(ISD::FMA, SL, VT, X, CC, FMA0, Flags);
305006c3fb27SDimitry Andric } else {
305106c3fb27SDimitry Andric const float ch_exp = 0x1.714000p+0f;
305206c3fb27SDimitry Andric const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits
305306c3fb27SDimitry Andric
305406c3fb27SDimitry Andric const float ch_exp10 = 0x1.a92000p+1f;
305506c3fb27SDimitry Andric const float cl_exp10 = 0x1.4f0978p-11f;
305606c3fb27SDimitry Andric
305706c3fb27SDimitry Andric SDValue CH = DAG.getConstantFP(IsExp10 ? ch_exp10 : ch_exp, SL, VT);
305806c3fb27SDimitry Andric SDValue CL = DAG.getConstantFP(IsExp10 ? cl_exp10 : cl_exp, SL, VT);
305906c3fb27SDimitry Andric
306006c3fb27SDimitry Andric SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X);
306106c3fb27SDimitry Andric SDValue MaskConst = DAG.getConstant(0xfffff000, SL, MVT::i32);
306206c3fb27SDimitry Andric SDValue XHAsInt = DAG.getNode(ISD::AND, SL, MVT::i32, XAsInt, MaskConst);
306306c3fb27SDimitry Andric SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt);
306406c3fb27SDimitry Andric SDValue XL = DAG.getNode(ISD::FSUB, SL, VT, X, XH, Flags);
306506c3fb27SDimitry Andric
306606c3fb27SDimitry Andric PH = DAG.getNode(ISD::FMUL, SL, VT, XH, CH, Flags);
306706c3fb27SDimitry Andric
306806c3fb27SDimitry Andric SDValue XLCL = DAG.getNode(ISD::FMUL, SL, VT, XL, CL, Flags);
306906c3fb27SDimitry Andric SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags);
307006c3fb27SDimitry Andric PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
307106c3fb27SDimitry Andric }
307206c3fb27SDimitry Andric
30735f757f3fSDimitry Andric SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);
307406c3fb27SDimitry Andric
307506c3fb27SDimitry Andric // It is unsafe to contract this fsub into the PH multiply.
307606c3fb27SDimitry Andric SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract);
307706c3fb27SDimitry Andric
307806c3fb27SDimitry Andric SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags);
307906c3fb27SDimitry Andric SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E);
308006c3fb27SDimitry Andric SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags);
308106c3fb27SDimitry Andric
308206c3fb27SDimitry Andric SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags);
308306c3fb27SDimitry Andric
308406c3fb27SDimitry Andric SDValue UnderflowCheckConst =
308506c3fb27SDimitry Andric DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);
308606c3fb27SDimitry Andric
308706c3fb27SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
308806c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
308906c3fb27SDimitry Andric SDValue Underflow =
309006c3fb27SDimitry Andric DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT);
309106c3fb27SDimitry Andric
309206c3fb27SDimitry Andric R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R);
309306c3fb27SDimitry Andric const auto &Options = getTargetMachine().Options;
309406c3fb27SDimitry Andric
309506c3fb27SDimitry Andric if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) {
309606c3fb27SDimitry Andric SDValue OverflowCheckConst =
309706c3fb27SDimitry Andric DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
309806c3fb27SDimitry Andric SDValue Overflow =
309906c3fb27SDimitry Andric DAG.getSetCC(SL, SetCCVT, X, OverflowCheckConst, ISD::SETOGT);
310006c3fb27SDimitry Andric SDValue Inf =
310106c3fb27SDimitry Andric DAG.getConstantFP(APFloat::getInf(APFloat::IEEEsingle()), SL, VT);
310206c3fb27SDimitry Andric R = DAG.getNode(ISD::SELECT, SL, VT, Overflow, Inf, R);
310306c3fb27SDimitry Andric }
310406c3fb27SDimitry Andric
310506c3fb27SDimitry Andric return R;
31060b57cec5SDimitry Andric }
31070b57cec5SDimitry Andric
isCtlzOpc(unsigned Opc)31080b57cec5SDimitry Andric static bool isCtlzOpc(unsigned Opc) {
31090b57cec5SDimitry Andric return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
31100b57cec5SDimitry Andric }
31110b57cec5SDimitry Andric
isCttzOpc(unsigned Opc)31120b57cec5SDimitry Andric static bool isCttzOpc(unsigned Opc) {
31130b57cec5SDimitry Andric return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF;
31140b57cec5SDimitry Andric }
31150b57cec5SDimitry Andric
lowerCTLZResults(SDValue Op,SelectionDAG & DAG) const31167a6dacacSDimitry Andric SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op,
31177a6dacacSDimitry Andric SelectionDAG &DAG) const {
31187a6dacacSDimitry Andric auto SL = SDLoc(Op);
31190fca6ea1SDimitry Andric auto Opc = Op.getOpcode();
31207a6dacacSDimitry Andric auto Arg = Op.getOperand(0u);
31217a6dacacSDimitry Andric auto ResultVT = Op.getValueType();
31227a6dacacSDimitry Andric
31237a6dacacSDimitry Andric if (ResultVT != MVT::i8 && ResultVT != MVT::i16)
31247a6dacacSDimitry Andric return {};
31257a6dacacSDimitry Andric
31260fca6ea1SDimitry Andric assert(isCtlzOpc(Opc));
31277a6dacacSDimitry Andric assert(ResultVT == Arg.getValueType());
31287a6dacacSDimitry Andric
31290fca6ea1SDimitry Andric const uint64_t NumBits = ResultVT.getFixedSizeInBits();
31300fca6ea1SDimitry Andric SDValue NumExtBits = DAG.getConstant(32u - NumBits, SL, MVT::i32);
31310fca6ea1SDimitry Andric SDValue NewOp;
31320fca6ea1SDimitry Andric
31330fca6ea1SDimitry Andric if (Opc == ISD::CTLZ_ZERO_UNDEF) {
31340fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Arg);
31350fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits);
31360fca6ea1SDimitry Andric NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);
31370fca6ea1SDimitry Andric } else {
31380fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg);
31390fca6ea1SDimitry Andric NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);
31400fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits);
31410fca6ea1SDimitry Andric }
31420fca6ea1SDimitry Andric
31437a6dacacSDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, ResultVT, NewOp);
31447a6dacacSDimitry Andric }
31457a6dacacSDimitry Andric
LowerCTLZ_CTTZ(SDValue Op,SelectionDAG & DAG) const31460b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
31470b57cec5SDimitry Andric SDLoc SL(Op);
31480b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
31490b57cec5SDimitry Andric
3150349cc55cSDimitry Andric assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode()));
3151349cc55cSDimitry Andric bool Ctlz = isCtlzOpc(Op.getOpcode());
3152349cc55cSDimitry Andric unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;
31530b57cec5SDimitry Andric
3154349cc55cSDimitry Andric bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
3155349cc55cSDimitry Andric Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
3156cb14a3feSDimitry Andric bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;
31570b57cec5SDimitry Andric
3158cb14a3feSDimitry Andric if (Src.getValueType() == MVT::i32 || Is64BitScalar) {
3159349cc55cSDimitry Andric // (ctlz hi:lo) -> (umin (ffbh src), 32)
3160349cc55cSDimitry Andric // (cttz hi:lo) -> (umin (ffbl src), 32)
3161349cc55cSDimitry Andric // (ctlz_zero_undef src) -> (ffbh src)
3162349cc55cSDimitry Andric // (cttz_zero_undef src) -> (ffbl src)
3163cb14a3feSDimitry Andric
3164cb14a3feSDimitry Andric // 64-bit scalar version produce 32-bit result
3165cb14a3feSDimitry Andric // (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64)
3166cb14a3feSDimitry Andric // (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64)
3167cb14a3feSDimitry Andric // (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src)
3168cb14a3feSDimitry Andric // (cttz_zero_undef src) -> (S_FF1_I32_B64 src)
3169349cc55cSDimitry Andric SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
3170349cc55cSDimitry Andric if (!ZeroUndef) {
3171cb14a3feSDimitry Andric const SDValue ConstVal = DAG.getConstant(
3172cb14a3feSDimitry Andric Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
3173cb14a3feSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);
3174349cc55cSDimitry Andric }
3175cb14a3feSDimitry Andric return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);
31760b57cec5SDimitry Andric }
31770b57cec5SDimitry Andric
3178349cc55cSDimitry Andric SDValue Lo, Hi;
3179349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG);
3180349cc55cSDimitry Andric
3181349cc55cSDimitry Andric SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo);
3182349cc55cSDimitry Andric SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi);
3183349cc55cSDimitry Andric
3184349cc55cSDimitry Andric // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64)
3185349cc55cSDimitry Andric // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64)
3186349cc55cSDimitry Andric // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
3187349cc55cSDimitry Andric // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
3188349cc55cSDimitry Andric
3189349cc55cSDimitry Andric unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT;
3190349cc55cSDimitry Andric const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
3191349cc55cSDimitry Andric if (Ctlz)
3192349cc55cSDimitry Andric OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
3193349cc55cSDimitry Andric else
3194349cc55cSDimitry Andric OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);
3195349cc55cSDimitry Andric
3196349cc55cSDimitry Andric SDValue NewOpr;
3197349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);
31980b57cec5SDimitry Andric if (!ZeroUndef) {
3199349cc55cSDimitry Andric const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32);
3200349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);
32010b57cec5SDimitry Andric }
32020b57cec5SDimitry Andric
32030b57cec5SDimitry Andric return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
32040b57cec5SDimitry Andric }
32050b57cec5SDimitry Andric
LowerINT_TO_FP32(SDValue Op,SelectionDAG & DAG,bool Signed) const32060b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
32070b57cec5SDimitry Andric bool Signed) const {
3208349cc55cSDimitry Andric // The regular method converting a 64-bit integer to float roughly consists of
3209349cc55cSDimitry Andric // 2 steps: normalization and rounding. In fact, after normalization, the
3210349cc55cSDimitry Andric // conversion from a 64-bit integer to a float is essentially the same as the
3211349cc55cSDimitry Andric // one from a 32-bit integer. The only difference is that it has more
3212349cc55cSDimitry Andric // trailing bits to be rounded. To leverage the native 32-bit conversion, a
3213349cc55cSDimitry Andric // 64-bit integer could be preprocessed and fit into a 32-bit integer then
3214349cc55cSDimitry Andric // converted into the correct float number. The basic steps for the unsigned
3215349cc55cSDimitry Andric // conversion are illustrated in the following pseudo code:
3216349cc55cSDimitry Andric //
3217349cc55cSDimitry Andric // f32 uitofp(i64 u) {
3218349cc55cSDimitry Andric // i32 hi, lo = split(u);
3219349cc55cSDimitry Andric // // Only count the leading zeros in hi as we have native support of the
3220349cc55cSDimitry Andric // // conversion from i32 to f32. If hi is all 0s, the conversion is
3221349cc55cSDimitry Andric // // reduced to a 32-bit one automatically.
3222349cc55cSDimitry Andric // i32 shamt = clz(hi); // Return 32 if hi is all 0s.
3223349cc55cSDimitry Andric // u <<= shamt;
3224349cc55cSDimitry Andric // hi, lo = split(u);
3225349cc55cSDimitry Andric // hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo.
3226349cc55cSDimitry Andric // // convert it as a 32-bit integer and scale the result back.
3227349cc55cSDimitry Andric // return uitofp(hi) * 2^(32 - shamt);
32280b57cec5SDimitry Andric // }
3229349cc55cSDimitry Andric //
3230349cc55cSDimitry Andric // The signed one follows the same principle but uses 'ffbh_i32' to count its
3231349cc55cSDimitry Andric // sign bits instead. If 'ffbh_i32' is not available, its absolute value is
3232349cc55cSDimitry Andric // converted instead followed by negation based its sign bit.
32330b57cec5SDimitry Andric
32340b57cec5SDimitry Andric SDLoc SL(Op);
32350b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
32360b57cec5SDimitry Andric
3237349cc55cSDimitry Andric SDValue Lo, Hi;
3238349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG);
3239349cc55cSDimitry Andric SDValue Sign;
3240349cc55cSDimitry Andric SDValue ShAmt;
3241349cc55cSDimitry Andric if (Signed && Subtarget->isGCN()) {
3242349cc55cSDimitry Andric // We also need to consider the sign bit in Lo if Hi has just sign bits,
3243349cc55cSDimitry Andric // i.e. Hi is 0 or -1. However, that only needs to take the MSB into
3244349cc55cSDimitry Andric // account. That is, the maximal shift is
3245349cc55cSDimitry Andric // - 32 if Lo and Hi have opposite signs;
3246349cc55cSDimitry Andric // - 33 if Lo and Hi have the same sign.
3247349cc55cSDimitry Andric //
3248349cc55cSDimitry Andric // Or, MaxShAmt = 33 + OppositeSign, where
3249349cc55cSDimitry Andric //
3250349cc55cSDimitry Andric // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
3251349cc55cSDimitry Andric // - -1 if Lo and Hi have opposite signs; and
3252349cc55cSDimitry Andric // - 0 otherwise.
3253349cc55cSDimitry Andric //
3254349cc55cSDimitry Andric // All in all, ShAmt is calculated as
3255349cc55cSDimitry Andric //
3256349cc55cSDimitry Andric // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
3257349cc55cSDimitry Andric //
3258349cc55cSDimitry Andric // or
3259349cc55cSDimitry Andric //
3260349cc55cSDimitry Andric // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
3261349cc55cSDimitry Andric //
3262349cc55cSDimitry Andric // to reduce the critical path.
3263349cc55cSDimitry Andric SDValue OppositeSign = DAG.getNode(
3264349cc55cSDimitry Andric ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
3265349cc55cSDimitry Andric DAG.getConstant(31, SL, MVT::i32));
3266349cc55cSDimitry Andric SDValue MaxShAmt =
3267349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
3268349cc55cSDimitry Andric OppositeSign);
3269349cc55cSDimitry Andric // Count the leading sign bits.
3270349cc55cSDimitry Andric ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
3271349cc55cSDimitry Andric // Different from unsigned conversion, the shift should be one bit less to
3272349cc55cSDimitry Andric // preserve the sign bit.
3273349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
3274349cc55cSDimitry Andric DAG.getConstant(1, SL, MVT::i32));
3275349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
3276349cc55cSDimitry Andric } else {
32770b57cec5SDimitry Andric if (Signed) {
3278349cc55cSDimitry Andric // Without 'ffbh_i32', only leading zeros could be counted. Take the
3279349cc55cSDimitry Andric // absolute value first.
3280349cc55cSDimitry Andric Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src,
3281349cc55cSDimitry Andric DAG.getConstant(63, SL, MVT::i64));
3282349cc55cSDimitry Andric SDValue Abs =
3283349cc55cSDimitry Andric DAG.getNode(ISD::XOR, SL, MVT::i64,
3284349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign);
3285349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Abs, DAG);
32860b57cec5SDimitry Andric }
3287349cc55cSDimitry Andric // Count the leading zeros.
3288349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi);
3289349cc55cSDimitry Andric // The shift amount for signed integers is [0, 32].
3290349cc55cSDimitry Andric }
3291349cc55cSDimitry Andric // Normalize the given 64-bit integer.
3292349cc55cSDimitry Andric SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt);
3293349cc55cSDimitry Andric // Split it again.
3294349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
3295349cc55cSDimitry Andric // Calculate the adjust bit for rounding.
3296349cc55cSDimitry Andric // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
3297349cc55cSDimitry Andric SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
3298349cc55cSDimitry Andric DAG.getConstant(1, SL, MVT::i32), Lo);
3299349cc55cSDimitry Andric // Get the 32-bit normalized integer.
3300349cc55cSDimitry Andric Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
3301349cc55cSDimitry Andric // Convert the normalized 32-bit integer into f32.
3302349cc55cSDimitry Andric unsigned Opc =
3303349cc55cSDimitry Andric (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
3304349cc55cSDimitry Andric SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm);
33050b57cec5SDimitry Andric
3306349cc55cSDimitry Andric // Finally, need to scale back the converted floating number as the original
3307349cc55cSDimitry Andric // 64-bit integer is converted as a 32-bit one.
3308349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
3309349cc55cSDimitry Andric ShAmt);
3310349cc55cSDimitry Andric // On GCN, use LDEXP directly.
3311349cc55cSDimitry Andric if (Subtarget->isGCN())
331206c3fb27SDimitry Andric return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);
33130b57cec5SDimitry Andric
3314349cc55cSDimitry Andric // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
3315349cc55cSDimitry Andric // part directly to emulate the multiplication of 2^ShAmt. That 8-bit
3316349cc55cSDimitry Andric // exponent is enough to avoid overflowing into the sign bit.
3317349cc55cSDimitry Andric SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt,
3318349cc55cSDimitry Andric DAG.getConstant(23, SL, MVT::i32));
3319349cc55cSDimitry Andric SDValue IVal =
3320349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i32,
3321349cc55cSDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);
3322349cc55cSDimitry Andric if (Signed) {
3323349cc55cSDimitry Andric // Set the sign bit.
3324349cc55cSDimitry Andric Sign = DAG.getNode(ISD::SHL, SL, MVT::i32,
3325349cc55cSDimitry Andric DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign),
3326349cc55cSDimitry Andric DAG.getConstant(31, SL, MVT::i32));
3327349cc55cSDimitry Andric IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);
3328349cc55cSDimitry Andric }
3329349cc55cSDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);
33300b57cec5SDimitry Andric }
33310b57cec5SDimitry Andric
LowerINT_TO_FP64(SDValue Op,SelectionDAG & DAG,bool Signed) const33320b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
33330b57cec5SDimitry Andric bool Signed) const {
33340b57cec5SDimitry Andric SDLoc SL(Op);
33350b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
33360b57cec5SDimitry Andric
3337349cc55cSDimitry Andric SDValue Lo, Hi;
3338349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG);
33390b57cec5SDimitry Andric
33400b57cec5SDimitry Andric SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
33410b57cec5SDimitry Andric SL, MVT::f64, Hi);
33420b57cec5SDimitry Andric
33430b57cec5SDimitry Andric SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
33440b57cec5SDimitry Andric
334506c3fb27SDimitry Andric SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,
33460b57cec5SDimitry Andric DAG.getConstant(32, SL, MVT::i32));
33470b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags?
33480b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
33490b57cec5SDimitry Andric }
33500b57cec5SDimitry Andric
LowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const33510b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
33520b57cec5SDimitry Andric SelectionDAG &DAG) const {
33530b57cec5SDimitry Andric // TODO: Factor out code common with LowerSINT_TO_FP.
33540b57cec5SDimitry Andric EVT DestVT = Op.getValueType();
3355480093f4SDimitry Andric SDValue Src = Op.getOperand(0);
3356480093f4SDimitry Andric EVT SrcVT = Src.getValueType();
3357480093f4SDimitry Andric
3358480093f4SDimitry Andric if (SrcVT == MVT::i16) {
3359480093f4SDimitry Andric if (DestVT == MVT::f16)
3360480093f4SDimitry Andric return Op;
3361480093f4SDimitry Andric SDLoc DL(Op);
3362480093f4SDimitry Andric
3363480093f4SDimitry Andric // Promote src to i32
3364480093f4SDimitry Andric SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
3365480093f4SDimitry Andric return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext);
3366480093f4SDimitry Andric }
3367480093f4SDimitry Andric
33681db9f3b2SDimitry Andric if (DestVT == MVT::bf16) {
33691db9f3b2SDimitry Andric SDLoc SL(Op);
33701db9f3b2SDimitry Andric SDValue ToF32 = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f32, Src);
33711db9f3b2SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true);
33721db9f3b2SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag);
33731db9f3b2SDimitry Andric }
33741db9f3b2SDimitry Andric
33751db9f3b2SDimitry Andric if (SrcVT != MVT::i64)
33761db9f3b2SDimitry Andric return Op;
3377480093f4SDimitry Andric
33780b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
33790b57cec5SDimitry Andric SDLoc DL(Op);
33800b57cec5SDimitry Andric
33810b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
3382bdd1243dSDimitry Andric SDValue FPRoundFlag =
3383bdd1243dSDimitry Andric DAG.getIntPtrConstant(0, SDLoc(Op), /*isTarget=*/true);
33840b57cec5SDimitry Andric SDValue FPRound =
33850b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
33860b57cec5SDimitry Andric
33870b57cec5SDimitry Andric return FPRound;
33880b57cec5SDimitry Andric }
33890b57cec5SDimitry Andric
33900b57cec5SDimitry Andric if (DestVT == MVT::f32)
33910b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, false);
33920b57cec5SDimitry Andric
33930b57cec5SDimitry Andric assert(DestVT == MVT::f64);
33940b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, false);
33950b57cec5SDimitry Andric }
33960b57cec5SDimitry Andric
LowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const33970b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
33980b57cec5SDimitry Andric SelectionDAG &DAG) const {
3399480093f4SDimitry Andric EVT DestVT = Op.getValueType();
3400480093f4SDimitry Andric
3401480093f4SDimitry Andric SDValue Src = Op.getOperand(0);
3402480093f4SDimitry Andric EVT SrcVT = Src.getValueType();
3403480093f4SDimitry Andric
3404480093f4SDimitry Andric if (SrcVT == MVT::i16) {
3405480093f4SDimitry Andric if (DestVT == MVT::f16)
3406480093f4SDimitry Andric return Op;
3407480093f4SDimitry Andric
3408480093f4SDimitry Andric SDLoc DL(Op);
3409480093f4SDimitry Andric // Promote src to i32
3410480093f4SDimitry Andric SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src);
3411480093f4SDimitry Andric return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext);
3412480093f4SDimitry Andric }
3413480093f4SDimitry Andric
34141db9f3b2SDimitry Andric if (DestVT == MVT::bf16) {
34151db9f3b2SDimitry Andric SDLoc SL(Op);
34161db9f3b2SDimitry Andric SDValue ToF32 = DAG.getNode(ISD::SINT_TO_FP, SL, MVT::f32, Src);
34171db9f3b2SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true);
34181db9f3b2SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag);
34191db9f3b2SDimitry Andric }
34201db9f3b2SDimitry Andric
34211db9f3b2SDimitry Andric if (SrcVT != MVT::i64)
34221db9f3b2SDimitry Andric return Op;
34230b57cec5SDimitry Andric
34240b57cec5SDimitry Andric // TODO: Factor out code common with LowerUINT_TO_FP.
34250b57cec5SDimitry Andric
34260b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
34270b57cec5SDimitry Andric SDLoc DL(Op);
34280b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
34290b57cec5SDimitry Andric
34300b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
3431bdd1243dSDimitry Andric SDValue FPRoundFlag =
3432bdd1243dSDimitry Andric DAG.getIntPtrConstant(0, SDLoc(Op), /*isTarget=*/true);
34330b57cec5SDimitry Andric SDValue FPRound =
34340b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
34350b57cec5SDimitry Andric
34360b57cec5SDimitry Andric return FPRound;
34370b57cec5SDimitry Andric }
34380b57cec5SDimitry Andric
34390b57cec5SDimitry Andric if (DestVT == MVT::f32)
34400b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, true);
34410b57cec5SDimitry Andric
34420b57cec5SDimitry Andric assert(DestVT == MVT::f64);
34430b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, true);
34440b57cec5SDimitry Andric }
34450b57cec5SDimitry Andric
LowerFP_TO_INT64(SDValue Op,SelectionDAG & DAG,bool Signed) const3446fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
34470b57cec5SDimitry Andric bool Signed) const {
34480b57cec5SDimitry Andric SDLoc SL(Op);
34490b57cec5SDimitry Andric
34500b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
3451fe6060f1SDimitry Andric EVT SrcVT = Src.getValueType();
34520b57cec5SDimitry Andric
3453fe6060f1SDimitry Andric assert(SrcVT == MVT::f32 || SrcVT == MVT::f64);
34540b57cec5SDimitry Andric
3455fe6060f1SDimitry Andric // The basic idea of converting a floating point number into a pair of 32-bit
3456fe6060f1SDimitry Andric // integers is illustrated as follows:
3457fe6060f1SDimitry Andric //
3458fe6060f1SDimitry Andric // tf := trunc(val);
3459fe6060f1SDimitry Andric // hif := floor(tf * 2^-32);
3460fe6060f1SDimitry Andric // lof := tf - hif * 2^32; // lof is always positive due to floor.
3461fe6060f1SDimitry Andric // hi := fptoi(hif);
3462fe6060f1SDimitry Andric // lo := fptoi(lof);
3463fe6060f1SDimitry Andric //
3464fe6060f1SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src);
3465fe6060f1SDimitry Andric SDValue Sign;
3466fe6060f1SDimitry Andric if (Signed && SrcVT == MVT::f32) {
3467fe6060f1SDimitry Andric // However, a 32-bit floating point number has only 23 bits mantissa and
3468fe6060f1SDimitry Andric // it's not enough to hold all the significant bits of `lof` if val is
3469fe6060f1SDimitry Andric // negative. To avoid the loss of precision, We need to take the absolute
3470fe6060f1SDimitry Andric // value after truncating and flip the result back based on the original
3471fe6060f1SDimitry Andric // signedness.
3472fe6060f1SDimitry Andric Sign = DAG.getNode(ISD::SRA, SL, MVT::i32,
3473fe6060f1SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc),
3474fe6060f1SDimitry Andric DAG.getConstant(31, SL, MVT::i32));
3475fe6060f1SDimitry Andric Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc);
3476fe6060f1SDimitry Andric }
3477fe6060f1SDimitry Andric
3478fe6060f1SDimitry Andric SDValue K0, K1;
3479fe6060f1SDimitry Andric if (SrcVT == MVT::f64) {
348006c3fb27SDimitry Andric K0 = DAG.getConstantFP(
348106c3fb27SDimitry Andric llvm::bit_cast<double>(UINT64_C(/*2^-32*/ 0x3df0000000000000)), SL,
348206c3fb27SDimitry Andric SrcVT);
348306c3fb27SDimitry Andric K1 = DAG.getConstantFP(
348406c3fb27SDimitry Andric llvm::bit_cast<double>(UINT64_C(/*-2^32*/ 0xc1f0000000000000)), SL,
348506c3fb27SDimitry Andric SrcVT);
3486fe6060f1SDimitry Andric } else {
348706c3fb27SDimitry Andric K0 = DAG.getConstantFP(
348806c3fb27SDimitry Andric llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)), SL, SrcVT);
348906c3fb27SDimitry Andric K1 = DAG.getConstantFP(
349006c3fb27SDimitry Andric llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)), SL, SrcVT);
3491fe6060f1SDimitry Andric }
34920b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags?
3493fe6060f1SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0);
34940b57cec5SDimitry Andric
3495fe6060f1SDimitry Andric SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul);
34960b57cec5SDimitry Andric
3497fe6060f1SDimitry Andric SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc);
34980b57cec5SDimitry Andric
3499fe6060f1SDimitry Andric SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT
3500fe6060f1SDimitry Andric : ISD::FP_TO_UINT,
3501fe6060f1SDimitry Andric SL, MVT::i32, FloorMul);
35020b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
35030b57cec5SDimitry Andric
3504fe6060f1SDimitry Andric SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
3505fe6060f1SDimitry Andric DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi}));
35060b57cec5SDimitry Andric
3507fe6060f1SDimitry Andric if (Signed && SrcVT == MVT::f32) {
3508fe6060f1SDimitry Andric assert(Sign);
3509fe6060f1SDimitry Andric // Flip the result based on the signedness, which is either all 0s or 1s.
3510fe6060f1SDimitry Andric Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
3511fe6060f1SDimitry Andric DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign}));
3512fe6060f1SDimitry Andric // r := xor(r, sign) - sign;
3513fe6060f1SDimitry Andric Result =
3514fe6060f1SDimitry Andric DAG.getNode(ISD::SUB, SL, MVT::i64,
3515fe6060f1SDimitry Andric DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign);
3516fe6060f1SDimitry Andric }
3517fe6060f1SDimitry Andric
3518fe6060f1SDimitry Andric return Result;
35190b57cec5SDimitry Andric }
35200b57cec5SDimitry Andric
LowerFP_TO_FP16(SDValue Op,SelectionDAG & DAG) const35210b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
35220b57cec5SDimitry Andric SDLoc DL(Op);
35230b57cec5SDimitry Andric SDValue N0 = Op.getOperand(0);
35240b57cec5SDimitry Andric
35250b57cec5SDimitry Andric // Convert to target node to get known bits
35260b57cec5SDimitry Andric if (N0.getValueType() == MVT::f32)
35270b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
35280b57cec5SDimitry Andric
35290b57cec5SDimitry Andric if (getTargetMachine().Options.UnsafeFPMath) {
35300b57cec5SDimitry Andric // There is a generic expand for FP_TO_FP16 with unsafe fast math.
35310b57cec5SDimitry Andric return SDValue();
35320b57cec5SDimitry Andric }
35330b57cec5SDimitry Andric
35340b57cec5SDimitry Andric assert(N0.getSimpleValueType() == MVT::f64);
35350b57cec5SDimitry Andric
35360b57cec5SDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode.
35370b57cec5SDimitry Andric const unsigned ExpMask = 0x7ff;
35380b57cec5SDimitry Andric const unsigned ExpBiasf64 = 1023;
35390b57cec5SDimitry Andric const unsigned ExpBiasf16 = 15;
35400b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
35410b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, MVT::i32);
35420b57cec5SDimitry Andric SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
35430b57cec5SDimitry Andric SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
35440b57cec5SDimitry Andric DAG.getConstant(32, DL, MVT::i64));
35450b57cec5SDimitry Andric UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
35460b57cec5SDimitry Andric U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
35470b57cec5SDimitry Andric SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
35480b57cec5SDimitry Andric DAG.getConstant(20, DL, MVT::i64));
35490b57cec5SDimitry Andric E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
35500b57cec5SDimitry Andric DAG.getConstant(ExpMask, DL, MVT::i32));
35510b57cec5SDimitry Andric // Subtract the fp64 exponent bias (1023) to get the real exponent and
35520b57cec5SDimitry Andric // add the f16 bias (15) to get the biased exponent for the f16 format.
35530b57cec5SDimitry Andric E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
35540b57cec5SDimitry Andric DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));
35550b57cec5SDimitry Andric
35560b57cec5SDimitry Andric SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
35570b57cec5SDimitry Andric DAG.getConstant(8, DL, MVT::i32));
35580b57cec5SDimitry Andric M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
35590b57cec5SDimitry Andric DAG.getConstant(0xffe, DL, MVT::i32));
35600b57cec5SDimitry Andric
35610b57cec5SDimitry Andric SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
35620b57cec5SDimitry Andric DAG.getConstant(0x1ff, DL, MVT::i32));
35630b57cec5SDimitry Andric MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);
35640b57cec5SDimitry Andric
35650b57cec5SDimitry Andric SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
35660b57cec5SDimitry Andric M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);
35670b57cec5SDimitry Andric
35680b57cec5SDimitry Andric // (M != 0 ? 0x0200 : 0) | 0x7c00;
35690b57cec5SDimitry Andric SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
35700b57cec5SDimitry Andric DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
35710b57cec5SDimitry Andric Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));
35720b57cec5SDimitry Andric
35730b57cec5SDimitry Andric // N = M | (E << 12);
35740b57cec5SDimitry Andric SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
35750b57cec5SDimitry Andric DAG.getNode(ISD::SHL, DL, MVT::i32, E,
35760b57cec5SDimitry Andric DAG.getConstant(12, DL, MVT::i32)));
35770b57cec5SDimitry Andric
35780b57cec5SDimitry Andric // B = clamp(1-E, 0, 13);
35790b57cec5SDimitry Andric SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
35800b57cec5SDimitry Andric One, E);
35810b57cec5SDimitry Andric SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
35820b57cec5SDimitry Andric B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
35830b57cec5SDimitry Andric DAG.getConstant(13, DL, MVT::i32));
35840b57cec5SDimitry Andric
35850b57cec5SDimitry Andric SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
35860b57cec5SDimitry Andric DAG.getConstant(0x1000, DL, MVT::i32));
35870b57cec5SDimitry Andric
35880b57cec5SDimitry Andric SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
35890b57cec5SDimitry Andric SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
35900b57cec5SDimitry Andric SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
35910b57cec5SDimitry Andric D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);
35920b57cec5SDimitry Andric
35930b57cec5SDimitry Andric SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
35940b57cec5SDimitry Andric SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
35950b57cec5SDimitry Andric DAG.getConstant(0x7, DL, MVT::i32));
35960b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
35970b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32));
35980b57cec5SDimitry Andric SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
35990b57cec5SDimitry Andric One, Zero, ISD::SETEQ);
36000b57cec5SDimitry Andric SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
36010b57cec5SDimitry Andric One, Zero, ISD::SETGT);
36020b57cec5SDimitry Andric V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
36030b57cec5SDimitry Andric V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);
36040b57cec5SDimitry Andric
36050b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
36060b57cec5SDimitry Andric DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
36070b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
36080b57cec5SDimitry Andric I, V, ISD::SETEQ);
36090b57cec5SDimitry Andric
36100b57cec5SDimitry Andric // Extract the sign bit.
36110b57cec5SDimitry Andric SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
36120b57cec5SDimitry Andric DAG.getConstant(16, DL, MVT::i32));
36130b57cec5SDimitry Andric Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
36140b57cec5SDimitry Andric DAG.getConstant(0x8000, DL, MVT::i32));
36150b57cec5SDimitry Andric
36160b57cec5SDimitry Andric V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
36170b57cec5SDimitry Andric return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
36180b57cec5SDimitry Andric }
36190b57cec5SDimitry Andric
LowerFP_TO_INT(const SDValue Op,SelectionDAG & DAG) const36201db9f3b2SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,
36210b57cec5SDimitry Andric SelectionDAG &DAG) const {
36220b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
3623fe6060f1SDimitry Andric unsigned OpOpcode = Op.getOpcode();
36240b57cec5SDimitry Andric EVT SrcVT = Src.getValueType();
3625fe6060f1SDimitry Andric EVT DestVT = Op.getValueType();
3626fe6060f1SDimitry Andric
3627fe6060f1SDimitry Andric // Will be selected natively
3628fe6060f1SDimitry Andric if (SrcVT == MVT::f16 && DestVT == MVT::i16)
3629fe6060f1SDimitry Andric return Op;
3630fe6060f1SDimitry Andric
36311db9f3b2SDimitry Andric if (SrcVT == MVT::bf16) {
36321db9f3b2SDimitry Andric SDLoc DL(Op);
36331db9f3b2SDimitry Andric SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
36341db9f3b2SDimitry Andric return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc);
36351db9f3b2SDimitry Andric }
36361db9f3b2SDimitry Andric
3637fe6060f1SDimitry Andric // Promote i16 to i32
3638fe6060f1SDimitry Andric if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
3639fe6060f1SDimitry Andric SDLoc DL(Op);
3640fe6060f1SDimitry Andric
3641fe6060f1SDimitry Andric SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
3642fe6060f1SDimitry Andric return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32);
3643fe6060f1SDimitry Andric }
3644fe6060f1SDimitry Andric
36451db9f3b2SDimitry Andric if (DestVT != MVT::i64)
36461db9f3b2SDimitry Andric return Op;
36471db9f3b2SDimitry Andric
3648e8d8bef9SDimitry Andric if (SrcVT == MVT::f16 ||
3649e8d8bef9SDimitry Andric (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
36500b57cec5SDimitry Andric SDLoc DL(Op);
36510b57cec5SDimitry Andric
3652fe6060f1SDimitry Andric SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
3653fe6060f1SDimitry Andric unsigned Ext =
3654fe6060f1SDimitry Andric OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3655fe6060f1SDimitry Andric return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);
36560b57cec5SDimitry Andric }
36570b57cec5SDimitry Andric
36581db9f3b2SDimitry Andric if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
3659fe6060f1SDimitry Andric return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT);
36600b57cec5SDimitry Andric
36610b57cec5SDimitry Andric return SDValue();
36620b57cec5SDimitry Andric }
36630b57cec5SDimitry Andric
LowerSIGN_EXTEND_INREG(SDValue Op,SelectionDAG & DAG) const36640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
36650b57cec5SDimitry Andric SelectionDAG &DAG) const {
36660b57cec5SDimitry Andric EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
36670b57cec5SDimitry Andric MVT VT = Op.getSimpleValueType();
36680b57cec5SDimitry Andric MVT ScalarVT = VT.getScalarType();
36690b57cec5SDimitry Andric
36700b57cec5SDimitry Andric assert(VT.isVector());
36710b57cec5SDimitry Andric
36720b57cec5SDimitry Andric SDValue Src = Op.getOperand(0);
36730b57cec5SDimitry Andric SDLoc DL(Op);
36740b57cec5SDimitry Andric
36750b57cec5SDimitry Andric // TODO: Don't scalarize on Evergreen?
36760b57cec5SDimitry Andric unsigned NElts = VT.getVectorNumElements();
36770b57cec5SDimitry Andric SmallVector<SDValue, 8> Args;
36780b57cec5SDimitry Andric DAG.ExtractVectorElements(Src, Args, 0, NElts);
36790b57cec5SDimitry Andric
36800b57cec5SDimitry Andric SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
36810b57cec5SDimitry Andric for (unsigned I = 0; I < NElts; ++I)
36820b57cec5SDimitry Andric Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
36830b57cec5SDimitry Andric
36840b57cec5SDimitry Andric return DAG.getBuildVector(VT, DL, Args);
36850b57cec5SDimitry Andric }
36860b57cec5SDimitry Andric
36870b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
36880b57cec5SDimitry Andric // Custom DAG optimizations
36890b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
36900b57cec5SDimitry Andric
isU24(SDValue Op,SelectionDAG & DAG)36910b57cec5SDimitry Andric static bool isU24(SDValue Op, SelectionDAG &DAG) {
36920b57cec5SDimitry Andric return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
36930b57cec5SDimitry Andric }
36940b57cec5SDimitry Andric
isI24(SDValue Op,SelectionDAG & DAG)36950b57cec5SDimitry Andric static bool isI24(SDValue Op, SelectionDAG &DAG) {
36960b57cec5SDimitry Andric EVT VT = Op.getValueType();
36970b57cec5SDimitry Andric return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
36980b57cec5SDimitry Andric // as unsigned 24-bit values.
3699349cc55cSDimitry Andric AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24;
37000b57cec5SDimitry Andric }
37010b57cec5SDimitry Andric
simplifyMul24(SDNode * Node24,TargetLowering::DAGCombinerInfo & DCI)3702fe6060f1SDimitry Andric static SDValue simplifyMul24(SDNode *Node24,
37030b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) {
37040b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
37055ffd83dbSDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo();
37068bcb0991SDimitry Andric bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
37078bcb0991SDimitry Andric
37088bcb0991SDimitry Andric SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
37098bcb0991SDimitry Andric SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1);
37108bcb0991SDimitry Andric unsigned NewOpcode = Node24->getOpcode();
37118bcb0991SDimitry Andric if (IsIntrin) {
3712647cbc5dSDimitry Andric unsigned IID = Node24->getConstantOperandVal(0);
3713349cc55cSDimitry Andric switch (IID) {
3714349cc55cSDimitry Andric case Intrinsic::amdgcn_mul_i24:
3715349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MUL_I24;
3716349cc55cSDimitry Andric break;
3717349cc55cSDimitry Andric case Intrinsic::amdgcn_mul_u24:
3718349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MUL_U24;
3719349cc55cSDimitry Andric break;
3720349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_i24:
3721349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MULHI_I24;
3722349cc55cSDimitry Andric break;
3723349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_u24:
3724349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MULHI_U24;
3725349cc55cSDimitry Andric break;
3726349cc55cSDimitry Andric default:
3727349cc55cSDimitry Andric llvm_unreachable("Expected 24-bit mul intrinsic");
3728349cc55cSDimitry Andric }
37298bcb0991SDimitry Andric }
37300b57cec5SDimitry Andric
37310b57cec5SDimitry Andric APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
37320b57cec5SDimitry Andric
37335ffd83dbSDimitry Andric // First try to simplify using SimplifyMultipleUseDemandedBits which allows
37345ffd83dbSDimitry Andric // the operands to have other uses, but will only perform simplifications that
37355ffd83dbSDimitry Andric // involve bypassing some nodes for this user.
37365ffd83dbSDimitry Andric SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG);
37375ffd83dbSDimitry Andric SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG);
37380b57cec5SDimitry Andric if (DemandedLHS || DemandedRHS)
37398bcb0991SDimitry Andric return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
37400b57cec5SDimitry Andric DemandedLHS ? DemandedLHS : LHS,
37410b57cec5SDimitry Andric DemandedRHS ? DemandedRHS : RHS);
37420b57cec5SDimitry Andric
37430b57cec5SDimitry Andric // Now try SimplifyDemandedBits which can simplify the nodes used by our
37440b57cec5SDimitry Andric // operands if this node is the only user.
37450b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
37460b57cec5SDimitry Andric return SDValue(Node24, 0);
37470b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
37480b57cec5SDimitry Andric return SDValue(Node24, 0);
37490b57cec5SDimitry Andric
37500b57cec5SDimitry Andric return SDValue();
37510b57cec5SDimitry Andric }
37520b57cec5SDimitry Andric
37530b57cec5SDimitry Andric template <typename IntTy>
constantFoldBFE(SelectionDAG & DAG,IntTy Src0,uint32_t Offset,uint32_t Width,const SDLoc & DL)37540b57cec5SDimitry Andric static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
37550b57cec5SDimitry Andric uint32_t Width, const SDLoc &DL) {
37560b57cec5SDimitry Andric if (Width + Offset < 32) {
37570b57cec5SDimitry Andric uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
37580b57cec5SDimitry Andric IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
37590b57cec5SDimitry Andric return DAG.getConstant(Result, DL, MVT::i32);
37600b57cec5SDimitry Andric }
37610b57cec5SDimitry Andric
37620b57cec5SDimitry Andric return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
37630b57cec5SDimitry Andric }
37640b57cec5SDimitry Andric
hasVolatileUser(SDNode * Val)37650b57cec5SDimitry Andric static bool hasVolatileUser(SDNode *Val) {
37660b57cec5SDimitry Andric for (SDNode *U : Val->uses()) {
37670b57cec5SDimitry Andric if (MemSDNode *M = dyn_cast<MemSDNode>(U)) {
37680b57cec5SDimitry Andric if (M->isVolatile())
37690b57cec5SDimitry Andric return true;
37700b57cec5SDimitry Andric }
37710b57cec5SDimitry Andric }
37720b57cec5SDimitry Andric
37730b57cec5SDimitry Andric return false;
37740b57cec5SDimitry Andric }
37750b57cec5SDimitry Andric
shouldCombineMemoryType(EVT VT) const37760b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
37770b57cec5SDimitry Andric // i32 vectors are the canonical memory type.
37780b57cec5SDimitry Andric if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
37790b57cec5SDimitry Andric return false;
37800b57cec5SDimitry Andric
37810b57cec5SDimitry Andric if (!VT.isByteSized())
37820b57cec5SDimitry Andric return false;
37830b57cec5SDimitry Andric
37840b57cec5SDimitry Andric unsigned Size = VT.getStoreSize();
37850b57cec5SDimitry Andric
37860b57cec5SDimitry Andric if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector())
37870b57cec5SDimitry Andric return false;
37880b57cec5SDimitry Andric
37890b57cec5SDimitry Andric if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
37900b57cec5SDimitry Andric return false;
37910b57cec5SDimitry Andric
37920b57cec5SDimitry Andric return true;
37930b57cec5SDimitry Andric }
37940b57cec5SDimitry Andric
37950b57cec5SDimitry Andric // Replace load of an illegal type with a store of a bitcast to a friendlier
37960b57cec5SDimitry Andric // type.
performLoadCombine(SDNode * N,DAGCombinerInfo & DCI) const37970b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
37980b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
37990b57cec5SDimitry Andric if (!DCI.isBeforeLegalize())
38000b57cec5SDimitry Andric return SDValue();
38010b57cec5SDimitry Andric
38020b57cec5SDimitry Andric LoadSDNode *LN = cast<LoadSDNode>(N);
38035ffd83dbSDimitry Andric if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
38040b57cec5SDimitry Andric return SDValue();
38050b57cec5SDimitry Andric
38060b57cec5SDimitry Andric SDLoc SL(N);
38070b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
38080b57cec5SDimitry Andric EVT VT = LN->getMemoryVT();
38090b57cec5SDimitry Andric
38100b57cec5SDimitry Andric unsigned Size = VT.getStoreSize();
38115ffd83dbSDimitry Andric Align Alignment = LN->getAlign();
38125ffd83dbSDimitry Andric if (Alignment < Size && isTypeLegal(VT)) {
3813bdd1243dSDimitry Andric unsigned IsFast;
38140b57cec5SDimitry Andric unsigned AS = LN->getAddressSpace();
38150b57cec5SDimitry Andric
38160b57cec5SDimitry Andric // Expand unaligned loads earlier than legalization. Due to visitation order
38170b57cec5SDimitry Andric // problems during legalization, the emitted instructions to pack and unpack
38180b57cec5SDimitry Andric // the bytes again are not eliminated in the case of an unaligned copy.
3819fe6060f1SDimitry Andric if (!allowsMisalignedMemoryAccesses(
3820fe6060f1SDimitry Andric VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {
3821480093f4SDimitry Andric if (VT.isVector())
382281ad6265SDimitry Andric return SplitVectorLoad(SDValue(LN, 0), DAG);
382381ad6265SDimitry Andric
382481ad6265SDimitry Andric SDValue Ops[2];
38250b57cec5SDimitry Andric std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
3826480093f4SDimitry Andric
38270b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc(N));
38280b57cec5SDimitry Andric }
38290b57cec5SDimitry Andric
38300b57cec5SDimitry Andric if (!IsFast)
38310b57cec5SDimitry Andric return SDValue();
38320b57cec5SDimitry Andric }
38330b57cec5SDimitry Andric
38340b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT))
38350b57cec5SDimitry Andric return SDValue();
38360b57cec5SDimitry Andric
38370b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
38380b57cec5SDimitry Andric
38390b57cec5SDimitry Andric SDValue NewLoad
38400b57cec5SDimitry Andric = DAG.getLoad(NewVT, SL, LN->getChain(),
38410b57cec5SDimitry Andric LN->getBasePtr(), LN->getMemOperand());
38420b57cec5SDimitry Andric
38430b57cec5SDimitry Andric SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);
38440b57cec5SDimitry Andric DCI.CombineTo(N, BC, NewLoad.getValue(1));
38450b57cec5SDimitry Andric return SDValue(N, 0);
38460b57cec5SDimitry Andric }
38470b57cec5SDimitry Andric
38480b57cec5SDimitry Andric // Replace store of an illegal type with a store of a bitcast to a friendlier
38490b57cec5SDimitry Andric // type.
performStoreCombine(SDNode * N,DAGCombinerInfo & DCI) const38500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
38510b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
38520b57cec5SDimitry Andric if (!DCI.isBeforeLegalize())
38530b57cec5SDimitry Andric return SDValue();
38540b57cec5SDimitry Andric
38550b57cec5SDimitry Andric StoreSDNode *SN = cast<StoreSDNode>(N);
38565ffd83dbSDimitry Andric if (!SN->isSimple() || !ISD::isNormalStore(SN))
38570b57cec5SDimitry Andric return SDValue();
38580b57cec5SDimitry Andric
38590b57cec5SDimitry Andric EVT VT = SN->getMemoryVT();
38600b57cec5SDimitry Andric unsigned Size = VT.getStoreSize();
38610b57cec5SDimitry Andric
38620b57cec5SDimitry Andric SDLoc SL(N);
38630b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
38645ffd83dbSDimitry Andric Align Alignment = SN->getAlign();
38655ffd83dbSDimitry Andric if (Alignment < Size && isTypeLegal(VT)) {
3866bdd1243dSDimitry Andric unsigned IsFast;
38670b57cec5SDimitry Andric unsigned AS = SN->getAddressSpace();
38680b57cec5SDimitry Andric
38690b57cec5SDimitry Andric // Expand unaligned stores earlier than legalization. Due to visitation
38700b57cec5SDimitry Andric // order problems during legalization, the emitted instructions to pack and
38710b57cec5SDimitry Andric // unpack the bytes again are not eliminated in the case of an unaligned
38720b57cec5SDimitry Andric // copy.
3873fe6060f1SDimitry Andric if (!allowsMisalignedMemoryAccesses(
3874fe6060f1SDimitry Andric VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {
38750b57cec5SDimitry Andric if (VT.isVector())
387681ad6265SDimitry Andric return SplitVectorStore(SDValue(SN, 0), DAG);
38770b57cec5SDimitry Andric
38780b57cec5SDimitry Andric return expandUnalignedStore(SN, DAG);
38790b57cec5SDimitry Andric }
38800b57cec5SDimitry Andric
38810b57cec5SDimitry Andric if (!IsFast)
38820b57cec5SDimitry Andric return SDValue();
38830b57cec5SDimitry Andric }
38840b57cec5SDimitry Andric
38850b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT))
38860b57cec5SDimitry Andric return SDValue();
38870b57cec5SDimitry Andric
38880b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
38890b57cec5SDimitry Andric SDValue Val = SN->getValue();
38900b57cec5SDimitry Andric
38910b57cec5SDimitry Andric //DCI.AddToWorklist(Val.getNode());
38920b57cec5SDimitry Andric
38930b57cec5SDimitry Andric bool OtherUses = !Val.hasOneUse();
38940b57cec5SDimitry Andric SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
38950b57cec5SDimitry Andric if (OtherUses) {
38960b57cec5SDimitry Andric SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);
38970b57cec5SDimitry Andric DAG.ReplaceAllUsesOfValueWith(Val, CastBack);
38980b57cec5SDimitry Andric }
38990b57cec5SDimitry Andric
39000b57cec5SDimitry Andric return DAG.getStore(SN->getChain(), SL, CastVal,
39010b57cec5SDimitry Andric SN->getBasePtr(), SN->getMemOperand());
39020b57cec5SDimitry Andric }
39030b57cec5SDimitry Andric
39040b57cec5SDimitry Andric // FIXME: This should go in generic DAG combiner with an isTruncateFree check,
39050b57cec5SDimitry Andric // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
39060b57cec5SDimitry Andric // issues.
performAssertSZExtCombine(SDNode * N,DAGCombinerInfo & DCI) const39070b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
39080b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
39090b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
39100b57cec5SDimitry Andric SDValue N0 = N->getOperand(0);
39110b57cec5SDimitry Andric
39120b57cec5SDimitry Andric // (vt2 (assertzext (truncate vt0:x), vt1)) ->
39130b57cec5SDimitry Andric // (vt2 (truncate (assertzext vt0:x, vt1)))
39140b57cec5SDimitry Andric if (N0.getOpcode() == ISD::TRUNCATE) {
39150b57cec5SDimitry Andric SDValue N1 = N->getOperand(1);
39160b57cec5SDimitry Andric EVT ExtVT = cast<VTSDNode>(N1)->getVT();
39170b57cec5SDimitry Andric SDLoc SL(N);
39180b57cec5SDimitry Andric
39190b57cec5SDimitry Andric SDValue Src = N0.getOperand(0);
39200b57cec5SDimitry Andric EVT SrcVT = Src.getValueType();
39210b57cec5SDimitry Andric if (SrcVT.bitsGE(ExtVT)) {
39220b57cec5SDimitry Andric SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
39230b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
39240b57cec5SDimitry Andric }
39250b57cec5SDimitry Andric }
39260b57cec5SDimitry Andric
39270b57cec5SDimitry Andric return SDValue();
39280b57cec5SDimitry Andric }
39298bcb0991SDimitry Andric
performIntrinsicWOChainCombine(SDNode * N,DAGCombinerInfo & DCI) const39308bcb0991SDimitry Andric SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
39318bcb0991SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const {
3932647cbc5dSDimitry Andric unsigned IID = N->getConstantOperandVal(0);
39338bcb0991SDimitry Andric switch (IID) {
39348bcb0991SDimitry Andric case Intrinsic::amdgcn_mul_i24:
39358bcb0991SDimitry Andric case Intrinsic::amdgcn_mul_u24:
3936349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_i24:
3937349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_u24:
3938fe6060f1SDimitry Andric return simplifyMul24(N, DCI);
39395ffd83dbSDimitry Andric case Intrinsic::amdgcn_fract:
39405ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq:
39415ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp_legacy:
39425ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_legacy:
39435f757f3fSDimitry Andric case Intrinsic::amdgcn_rsq_clamp: {
39445ffd83dbSDimitry Andric // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
39455ffd83dbSDimitry Andric SDValue Src = N->getOperand(1);
39465ffd83dbSDimitry Andric return Src.isUndef() ? Src : SDValue();
39475ffd83dbSDimitry Andric }
394806c3fb27SDimitry Andric case Intrinsic::amdgcn_frexp_exp: {
394906c3fb27SDimitry Andric // frexp_exp (fneg x) -> frexp_exp x
395006c3fb27SDimitry Andric // frexp_exp (fabs x) -> frexp_exp x
395106c3fb27SDimitry Andric // frexp_exp (fneg (fabs x)) -> frexp_exp x
395206c3fb27SDimitry Andric SDValue Src = N->getOperand(1);
395306c3fb27SDimitry Andric SDValue PeekSign = peekFPSignOps(Src);
395406c3fb27SDimitry Andric if (PeekSign == Src)
395506c3fb27SDimitry Andric return SDValue();
395606c3fb27SDimitry Andric return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
395706c3fb27SDimitry Andric 0);
395806c3fb27SDimitry Andric }
39598bcb0991SDimitry Andric default:
39608bcb0991SDimitry Andric return SDValue();
39618bcb0991SDimitry Andric }
39628bcb0991SDimitry Andric }
39638bcb0991SDimitry Andric
39640b57cec5SDimitry Andric /// Split the 64-bit value \p LHS into two 32-bit components, and perform the
39650b57cec5SDimitry Andric /// binary operation \p Opc to it with the corresponding constant operands.
splitBinaryBitConstantOpImpl(DAGCombinerInfo & DCI,const SDLoc & SL,unsigned Opc,SDValue LHS,uint32_t ValLo,uint32_t ValHi) const39660b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
39670b57cec5SDimitry Andric DAGCombinerInfo &DCI, const SDLoc &SL,
39680b57cec5SDimitry Andric unsigned Opc, SDValue LHS,
39690b57cec5SDimitry Andric uint32_t ValLo, uint32_t ValHi) const {
39700b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
39710b57cec5SDimitry Andric SDValue Lo, Hi;
39720b57cec5SDimitry Andric std::tie(Lo, Hi) = split64BitValue(LHS, DAG);
39730b57cec5SDimitry Andric
39740b57cec5SDimitry Andric SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
39750b57cec5SDimitry Andric SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);
39760b57cec5SDimitry Andric
39770b57cec5SDimitry Andric SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
39780b57cec5SDimitry Andric SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);
39790b57cec5SDimitry Andric
39800b57cec5SDimitry Andric // Re-visit the ands. It's possible we eliminated one of them and it could
39810b57cec5SDimitry Andric // simplify the vector.
39820b57cec5SDimitry Andric DCI.AddToWorklist(Lo.getNode());
39830b57cec5SDimitry Andric DCI.AddToWorklist(Hi.getNode());
39840b57cec5SDimitry Andric
39850b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd});
39860b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
39870b57cec5SDimitry Andric }
39880b57cec5SDimitry Andric
performShlCombine(SDNode * N,DAGCombinerInfo & DCI) const39890b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
39900b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
39910b57cec5SDimitry Andric EVT VT = N->getValueType(0);
39920b57cec5SDimitry Andric
39930b57cec5SDimitry Andric ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
39940b57cec5SDimitry Andric if (!RHS)
39950b57cec5SDimitry Andric return SDValue();
39960b57cec5SDimitry Andric
39970b57cec5SDimitry Andric SDValue LHS = N->getOperand(0);
39980b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue();
39990b57cec5SDimitry Andric if (!RHSVal)
40000b57cec5SDimitry Andric return LHS;
40010b57cec5SDimitry Andric
40020b57cec5SDimitry Andric SDLoc SL(N);
40030b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
40040b57cec5SDimitry Andric
40050b57cec5SDimitry Andric switch (LHS->getOpcode()) {
40060b57cec5SDimitry Andric default:
40070b57cec5SDimitry Andric break;
40080b57cec5SDimitry Andric case ISD::ZERO_EXTEND:
40090b57cec5SDimitry Andric case ISD::SIGN_EXTEND:
40100b57cec5SDimitry Andric case ISD::ANY_EXTEND: {
40110b57cec5SDimitry Andric SDValue X = LHS->getOperand(0);
40120b57cec5SDimitry Andric
40130b57cec5SDimitry Andric if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&
40140b57cec5SDimitry Andric isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) {
40150b57cec5SDimitry Andric // Prefer build_vector as the canonical form if packed types are legal.
40160b57cec5SDimitry Andric // (shl ([asz]ext i16:x), 16 -> build_vector 0, x
40170b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL,
40180b57cec5SDimitry Andric { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) });
40190b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
40200b57cec5SDimitry Andric }
40210b57cec5SDimitry Andric
40220b57cec5SDimitry Andric // shl (ext x) => zext (shl x), if shift does not overflow int
40230b57cec5SDimitry Andric if (VT != MVT::i64)
40240b57cec5SDimitry Andric break;
40250b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(X);
40260b57cec5SDimitry Andric unsigned LZ = Known.countMinLeadingZeros();
40270b57cec5SDimitry Andric if (LZ < RHSVal)
40280b57cec5SDimitry Andric break;
40290b57cec5SDimitry Andric EVT XVT = X.getValueType();
40300b57cec5SDimitry Andric SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
40310b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Shl, SL, VT);
40320b57cec5SDimitry Andric }
40330b57cec5SDimitry Andric }
40340b57cec5SDimitry Andric
40350b57cec5SDimitry Andric if (VT != MVT::i64)
40360b57cec5SDimitry Andric return SDValue();
40370b57cec5SDimitry Andric
40380b57cec5SDimitry Andric // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
40390b57cec5SDimitry Andric
40400b57cec5SDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the
40410b57cec5SDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and
40420b57cec5SDimitry Andric // the same code size.
40430b57cec5SDimitry Andric if (RHSVal < 32)
40440b57cec5SDimitry Andric return SDValue();
40450b57cec5SDimitry Andric
40460b57cec5SDimitry Andric SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
40470b57cec5SDimitry Andric
40480b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
40490b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);
40500b57cec5SDimitry Andric
40510b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
40520b57cec5SDimitry Andric
40530b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift});
40540b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
40550b57cec5SDimitry Andric }
40560b57cec5SDimitry Andric
performSraCombine(SDNode * N,DAGCombinerInfo & DCI) const40570b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
40580b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
40590b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i64)
40600b57cec5SDimitry Andric return SDValue();
40610b57cec5SDimitry Andric
40620b57cec5SDimitry Andric const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
40630b57cec5SDimitry Andric if (!RHS)
40640b57cec5SDimitry Andric return SDValue();
40650b57cec5SDimitry Andric
40660b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
40670b57cec5SDimitry Andric SDLoc SL(N);
40680b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue();
40690b57cec5SDimitry Andric
40700b57cec5SDimitry Andric // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
40710b57cec5SDimitry Andric if (RHSVal == 32) {
40720b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
40730b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
40740b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32));
40750b57cec5SDimitry Andric
40760b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
40770b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
40780b57cec5SDimitry Andric }
40790b57cec5SDimitry Andric
40800b57cec5SDimitry Andric // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
40810b57cec5SDimitry Andric if (RHSVal == 63) {
40820b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
40830b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
40840b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32));
40850b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
40860b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
40870b57cec5SDimitry Andric }
40880b57cec5SDimitry Andric
40890b57cec5SDimitry Andric return SDValue();
40900b57cec5SDimitry Andric }
40910b57cec5SDimitry Andric
performSrlCombine(SDNode * N,DAGCombinerInfo & DCI) const40920b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
40930b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
40940b57cec5SDimitry Andric auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
40950b57cec5SDimitry Andric if (!RHS)
40960b57cec5SDimitry Andric return SDValue();
40970b57cec5SDimitry Andric
40980b57cec5SDimitry Andric EVT VT = N->getValueType(0);
40990b57cec5SDimitry Andric SDValue LHS = N->getOperand(0);
41000b57cec5SDimitry Andric unsigned ShiftAmt = RHS->getZExtValue();
41010b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
41020b57cec5SDimitry Andric SDLoc SL(N);
41030b57cec5SDimitry Andric
41040b57cec5SDimitry Andric // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
41050b57cec5SDimitry Andric // this improves the ability to match BFE patterns in isel.
41060b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::AND) {
41070b57cec5SDimitry Andric if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
410881ad6265SDimitry Andric unsigned MaskIdx, MaskLen;
410981ad6265SDimitry Andric if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
411081ad6265SDimitry Andric MaskIdx == ShiftAmt) {
41110b57cec5SDimitry Andric return DAG.getNode(
41120b57cec5SDimitry Andric ISD::AND, SL, VT,
41130b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
41140b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
41150b57cec5SDimitry Andric }
41160b57cec5SDimitry Andric }
41170b57cec5SDimitry Andric }
41180b57cec5SDimitry Andric
41190b57cec5SDimitry Andric if (VT != MVT::i64)
41200b57cec5SDimitry Andric return SDValue();
41210b57cec5SDimitry Andric
41220b57cec5SDimitry Andric if (ShiftAmt < 32)
41230b57cec5SDimitry Andric return SDValue();
41240b57cec5SDimitry Andric
41250b57cec5SDimitry Andric // srl i64:x, C for C >= 32
41260b57cec5SDimitry Andric // =>
41270b57cec5SDimitry Andric // build_pair (srl hi_32(x), C - 32), 0
41280b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
41290b57cec5SDimitry Andric
4130349cc55cSDimitry Andric SDValue Hi = getHiHalf64(LHS, DAG);
41310b57cec5SDimitry Andric
41320b57cec5SDimitry Andric SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
41330b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
41340b57cec5SDimitry Andric
41350b57cec5SDimitry Andric SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero});
41360b57cec5SDimitry Andric
41370b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
41380b57cec5SDimitry Andric }
41390b57cec5SDimitry Andric
performTruncateCombine(SDNode * N,DAGCombinerInfo & DCI) const41400b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performTruncateCombine(
41410b57cec5SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const {
41420b57cec5SDimitry Andric SDLoc SL(N);
41430b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
41440b57cec5SDimitry Andric EVT VT = N->getValueType(0);
41450b57cec5SDimitry Andric SDValue Src = N->getOperand(0);
41460b57cec5SDimitry Andric
41470b57cec5SDimitry Andric // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
41480b57cec5SDimitry Andric if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
41490b57cec5SDimitry Andric SDValue Vec = Src.getOperand(0);
41500b57cec5SDimitry Andric if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
41510b57cec5SDimitry Andric SDValue Elt0 = Vec.getOperand(0);
41520b57cec5SDimitry Andric EVT EltVT = Elt0.getValueType();
4153e8d8bef9SDimitry Andric if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) {
41540b57cec5SDimitry Andric if (EltVT.isFloatingPoint()) {
41550b57cec5SDimitry Andric Elt0 = DAG.getNode(ISD::BITCAST, SL,
41560b57cec5SDimitry Andric EltVT.changeTypeToInteger(), Elt0);
41570b57cec5SDimitry Andric }
41580b57cec5SDimitry Andric
41590b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0);
41600b57cec5SDimitry Andric }
41610b57cec5SDimitry Andric }
41620b57cec5SDimitry Andric }
41630b57cec5SDimitry Andric
41640b57cec5SDimitry Andric // Equivalent of above for accessing the high element of a vector as an
41650b57cec5SDimitry Andric // integer operation.
41660b57cec5SDimitry Andric // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y)
41670b57cec5SDimitry Andric if (Src.getOpcode() == ISD::SRL && !VT.isVector()) {
41680b57cec5SDimitry Andric if (auto K = isConstOrConstSplat(Src.getOperand(1))) {
41690b57cec5SDimitry Andric if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
41700b57cec5SDimitry Andric SDValue BV = stripBitcast(Src.getOperand(0));
41710b57cec5SDimitry Andric if (BV.getOpcode() == ISD::BUILD_VECTOR &&
41720b57cec5SDimitry Andric BV.getValueType().getVectorNumElements() == 2) {
41730b57cec5SDimitry Andric SDValue SrcElt = BV.getOperand(1);
41740b57cec5SDimitry Andric EVT SrcEltVT = SrcElt.getValueType();
41750b57cec5SDimitry Andric if (SrcEltVT.isFloatingPoint()) {
41760b57cec5SDimitry Andric SrcElt = DAG.getNode(ISD::BITCAST, SL,
41770b57cec5SDimitry Andric SrcEltVT.changeTypeToInteger(), SrcElt);
41780b57cec5SDimitry Andric }
41790b57cec5SDimitry Andric
41800b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt);
41810b57cec5SDimitry Andric }
41820b57cec5SDimitry Andric }
41830b57cec5SDimitry Andric }
41840b57cec5SDimitry Andric }
41850b57cec5SDimitry Andric
41860b57cec5SDimitry Andric // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit.
41870b57cec5SDimitry Andric //
41880b57cec5SDimitry Andric // i16 (trunc (srl i64:x, K)), K <= 16 ->
41890b57cec5SDimitry Andric // i16 (trunc (srl (i32 (trunc x), K)))
41900b57cec5SDimitry Andric if (VT.getScalarSizeInBits() < 32) {
41910b57cec5SDimitry Andric EVT SrcVT = Src.getValueType();
41920b57cec5SDimitry Andric if (SrcVT.getScalarSizeInBits() > 32 &&
41930b57cec5SDimitry Andric (Src.getOpcode() == ISD::SRL ||
41940b57cec5SDimitry Andric Src.getOpcode() == ISD::SRA ||
41950b57cec5SDimitry Andric Src.getOpcode() == ISD::SHL)) {
41960b57cec5SDimitry Andric SDValue Amt = Src.getOperand(1);
41970b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(Amt);
4198bdd1243dSDimitry Andric
4199bdd1243dSDimitry Andric // - For left shifts, do the transform as long as the shift
4200bdd1243dSDimitry Andric // amount is still legal for i32, so when ShiftAmt < 32 (<= 31)
4201bdd1243dSDimitry Andric // - For right shift, do it if ShiftAmt <= (32 - Size) to avoid
4202bdd1243dSDimitry Andric // losing information stored in the high bits when truncating.
4203bdd1243dSDimitry Andric const unsigned MaxCstSize =
4204bdd1243dSDimitry Andric (Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits());
4205bdd1243dSDimitry Andric if (Known.getMaxValue().ule(MaxCstSize)) {
42060b57cec5SDimitry Andric EVT MidVT = VT.isVector() ?
42070b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MVT::i32,
42080b57cec5SDimitry Andric VT.getVectorNumElements()) : MVT::i32;
42090b57cec5SDimitry Andric
42100b57cec5SDimitry Andric EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout());
42110b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
42120b57cec5SDimitry Andric Src.getOperand(0));
42130b57cec5SDimitry Andric DCI.AddToWorklist(Trunc.getNode());
42140b57cec5SDimitry Andric
42150b57cec5SDimitry Andric if (Amt.getValueType() != NewShiftVT) {
42160b57cec5SDimitry Andric Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT);
42170b57cec5SDimitry Andric DCI.AddToWorklist(Amt.getNode());
42180b57cec5SDimitry Andric }
42190b57cec5SDimitry Andric
42200b57cec5SDimitry Andric SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
42210b57cec5SDimitry Andric Trunc, Amt);
42220b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
42230b57cec5SDimitry Andric }
42240b57cec5SDimitry Andric }
42250b57cec5SDimitry Andric }
42260b57cec5SDimitry Andric
42270b57cec5SDimitry Andric return SDValue();
42280b57cec5SDimitry Andric }
42290b57cec5SDimitry Andric
42300b57cec5SDimitry Andric // We need to specifically handle i64 mul here to avoid unnecessary conversion
42310b57cec5SDimitry Andric // instructions. If we only match on the legalized i64 mul expansion,
42320b57cec5SDimitry Andric // SimplifyDemandedBits will be unable to remove them because there will be
42330b57cec5SDimitry Andric // multiple uses due to the separate mul + mulh[su].
getMul24(SelectionDAG & DAG,const SDLoc & SL,SDValue N0,SDValue N1,unsigned Size,bool Signed)42340b57cec5SDimitry Andric static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
42350b57cec5SDimitry Andric SDValue N0, SDValue N1, unsigned Size, bool Signed) {
42360b57cec5SDimitry Andric if (Size <= 32) {
42370b57cec5SDimitry Andric unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
42380b57cec5SDimitry Andric return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
42390b57cec5SDimitry Andric }
42400b57cec5SDimitry Andric
4241e8d8bef9SDimitry Andric unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
4242e8d8bef9SDimitry Andric unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
42430b57cec5SDimitry Andric
4244e8d8bef9SDimitry Andric SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
4245e8d8bef9SDimitry Andric SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
42460b57cec5SDimitry Andric
4247e8d8bef9SDimitry Andric return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi);
42480b57cec5SDimitry Andric }
42490b57cec5SDimitry Andric
425006c3fb27SDimitry Andric /// If \p V is an add of a constant 1, returns the other operand. Otherwise
425106c3fb27SDimitry Andric /// return SDValue().
getAddOneOp(const SDNode * V)425206c3fb27SDimitry Andric static SDValue getAddOneOp(const SDNode *V) {
425306c3fb27SDimitry Andric if (V->getOpcode() != ISD::ADD)
425406c3fb27SDimitry Andric return SDValue();
425506c3fb27SDimitry Andric
42565f757f3fSDimitry Andric return isOneConstant(V->getOperand(1)) ? V->getOperand(0) : SDValue();
425706c3fb27SDimitry Andric }
425806c3fb27SDimitry Andric
performMulCombine(SDNode * N,DAGCombinerInfo & DCI) const42590b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
42600b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
42610fca6ea1SDimitry Andric assert(N->getOpcode() == ISD::MUL);
42620b57cec5SDimitry Andric EVT VT = N->getValueType(0);
42630b57cec5SDimitry Andric
4264fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since
4265fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
4266fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the
4267fe6060f1SDimitry Andric // value is in an SGPR.
4268fe6060f1SDimitry Andric if (!N->isDivergent())
4269fe6060f1SDimitry Andric return SDValue();
4270fe6060f1SDimitry Andric
42710b57cec5SDimitry Andric unsigned Size = VT.getSizeInBits();
42720b57cec5SDimitry Andric if (VT.isVector() || Size > 64)
42730b57cec5SDimitry Andric return SDValue();
42740b57cec5SDimitry Andric
42750b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
42760b57cec5SDimitry Andric SDLoc DL(N);
42770b57cec5SDimitry Andric
42780b57cec5SDimitry Andric SDValue N0 = N->getOperand(0);
42790b57cec5SDimitry Andric SDValue N1 = N->getOperand(1);
42800b57cec5SDimitry Andric
428106c3fb27SDimitry Andric // Undo InstCombine canonicalize X * (Y + 1) -> X * Y + X to enable mad
428206c3fb27SDimitry Andric // matching.
428306c3fb27SDimitry Andric
428406c3fb27SDimitry Andric // mul x, (add y, 1) -> add (mul x, y), x
428506c3fb27SDimitry Andric auto IsFoldableAdd = [](SDValue V) -> SDValue {
428606c3fb27SDimitry Andric SDValue AddOp = getAddOneOp(V.getNode());
428706c3fb27SDimitry Andric if (!AddOp)
428806c3fb27SDimitry Andric return SDValue();
428906c3fb27SDimitry Andric
429006c3fb27SDimitry Andric if (V.hasOneUse() || all_of(V->uses(), [](const SDNode *U) -> bool {
429106c3fb27SDimitry Andric return U->getOpcode() == ISD::MUL;
429206c3fb27SDimitry Andric }))
429306c3fb27SDimitry Andric return AddOp;
429406c3fb27SDimitry Andric
429506c3fb27SDimitry Andric return SDValue();
429606c3fb27SDimitry Andric };
429706c3fb27SDimitry Andric
429806c3fb27SDimitry Andric // FIXME: The selection pattern is not properly checking for commuted
429906c3fb27SDimitry Andric // operands, so we have to place the mul in the LHS
430006c3fb27SDimitry Andric if (SDValue MulOper = IsFoldableAdd(N0)) {
430106c3fb27SDimitry Andric SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);
430206c3fb27SDimitry Andric return DAG.getNode(ISD::ADD, DL, VT, MulVal, N1);
430306c3fb27SDimitry Andric }
430406c3fb27SDimitry Andric
430506c3fb27SDimitry Andric if (SDValue MulOper = IsFoldableAdd(N1)) {
430606c3fb27SDimitry Andric SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);
430706c3fb27SDimitry Andric return DAG.getNode(ISD::ADD, DL, VT, MulVal, N0);
430806c3fb27SDimitry Andric }
430906c3fb27SDimitry Andric
431006c3fb27SDimitry Andric // There are i16 integer mul/mad.
431106c3fb27SDimitry Andric if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
431206c3fb27SDimitry Andric return SDValue();
431306c3fb27SDimitry Andric
43140b57cec5SDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
43150b57cec5SDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since
43160b57cec5SDimitry Andric // we can assume the high bits are whatever we want, use the underlying value
43170b57cec5SDimitry Andric // to avoid the unknown high bits from interfering.
43180b57cec5SDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND)
43190b57cec5SDimitry Andric N0 = N0.getOperand(0);
43200b57cec5SDimitry Andric
43210b57cec5SDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND)
43220b57cec5SDimitry Andric N1 = N1.getOperand(0);
43230b57cec5SDimitry Andric
43240b57cec5SDimitry Andric SDValue Mul;
43250b57cec5SDimitry Andric
43260b57cec5SDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
43270b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
43280b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
43290b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, false);
43300b57cec5SDimitry Andric } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
43310b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
43320b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
43330b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, true);
43340b57cec5SDimitry Andric } else {
43350b57cec5SDimitry Andric return SDValue();
43360b57cec5SDimitry Andric }
43370b57cec5SDimitry Andric
43380b57cec5SDimitry Andric // We need to use sext even for MUL_U24, because MUL_U24 is used
43390b57cec5SDimitry Andric // for signed multiply of 8 and 16-bit types.
43400b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mul, DL, VT);
43410b57cec5SDimitry Andric }
43420b57cec5SDimitry Andric
43434824e7fdSDimitry Andric SDValue
performMulLoHiCombine(SDNode * N,DAGCombinerInfo & DCI) const43444824e7fdSDimitry Andric AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
43454824e7fdSDimitry Andric DAGCombinerInfo &DCI) const {
43464824e7fdSDimitry Andric if (N->getValueType(0) != MVT::i32)
43474824e7fdSDimitry Andric return SDValue();
43484824e7fdSDimitry Andric
43494824e7fdSDimitry Andric SelectionDAG &DAG = DCI.DAG;
43504824e7fdSDimitry Andric SDLoc DL(N);
43514824e7fdSDimitry Andric
4352*c80e69b0SDimitry Andric bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
43534824e7fdSDimitry Andric SDValue N0 = N->getOperand(0);
43544824e7fdSDimitry Andric SDValue N1 = N->getOperand(1);
43554824e7fdSDimitry Andric
43564824e7fdSDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
43574824e7fdSDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since
43584824e7fdSDimitry Andric // we can assume the high bits are whatever we want, use the underlying value
43594824e7fdSDimitry Andric // to avoid the unknown high bits from interfering.
43604824e7fdSDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND)
43614824e7fdSDimitry Andric N0 = N0.getOperand(0);
43624824e7fdSDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND)
43634824e7fdSDimitry Andric N1 = N1.getOperand(0);
43644824e7fdSDimitry Andric
43654824e7fdSDimitry Andric // Try to use two fast 24-bit multiplies (one for each half of the result)
43664824e7fdSDimitry Andric // instead of one slow extending multiply.
4367*c80e69b0SDimitry Andric unsigned LoOpcode = 0;
4368*c80e69b0SDimitry Andric unsigned HiOpcode = 0;
4369*c80e69b0SDimitry Andric if (Signed) {
4370*c80e69b0SDimitry Andric if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
4371*c80e69b0SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
4372*c80e69b0SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
4373*c80e69b0SDimitry Andric LoOpcode = AMDGPUISD::MUL_I24;
4374*c80e69b0SDimitry Andric HiOpcode = AMDGPUISD::MULHI_I24;
4375*c80e69b0SDimitry Andric }
4376*c80e69b0SDimitry Andric } else {
43774824e7fdSDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
43784824e7fdSDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
43794824e7fdSDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
43804824e7fdSDimitry Andric LoOpcode = AMDGPUISD::MUL_U24;
43814824e7fdSDimitry Andric HiOpcode = AMDGPUISD::MULHI_U24;
43824824e7fdSDimitry Andric }
4383*c80e69b0SDimitry Andric }
4384*c80e69b0SDimitry Andric if (!LoOpcode)
4385*c80e69b0SDimitry Andric return SDValue();
43864824e7fdSDimitry Andric
43874824e7fdSDimitry Andric SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
43884824e7fdSDimitry Andric SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
43894824e7fdSDimitry Andric DCI.CombineTo(N, Lo, Hi);
43904824e7fdSDimitry Andric return SDValue(N, 0);
43914824e7fdSDimitry Andric }
43924824e7fdSDimitry Andric
performMulhsCombine(SDNode * N,DAGCombinerInfo & DCI) const43930b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
43940b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
43950b57cec5SDimitry Andric EVT VT = N->getValueType(0);
43960b57cec5SDimitry Andric
43970b57cec5SDimitry Andric if (!Subtarget->hasMulI24() || VT.isVector())
43980b57cec5SDimitry Andric return SDValue();
43990b57cec5SDimitry Andric
4400fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since
4401fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
4402fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the
4403fe6060f1SDimitry Andric // value is in an SGPR.
4404fe6060f1SDimitry Andric // This doesn't apply if no s_mul_hi is available (since we'll end up with a
4405fe6060f1SDimitry Andric // valu op anyway)
4406fe6060f1SDimitry Andric if (Subtarget->hasSMulHi() && !N->isDivergent())
4407fe6060f1SDimitry Andric return SDValue();
4408fe6060f1SDimitry Andric
44090b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
44100b57cec5SDimitry Andric SDLoc DL(N);
44110b57cec5SDimitry Andric
44120b57cec5SDimitry Andric SDValue N0 = N->getOperand(0);
44130b57cec5SDimitry Andric SDValue N1 = N->getOperand(1);
44140b57cec5SDimitry Andric
44150b57cec5SDimitry Andric if (!isI24(N0, DAG) || !isI24(N1, DAG))
44160b57cec5SDimitry Andric return SDValue();
44170b57cec5SDimitry Andric
44180b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
44190b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
44200b57cec5SDimitry Andric
44210b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
44220b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode());
44230b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mulhi, DL, VT);
44240b57cec5SDimitry Andric }
44250b57cec5SDimitry Andric
performMulhuCombine(SDNode * N,DAGCombinerInfo & DCI) const44260b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
44270b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
44280b57cec5SDimitry Andric EVT VT = N->getValueType(0);
44290b57cec5SDimitry Andric
44300b57cec5SDimitry Andric if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32)
44310b57cec5SDimitry Andric return SDValue();
44320b57cec5SDimitry Andric
4433fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since
4434fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
4435fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the
4436fe6060f1SDimitry Andric // value is in an SGPR.
4437fe6060f1SDimitry Andric // This doesn't apply if no s_mul_hi is available (since we'll end up with a
4438fe6060f1SDimitry Andric // valu op anyway)
4439fe6060f1SDimitry Andric if (Subtarget->hasSMulHi() && !N->isDivergent())
4440fe6060f1SDimitry Andric return SDValue();
4441fe6060f1SDimitry Andric
44420b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
44430b57cec5SDimitry Andric SDLoc DL(N);
44440b57cec5SDimitry Andric
44450b57cec5SDimitry Andric SDValue N0 = N->getOperand(0);
44460b57cec5SDimitry Andric SDValue N1 = N->getOperand(1);
44470b57cec5SDimitry Andric
44480b57cec5SDimitry Andric if (!isU24(N0, DAG) || !isU24(N1, DAG))
44490b57cec5SDimitry Andric return SDValue();
44500b57cec5SDimitry Andric
44510b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
44520b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
44530b57cec5SDimitry Andric
44540b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
44550b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode());
44560b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Mulhi, DL, VT);
44570b57cec5SDimitry Andric }
44580b57cec5SDimitry Andric
getFFBX_U32(SelectionDAG & DAG,SDValue Op,const SDLoc & DL,unsigned Opc) const44590b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
44600b57cec5SDimitry Andric SDValue Op,
44610b57cec5SDimitry Andric const SDLoc &DL,
44620b57cec5SDimitry Andric unsigned Opc) const {
44630b57cec5SDimitry Andric EVT VT = Op.getValueType();
44640b57cec5SDimitry Andric EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
44650b57cec5SDimitry Andric if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
44660b57cec5SDimitry Andric LegalVT != MVT::i16))
44670b57cec5SDimitry Andric return SDValue();
44680b57cec5SDimitry Andric
44690b57cec5SDimitry Andric if (VT != MVT::i32)
44700b57cec5SDimitry Andric Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
44710b57cec5SDimitry Andric
44720b57cec5SDimitry Andric SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op);
44730b57cec5SDimitry Andric if (VT != MVT::i32)
44740b57cec5SDimitry Andric FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX);
44750b57cec5SDimitry Andric
44760b57cec5SDimitry Andric return FFBX;
44770b57cec5SDimitry Andric }
44780b57cec5SDimitry Andric
44790b57cec5SDimitry Andric // The native instructions return -1 on 0 input. Optimize out a select that
44800b57cec5SDimitry Andric // produces -1 on 0.
44810b57cec5SDimitry Andric //
44820b57cec5SDimitry Andric // TODO: If zero is not undef, we could also do this if the output is compared
44830b57cec5SDimitry Andric // against the bitwidth.
44840b57cec5SDimitry Andric //
44850b57cec5SDimitry Andric // TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
performCtlz_CttzCombine(const SDLoc & SL,SDValue Cond,SDValue LHS,SDValue RHS,DAGCombinerInfo & DCI) const44860b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
44870b57cec5SDimitry Andric SDValue LHS, SDValue RHS,
44880b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
44895f757f3fSDimitry Andric if (!isNullConstant(Cond.getOperand(1)))
44900b57cec5SDimitry Andric return SDValue();
44910b57cec5SDimitry Andric
44920b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
44930b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
44940b57cec5SDimitry Andric SDValue CmpLHS = Cond.getOperand(0);
44950b57cec5SDimitry Andric
44960b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
44970b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
44980b57cec5SDimitry Andric if (CCOpcode == ISD::SETEQ &&
44990b57cec5SDimitry Andric (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
450006c3fb27SDimitry Andric RHS.getOperand(0) == CmpLHS && isAllOnesConstant(LHS)) {
45015ffd83dbSDimitry Andric unsigned Opc =
45025ffd83dbSDimitry Andric isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
45030b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc);
45040b57cec5SDimitry Andric }
45050b57cec5SDimitry Andric
45060b57cec5SDimitry Andric // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
45070b57cec5SDimitry Andric // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
45080b57cec5SDimitry Andric if (CCOpcode == ISD::SETNE &&
45095ffd83dbSDimitry Andric (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
451006c3fb27SDimitry Andric LHS.getOperand(0) == CmpLHS && isAllOnesConstant(RHS)) {
45115ffd83dbSDimitry Andric unsigned Opc =
45125ffd83dbSDimitry Andric isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
45135ffd83dbSDimitry Andric
45140b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc);
45150b57cec5SDimitry Andric }
45160b57cec5SDimitry Andric
45170b57cec5SDimitry Andric return SDValue();
45180b57cec5SDimitry Andric }
45190b57cec5SDimitry Andric
distributeOpThroughSelect(TargetLowering::DAGCombinerInfo & DCI,unsigned Op,const SDLoc & SL,SDValue Cond,SDValue N1,SDValue N2)45200b57cec5SDimitry Andric static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
45210b57cec5SDimitry Andric unsigned Op,
45220b57cec5SDimitry Andric const SDLoc &SL,
45230b57cec5SDimitry Andric SDValue Cond,
45240b57cec5SDimitry Andric SDValue N1,
45250b57cec5SDimitry Andric SDValue N2) {
45260b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
45270b57cec5SDimitry Andric EVT VT = N1.getValueType();
45280b57cec5SDimitry Andric
45290b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond,
45300b57cec5SDimitry Andric N1.getOperand(0), N2.getOperand(0));
45310b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode());
45320b57cec5SDimitry Andric return DAG.getNode(Op, SL, VT, NewSelect);
45330b57cec5SDimitry Andric }
45340b57cec5SDimitry Andric
45350b57cec5SDimitry Andric // Pull a free FP operation out of a select so it may fold into uses.
45360b57cec5SDimitry Andric //
45370b57cec5SDimitry Andric // select c, (fneg x), (fneg y) -> fneg (select c, x, y)
45380b57cec5SDimitry Andric // select c, (fneg x), k -> fneg (select c, x, (fneg k))
45390b57cec5SDimitry Andric //
45400b57cec5SDimitry Andric // select c, (fabs x), (fabs y) -> fabs (select c, x, y)
45410b57cec5SDimitry Andric // select c, (fabs x), +k -> fabs (select c, x, k)
454206c3fb27SDimitry Andric SDValue
foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo & DCI,SDValue N) const454306c3fb27SDimitry Andric AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
454406c3fb27SDimitry Andric SDValue N) const {
45450b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
45460b57cec5SDimitry Andric SDValue Cond = N.getOperand(0);
45470b57cec5SDimitry Andric SDValue LHS = N.getOperand(1);
45480b57cec5SDimitry Andric SDValue RHS = N.getOperand(2);
45490b57cec5SDimitry Andric
45500b57cec5SDimitry Andric EVT VT = N.getValueType();
45510b57cec5SDimitry Andric if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
45520b57cec5SDimitry Andric (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
455306c3fb27SDimitry Andric if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
455406c3fb27SDimitry Andric return SDValue();
455506c3fb27SDimitry Andric
45560b57cec5SDimitry Andric return distributeOpThroughSelect(DCI, LHS.getOpcode(),
45570b57cec5SDimitry Andric SDLoc(N), Cond, LHS, RHS);
45580b57cec5SDimitry Andric }
45590b57cec5SDimitry Andric
45600b57cec5SDimitry Andric bool Inv = false;
45610b57cec5SDimitry Andric if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {
45620b57cec5SDimitry Andric std::swap(LHS, RHS);
45630b57cec5SDimitry Andric Inv = true;
45640b57cec5SDimitry Andric }
45650b57cec5SDimitry Andric
45660b57cec5SDimitry Andric // TODO: Support vector constants.
45670b57cec5SDimitry Andric ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
456806c3fb27SDimitry Andric if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&
456906c3fb27SDimitry Andric !selectSupportsSourceMods(N.getNode())) {
45700b57cec5SDimitry Andric SDLoc SL(N);
45710b57cec5SDimitry Andric // If one side is an fneg/fabs and the other is a constant, we can push the
45720b57cec5SDimitry Andric // fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
45730b57cec5SDimitry Andric SDValue NewLHS = LHS.getOperand(0);
45740b57cec5SDimitry Andric SDValue NewRHS = RHS;
45750b57cec5SDimitry Andric
45760b57cec5SDimitry Andric // Careful: if the neg can be folded up, don't try to pull it back down.
45770b57cec5SDimitry Andric bool ShouldFoldNeg = true;
45780b57cec5SDimitry Andric
45790b57cec5SDimitry Andric if (NewLHS.hasOneUse()) {
45800b57cec5SDimitry Andric unsigned Opc = NewLHS.getOpcode();
458106c3fb27SDimitry Andric if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(NewLHS.getNode()))
45820b57cec5SDimitry Andric ShouldFoldNeg = false;
45830b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
45840b57cec5SDimitry Andric ShouldFoldNeg = false;
45850b57cec5SDimitry Andric }
45860b57cec5SDimitry Andric
45870b57cec5SDimitry Andric if (ShouldFoldNeg) {
458806c3fb27SDimitry Andric if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative())
458906c3fb27SDimitry Andric return SDValue();
459006c3fb27SDimitry Andric
459106c3fb27SDimitry Andric // We're going to be forced to use a source modifier anyway, there's no
459206c3fb27SDimitry Andric // point to pulling the negate out unless we can get a size reduction by
459306c3fb27SDimitry Andric // negating the constant.
459406c3fb27SDimitry Andric //
459506c3fb27SDimitry Andric // TODO: Generalize to use getCheaperNegatedExpression which doesn't know
459606c3fb27SDimitry Andric // about cheaper constants.
459706c3fb27SDimitry Andric if (NewLHS.getOpcode() == ISD::FABS &&
459806c3fb27SDimitry Andric getConstantNegateCost(CRHS) != NegatibleCost::Cheaper)
459906c3fb27SDimitry Andric return SDValue();
460006c3fb27SDimitry Andric
460106c3fb27SDimitry Andric if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
460206c3fb27SDimitry Andric return SDValue();
460306c3fb27SDimitry Andric
46040b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG)
46050b57cec5SDimitry Andric NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
46060b57cec5SDimitry Andric
46070b57cec5SDimitry Andric if (Inv)
46080b57cec5SDimitry Andric std::swap(NewLHS, NewRHS);
46090b57cec5SDimitry Andric
46100b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
46110b57cec5SDimitry Andric Cond, NewLHS, NewRHS);
46120b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode());
46130b57cec5SDimitry Andric return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
46140b57cec5SDimitry Andric }
46150b57cec5SDimitry Andric }
46160b57cec5SDimitry Andric
46170b57cec5SDimitry Andric return SDValue();
46180b57cec5SDimitry Andric }
46190b57cec5SDimitry Andric
performSelectCombine(SDNode * N,DAGCombinerInfo & DCI) const46200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
46210b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
46220b57cec5SDimitry Andric if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
46230b57cec5SDimitry Andric return Folded;
46240b57cec5SDimitry Andric
46250b57cec5SDimitry Andric SDValue Cond = N->getOperand(0);
46260b57cec5SDimitry Andric if (Cond.getOpcode() != ISD::SETCC)
46270b57cec5SDimitry Andric return SDValue();
46280b57cec5SDimitry Andric
46290b57cec5SDimitry Andric EVT VT = N->getValueType(0);
46300b57cec5SDimitry Andric SDValue LHS = Cond.getOperand(0);
46310b57cec5SDimitry Andric SDValue RHS = Cond.getOperand(1);
46320b57cec5SDimitry Andric SDValue CC = Cond.getOperand(2);
46330b57cec5SDimitry Andric
46340b57cec5SDimitry Andric SDValue True = N->getOperand(1);
46350b57cec5SDimitry Andric SDValue False = N->getOperand(2);
46360b57cec5SDimitry Andric
46370b57cec5SDimitry Andric if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
46380b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
46390b57cec5SDimitry Andric if (DAG.isConstantValueOfAnyType(True) &&
46400b57cec5SDimitry Andric !DAG.isConstantValueOfAnyType(False)) {
46410b57cec5SDimitry Andric // Swap cmp + select pair to move constant to false input.
46420b57cec5SDimitry Andric // This will allow using VOPC cndmasks more often.
46430b57cec5SDimitry Andric // select (setcc x, y), k, x -> select (setccinv x, y), x, k
46440b57cec5SDimitry Andric
46450b57cec5SDimitry Andric SDLoc SL(N);
4646480093f4SDimitry Andric ISD::CondCode NewCC =
4647480093f4SDimitry Andric getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());
46480b57cec5SDimitry Andric
46490b57cec5SDimitry Andric SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
46500b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
46510b57cec5SDimitry Andric }
46520b57cec5SDimitry Andric
46530b57cec5SDimitry Andric if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
46540b57cec5SDimitry Andric SDValue MinMax
46550b57cec5SDimitry Andric = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
46560b57cec5SDimitry Andric // Revisit this node so we can catch min3/max3/med3 patterns.
46570b57cec5SDimitry Andric //DCI.AddToWorklist(MinMax.getNode());
46580b57cec5SDimitry Andric return MinMax;
46590b57cec5SDimitry Andric }
46600b57cec5SDimitry Andric }
46610b57cec5SDimitry Andric
46620b57cec5SDimitry Andric // There's no reason to not do this if the condition has other uses.
46630b57cec5SDimitry Andric return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
46640b57cec5SDimitry Andric }
46650b57cec5SDimitry Andric
isInv2Pi(const APFloat & APF)46660b57cec5SDimitry Andric static bool isInv2Pi(const APFloat &APF) {
46670b57cec5SDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
46680b57cec5SDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
46690b57cec5SDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
46700b57cec5SDimitry Andric
46710b57cec5SDimitry Andric return APF.bitwiseIsEqual(KF16) ||
46720b57cec5SDimitry Andric APF.bitwiseIsEqual(KF32) ||
46730b57cec5SDimitry Andric APF.bitwiseIsEqual(KF64);
46740b57cec5SDimitry Andric }
46750b57cec5SDimitry Andric
46760b57cec5SDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
46770b57cec5SDimitry Andric // additional cost to negate them.
467806c3fb27SDimitry Andric TargetLowering::NegatibleCost
getConstantNegateCost(const ConstantFPSDNode * C) const467906c3fb27SDimitry Andric AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const {
468006c3fb27SDimitry Andric if (C->isZero())
468106c3fb27SDimitry Andric return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;
46820b57cec5SDimitry Andric
46830b57cec5SDimitry Andric if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
468406c3fb27SDimitry Andric return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;
468506c3fb27SDimitry Andric
468606c3fb27SDimitry Andric return NegatibleCost::Neutral;
46870b57cec5SDimitry Andric }
46880b57cec5SDimitry Andric
isConstantCostlierToNegate(SDValue N) const468906c3fb27SDimitry Andric bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
469006c3fb27SDimitry Andric if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
469106c3fb27SDimitry Andric return getConstantNegateCost(C) == NegatibleCost::Expensive;
469206c3fb27SDimitry Andric return false;
469306c3fb27SDimitry Andric }
469406c3fb27SDimitry Andric
isConstantCheaperToNegate(SDValue N) const469506c3fb27SDimitry Andric bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const {
469606c3fb27SDimitry Andric if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
469706c3fb27SDimitry Andric return getConstantNegateCost(C) == NegatibleCost::Cheaper;
46980b57cec5SDimitry Andric return false;
46990b57cec5SDimitry Andric }
47000b57cec5SDimitry Andric
inverseMinMax(unsigned Opc)47010b57cec5SDimitry Andric static unsigned inverseMinMax(unsigned Opc) {
47020b57cec5SDimitry Andric switch (Opc) {
47030b57cec5SDimitry Andric case ISD::FMAXNUM:
47040b57cec5SDimitry Andric return ISD::FMINNUM;
47050b57cec5SDimitry Andric case ISD::FMINNUM:
47060b57cec5SDimitry Andric return ISD::FMAXNUM;
47070b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE:
47080b57cec5SDimitry Andric return ISD::FMINNUM_IEEE;
47090b57cec5SDimitry Andric case ISD::FMINNUM_IEEE:
47100b57cec5SDimitry Andric return ISD::FMAXNUM_IEEE;
47115f757f3fSDimitry Andric case ISD::FMAXIMUM:
47125f757f3fSDimitry Andric return ISD::FMINIMUM;
47135f757f3fSDimitry Andric case ISD::FMINIMUM:
47145f757f3fSDimitry Andric return ISD::FMAXIMUM;
47150b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY:
47160b57cec5SDimitry Andric return AMDGPUISD::FMIN_LEGACY;
47170b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY:
47180b57cec5SDimitry Andric return AMDGPUISD::FMAX_LEGACY;
47190b57cec5SDimitry Andric default:
47200b57cec5SDimitry Andric llvm_unreachable("invalid min/max opcode");
47210b57cec5SDimitry Andric }
47220b57cec5SDimitry Andric }
47230b57cec5SDimitry Andric
472406c3fb27SDimitry Andric /// \return true if it's profitable to try to push an fneg into its source
472506c3fb27SDimitry Andric /// instruction.
shouldFoldFNegIntoSrc(SDNode * N,SDValue N0)472606c3fb27SDimitry Andric bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) {
47270b57cec5SDimitry Andric // If the input has multiple uses and we can either fold the negate down, or
47280b57cec5SDimitry Andric // the other uses cannot, give up. This both prevents unprofitable
47290b57cec5SDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around
47300b57cec5SDimitry Andric // a negate that has no 'good' form.
47310b57cec5SDimitry Andric if (N0.hasOneUse()) {
47320b57cec5SDimitry Andric // This may be able to fold into the source, but at a code size cost. Don't
47330b57cec5SDimitry Andric // fold if the fold into the user is free.
47340b57cec5SDimitry Andric if (allUsesHaveSourceMods(N, 0))
473506c3fb27SDimitry Andric return false;
47360b57cec5SDimitry Andric } else {
473706c3fb27SDimitry Andric if (fnegFoldsIntoOp(N0.getNode()) &&
47380b57cec5SDimitry Andric (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode())))
473906c3fb27SDimitry Andric return false;
47400b57cec5SDimitry Andric }
47410b57cec5SDimitry Andric
474206c3fb27SDimitry Andric return true;
474306c3fb27SDimitry Andric }
474406c3fb27SDimitry Andric
performFNegCombine(SDNode * N,DAGCombinerInfo & DCI) const474506c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
474606c3fb27SDimitry Andric DAGCombinerInfo &DCI) const {
474706c3fb27SDimitry Andric SelectionDAG &DAG = DCI.DAG;
474806c3fb27SDimitry Andric SDValue N0 = N->getOperand(0);
474906c3fb27SDimitry Andric EVT VT = N->getValueType(0);
475006c3fb27SDimitry Andric
475106c3fb27SDimitry Andric unsigned Opc = N0.getOpcode();
475206c3fb27SDimitry Andric
475306c3fb27SDimitry Andric if (!shouldFoldFNegIntoSrc(N, N0))
475406c3fb27SDimitry Andric return SDValue();
475506c3fb27SDimitry Andric
47560b57cec5SDimitry Andric SDLoc SL(N);
47570b57cec5SDimitry Andric switch (Opc) {
47580b57cec5SDimitry Andric case ISD::FADD: {
47590b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0))
47600b57cec5SDimitry Andric return SDValue();
47610b57cec5SDimitry Andric
47620b57cec5SDimitry Andric // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
47630b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0);
47640b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1);
47650b57cec5SDimitry Andric
47660b57cec5SDimitry Andric if (LHS.getOpcode() != ISD::FNEG)
47670b57cec5SDimitry Andric LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
47680b57cec5SDimitry Andric else
47690b57cec5SDimitry Andric LHS = LHS.getOperand(0);
47700b57cec5SDimitry Andric
47710b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG)
47720b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
47730b57cec5SDimitry Andric else
47740b57cec5SDimitry Andric RHS = RHS.getOperand(0);
47750b57cec5SDimitry Andric
47760b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
47770b57cec5SDimitry Andric if (Res.getOpcode() != ISD::FADD)
47780b57cec5SDimitry Andric return SDValue(); // Op got folded away.
47790b57cec5SDimitry Andric if (!N0.hasOneUse())
47800b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
47810b57cec5SDimitry Andric return Res;
47820b57cec5SDimitry Andric }
47830b57cec5SDimitry Andric case ISD::FMUL:
47840b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: {
47850b57cec5SDimitry Andric // (fneg (fmul x, y)) -> (fmul x, (fneg y))
47860b57cec5SDimitry Andric // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y))
47870b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0);
47880b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1);
47890b57cec5SDimitry Andric
47900b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG)
47910b57cec5SDimitry Andric LHS = LHS.getOperand(0);
47920b57cec5SDimitry Andric else if (RHS.getOpcode() == ISD::FNEG)
47930b57cec5SDimitry Andric RHS = RHS.getOperand(0);
47940b57cec5SDimitry Andric else
47950b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
47960b57cec5SDimitry Andric
47970b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
47980b57cec5SDimitry Andric if (Res.getOpcode() != Opc)
47990b57cec5SDimitry Andric return SDValue(); // Op got folded away.
48000b57cec5SDimitry Andric if (!N0.hasOneUse())
48010b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
48020b57cec5SDimitry Andric return Res;
48030b57cec5SDimitry Andric }
48040b57cec5SDimitry Andric case ISD::FMA:
48050b57cec5SDimitry Andric case ISD::FMAD: {
4806e8d8bef9SDimitry Andric // TODO: handle llvm.amdgcn.fma.legacy
48070b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0))
48080b57cec5SDimitry Andric return SDValue();
48090b57cec5SDimitry Andric
48100b57cec5SDimitry Andric // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
48110b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0);
48120b57cec5SDimitry Andric SDValue MHS = N0.getOperand(1);
48130b57cec5SDimitry Andric SDValue RHS = N0.getOperand(2);
48140b57cec5SDimitry Andric
48150b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG)
48160b57cec5SDimitry Andric LHS = LHS.getOperand(0);
48170b57cec5SDimitry Andric else if (MHS.getOpcode() == ISD::FNEG)
48180b57cec5SDimitry Andric MHS = MHS.getOperand(0);
48190b57cec5SDimitry Andric else
48200b57cec5SDimitry Andric MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);
48210b57cec5SDimitry Andric
48220b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG)
48230b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
48240b57cec5SDimitry Andric else
48250b57cec5SDimitry Andric RHS = RHS.getOperand(0);
48260b57cec5SDimitry Andric
48270b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
48280b57cec5SDimitry Andric if (Res.getOpcode() != Opc)
48290b57cec5SDimitry Andric return SDValue(); // Op got folded away.
48300b57cec5SDimitry Andric if (!N0.hasOneUse())
48310b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
48320b57cec5SDimitry Andric return Res;
48330b57cec5SDimitry Andric }
48340b57cec5SDimitry Andric case ISD::FMAXNUM:
48350b57cec5SDimitry Andric case ISD::FMINNUM:
48360b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE:
48370b57cec5SDimitry Andric case ISD::FMINNUM_IEEE:
48385f757f3fSDimitry Andric case ISD::FMINIMUM:
48395f757f3fSDimitry Andric case ISD::FMAXIMUM:
48400b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY:
48410b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: {
48420b57cec5SDimitry Andric // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
48430b57cec5SDimitry Andric // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
48440b57cec5SDimitry Andric // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
48450b57cec5SDimitry Andric // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)
48460b57cec5SDimitry Andric
48470b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0);
48480b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1);
48490b57cec5SDimitry Andric
48500b57cec5SDimitry Andric // 0 doesn't have a negated inline immediate.
48510b57cec5SDimitry Andric // TODO: This constant check should be generalized to other operations.
48520b57cec5SDimitry Andric if (isConstantCostlierToNegate(RHS))
48530b57cec5SDimitry Andric return SDValue();
48540b57cec5SDimitry Andric
48550b57cec5SDimitry Andric SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
48560b57cec5SDimitry Andric SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
48570b57cec5SDimitry Andric unsigned Opposite = inverseMinMax(Opc);
48580b57cec5SDimitry Andric
48590b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
48600b57cec5SDimitry Andric if (Res.getOpcode() != Opposite)
48610b57cec5SDimitry Andric return SDValue(); // Op got folded away.
48620b57cec5SDimitry Andric if (!N0.hasOneUse())
48630b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
48640b57cec5SDimitry Andric return Res;
48650b57cec5SDimitry Andric }
48660b57cec5SDimitry Andric case AMDGPUISD::FMED3: {
48670b57cec5SDimitry Andric SDValue Ops[3];
48680b57cec5SDimitry Andric for (unsigned I = 0; I < 3; ++I)
48690b57cec5SDimitry Andric Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
48700b57cec5SDimitry Andric
48710b57cec5SDimitry Andric SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
48720b57cec5SDimitry Andric if (Res.getOpcode() != AMDGPUISD::FMED3)
48730b57cec5SDimitry Andric return SDValue(); // Op got folded away.
4874e8d8bef9SDimitry Andric
4875e8d8bef9SDimitry Andric if (!N0.hasOneUse()) {
4876e8d8bef9SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
4877e8d8bef9SDimitry Andric DAG.ReplaceAllUsesWith(N0, Neg);
4878e8d8bef9SDimitry Andric
4879e8d8bef9SDimitry Andric for (SDNode *U : Neg->uses())
4880e8d8bef9SDimitry Andric DCI.AddToWorklist(U);
4881e8d8bef9SDimitry Andric }
4882e8d8bef9SDimitry Andric
48830b57cec5SDimitry Andric return Res;
48840b57cec5SDimitry Andric }
48850b57cec5SDimitry Andric case ISD::FP_EXTEND:
48860b57cec5SDimitry Andric case ISD::FTRUNC:
48870b57cec5SDimitry Andric case ISD::FRINT:
48880b57cec5SDimitry Andric case ISD::FNEARBYINT: // XXX - Should fround be handled?
48895f757f3fSDimitry Andric case ISD::FROUNDEVEN:
48900b57cec5SDimitry Andric case ISD::FSIN:
48910b57cec5SDimitry Andric case ISD::FCANONICALIZE:
48920b57cec5SDimitry Andric case AMDGPUISD::RCP:
48930b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY:
48940b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG:
48950b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: {
48960b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0);
48970b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) {
48980b57cec5SDimitry Andric // (fneg (fp_extend (fneg x))) -> (fp_extend x)
48990b57cec5SDimitry Andric // (fneg (rcp (fneg x))) -> (rcp x)
49000b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0));
49010b57cec5SDimitry Andric }
49020b57cec5SDimitry Andric
49030b57cec5SDimitry Andric if (!N0.hasOneUse())
49040b57cec5SDimitry Andric return SDValue();
49050b57cec5SDimitry Andric
49060b57cec5SDimitry Andric // (fneg (fp_extend x)) -> (fp_extend (fneg x))
49070b57cec5SDimitry Andric // (fneg (rcp x)) -> (rcp (fneg x))
49080b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
49090b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
49100b57cec5SDimitry Andric }
49110b57cec5SDimitry Andric case ISD::FP_ROUND: {
49120b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0);
49130b57cec5SDimitry Andric
49140b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) {
49150b57cec5SDimitry Andric // (fneg (fp_round (fneg x))) -> (fp_round x)
49160b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT,
49170b57cec5SDimitry Andric CvtSrc.getOperand(0), N0.getOperand(1));
49180b57cec5SDimitry Andric }
49190b57cec5SDimitry Andric
49200b57cec5SDimitry Andric if (!N0.hasOneUse())
49210b57cec5SDimitry Andric return SDValue();
49220b57cec5SDimitry Andric
49230b57cec5SDimitry Andric // (fneg (fp_round x)) -> (fp_round (fneg x))
49240b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
49250b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
49260b57cec5SDimitry Andric }
49270b57cec5SDimitry Andric case ISD::FP16_TO_FP: {
49280b57cec5SDimitry Andric // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
49290b57cec5SDimitry Andric // f16, but legalization of f16 fneg ends up pulling it out of the source.
49300b57cec5SDimitry Andric // Put the fneg back as a legal source operation that can be matched later.
49310b57cec5SDimitry Andric SDLoc SL(N);
49320b57cec5SDimitry Andric
49330b57cec5SDimitry Andric SDValue Src = N0.getOperand(0);
49340b57cec5SDimitry Andric EVT SrcVT = Src.getValueType();
49350b57cec5SDimitry Andric
49360b57cec5SDimitry Andric // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
49370b57cec5SDimitry Andric SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
49380b57cec5SDimitry Andric DAG.getConstant(0x8000, SL, SrcVT));
49390b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
49400b57cec5SDimitry Andric }
494106c3fb27SDimitry Andric case ISD::SELECT: {
494206c3fb27SDimitry Andric // fneg (select c, a, b) -> select c, (fneg a), (fneg b)
494306c3fb27SDimitry Andric // TODO: Invert conditions of foldFreeOpFromSelect
494406c3fb27SDimitry Andric return SDValue();
494506c3fb27SDimitry Andric }
494606c3fb27SDimitry Andric case ISD::BITCAST: {
494706c3fb27SDimitry Andric SDLoc SL(N);
494806c3fb27SDimitry Andric SDValue BCSrc = N0.getOperand(0);
494906c3fb27SDimitry Andric if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
495006c3fb27SDimitry Andric SDValue HighBits = BCSrc.getOperand(BCSrc.getNumOperands() - 1);
495106c3fb27SDimitry Andric if (HighBits.getValueType().getSizeInBits() != 32 ||
495206c3fb27SDimitry Andric !fnegFoldsIntoOp(HighBits.getNode()))
495306c3fb27SDimitry Andric return SDValue();
495406c3fb27SDimitry Andric
495506c3fb27SDimitry Andric // f64 fneg only really needs to operate on the high half of of the
495606c3fb27SDimitry Andric // register, so try to force it to an f32 operation to help make use of
495706c3fb27SDimitry Andric // source modifiers.
495806c3fb27SDimitry Andric //
495906c3fb27SDimitry Andric //
496006c3fb27SDimitry Andric // fneg (f64 (bitcast (build_vector x, y))) ->
496106c3fb27SDimitry Andric // f64 (bitcast (build_vector (bitcast i32:x to f32),
496206c3fb27SDimitry Andric // (fneg (bitcast i32:y to f32)))
496306c3fb27SDimitry Andric
496406c3fb27SDimitry Andric SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits);
496506c3fb27SDimitry Andric SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi);
496606c3fb27SDimitry Andric SDValue CastBack =
496706c3fb27SDimitry Andric DAG.getNode(ISD::BITCAST, SL, HighBits.getValueType(), NegHi);
496806c3fb27SDimitry Andric
496906c3fb27SDimitry Andric SmallVector<SDValue, 8> Ops(BCSrc->op_begin(), BCSrc->op_end());
497006c3fb27SDimitry Andric Ops.back() = CastBack;
497106c3fb27SDimitry Andric DCI.AddToWorklist(NegHi.getNode());
497206c3fb27SDimitry Andric SDValue Build =
497306c3fb27SDimitry Andric DAG.getNode(ISD::BUILD_VECTOR, SL, BCSrc.getValueType(), Ops);
497406c3fb27SDimitry Andric SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build);
497506c3fb27SDimitry Andric
497606c3fb27SDimitry Andric if (!N0.hasOneUse())
497706c3fb27SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Result));
497806c3fb27SDimitry Andric return Result;
497906c3fb27SDimitry Andric }
498006c3fb27SDimitry Andric
498106c3fb27SDimitry Andric if (BCSrc.getOpcode() == ISD::SELECT && VT == MVT::f32 &&
498206c3fb27SDimitry Andric BCSrc.hasOneUse()) {
498306c3fb27SDimitry Andric // fneg (bitcast (f32 (select cond, i32:lhs, i32:rhs))) ->
498406c3fb27SDimitry Andric // select cond, (bitcast i32:lhs to f32), (bitcast i32:rhs to f32)
498506c3fb27SDimitry Andric
498606c3fb27SDimitry Andric // TODO: Cast back result for multiple uses is beneficial in some cases.
498706c3fb27SDimitry Andric
498806c3fb27SDimitry Andric SDValue LHS =
498906c3fb27SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(1));
499006c3fb27SDimitry Andric SDValue RHS =
499106c3fb27SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(2));
499206c3fb27SDimitry Andric
499306c3fb27SDimitry Andric SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);
499406c3fb27SDimitry Andric SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);
499506c3fb27SDimitry Andric
499606c3fb27SDimitry Andric return DAG.getNode(ISD::SELECT, SL, MVT::f32, BCSrc.getOperand(0), NegLHS,
499706c3fb27SDimitry Andric NegRHS);
499806c3fb27SDimitry Andric }
499906c3fb27SDimitry Andric
500006c3fb27SDimitry Andric return SDValue();
500106c3fb27SDimitry Andric }
50020b57cec5SDimitry Andric default:
50030b57cec5SDimitry Andric return SDValue();
50040b57cec5SDimitry Andric }
50050b57cec5SDimitry Andric }
50060b57cec5SDimitry Andric
performFAbsCombine(SDNode * N,DAGCombinerInfo & DCI) const50070b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
50080b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
50090b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
50100b57cec5SDimitry Andric SDValue N0 = N->getOperand(0);
50110b57cec5SDimitry Andric
50120b57cec5SDimitry Andric if (!N0.hasOneUse())
50130b57cec5SDimitry Andric return SDValue();
50140b57cec5SDimitry Andric
50150b57cec5SDimitry Andric switch (N0.getOpcode()) {
50160b57cec5SDimitry Andric case ISD::FP16_TO_FP: {
50170b57cec5SDimitry Andric assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
50180b57cec5SDimitry Andric SDLoc SL(N);
50190b57cec5SDimitry Andric SDValue Src = N0.getOperand(0);
50200b57cec5SDimitry Andric EVT SrcVT = Src.getValueType();
50210b57cec5SDimitry Andric
50220b57cec5SDimitry Andric // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
50230b57cec5SDimitry Andric SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
50240b57cec5SDimitry Andric DAG.getConstant(0x7fff, SL, SrcVT));
50250b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
50260b57cec5SDimitry Andric }
50270b57cec5SDimitry Andric default:
50280b57cec5SDimitry Andric return SDValue();
50290b57cec5SDimitry Andric }
50300b57cec5SDimitry Andric }
50310b57cec5SDimitry Andric
performRcpCombine(SDNode * N,DAGCombinerInfo & DCI) const50320b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
50330b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
50340b57cec5SDimitry Andric const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
50350b57cec5SDimitry Andric if (!CFP)
50360b57cec5SDimitry Andric return SDValue();
50370b57cec5SDimitry Andric
50380b57cec5SDimitry Andric // XXX - Should this flush denormals?
50390b57cec5SDimitry Andric const APFloat &Val = CFP->getValueAPF();
50400b57cec5SDimitry Andric APFloat One(Val.getSemantics(), "1.0");
50410b57cec5SDimitry Andric return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
50420b57cec5SDimitry Andric }
50430b57cec5SDimitry Andric
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const50440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
50450b57cec5SDimitry Andric DAGCombinerInfo &DCI) const {
50460b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG;
50470b57cec5SDimitry Andric SDLoc DL(N);
50480b57cec5SDimitry Andric
50490b57cec5SDimitry Andric switch(N->getOpcode()) {
50500b57cec5SDimitry Andric default:
50510b57cec5SDimitry Andric break;
50520b57cec5SDimitry Andric case ISD::BITCAST: {
50530b57cec5SDimitry Andric EVT DestVT = N->getValueType(0);
50540b57cec5SDimitry Andric
50550b57cec5SDimitry Andric // Push casts through vector builds. This helps avoid emitting a large
50560b57cec5SDimitry Andric // number of copies when materializing floating point vector constants.
50570b57cec5SDimitry Andric //
50580b57cec5SDimitry Andric // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
50590b57cec5SDimitry Andric // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
50600b57cec5SDimitry Andric if (DestVT.isVector()) {
50610b57cec5SDimitry Andric SDValue Src = N->getOperand(0);
50621db9f3b2SDimitry Andric if (Src.getOpcode() == ISD::BUILD_VECTOR &&
50631db9f3b2SDimitry Andric (DCI.getDAGCombineLevel() < AfterLegalizeDAG ||
50641db9f3b2SDimitry Andric isOperationLegal(ISD::BUILD_VECTOR, DestVT))) {
50650b57cec5SDimitry Andric EVT SrcVT = Src.getValueType();
50660b57cec5SDimitry Andric unsigned NElts = DestVT.getVectorNumElements();
50670b57cec5SDimitry Andric
50680b57cec5SDimitry Andric if (SrcVT.getVectorNumElements() == NElts) {
50690b57cec5SDimitry Andric EVT DestEltVT = DestVT.getVectorElementType();
50700b57cec5SDimitry Andric
50710b57cec5SDimitry Andric SmallVector<SDValue, 8> CastedElts;
50720b57cec5SDimitry Andric SDLoc SL(N);
50730b57cec5SDimitry Andric for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
50740b57cec5SDimitry Andric SDValue Elt = Src.getOperand(I);
50750b57cec5SDimitry Andric CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
50760b57cec5SDimitry Andric }
50770b57cec5SDimitry Andric
50780b57cec5SDimitry Andric return DAG.getBuildVector(DestVT, SL, CastedElts);
50790b57cec5SDimitry Andric }
50800b57cec5SDimitry Andric }
50810b57cec5SDimitry Andric }
50820b57cec5SDimitry Andric
5083e8d8bef9SDimitry Andric if (DestVT.getSizeInBits() != 64 || !DestVT.isVector())
50840b57cec5SDimitry Andric break;
50850b57cec5SDimitry Andric
50860b57cec5SDimitry Andric // Fold bitcasts of constants.
50870b57cec5SDimitry Andric //
50880b57cec5SDimitry Andric // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
50890b57cec5SDimitry Andric // TODO: Generalize and move to DAGCombiner
50900b57cec5SDimitry Andric SDValue Src = N->getOperand(0);
50910b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
50920b57cec5SDimitry Andric SDLoc SL(N);
50930b57cec5SDimitry Andric uint64_t CVal = C->getZExtValue();
50940b57cec5SDimitry Andric SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
50950b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
50960b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
50970b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
50980b57cec5SDimitry Andric }
50990b57cec5SDimitry Andric
51000b57cec5SDimitry Andric if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
51010b57cec5SDimitry Andric const APInt &Val = C->getValueAPF().bitcastToAPInt();
51020b57cec5SDimitry Andric SDLoc SL(N);
51030b57cec5SDimitry Andric uint64_t CVal = Val.getZExtValue();
51040b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
51050b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
51060b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
51070b57cec5SDimitry Andric
51080b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
51090b57cec5SDimitry Andric }
51100b57cec5SDimitry Andric
51110b57cec5SDimitry Andric break;
51120b57cec5SDimitry Andric }
51130b57cec5SDimitry Andric case ISD::SHL: {
51140b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
51150b57cec5SDimitry Andric break;
51160b57cec5SDimitry Andric
51170b57cec5SDimitry Andric return performShlCombine(N, DCI);
51180b57cec5SDimitry Andric }
51190b57cec5SDimitry Andric case ISD::SRL: {
51200b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
51210b57cec5SDimitry Andric break;
51220b57cec5SDimitry Andric
51230b57cec5SDimitry Andric return performSrlCombine(N, DCI);
51240b57cec5SDimitry Andric }
51250b57cec5SDimitry Andric case ISD::SRA: {
51260b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
51270b57cec5SDimitry Andric break;
51280b57cec5SDimitry Andric
51290b57cec5SDimitry Andric return performSraCombine(N, DCI);
51300b57cec5SDimitry Andric }
51310b57cec5SDimitry Andric case ISD::TRUNCATE:
51320b57cec5SDimitry Andric return performTruncateCombine(N, DCI);
51330b57cec5SDimitry Andric case ISD::MUL:
51340b57cec5SDimitry Andric return performMulCombine(N, DCI);
513506c3fb27SDimitry Andric case AMDGPUISD::MUL_U24:
513606c3fb27SDimitry Andric case AMDGPUISD::MUL_I24: {
513706c3fb27SDimitry Andric if (SDValue Simplified = simplifyMul24(N, DCI))
513806c3fb27SDimitry Andric return Simplified;
51390fca6ea1SDimitry Andric break;
514006c3fb27SDimitry Andric }
514106c3fb27SDimitry Andric case AMDGPUISD::MULHI_I24:
514206c3fb27SDimitry Andric case AMDGPUISD::MULHI_U24:
514306c3fb27SDimitry Andric return simplifyMul24(N, DCI);
51444824e7fdSDimitry Andric case ISD::SMUL_LOHI:
51454824e7fdSDimitry Andric case ISD::UMUL_LOHI:
51464824e7fdSDimitry Andric return performMulLoHiCombine(N, DCI);
51470b57cec5SDimitry Andric case ISD::MULHS:
51480b57cec5SDimitry Andric return performMulhsCombine(N, DCI);
51490b57cec5SDimitry Andric case ISD::MULHU:
51500b57cec5SDimitry Andric return performMulhuCombine(N, DCI);
51510b57cec5SDimitry Andric case ISD::SELECT:
51520b57cec5SDimitry Andric return performSelectCombine(N, DCI);
51530b57cec5SDimitry Andric case ISD::FNEG:
51540b57cec5SDimitry Andric return performFNegCombine(N, DCI);
51550b57cec5SDimitry Andric case ISD::FABS:
51560b57cec5SDimitry Andric return performFAbsCombine(N, DCI);
51570b57cec5SDimitry Andric case AMDGPUISD::BFE_I32:
51580b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: {
51590b57cec5SDimitry Andric assert(!N->getValueType(0).isVector() &&
51600b57cec5SDimitry Andric "Vector handling of BFE not implemented");
51610b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
51620b57cec5SDimitry Andric if (!Width)
51630b57cec5SDimitry Andric break;
51640b57cec5SDimitry Andric
51650b57cec5SDimitry Andric uint32_t WidthVal = Width->getZExtValue() & 0x1f;
51660b57cec5SDimitry Andric if (WidthVal == 0)
51670b57cec5SDimitry Andric return DAG.getConstant(0, DL, MVT::i32);
51680b57cec5SDimitry Andric
51690b57cec5SDimitry Andric ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
51700b57cec5SDimitry Andric if (!Offset)
51710b57cec5SDimitry Andric break;
51720b57cec5SDimitry Andric
51730b57cec5SDimitry Andric SDValue BitsFrom = N->getOperand(0);
51740b57cec5SDimitry Andric uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
51750b57cec5SDimitry Andric
51760b57cec5SDimitry Andric bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
51770b57cec5SDimitry Andric
51780b57cec5SDimitry Andric if (OffsetVal == 0) {
51790b57cec5SDimitry Andric // This is already sign / zero extended, so try to fold away extra BFEs.
51800b57cec5SDimitry Andric unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
51810b57cec5SDimitry Andric
51820b57cec5SDimitry Andric unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
51830b57cec5SDimitry Andric if (OpSignBits >= SignBits)
51840b57cec5SDimitry Andric return BitsFrom;
51850b57cec5SDimitry Andric
51860b57cec5SDimitry Andric EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
51870b57cec5SDimitry Andric if (Signed) {
51880b57cec5SDimitry Andric // This is a sign_extend_inreg. Replace it to take advantage of existing
51890b57cec5SDimitry Andric // DAG Combines. If not eliminated, we will match back to BFE during
51900b57cec5SDimitry Andric // selection.
51910b57cec5SDimitry Andric
51920b57cec5SDimitry Andric // TODO: The sext_inreg of extended types ends, although we can could
51930b57cec5SDimitry Andric // handle them in a single BFE.
51940b57cec5SDimitry Andric return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
51950b57cec5SDimitry Andric DAG.getValueType(SmallVT));
51960b57cec5SDimitry Andric }
51970b57cec5SDimitry Andric
51980b57cec5SDimitry Andric return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
51990b57cec5SDimitry Andric }
52000b57cec5SDimitry Andric
52010b57cec5SDimitry Andric if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
52020b57cec5SDimitry Andric if (Signed) {
52030b57cec5SDimitry Andric return constantFoldBFE<int32_t>(DAG,
52040b57cec5SDimitry Andric CVal->getSExtValue(),
52050b57cec5SDimitry Andric OffsetVal,
52060b57cec5SDimitry Andric WidthVal,
52070b57cec5SDimitry Andric DL);
52080b57cec5SDimitry Andric }
52090b57cec5SDimitry Andric
52100b57cec5SDimitry Andric return constantFoldBFE<uint32_t>(DAG,
52110b57cec5SDimitry Andric CVal->getZExtValue(),
52120b57cec5SDimitry Andric OffsetVal,
52130b57cec5SDimitry Andric WidthVal,
52140b57cec5SDimitry Andric DL);
52150b57cec5SDimitry Andric }
52160b57cec5SDimitry Andric
52170b57cec5SDimitry Andric if ((OffsetVal + WidthVal) >= 32 &&
52180b57cec5SDimitry Andric !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
52190b57cec5SDimitry Andric SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
52200b57cec5SDimitry Andric return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
52210b57cec5SDimitry Andric BitsFrom, ShiftVal);
52220b57cec5SDimitry Andric }
52230b57cec5SDimitry Andric
52240b57cec5SDimitry Andric if (BitsFrom.hasOneUse()) {
52250b57cec5SDimitry Andric APInt Demanded = APInt::getBitsSet(32,
52260b57cec5SDimitry Andric OffsetVal,
52270b57cec5SDimitry Andric OffsetVal + WidthVal);
52280b57cec5SDimitry Andric
52290b57cec5SDimitry Andric KnownBits Known;
52300b57cec5SDimitry Andric TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
52310b57cec5SDimitry Andric !DCI.isBeforeLegalizeOps());
52320b57cec5SDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo();
52330b57cec5SDimitry Andric if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
52340b57cec5SDimitry Andric TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) {
52350b57cec5SDimitry Andric DCI.CommitTargetLoweringOpt(TLO);
52360b57cec5SDimitry Andric }
52370b57cec5SDimitry Andric }
52380b57cec5SDimitry Andric
52390b57cec5SDimitry Andric break;
52400b57cec5SDimitry Andric }
52410b57cec5SDimitry Andric case ISD::LOAD:
52420b57cec5SDimitry Andric return performLoadCombine(N, DCI);
52430b57cec5SDimitry Andric case ISD::STORE:
52440b57cec5SDimitry Andric return performStoreCombine(N, DCI);
52450b57cec5SDimitry Andric case AMDGPUISD::RCP:
52460b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG:
52470b57cec5SDimitry Andric return performRcpCombine(N, DCI);
52480b57cec5SDimitry Andric case ISD::AssertZext:
52490b57cec5SDimitry Andric case ISD::AssertSext:
52500b57cec5SDimitry Andric return performAssertSZExtCombine(N, DCI);
52518bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN:
52528bcb0991SDimitry Andric return performIntrinsicWOChainCombine(N, DCI);
52535f757f3fSDimitry Andric case AMDGPUISD::FMAD_FTZ: {
52545f757f3fSDimitry Andric SDValue N0 = N->getOperand(0);
52555f757f3fSDimitry Andric SDValue N1 = N->getOperand(1);
52565f757f3fSDimitry Andric SDValue N2 = N->getOperand(2);
52575f757f3fSDimitry Andric EVT VT = N->getValueType(0);
52585f757f3fSDimitry Andric
52595f757f3fSDimitry Andric // FMAD_FTZ is a FMAD + flush denormals to zero.
52605f757f3fSDimitry Andric // We flush the inputs, the intermediate step, and the output.
52615f757f3fSDimitry Andric ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
52625f757f3fSDimitry Andric ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
52635f757f3fSDimitry Andric ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
52645f757f3fSDimitry Andric if (N0CFP && N1CFP && N2CFP) {
52655f757f3fSDimitry Andric const auto FTZ = [](const APFloat &V) {
52665f757f3fSDimitry Andric if (V.isDenormal()) {
52675f757f3fSDimitry Andric APFloat Zero(V.getSemantics(), 0);
52685f757f3fSDimitry Andric return V.isNegative() ? -Zero : Zero;
52695f757f3fSDimitry Andric }
52705f757f3fSDimitry Andric return V;
52715f757f3fSDimitry Andric };
52725f757f3fSDimitry Andric
52735f757f3fSDimitry Andric APFloat V0 = FTZ(N0CFP->getValueAPF());
52745f757f3fSDimitry Andric APFloat V1 = FTZ(N1CFP->getValueAPF());
52755f757f3fSDimitry Andric APFloat V2 = FTZ(N2CFP->getValueAPF());
52765f757f3fSDimitry Andric V0.multiply(V1, APFloat::rmNearestTiesToEven);
52775f757f3fSDimitry Andric V0 = FTZ(V0);
52785f757f3fSDimitry Andric V0.add(V2, APFloat::rmNearestTiesToEven);
52795f757f3fSDimitry Andric return DAG.getConstantFP(FTZ(V0), DL, VT);
52805f757f3fSDimitry Andric }
52815f757f3fSDimitry Andric break;
52825f757f3fSDimitry Andric }
52830b57cec5SDimitry Andric }
52840b57cec5SDimitry Andric return SDValue();
52850b57cec5SDimitry Andric }
52860b57cec5SDimitry Andric
52870b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52880b57cec5SDimitry Andric // Helper functions
52890b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52900b57cec5SDimitry Andric
CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,Register Reg,EVT VT,const SDLoc & SL,bool RawReg) const52910b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
52920b57cec5SDimitry Andric const TargetRegisterClass *RC,
52935ffd83dbSDimitry Andric Register Reg, EVT VT,
52940b57cec5SDimitry Andric const SDLoc &SL,
52950b57cec5SDimitry Andric bool RawReg) const {
52960b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction();
52970b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
52985ffd83dbSDimitry Andric Register VReg;
52990b57cec5SDimitry Andric
53000b57cec5SDimitry Andric if (!MRI.isLiveIn(Reg)) {
53010b57cec5SDimitry Andric VReg = MRI.createVirtualRegister(RC);
53020b57cec5SDimitry Andric MRI.addLiveIn(Reg, VReg);
53030b57cec5SDimitry Andric } else {
53040b57cec5SDimitry Andric VReg = MRI.getLiveInVirtReg(Reg);
53050b57cec5SDimitry Andric }
53060b57cec5SDimitry Andric
53070b57cec5SDimitry Andric if (RawReg)
53080b57cec5SDimitry Andric return DAG.getRegister(VReg, VT);
53090b57cec5SDimitry Andric
53100b57cec5SDimitry Andric return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
53110b57cec5SDimitry Andric }
53120b57cec5SDimitry Andric
53138bcb0991SDimitry Andric // This may be called multiple times, and nothing prevents creating multiple
53148bcb0991SDimitry Andric // objects at the same offset. See if we already defined this object.
getOrCreateFixedStackObject(MachineFrameInfo & MFI,unsigned Size,int64_t Offset)53158bcb0991SDimitry Andric static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
53168bcb0991SDimitry Andric int64_t Offset) {
53178bcb0991SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
53188bcb0991SDimitry Andric if (MFI.getObjectOffset(I) == Offset) {
53198bcb0991SDimitry Andric assert(MFI.getObjectSize(I) == Size);
53208bcb0991SDimitry Andric return I;
53218bcb0991SDimitry Andric }
53228bcb0991SDimitry Andric }
53238bcb0991SDimitry Andric
53248bcb0991SDimitry Andric return MFI.CreateFixedObject(Size, Offset, true);
53258bcb0991SDimitry Andric }
53268bcb0991SDimitry Andric
loadStackInputValue(SelectionDAG & DAG,EVT VT,const SDLoc & SL,int64_t Offset) const53270b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
53280b57cec5SDimitry Andric EVT VT,
53290b57cec5SDimitry Andric const SDLoc &SL,
53300b57cec5SDimitry Andric int64_t Offset) const {
53310b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction();
53320b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
53338bcb0991SDimitry Andric int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset);
53340b57cec5SDimitry Andric
53350b57cec5SDimitry Andric auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
53360b57cec5SDimitry Andric SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
53370b57cec5SDimitry Andric
5338e8d8bef9SDimitry Andric return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4),
53390b57cec5SDimitry Andric MachineMemOperand::MODereferenceable |
53400b57cec5SDimitry Andric MachineMemOperand::MOInvariant);
53410b57cec5SDimitry Andric }
53420b57cec5SDimitry Andric
storeStackInputValue(SelectionDAG & DAG,const SDLoc & SL,SDValue Chain,SDValue ArgVal,int64_t Offset) const53430b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
53440b57cec5SDimitry Andric const SDLoc &SL,
53450b57cec5SDimitry Andric SDValue Chain,
53460b57cec5SDimitry Andric SDValue ArgVal,
53470b57cec5SDimitry Andric int64_t Offset) const {
53480b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction();
53490b57cec5SDimitry Andric MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
5350fe6060f1SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
53510b57cec5SDimitry Andric
53520b57cec5SDimitry Andric SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
5353fe6060f1SDimitry Andric // Stores to the argument stack area are relative to the stack pointer.
5354fe6060f1SDimitry Andric SDValue SP =
5355fe6060f1SDimitry Andric DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);
5356fe6060f1SDimitry Andric Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr);
5357e8d8bef9SDimitry Andric SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
53580b57cec5SDimitry Andric MachineMemOperand::MODereferenceable);
53590b57cec5SDimitry Andric return Store;
53600b57cec5SDimitry Andric }
53610b57cec5SDimitry Andric
loadInputValue(SelectionDAG & DAG,const TargetRegisterClass * RC,EVT VT,const SDLoc & SL,const ArgDescriptor & Arg) const53620b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
53630b57cec5SDimitry Andric const TargetRegisterClass *RC,
53640b57cec5SDimitry Andric EVT VT, const SDLoc &SL,
53650b57cec5SDimitry Andric const ArgDescriptor &Arg) const {
53660b57cec5SDimitry Andric assert(Arg && "Attempting to load missing argument");
53670b57cec5SDimitry Andric
53680b57cec5SDimitry Andric SDValue V = Arg.isRegister() ?
53690b57cec5SDimitry Andric CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) :
53700b57cec5SDimitry Andric loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
53710b57cec5SDimitry Andric
53720b57cec5SDimitry Andric if (!Arg.isMasked())
53730b57cec5SDimitry Andric return V;
53740b57cec5SDimitry Andric
53750b57cec5SDimitry Andric unsigned Mask = Arg.getMask();
537606c3fb27SDimitry Andric unsigned Shift = llvm::countr_zero<unsigned>(Mask);
53770b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, SL, VT, V,
53780b57cec5SDimitry Andric DAG.getShiftAmountConstant(Shift, VT, SL));
53790b57cec5SDimitry Andric return DAG.getNode(ISD::AND, SL, VT, V,
53800b57cec5SDimitry Andric DAG.getConstant(Mask >> Shift, SL, VT));
53810b57cec5SDimitry Andric }
53820b57cec5SDimitry Andric
getImplicitParameterOffset(uint64_t ExplicitKernArgSize,const ImplicitParameter Param) const53830b57cec5SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
538406c3fb27SDimitry Andric uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const {
538506c3fb27SDimitry Andric unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();
538606c3fb27SDimitry Andric const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr();
538706c3fb27SDimitry Andric uint64_t ArgOffset =
538806c3fb27SDimitry Andric alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;
53890b57cec5SDimitry Andric switch (Param) {
539081ad6265SDimitry Andric case FIRST_IMPLICIT:
53910b57cec5SDimitry Andric return ArgOffset;
539281ad6265SDimitry Andric case PRIVATE_BASE:
539381ad6265SDimitry Andric return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET;
539481ad6265SDimitry Andric case SHARED_BASE:
539581ad6265SDimitry Andric return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET;
539681ad6265SDimitry Andric case QUEUE_PTR:
539781ad6265SDimitry Andric return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET;
53980b57cec5SDimitry Andric }
53990b57cec5SDimitry Andric llvm_unreachable("unexpected implicit parameter type");
54000b57cec5SDimitry Andric }
54010b57cec5SDimitry Andric
getImplicitParameterOffset(const MachineFunction & MF,const ImplicitParameter Param) const540206c3fb27SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
540306c3fb27SDimitry Andric const MachineFunction &MF, const ImplicitParameter Param) const {
540406c3fb27SDimitry Andric const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
540506c3fb27SDimitry Andric return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param);
540606c3fb27SDimitry Andric }
540706c3fb27SDimitry Andric
54080b57cec5SDimitry Andric #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
54090b57cec5SDimitry Andric
getTargetNodeName(unsigned Opcode) const54100b57cec5SDimitry Andric const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
54110b57cec5SDimitry Andric switch ((AMDGPUISD::NodeType)Opcode) {
54120b57cec5SDimitry Andric case AMDGPUISD::FIRST_NUMBER: break;
54130b57cec5SDimitry Andric // AMDIL DAG nodes
54140b57cec5SDimitry Andric NODE_NAME_CASE(UMUL);
54150b57cec5SDimitry Andric NODE_NAME_CASE(BRANCH_COND);
54160b57cec5SDimitry Andric
54170b57cec5SDimitry Andric // AMDGPU DAG nodes
54180b57cec5SDimitry Andric NODE_NAME_CASE(IF)
54190b57cec5SDimitry Andric NODE_NAME_CASE(ELSE)
54200b57cec5SDimitry Andric NODE_NAME_CASE(LOOP)
54210b57cec5SDimitry Andric NODE_NAME_CASE(CALL)
54220b57cec5SDimitry Andric NODE_NAME_CASE(TC_RETURN)
542306c3fb27SDimitry Andric NODE_NAME_CASE(TC_RETURN_GFX)
54245f757f3fSDimitry Andric NODE_NAME_CASE(TC_RETURN_CHAIN)
54250b57cec5SDimitry Andric NODE_NAME_CASE(TRAP)
542606c3fb27SDimitry Andric NODE_NAME_CASE(RET_GLUE)
54275f757f3fSDimitry Andric NODE_NAME_CASE(WAVE_ADDRESS)
54280b57cec5SDimitry Andric NODE_NAME_CASE(RETURN_TO_EPILOG)
54290b57cec5SDimitry Andric NODE_NAME_CASE(ENDPGM)
543006c3fb27SDimitry Andric NODE_NAME_CASE(ENDPGM_TRAP)
54310fca6ea1SDimitry Andric NODE_NAME_CASE(SIMULATED_TRAP)
54320b57cec5SDimitry Andric NODE_NAME_CASE(DWORDADDR)
54330b57cec5SDimitry Andric NODE_NAME_CASE(FRACT)
54340b57cec5SDimitry Andric NODE_NAME_CASE(SETCC)
54350b57cec5SDimitry Andric NODE_NAME_CASE(SETREG)
54368bcb0991SDimitry Andric NODE_NAME_CASE(DENORM_MODE)
54370b57cec5SDimitry Andric NODE_NAME_CASE(FMA_W_CHAIN)
54380b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_W_CHAIN)
54390b57cec5SDimitry Andric NODE_NAME_CASE(CLAMP)
54400b57cec5SDimitry Andric NODE_NAME_CASE(COS_HW)
54410b57cec5SDimitry Andric NODE_NAME_CASE(SIN_HW)
54420b57cec5SDimitry Andric NODE_NAME_CASE(FMAX_LEGACY)
54430b57cec5SDimitry Andric NODE_NAME_CASE(FMIN_LEGACY)
54440b57cec5SDimitry Andric NODE_NAME_CASE(FMAX3)
54450b57cec5SDimitry Andric NODE_NAME_CASE(SMAX3)
54460b57cec5SDimitry Andric NODE_NAME_CASE(UMAX3)
54470b57cec5SDimitry Andric NODE_NAME_CASE(FMIN3)
54480b57cec5SDimitry Andric NODE_NAME_CASE(SMIN3)
54490b57cec5SDimitry Andric NODE_NAME_CASE(UMIN3)
54500b57cec5SDimitry Andric NODE_NAME_CASE(FMED3)
54510b57cec5SDimitry Andric NODE_NAME_CASE(SMED3)
54520b57cec5SDimitry Andric NODE_NAME_CASE(UMED3)
54535f757f3fSDimitry Andric NODE_NAME_CASE(FMAXIMUM3)
54545f757f3fSDimitry Andric NODE_NAME_CASE(FMINIMUM3)
54550b57cec5SDimitry Andric NODE_NAME_CASE(FDOT2)
54560b57cec5SDimitry Andric NODE_NAME_CASE(URECIP)
54570b57cec5SDimitry Andric NODE_NAME_CASE(DIV_SCALE)
54580b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FMAS)
54590b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FIXUP)
54600b57cec5SDimitry Andric NODE_NAME_CASE(FMAD_FTZ)
54610b57cec5SDimitry Andric NODE_NAME_CASE(RCP)
54620b57cec5SDimitry Andric NODE_NAME_CASE(RSQ)
54630b57cec5SDimitry Andric NODE_NAME_CASE(RCP_LEGACY)
54640b57cec5SDimitry Andric NODE_NAME_CASE(RCP_IFLAG)
546506c3fb27SDimitry Andric NODE_NAME_CASE(LOG)
546606c3fb27SDimitry Andric NODE_NAME_CASE(EXP)
54670b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_LEGACY)
54680b57cec5SDimitry Andric NODE_NAME_CASE(RSQ_CLAMP)
54690b57cec5SDimitry Andric NODE_NAME_CASE(FP_CLASS)
54700b57cec5SDimitry Andric NODE_NAME_CASE(DOT4)
54710b57cec5SDimitry Andric NODE_NAME_CASE(CARRY)
54720b57cec5SDimitry Andric NODE_NAME_CASE(BORROW)
54730b57cec5SDimitry Andric NODE_NAME_CASE(BFE_U32)
54740b57cec5SDimitry Andric NODE_NAME_CASE(BFE_I32)
54750b57cec5SDimitry Andric NODE_NAME_CASE(BFI)
54760b57cec5SDimitry Andric NODE_NAME_CASE(BFM)
54770b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_U32)
54780b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_I32)
54790b57cec5SDimitry Andric NODE_NAME_CASE(FFBL_B32)
54800b57cec5SDimitry Andric NODE_NAME_CASE(MUL_U24)
54810b57cec5SDimitry Andric NODE_NAME_CASE(MUL_I24)
54820b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_U24)
54830b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_I24)
54840b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U24)
54850b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I24)
54860b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I64_I32)
54870b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U64_U32)
54880b57cec5SDimitry Andric NODE_NAME_CASE(PERM)
54890b57cec5SDimitry Andric NODE_NAME_CASE(TEXTURE_FETCH)
54900b57cec5SDimitry Andric NODE_NAME_CASE(R600_EXPORT)
54910b57cec5SDimitry Andric NODE_NAME_CASE(CONST_ADDRESS)
54920b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_LOAD)
54930b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_STORE)
54940b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLE)
54950b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEB)
54960b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLED)
54970b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEL)
54980b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE0)
54990b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE1)
55000b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE2)
55010b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE3)
55020b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
55030b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_I16_F32)
55040b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_U16_F32)
55050b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_I16_I32)
55060b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_U16_U32)
55070b57cec5SDimitry Andric NODE_NAME_CASE(FP_TO_FP16)
55080b57cec5SDimitry Andric NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
55090b57cec5SDimitry Andric NODE_NAME_CASE(CONST_DATA_PTR)
55100b57cec5SDimitry Andric NODE_NAME_CASE(PC_ADD_REL_OFFSET)
55110b57cec5SDimitry Andric NODE_NAME_CASE(LDS)
551281ad6265SDimitry Andric NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
551381ad6265SDimitry Andric NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
55140b57cec5SDimitry Andric NODE_NAME_CASE(DUMMY_CHAIN)
55150b57cec5SDimitry Andric case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
55160b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI)
55170b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO)
55180b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_I8)
55190b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_U8)
55200b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_I8)
55210b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_U8)
55220b57cec5SDimitry Andric NODE_NAME_CASE(STORE_MSKOR)
55230b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_CONSTANT)
55240b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
55250b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
55260b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
55270b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
55280b57cec5SDimitry Andric NODE_NAME_CASE(DS_ORDERED_COUNT)
55290b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_CMP_SWAP)
55300b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD)
55310b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
55320b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_USHORT)
55330b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_BYTE)
55340b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_SHORT)
55350fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_TFE)
55360fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_UBYTE_TFE)
55370fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_USHORT_TFE)
55380fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_BYTE_TFE)
55390fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_SHORT_TFE)
55400b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
5541bdd1243dSDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE)
55420b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
55430b57cec5SDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD)
55447a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_BYTE)
55457a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_UBYTE)
55467a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_SHORT)
55477a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_USHORT)
55480b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE)
55490b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_BYTE)
55500b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_SHORT)
55510b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT)
55520b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
55530b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
55540b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
55550b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
55560b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMIN)
55570b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMIN)
55580b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMAX)
55590b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMAX)
55600b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_AND)
55610b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_OR)
55620b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
55638bcb0991SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_INC)
55648bcb0991SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
55650b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
55665ffd83dbSDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
55670b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
5568fe6060f1SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
5569fe6060f1SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
55707a6dacacSDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)
55710b57cec5SDimitry Andric
55720b57cec5SDimitry Andric case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
55730b57cec5SDimitry Andric }
55740b57cec5SDimitry Andric return nullptr;
55750b57cec5SDimitry Andric }
55760b57cec5SDimitry Andric
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps,bool & UseOneConstNR,bool Reciprocal) const55770b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
55780b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled,
55790b57cec5SDimitry Andric int &RefinementSteps,
55800b57cec5SDimitry Andric bool &UseOneConstNR,
55810b57cec5SDimitry Andric bool Reciprocal) const {
55820b57cec5SDimitry Andric EVT VT = Operand.getValueType();
55830b57cec5SDimitry Andric
55840b57cec5SDimitry Andric if (VT == MVT::f32) {
55850b57cec5SDimitry Andric RefinementSteps = 0;
55860b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
55870b57cec5SDimitry Andric }
55880b57cec5SDimitry Andric
55890b57cec5SDimitry Andric // TODO: There is also f64 rsq instruction, but the documentation is less
55900b57cec5SDimitry Andric // clear on its precision.
55910b57cec5SDimitry Andric
55920b57cec5SDimitry Andric return SDValue();
55930b57cec5SDimitry Andric }
55940b57cec5SDimitry Andric
getRecipEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps) const55950b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
55960b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled,
55970b57cec5SDimitry Andric int &RefinementSteps) const {
55980b57cec5SDimitry Andric EVT VT = Operand.getValueType();
55990b57cec5SDimitry Andric
56000b57cec5SDimitry Andric if (VT == MVT::f32) {
56010b57cec5SDimitry Andric // Reciprocal, < 1 ulp error.
56020b57cec5SDimitry Andric //
56030b57cec5SDimitry Andric // This reciprocal approximation converges to < 0.5 ulp error with one
56040b57cec5SDimitry Andric // newton rhapson performed with two fused multiple adds (FMAs).
56050b57cec5SDimitry Andric
56060b57cec5SDimitry Andric RefinementSteps = 0;
56070b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
56080b57cec5SDimitry Andric }
56090b57cec5SDimitry Andric
56100b57cec5SDimitry Andric // TODO: There is also f64 rcp instruction, but the documentation is less
56110b57cec5SDimitry Andric // clear on its precision.
56120b57cec5SDimitry Andric
56130b57cec5SDimitry Andric return SDValue();
56140b57cec5SDimitry Andric }
56150b57cec5SDimitry Andric
workitemIntrinsicDim(unsigned ID)561681ad6265SDimitry Andric static unsigned workitemIntrinsicDim(unsigned ID) {
561781ad6265SDimitry Andric switch (ID) {
561881ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_x:
561981ad6265SDimitry Andric return 0;
562081ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_y:
562181ad6265SDimitry Andric return 1;
562281ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_z:
562381ad6265SDimitry Andric return 2;
562481ad6265SDimitry Andric default:
562581ad6265SDimitry Andric llvm_unreachable("not a workitem intrinsic");
562681ad6265SDimitry Andric }
562781ad6265SDimitry Andric }
562881ad6265SDimitry Andric
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const56290b57cec5SDimitry Andric void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
56300b57cec5SDimitry Andric const SDValue Op, KnownBits &Known,
56310b57cec5SDimitry Andric const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
56320b57cec5SDimitry Andric
56330b57cec5SDimitry Andric Known.resetAll(); // Don't know anything.
56340b57cec5SDimitry Andric
56350b57cec5SDimitry Andric unsigned Opc = Op.getOpcode();
56360b57cec5SDimitry Andric
56370b57cec5SDimitry Andric switch (Opc) {
56380b57cec5SDimitry Andric default:
56390b57cec5SDimitry Andric break;
56400b57cec5SDimitry Andric case AMDGPUISD::CARRY:
56410b57cec5SDimitry Andric case AMDGPUISD::BORROW: {
56420b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 31);
56430b57cec5SDimitry Andric break;
56440b57cec5SDimitry Andric }
56450b57cec5SDimitry Andric
56460b57cec5SDimitry Andric case AMDGPUISD::BFE_I32:
56470b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: {
56480b57cec5SDimitry Andric ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
56490b57cec5SDimitry Andric if (!CWidth)
56500b57cec5SDimitry Andric return;
56510b57cec5SDimitry Andric
56520b57cec5SDimitry Andric uint32_t Width = CWidth->getZExtValue() & 0x1f;
56530b57cec5SDimitry Andric
56540b57cec5SDimitry Andric if (Opc == AMDGPUISD::BFE_U32)
56550b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 32 - Width);
56560b57cec5SDimitry Andric
56570b57cec5SDimitry Andric break;
56580b57cec5SDimitry Andric }
5659fe6060f1SDimitry Andric case AMDGPUISD::FP_TO_FP16: {
56600b57cec5SDimitry Andric unsigned BitWidth = Known.getBitWidth();
56610b57cec5SDimitry Andric
56620b57cec5SDimitry Andric // High bits are zero.
56630b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
56640b57cec5SDimitry Andric break;
56650b57cec5SDimitry Andric }
56660b57cec5SDimitry Andric case AMDGPUISD::MUL_U24:
56670b57cec5SDimitry Andric case AMDGPUISD::MUL_I24: {
56680b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
56690b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
56700b57cec5SDimitry Andric unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
56710b57cec5SDimitry Andric RHSKnown.countMinTrailingZeros();
56720b57cec5SDimitry Andric Known.Zero.setLowBits(std::min(TrailZ, 32u));
5673480093f4SDimitry Andric // Skip extra check if all bits are known zeros.
5674480093f4SDimitry Andric if (TrailZ >= 32)
5675480093f4SDimitry Andric break;
56760b57cec5SDimitry Andric
56770b57cec5SDimitry Andric // Truncate to 24 bits.
56780b57cec5SDimitry Andric LHSKnown = LHSKnown.trunc(24);
56790b57cec5SDimitry Andric RHSKnown = RHSKnown.trunc(24);
56800b57cec5SDimitry Andric
56810b57cec5SDimitry Andric if (Opc == AMDGPUISD::MUL_I24) {
568204eeddc0SDimitry Andric unsigned LHSValBits = LHSKnown.countMaxSignificantBits();
568304eeddc0SDimitry Andric unsigned RHSValBits = RHSKnown.countMaxSignificantBits();
568404eeddc0SDimitry Andric unsigned MaxValBits = LHSValBits + RHSValBits;
568504eeddc0SDimitry Andric if (MaxValBits > 32)
56860b57cec5SDimitry Andric break;
568704eeddc0SDimitry Andric unsigned SignBits = 32 - MaxValBits + 1;
56880b57cec5SDimitry Andric bool LHSNegative = LHSKnown.isNegative();
5689480093f4SDimitry Andric bool LHSNonNegative = LHSKnown.isNonNegative();
5690480093f4SDimitry Andric bool LHSPositive = LHSKnown.isStrictlyPositive();
56910b57cec5SDimitry Andric bool RHSNegative = RHSKnown.isNegative();
5692480093f4SDimitry Andric bool RHSNonNegative = RHSKnown.isNonNegative();
5693480093f4SDimitry Andric bool RHSPositive = RHSKnown.isStrictlyPositive();
5694480093f4SDimitry Andric
5695480093f4SDimitry Andric if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
569604eeddc0SDimitry Andric Known.Zero.setHighBits(SignBits);
5697480093f4SDimitry Andric else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
569804eeddc0SDimitry Andric Known.One.setHighBits(SignBits);
56990b57cec5SDimitry Andric } else {
570004eeddc0SDimitry Andric unsigned LHSValBits = LHSKnown.countMaxActiveBits();
570104eeddc0SDimitry Andric unsigned RHSValBits = RHSKnown.countMaxActiveBits();
570204eeddc0SDimitry Andric unsigned MaxValBits = LHSValBits + RHSValBits;
57030b57cec5SDimitry Andric if (MaxValBits >= 32)
57040b57cec5SDimitry Andric break;
570504eeddc0SDimitry Andric Known.Zero.setBitsFrom(MaxValBits);
57060b57cec5SDimitry Andric }
57070b57cec5SDimitry Andric break;
57080b57cec5SDimitry Andric }
57090b57cec5SDimitry Andric case AMDGPUISD::PERM: {
57100b57cec5SDimitry Andric ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2));
57110b57cec5SDimitry Andric if (!CMask)
57120b57cec5SDimitry Andric return;
57130b57cec5SDimitry Andric
57140b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
57150b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
57160b57cec5SDimitry Andric unsigned Sel = CMask->getZExtValue();
57170b57cec5SDimitry Andric
57180b57cec5SDimitry Andric for (unsigned I = 0; I < 32; I += 8) {
57190b57cec5SDimitry Andric unsigned SelBits = Sel & 0xff;
57200b57cec5SDimitry Andric if (SelBits < 4) {
57210b57cec5SDimitry Andric SelBits *= 8;
57220b57cec5SDimitry Andric Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
57230b57cec5SDimitry Andric Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
57240b57cec5SDimitry Andric } else if (SelBits < 7) {
57250b57cec5SDimitry Andric SelBits = (SelBits & 3) * 8;
57260b57cec5SDimitry Andric Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
57270b57cec5SDimitry Andric Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
57280b57cec5SDimitry Andric } else if (SelBits == 0x0c) {
57298bcb0991SDimitry Andric Known.Zero |= 0xFFull << I;
57300b57cec5SDimitry Andric } else if (SelBits > 0x0c) {
57318bcb0991SDimitry Andric Known.One |= 0xFFull << I;
57320b57cec5SDimitry Andric }
57330b57cec5SDimitry Andric Sel >>= 8;
57340b57cec5SDimitry Andric }
57350b57cec5SDimitry Andric break;
57360b57cec5SDimitry Andric }
57370b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: {
57380b57cec5SDimitry Andric Known.Zero.setHighBits(24);
57390b57cec5SDimitry Andric break;
57400b57cec5SDimitry Andric }
57410b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: {
57420b57cec5SDimitry Andric Known.Zero.setHighBits(16);
57430b57cec5SDimitry Andric break;
57440b57cec5SDimitry Andric }
57450b57cec5SDimitry Andric case AMDGPUISD::LDS: {
57460b57cec5SDimitry Andric auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
57475ffd83dbSDimitry Andric Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());
57480b57cec5SDimitry Andric
57490b57cec5SDimitry Andric Known.Zero.setHighBits(16);
57505ffd83dbSDimitry Andric Known.Zero.setLowBits(Log2(Alignment));
57510b57cec5SDimitry Andric break;
57520b57cec5SDimitry Andric }
575306c3fb27SDimitry Andric case AMDGPUISD::SMIN3:
575406c3fb27SDimitry Andric case AMDGPUISD::SMAX3:
575506c3fb27SDimitry Andric case AMDGPUISD::SMED3:
575606c3fb27SDimitry Andric case AMDGPUISD::UMIN3:
575706c3fb27SDimitry Andric case AMDGPUISD::UMAX3:
575806c3fb27SDimitry Andric case AMDGPUISD::UMED3: {
575906c3fb27SDimitry Andric KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
576006c3fb27SDimitry Andric if (Known2.isUnknown())
576106c3fb27SDimitry Andric break;
576206c3fb27SDimitry Andric
576306c3fb27SDimitry Andric KnownBits Known1 = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
576406c3fb27SDimitry Andric if (Known1.isUnknown())
576506c3fb27SDimitry Andric break;
576606c3fb27SDimitry Andric
576706c3fb27SDimitry Andric KnownBits Known0 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
576806c3fb27SDimitry Andric if (Known0.isUnknown())
576906c3fb27SDimitry Andric break;
577006c3fb27SDimitry Andric
577106c3fb27SDimitry Andric // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
577206c3fb27SDimitry Andric Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
577306c3fb27SDimitry Andric Known.One = Known0.One & Known1.One & Known2.One;
577406c3fb27SDimitry Andric break;
577506c3fb27SDimitry Andric }
57760b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: {
5777647cbc5dSDimitry Andric unsigned IID = Op.getConstantOperandVal(0);
57780b57cec5SDimitry Andric switch (IID) {
577981ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_x:
578081ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_y:
578181ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: {
578281ad6265SDimitry Andric unsigned MaxValue = Subtarget->getMaxWorkitemID(
578381ad6265SDimitry Andric DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID));
578406c3fb27SDimitry Andric Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
578581ad6265SDimitry Andric break;
578681ad6265SDimitry Andric }
57870b57cec5SDimitry Andric default:
57880b57cec5SDimitry Andric break;
57890b57cec5SDimitry Andric }
57900b57cec5SDimitry Andric }
57910b57cec5SDimitry Andric }
57920b57cec5SDimitry Andric }
57930b57cec5SDimitry Andric
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const57940b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
57950b57cec5SDimitry Andric SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
57960b57cec5SDimitry Andric unsigned Depth) const {
57970b57cec5SDimitry Andric switch (Op.getOpcode()) {
57980b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: {
57990b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
58000b57cec5SDimitry Andric if (!Width)
58010b57cec5SDimitry Andric return 1;
58020b57cec5SDimitry Andric
58030b57cec5SDimitry Andric unsigned SignBits = 32 - Width->getZExtValue() + 1;
58040b57cec5SDimitry Andric if (!isNullConstant(Op.getOperand(1)))
58050b57cec5SDimitry Andric return SignBits;
58060b57cec5SDimitry Andric
58070b57cec5SDimitry Andric // TODO: Could probably figure something out with non-0 offsets.
58080b57cec5SDimitry Andric unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
58090b57cec5SDimitry Andric return std::max(SignBits, Op0SignBits);
58100b57cec5SDimitry Andric }
58110b57cec5SDimitry Andric
58120b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: {
58130b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
58140b57cec5SDimitry Andric return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
58150b57cec5SDimitry Andric }
58160b57cec5SDimitry Andric
58170b57cec5SDimitry Andric case AMDGPUISD::CARRY:
58180b57cec5SDimitry Andric case AMDGPUISD::BORROW:
58190b57cec5SDimitry Andric return 31;
58200b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_BYTE:
58210b57cec5SDimitry Andric return 25;
58220b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_SHORT:
58230b57cec5SDimitry Andric return 17;
58240b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE:
58250b57cec5SDimitry Andric return 24;
58260b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT:
58270b57cec5SDimitry Andric return 16;
58280b57cec5SDimitry Andric case AMDGPUISD::FP_TO_FP16:
58290b57cec5SDimitry Andric return 16;
583006c3fb27SDimitry Andric case AMDGPUISD::SMIN3:
583106c3fb27SDimitry Andric case AMDGPUISD::SMAX3:
583206c3fb27SDimitry Andric case AMDGPUISD::SMED3:
583306c3fb27SDimitry Andric case AMDGPUISD::UMIN3:
583406c3fb27SDimitry Andric case AMDGPUISD::UMAX3:
583506c3fb27SDimitry Andric case AMDGPUISD::UMED3: {
583606c3fb27SDimitry Andric unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1);
583706c3fb27SDimitry Andric if (Tmp2 == 1)
583806c3fb27SDimitry Andric return 1; // Early out.
583906c3fb27SDimitry Andric
584006c3fb27SDimitry Andric unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
584106c3fb27SDimitry Andric if (Tmp1 == 1)
584206c3fb27SDimitry Andric return 1; // Early out.
584306c3fb27SDimitry Andric
584406c3fb27SDimitry Andric unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
584506c3fb27SDimitry Andric if (Tmp0 == 1)
584606c3fb27SDimitry Andric return 1; // Early out.
584706c3fb27SDimitry Andric
58480fca6ea1SDimitry Andric return std::min({Tmp0, Tmp1, Tmp2});
584906c3fb27SDimitry Andric }
58500b57cec5SDimitry Andric default:
58510b57cec5SDimitry Andric return 1;
58520b57cec5SDimitry Andric }
58530b57cec5SDimitry Andric }
58540b57cec5SDimitry Andric
computeNumSignBitsForTargetInstr(GISelKnownBits & Analysis,Register R,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const58555ffd83dbSDimitry Andric unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
58565ffd83dbSDimitry Andric GISelKnownBits &Analysis, Register R,
58575ffd83dbSDimitry Andric const APInt &DemandedElts, const MachineRegisterInfo &MRI,
58585ffd83dbSDimitry Andric unsigned Depth) const {
58595ffd83dbSDimitry Andric const MachineInstr *MI = MRI.getVRegDef(R);
58605ffd83dbSDimitry Andric if (!MI)
58615ffd83dbSDimitry Andric return 1;
58625ffd83dbSDimitry Andric
58635ffd83dbSDimitry Andric // TODO: Check range metadata on MMO.
58645ffd83dbSDimitry Andric switch (MI->getOpcode()) {
58655ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
58665ffd83dbSDimitry Andric return 25;
58675ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
58685ffd83dbSDimitry Andric return 17;
58695ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
58705ffd83dbSDimitry Andric return 24;
58715ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
58725ffd83dbSDimitry Andric return 16;
587306c3fb27SDimitry Andric case AMDGPU::G_AMDGPU_SMED3:
587406c3fb27SDimitry Andric case AMDGPU::G_AMDGPU_UMED3: {
587506c3fb27SDimitry Andric auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
587606c3fb27SDimitry Andric unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
587706c3fb27SDimitry Andric if (Tmp2 == 1)
587806c3fb27SDimitry Andric return 1;
587906c3fb27SDimitry Andric unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
588006c3fb27SDimitry Andric if (Tmp1 == 1)
588106c3fb27SDimitry Andric return 1;
588206c3fb27SDimitry Andric unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
588306c3fb27SDimitry Andric if (Tmp0 == 1)
588406c3fb27SDimitry Andric return 1;
58850fca6ea1SDimitry Andric return std::min({Tmp0, Tmp1, Tmp2});
588606c3fb27SDimitry Andric }
58875ffd83dbSDimitry Andric default:
58885ffd83dbSDimitry Andric return 1;
58895ffd83dbSDimitry Andric }
58905ffd83dbSDimitry Andric }
58915ffd83dbSDimitry Andric
isKnownNeverNaNForTargetNode(SDValue Op,const SelectionDAG & DAG,bool SNaN,unsigned Depth) const58920b57cec5SDimitry Andric bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
58930b57cec5SDimitry Andric const SelectionDAG &DAG,
58940b57cec5SDimitry Andric bool SNaN,
58950b57cec5SDimitry Andric unsigned Depth) const {
58960b57cec5SDimitry Andric unsigned Opcode = Op.getOpcode();
58970b57cec5SDimitry Andric switch (Opcode) {
58980b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY:
58990b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: {
59000b57cec5SDimitry Andric if (SNaN)
59010b57cec5SDimitry Andric return true;
59020b57cec5SDimitry Andric
59030b57cec5SDimitry Andric // TODO: Can check no nans on one of the operands for each one, but which
59040b57cec5SDimitry Andric // one?
59050b57cec5SDimitry Andric return false;
59060b57cec5SDimitry Andric }
59070b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY:
59080b57cec5SDimitry Andric case AMDGPUISD::CVT_PKRTZ_F16_F32: {
59090b57cec5SDimitry Andric if (SNaN)
59100b57cec5SDimitry Andric return true;
59110b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
59120b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
59130b57cec5SDimitry Andric }
59140b57cec5SDimitry Andric case AMDGPUISD::FMED3:
59150b57cec5SDimitry Andric case AMDGPUISD::FMIN3:
59160b57cec5SDimitry Andric case AMDGPUISD::FMAX3:
59175f757f3fSDimitry Andric case AMDGPUISD::FMINIMUM3:
59185f757f3fSDimitry Andric case AMDGPUISD::FMAXIMUM3:
59190b57cec5SDimitry Andric case AMDGPUISD::FMAD_FTZ: {
59200b57cec5SDimitry Andric if (SNaN)
59210b57cec5SDimitry Andric return true;
59220b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
59230b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
59240b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
59250b57cec5SDimitry Andric }
59260b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE0:
59270b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE1:
59280b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE2:
59290b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE3:
59300b57cec5SDimitry Andric return true;
59310b57cec5SDimitry Andric
59320b57cec5SDimitry Andric case AMDGPUISD::RCP:
59330b57cec5SDimitry Andric case AMDGPUISD::RSQ:
59340b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY:
59350b57cec5SDimitry Andric case AMDGPUISD::RSQ_CLAMP: {
59360b57cec5SDimitry Andric if (SNaN)
59370b57cec5SDimitry Andric return true;
59380b57cec5SDimitry Andric
59390b57cec5SDimitry Andric // TODO: Need is known positive check.
59400b57cec5SDimitry Andric return false;
59410b57cec5SDimitry Andric }
594206c3fb27SDimitry Andric case ISD::FLDEXP:
59430b57cec5SDimitry Andric case AMDGPUISD::FRACT: {
59440b57cec5SDimitry Andric if (SNaN)
59450b57cec5SDimitry Andric return true;
59460b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
59470b57cec5SDimitry Andric }
59480b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE:
59490b57cec5SDimitry Andric case AMDGPUISD::DIV_FMAS:
59500b57cec5SDimitry Andric case AMDGPUISD::DIV_FIXUP:
59510b57cec5SDimitry Andric // TODO: Refine on operands.
59520b57cec5SDimitry Andric return SNaN;
59530b57cec5SDimitry Andric case AMDGPUISD::SIN_HW:
59540b57cec5SDimitry Andric case AMDGPUISD::COS_HW: {
59550b57cec5SDimitry Andric // TODO: Need check for infinity
59560b57cec5SDimitry Andric return SNaN;
59570b57cec5SDimitry Andric }
59580b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: {
5959647cbc5dSDimitry Andric unsigned IntrinsicID = Op.getConstantOperandVal(0);
59600b57cec5SDimitry Andric // TODO: Handle more intrinsics
59610b57cec5SDimitry Andric switch (IntrinsicID) {
59620b57cec5SDimitry Andric case Intrinsic::amdgcn_cubeid:
59630b57cec5SDimitry Andric return true;
59640b57cec5SDimitry Andric
59650b57cec5SDimitry Andric case Intrinsic::amdgcn_frexp_mant: {
59660b57cec5SDimitry Andric if (SNaN)
59670b57cec5SDimitry Andric return true;
59680b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
59690b57cec5SDimitry Andric }
59700b57cec5SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: {
59710b57cec5SDimitry Andric if (SNaN)
59720b57cec5SDimitry Andric return true;
59730b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
59740b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
59750b57cec5SDimitry Andric }
59765ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp:
59775ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq:
59785ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp_legacy:
59795ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_legacy:
59805ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_clamp: {
59815ffd83dbSDimitry Andric if (SNaN)
59825ffd83dbSDimitry Andric return true;
59835ffd83dbSDimitry Andric
59845ffd83dbSDimitry Andric // TODO: Need is known positive check.
59855ffd83dbSDimitry Andric return false;
59865ffd83dbSDimitry Andric }
59875ffd83dbSDimitry Andric case Intrinsic::amdgcn_trig_preop:
59880b57cec5SDimitry Andric case Intrinsic::amdgcn_fdot2:
59890b57cec5SDimitry Andric // TODO: Refine on operand
59900b57cec5SDimitry Andric return SNaN;
5991e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fma_legacy:
5992e8d8bef9SDimitry Andric if (SNaN)
5993e8d8bef9SDimitry Andric return true;
5994e8d8bef9SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
5995e8d8bef9SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) &&
5996e8d8bef9SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1);
59970b57cec5SDimitry Andric default:
59980b57cec5SDimitry Andric return false;
59990b57cec5SDimitry Andric }
60000b57cec5SDimitry Andric }
60010b57cec5SDimitry Andric default:
60020b57cec5SDimitry Andric return false;
60030b57cec5SDimitry Andric }
60040b57cec5SDimitry Andric }
60050b57cec5SDimitry Andric
isReassocProfitable(MachineRegisterInfo & MRI,Register N0,Register N1) const600606c3fb27SDimitry Andric bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
600706c3fb27SDimitry Andric Register N0, Register N1) const {
600806c3fb27SDimitry Andric return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
600906c3fb27SDimitry Andric }
601006c3fb27SDimitry Andric
60110b57cec5SDimitry Andric TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * RMW) const60120b57cec5SDimitry Andric AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
60130b57cec5SDimitry Andric switch (RMW->getOperation()) {
60140b57cec5SDimitry Andric case AtomicRMWInst::Nand:
60150b57cec5SDimitry Andric case AtomicRMWInst::FAdd:
60160b57cec5SDimitry Andric case AtomicRMWInst::FSub:
6017753f127fSDimitry Andric case AtomicRMWInst::FMax:
6018753f127fSDimitry Andric case AtomicRMWInst::FMin:
60190b57cec5SDimitry Andric return AtomicExpansionKind::CmpXChg;
60200fca6ea1SDimitry Andric case AtomicRMWInst::Xchg: {
60210fca6ea1SDimitry Andric const DataLayout &DL = RMW->getFunction()->getDataLayout();
60220fca6ea1SDimitry Andric unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
60230fca6ea1SDimitry Andric if (ValSize == 32 || ValSize == 64)
60240fca6ea1SDimitry Andric return AtomicExpansionKind::None;
60250fca6ea1SDimitry Andric return AtomicExpansionKind::CmpXChg;
60260fca6ea1SDimitry Andric }
6027bdd1243dSDimitry Andric default: {
6028bdd1243dSDimitry Andric if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
6029bdd1243dSDimitry Andric unsigned Size = IntTy->getBitWidth();
6030bdd1243dSDimitry Andric if (Size == 32 || Size == 64)
60310b57cec5SDimitry Andric return AtomicExpansionKind::None;
60320b57cec5SDimitry Andric }
6033bdd1243dSDimitry Andric
6034bdd1243dSDimitry Andric return AtomicExpansionKind::CmpXChg;
6035bdd1243dSDimitry Andric }
6036bdd1243dSDimitry Andric }
60370b57cec5SDimitry Andric }
6038fe6060f1SDimitry Andric
603906c3fb27SDimitry Andric /// Whether it is profitable to sink the operands of an
604006c3fb27SDimitry Andric /// Instruction I to the basic block of I.
604106c3fb27SDimitry Andric /// This helps using several modifiers (like abs and neg) more often.
shouldSinkOperands(Instruction * I,SmallVectorImpl<Use * > & Ops) const604206c3fb27SDimitry Andric bool AMDGPUTargetLowering::shouldSinkOperands(
604306c3fb27SDimitry Andric Instruction *I, SmallVectorImpl<Use *> &Ops) const {
604406c3fb27SDimitry Andric using namespace PatternMatch;
604506c3fb27SDimitry Andric
604606c3fb27SDimitry Andric for (auto &Op : I->operands()) {
604706c3fb27SDimitry Andric // Ensure we are not already sinking this operand.
604806c3fb27SDimitry Andric if (any_of(Ops, [&](Use *U) { return U->get() == Op.get(); }))
604906c3fb27SDimitry Andric continue;
605006c3fb27SDimitry Andric
605106c3fb27SDimitry Andric if (match(&Op, m_FAbs(m_Value())) || match(&Op, m_FNeg(m_Value())))
605206c3fb27SDimitry Andric Ops.push_back(&Op);
605306c3fb27SDimitry Andric }
605406c3fb27SDimitry Andric
605506c3fb27SDimitry Andric return !Ops.empty();
605606c3fb27SDimitry Andric }
6057