1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ARMSelectionDAGInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARMTargetMachine.h" 14 #include "llvm/CodeGen/SelectionDAG.h" 15 #include "llvm/IR/DerivedTypes.h" 16 using namespace llvm; 17 18 #define DEBUG_TYPE "arm-selectiondag-info" 19 20 // Emit, if possible, a specialized version of the given Libcall. Typically this 21 // means selecting the appropriately aligned version, but we also convert memset 22 // of 0 into memclr. 23 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( 24 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 25 SDValue Size, unsigned Align, RTLIB::Libcall LC) const { 26 const ARMSubtarget &Subtarget = 27 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 28 const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); 29 30 // Only use a specialized AEABI function if the default version of this 31 // Libcall is an AEABI function. 32 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) 33 return SDValue(); 34 35 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be 36 // able to translate memset to memclr and use the value to index the function 37 // name array. 38 enum { 39 AEABI_MEMCPY = 0, 40 AEABI_MEMMOVE, 41 AEABI_MEMSET, 42 AEABI_MEMCLR 43 } AEABILibcall; 44 switch (LC) { 45 case RTLIB::MEMCPY: 46 AEABILibcall = AEABI_MEMCPY; 47 break; 48 case RTLIB::MEMMOVE: 49 AEABILibcall = AEABI_MEMMOVE; 50 break; 51 case RTLIB::MEMSET: 52 AEABILibcall = AEABI_MEMSET; 53 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) 54 if (ConstantSrc->getZExtValue() == 0) 55 AEABILibcall = AEABI_MEMCLR; 56 break; 57 default: 58 return SDValue(); 59 } 60 61 // Choose the most-aligned libcall variant that we can 62 enum { 63 ALIGN1 = 0, 64 ALIGN4, 65 ALIGN8 66 } AlignVariant; 67 if ((Align & 7) == 0) 68 AlignVariant = ALIGN8; 69 else if ((Align & 3) == 0) 70 AlignVariant = ALIGN4; 71 else 72 AlignVariant = ALIGN1; 73 74 TargetLowering::ArgListTy Args; 75 TargetLowering::ArgListEntry Entry; 76 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 77 Entry.Node = Dst; 78 Args.push_back(Entry); 79 if (AEABILibcall == AEABI_MEMCLR) { 80 Entry.Node = Size; 81 Args.push_back(Entry); 82 } else if (AEABILibcall == AEABI_MEMSET) { 83 // Adjust parameters for memset, EABI uses format (ptr, size, value), 84 // GNU library uses (ptr, value, size) 85 // See RTABI section 4.3.4 86 Entry.Node = Size; 87 Args.push_back(Entry); 88 89 // Extend or truncate the argument to be an i32 value for the call. 90 if (Src.getValueType().bitsGT(MVT::i32)) 91 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); 92 else if (Src.getValueType().bitsLT(MVT::i32)) 93 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); 94 95 Entry.Node = Src; 96 Entry.Ty = Type::getInt32Ty(*DAG.getContext()); 97 Entry.IsSExt = false; 98 Args.push_back(Entry); 99 } else { 100 Entry.Node = Src; 101 Args.push_back(Entry); 102 103 Entry.Node = Size; 104 Args.push_back(Entry); 105 } 106 107 char const *FunctionNames[4][3] = { 108 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, 109 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, 110 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, 111 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } 112 }; 113 TargetLowering::CallLoweringInfo CLI(DAG); 114 CLI.setDebugLoc(dl) 115 .setChain(Chain) 116 .setLibCallee( 117 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), 118 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], 119 TLI->getPointerTy(DAG.getDataLayout())), 120 std::move(Args)) 121 .setDiscardResult(); 122 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 123 124 return CallResult.second; 125 } 126 127 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( 128 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 129 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, 130 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 131 const ARMSubtarget &Subtarget = 132 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 133 // Do repeated 4-byte loads and stores. To be improved. 134 // This requires 4-byte alignment. 135 if (Alignment < Align(4)) 136 return SDValue(); 137 // This requires the copy size to be a constant, preferably 138 // within a subtarget-specific limit. 139 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 140 if (!ConstantSize) 141 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, 142 Alignment.value(), RTLIB::MEMCPY); 143 uint64_t SizeVal = ConstantSize->getZExtValue(); 144 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 145 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, 146 Alignment.value(), RTLIB::MEMCPY); 147 148 unsigned BytesLeft = SizeVal & 3; 149 unsigned NumMemOps = SizeVal >> 2; 150 unsigned EmittedNumMemOps = 0; 151 EVT VT = MVT::i32; 152 unsigned VTSize = 4; 153 unsigned i = 0; 154 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers 155 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; 156 SDValue TFOps[6]; 157 SDValue Loads[6]; 158 uint64_t SrcOff = 0, DstOff = 0; 159 160 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to 161 // VLDM/VSTM and make this code emit it when appropriate. This would reduce 162 // pressure on the general purpose registers. However this seems harder to map 163 // onto the register allocator's view of the world. 164 165 // The number of MEMCPY pseudo-instructions to emit. We use up to 166 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm 167 // later on. This is a lower bound on the number of MEMCPY operations we must 168 // emit. 169 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; 170 171 // Code size optimisation: do not inline memcpy if expansion results in 172 // more instructions than the libary call. 173 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) { 174 return SDValue(); 175 } 176 177 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); 178 179 for (unsigned I = 0; I != NumMEMCPYs; ++I) { 180 // Evenly distribute registers among MEMCPY operations to reduce register 181 // pressure. 182 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; 183 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; 184 185 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, 186 DAG.getConstant(NumRegs, dl, MVT::i32)); 187 Src = Dst.getValue(1); 188 Chain = Dst.getValue(2); 189 190 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); 191 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); 192 193 EmittedNumMemOps = NextEmittedNumMemOps; 194 } 195 196 if (BytesLeft == 0) 197 return Chain; 198 199 // Issue loads / stores for the trailing (1 - 3) bytes. 200 auto getRemainingValueType = [](unsigned BytesLeft) { 201 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8; 202 }; 203 auto getRemainingSize = [](unsigned BytesLeft) { 204 return (BytesLeft >= 2) ? 2 : 1; 205 }; 206 207 unsigned BytesLeftSave = BytesLeft; 208 i = 0; 209 while (BytesLeft) { 210 VT = getRemainingValueType(BytesLeft); 211 VTSize = getRemainingSize(BytesLeft); 212 Loads[i] = DAG.getLoad(VT, dl, Chain, 213 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 214 DAG.getConstant(SrcOff, dl, MVT::i32)), 215 SrcPtrInfo.getWithOffset(SrcOff)); 216 TFOps[i] = Loads[i].getValue(1); 217 ++i; 218 SrcOff += VTSize; 219 BytesLeft -= VTSize; 220 } 221 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 222 makeArrayRef(TFOps, i)); 223 224 i = 0; 225 BytesLeft = BytesLeftSave; 226 while (BytesLeft) { 227 VT = getRemainingValueType(BytesLeft); 228 VTSize = getRemainingSize(BytesLeft); 229 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 230 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 231 DAG.getConstant(DstOff, dl, MVT::i32)), 232 DstPtrInfo.getWithOffset(DstOff)); 233 ++i; 234 DstOff += VTSize; 235 BytesLeft -= VTSize; 236 } 237 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 238 makeArrayRef(TFOps, i)); 239 } 240 241 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( 242 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 243 SDValue Size, Align Alignment, bool isVolatile, 244 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 245 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, 246 Alignment.value(), RTLIB::MEMMOVE); 247 } 248 249 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( 250 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 251 SDValue Size, Align Alignment, bool isVolatile, 252 MachinePointerInfo DstPtrInfo) const { 253 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, 254 Alignment.value(), RTLIB::MEMSET); 255 } 256