1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 10 #include "llvm/Analysis/ScalarEvolution.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/IR/MDBuilder.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 17 #include <optional> 18 19 #define DEBUG_TYPE "lower-mem-intrinsics" 20 21 using namespace llvm; 22 23 void llvm::createMemCpyLoopKnownSize( 24 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, 25 ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, 26 bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, 27 std::optional<uint32_t> AtomicElementSize) { 28 // No need to expand zero length copies. 29 if (CopyLen->isZero()) 30 return; 31 32 BasicBlock *PreLoopBB = InsertBefore->getParent(); 33 BasicBlock *PostLoopBB = nullptr; 34 Function *ParentFunc = PreLoopBB->getParent(); 35 LLVMContext &Ctx = PreLoopBB->getContext(); 36 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 37 MDBuilder MDB(Ctx); 38 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 39 StringRef Name = "MemCopyAliasScope"; 40 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 41 42 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 43 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 44 45 Type *TypeOfCopyLen = CopyLen->getType(); 46 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 47 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 48 AtomicElementSize); 49 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 50 "Atomic memcpy lowering is not supported for vector operand type"); 51 52 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 53 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 54 "Atomic memcpy lowering is not supported for selected operand size"); 55 56 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; 57 58 if (LoopEndCount != 0) { 59 // Split 60 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 61 BasicBlock *LoopBB = 62 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 63 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 64 65 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 66 67 // Cast the Src and Dst pointers to pointers to the loop operand type (if 68 // needed). 69 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 70 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 71 if (SrcAddr->getType() != SrcOpType) { 72 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 73 } 74 if (DstAddr->getType() != DstOpType) { 75 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 76 } 77 78 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 79 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 80 81 IRBuilder<> LoopBuilder(LoopBB); 82 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 83 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 84 // Loop Body 85 Value *SrcGEP = 86 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 87 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 88 PartSrcAlign, SrcIsVolatile); 89 if (!CanOverlap) { 90 // Set alias scope for loads. 91 Load->setMetadata(LLVMContext::MD_alias_scope, 92 MDNode::get(Ctx, NewScope)); 93 } 94 Value *DstGEP = 95 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 96 StoreInst *Store = LoopBuilder.CreateAlignedStore( 97 Load, DstGEP, PartDstAlign, DstIsVolatile); 98 if (!CanOverlap) { 99 // Indicate that stores don't overlap loads. 100 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 101 } 102 if (AtomicElementSize) { 103 Load->setAtomic(AtomicOrdering::Unordered); 104 Store->setAtomic(AtomicOrdering::Unordered); 105 } 106 Value *NewIndex = 107 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); 108 LoopIndex->addIncoming(NewIndex, LoopBB); 109 110 // Create the loop branch condition. 111 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 112 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 113 LoopBB, PostLoopBB); 114 } 115 116 uint64_t BytesCopied = LoopEndCount * LoopOpSize; 117 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 118 if (RemainingBytes) { 119 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() 120 : InsertBefore); 121 122 SmallVector<Type *, 5> RemainingOps; 123 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 124 SrcAS, DstAS, SrcAlign.value(), 125 DstAlign.value(), AtomicElementSize); 126 127 for (auto *OpTy : RemainingOps) { 128 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 129 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); 130 131 // Calculate the new index 132 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 133 assert( 134 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && 135 "Atomic memcpy lowering is not supported for selected operand size"); 136 137 uint64_t GepIndex = BytesCopied / OperandSize; 138 assert(GepIndex * OperandSize == BytesCopied && 139 "Division should have no Remainder!"); 140 // Cast source to operand type and load 141 PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); 142 Value *CastedSrc = SrcAddr->getType() == SrcPtrType 143 ? SrcAddr 144 : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); 145 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 146 OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); 147 LoadInst *Load = 148 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); 149 if (!CanOverlap) { 150 // Set alias scope for loads. 151 Load->setMetadata(LLVMContext::MD_alias_scope, 152 MDNode::get(Ctx, NewScope)); 153 } 154 // Cast destination to operand type and store. 155 PointerType *DstPtrType = PointerType::get(OpTy, DstAS); 156 Value *CastedDst = DstAddr->getType() == DstPtrType 157 ? DstAddr 158 : RBuilder.CreateBitCast(DstAddr, DstPtrType); 159 Value *DstGEP = RBuilder.CreateInBoundsGEP( 160 OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); 161 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 162 DstIsVolatile); 163 if (!CanOverlap) { 164 // Indicate that stores don't overlap loads. 165 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 166 } 167 if (AtomicElementSize) { 168 Load->setAtomic(AtomicOrdering::Unordered); 169 Store->setAtomic(AtomicOrdering::Unordered); 170 } 171 BytesCopied += OperandSize; 172 } 173 } 174 assert(BytesCopied == CopyLen->getZExtValue() && 175 "Bytes copied should match size in the call!"); 176 } 177 178 void llvm::createMemCpyLoopUnknownSize( 179 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, 180 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, 181 bool CanOverlap, const TargetTransformInfo &TTI, 182 std::optional<uint32_t> AtomicElementSize) { 183 BasicBlock *PreLoopBB = InsertBefore->getParent(); 184 BasicBlock *PostLoopBB = 185 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 186 187 Function *ParentFunc = PreLoopBB->getParent(); 188 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 189 LLVMContext &Ctx = PreLoopBB->getContext(); 190 MDBuilder MDB(Ctx); 191 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 192 StringRef Name = "MemCopyAliasScope"; 193 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 194 195 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 196 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 197 198 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 199 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 200 AtomicElementSize); 201 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 202 "Atomic memcpy lowering is not supported for vector operand type"); 203 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 204 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 205 "Atomic memcpy lowering is not supported for selected operand size"); 206 207 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 208 209 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 210 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 211 if (SrcAddr->getType() != SrcOpType) { 212 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 213 } 214 if (DstAddr->getType() != DstOpType) { 215 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 216 } 217 218 // Calculate the loop trip count, and remaining bytes to copy after the loop. 219 Type *CopyLenType = CopyLen->getType(); 220 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 221 assert(ILengthType && 222 "expected size argument to memcpy to be an integer type!"); 223 Type *Int8Type = Type::getInt8Ty(Ctx); 224 bool LoopOpIsInt8 = LoopOpType == Int8Type; 225 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 226 Value *RuntimeLoopCount = LoopOpIsInt8 ? 227 CopyLen : 228 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); 229 BasicBlock *LoopBB = 230 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 231 IRBuilder<> LoopBuilder(LoopBB); 232 233 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 234 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 235 236 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 237 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 238 239 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 240 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 241 PartSrcAlign, SrcIsVolatile); 242 if (!CanOverlap) { 243 // Set alias scope for loads. 244 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); 245 } 246 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 247 StoreInst *Store = 248 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); 249 if (!CanOverlap) { 250 // Indicate that stores don't overlap loads. 251 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 252 } 253 if (AtomicElementSize) { 254 Load->setAtomic(AtomicOrdering::Unordered); 255 Store->setAtomic(AtomicOrdering::Unordered); 256 } 257 Value *NewIndex = 258 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); 259 LoopIndex->addIncoming(NewIndex, LoopBB); 260 261 bool requiresResidual = 262 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); 263 if (requiresResidual) { 264 Type *ResLoopOpType = AtomicElementSize 265 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) 266 : Int8Type; 267 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); 268 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && 269 "Store size is expected to match type size"); 270 271 // Add in the 272 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); 273 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); 274 275 // Loop body for the residual copy. 276 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", 277 PreLoopBB->getParent(), 278 PostLoopBB); 279 // Residual loop header. 280 BasicBlock *ResHeaderBB = BasicBlock::Create( 281 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 282 283 // Need to update the pre-loop basic block to branch to the correct place. 284 // branch to the main loop if the count is non-zero, branch to the residual 285 // loop if the copy size is smaller then 1 iteration of the main loop but 286 // non-zero and finally branch to after the residual loop if the memcpy 287 // size is zero. 288 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 289 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 290 LoopBB, ResHeaderBB); 291 PreLoopBB->getTerminator()->eraseFromParent(); 292 293 LoopBuilder.CreateCondBr( 294 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 295 ResHeaderBB); 296 297 // Determine if we need to branch to the residual loop or bypass it. 298 IRBuilder<> RHBuilder(ResHeaderBB); 299 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), 300 ResLoopBB, PostLoopBB); 301 302 // Copy the residual with single byte load/store loop. 303 IRBuilder<> ResBuilder(ResLoopBB); 304 PHINode *ResidualIndex = 305 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 306 ResidualIndex->addIncoming(Zero, ResHeaderBB); 307 308 Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( 309 SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); 310 Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( 311 DstAddr, PointerType::get(ResLoopOpType, DstAS)); 312 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); 313 Value *SrcGEP = ResBuilder.CreateInBoundsGEP( 314 ResLoopOpType, SrcAsResLoopOpType, FullOffset); 315 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, 316 PartSrcAlign, SrcIsVolatile); 317 if (!CanOverlap) { 318 // Set alias scope for loads. 319 Load->setMetadata(LLVMContext::MD_alias_scope, 320 MDNode::get(Ctx, NewScope)); 321 } 322 Value *DstGEP = ResBuilder.CreateInBoundsGEP( 323 ResLoopOpType, DstAsResLoopOpType, FullOffset); 324 StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 325 DstIsVolatile); 326 if (!CanOverlap) { 327 // Indicate that stores don't overlap loads. 328 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 329 } 330 if (AtomicElementSize) { 331 Load->setAtomic(AtomicOrdering::Unordered); 332 Store->setAtomic(AtomicOrdering::Unordered); 333 } 334 Value *ResNewIndex = ResBuilder.CreateAdd( 335 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); 336 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 337 338 // Create the loop branch condition. 339 ResBuilder.CreateCondBr( 340 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, 341 PostLoopBB); 342 } else { 343 // In this case the loop operand type was a byte, and there is no need for a 344 // residual loop to copy the remaining memory after the main loop. 345 // We do however need to patch up the control flow by creating the 346 // terminators for the preloop block and the memcpy loop. 347 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 348 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 349 LoopBB, PostLoopBB); 350 PreLoopBB->getTerminator()->eraseFromParent(); 351 LoopBuilder.CreateCondBr( 352 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 353 PostLoopBB); 354 } 355 } 356 357 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 358 // regions; therefore, it has to check the relative positions of the source and 359 // destination pointers and choose the copy direction accordingly. 360 // 361 // The code below is an IR rendition of this C function: 362 // 363 // void* memmove(void* dst, const void* src, size_t n) { 364 // unsigned char* d = dst; 365 // const unsigned char* s = src; 366 // if (s < d) { 367 // // copy backwards 368 // while (n--) { 369 // d[n] = s[n]; 370 // } 371 // } else { 372 // // copy forward 373 // for (size_t i = 0; i < n; ++i) { 374 // d[i] = s[i]; 375 // } 376 // } 377 // return dst; 378 // } 379 static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, 380 Value *DstAddr, Value *CopyLen, Align SrcAlign, 381 Align DstAlign, bool SrcIsVolatile, 382 bool DstIsVolatile, 383 const TargetTransformInfo &TTI) { 384 Type *TypeOfCopyLen = CopyLen->getType(); 385 BasicBlock *OrigBB = InsertBefore->getParent(); 386 Function *F = OrigBB->getParent(); 387 const DataLayout &DL = F->getParent()->getDataLayout(); 388 // TODO: Use different element type if possible? 389 Type *EltTy = Type::getInt8Ty(F->getContext()); 390 391 // Create the a comparison of src and dst, based on which we jump to either 392 // the forward-copy part of the function (if src >= dst) or the backwards-copy 393 // part (if src < dst). 394 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 395 // structure. Its block terminators (unconditional branches) are replaced by 396 // the appropriate conditional branches when the loop is built. 397 ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, 398 SrcAddr, DstAddr, "compare_src_dst"); 399 Instruction *ThenTerm, *ElseTerm; 400 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, 401 &ElseTerm); 402 403 // Each part of the function consists of two blocks: 404 // copy_backwards: used to skip the loop when n == 0 405 // copy_backwards_loop: the actual backwards loop BB 406 // copy_forward: used to skip the loop when n == 0 407 // copy_forward_loop: the actual forward loop BB 408 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 409 CopyBackwardsBB->setName("copy_backwards"); 410 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 411 CopyForwardBB->setName("copy_forward"); 412 BasicBlock *ExitBB = InsertBefore->getParent(); 413 ExitBB->setName("memmove_done"); 414 415 unsigned PartSize = DL.getTypeStoreSize(EltTy); 416 Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); 417 Align PartDstAlign(commonAlignment(DstAlign, PartSize)); 418 419 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared 420 // between both backwards and forward copy clauses. 421 ICmpInst *CompareN = 422 new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, 423 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); 424 425 // Copying backwards. 426 BasicBlock *LoopBB = 427 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); 428 IRBuilder<> LoopBuilder(LoopBB); 429 430 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 431 Value *IndexPtr = LoopBuilder.CreateSub( 432 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); 433 Value *Element = LoopBuilder.CreateAlignedLoad( 434 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), 435 PartSrcAlign, "element"); 436 LoopBuilder.CreateAlignedStore( 437 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), 438 PartDstAlign); 439 LoopBuilder.CreateCondBr( 440 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), 441 ExitBB, LoopBB); 442 LoopPhi->addIncoming(IndexPtr, LoopBB); 443 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 444 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); 445 ThenTerm->eraseFromParent(); 446 447 // Copying forward. 448 BasicBlock *FwdLoopBB = 449 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); 450 IRBuilder<> FwdLoopBuilder(FwdLoopBB); 451 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); 452 Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); 453 Value *FwdElement = 454 FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element"); 455 Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); 456 FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign); 457 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( 458 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); 459 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), 460 ExitBB, FwdLoopBB); 461 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); 462 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); 463 464 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); 465 ElseTerm->eraseFromParent(); 466 } 467 468 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, 469 Value *CopyLen, Value *SetValue, Align DstAlign, 470 bool IsVolatile) { 471 Type *TypeOfCopyLen = CopyLen->getType(); 472 BasicBlock *OrigBB = InsertBefore->getParent(); 473 Function *F = OrigBB->getParent(); 474 const DataLayout &DL = F->getParent()->getDataLayout(); 475 BasicBlock *NewBB = 476 OrigBB->splitBasicBlock(InsertBefore, "split"); 477 BasicBlock *LoopBB 478 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 479 480 IRBuilder<> Builder(OrigBB->getTerminator()); 481 482 // Cast pointer to the type of value getting stored 483 unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 484 DstAddr = Builder.CreateBitCast(DstAddr, 485 PointerType::get(SetValue->getType(), dstAS)); 486 487 Builder.CreateCondBr( 488 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 489 LoopBB); 490 OrigBB->getTerminator()->eraseFromParent(); 491 492 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); 493 Align PartAlign(commonAlignment(DstAlign, PartSize)); 494 495 IRBuilder<> LoopBuilder(LoopBB); 496 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 497 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 498 499 LoopBuilder.CreateAlignedStore( 500 SetValue, 501 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 502 PartAlign, IsVolatile); 503 504 Value *NewIndex = 505 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 506 LoopIndex->addIncoming(NewIndex, LoopBB); 507 508 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 509 NewBB); 510 } 511 512 template <typename T> 513 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { 514 if (SE) { 515 auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); 516 auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); 517 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) 518 return false; 519 } 520 return true; 521 } 522 523 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 524 const TargetTransformInfo &TTI, 525 ScalarEvolution *SE) { 526 bool CanOverlap = canOverlap(Memcpy, SE); 527 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 528 createMemCpyLoopKnownSize( 529 /* InsertBefore */ Memcpy, 530 /* SrcAddr */ Memcpy->getRawSource(), 531 /* DstAddr */ Memcpy->getRawDest(), 532 /* CopyLen */ CI, 533 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 534 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 535 /* SrcIsVolatile */ Memcpy->isVolatile(), 536 /* DstIsVolatile */ Memcpy->isVolatile(), 537 /* CanOverlap */ CanOverlap, 538 /* TargetTransformInfo */ TTI); 539 } else { 540 createMemCpyLoopUnknownSize( 541 /* InsertBefore */ Memcpy, 542 /* SrcAddr */ Memcpy->getRawSource(), 543 /* DstAddr */ Memcpy->getRawDest(), 544 /* CopyLen */ Memcpy->getLength(), 545 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 546 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 547 /* SrcIsVolatile */ Memcpy->isVolatile(), 548 /* DstIsVolatile */ Memcpy->isVolatile(), 549 /* CanOverlap */ CanOverlap, 550 /* TargetTransformInfo */ TTI); 551 } 552 } 553 554 bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, 555 const TargetTransformInfo &TTI) { 556 Value *CopyLen = Memmove->getLength(); 557 Value *SrcAddr = Memmove->getRawSource(); 558 Value *DstAddr = Memmove->getRawDest(); 559 Align SrcAlign = Memmove->getSourceAlign().valueOrOne(); 560 Align DstAlign = Memmove->getDestAlign().valueOrOne(); 561 bool SrcIsVolatile = Memmove->isVolatile(); 562 bool DstIsVolatile = SrcIsVolatile; 563 IRBuilder<> CastBuilder(Memmove); 564 565 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace(); 566 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace(); 567 if (SrcAS != DstAS) { 568 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) { 569 // We may not be able to emit a pointer comparison, but we don't have 570 // to. Expand as memcpy. 571 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) { 572 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 573 CI, SrcAlign, DstAlign, SrcIsVolatile, 574 DstIsVolatile, 575 /*CanOverlap=*/false, TTI); 576 } else { 577 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 578 CopyLen, SrcAlign, DstAlign, SrcIsVolatile, 579 DstIsVolatile, 580 /*CanOverlap=*/false, TTI); 581 } 582 583 return true; 584 } 585 586 if (TTI.isValidAddrSpaceCast(DstAS, SrcAS)) 587 DstAddr = CastBuilder.CreateAddrSpaceCast(DstAddr, SrcAddr->getType()); 588 else if (TTI.isValidAddrSpaceCast(SrcAS, DstAS)) 589 SrcAddr = CastBuilder.CreateAddrSpaceCast(SrcAddr, DstAddr->getType()); 590 else { 591 // We don't know generically if it's legal to introduce an 592 // addrspacecast. We need to know either if it's legal to insert an 593 // addrspacecast, or if the address spaces cannot alias. 594 LLVM_DEBUG( 595 dbgs() << "Do not know how to expand memmove between different " 596 "address spaces\n"); 597 return false; 598 } 599 } 600 601 createMemMoveLoop( 602 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign, 603 SrcIsVolatile, DstIsVolatile, TTI); 604 return true; 605 } 606 607 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 608 createMemSetLoop(/* InsertBefore */ Memset, 609 /* DstAddr */ Memset->getRawDest(), 610 /* CopyLen */ Memset->getLength(), 611 /* SetValue */ Memset->getValue(), 612 /* Alignment */ Memset->getDestAlign().valueOrOne(), 613 Memset->isVolatile()); 614 } 615 616 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, 617 const TargetTransformInfo &TTI, 618 ScalarEvolution *SE) { 619 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { 620 createMemCpyLoopKnownSize( 621 /* InsertBefore */ AtomicMemcpy, 622 /* SrcAddr */ AtomicMemcpy->getRawSource(), 623 /* DstAddr */ AtomicMemcpy->getRawDest(), 624 /* CopyLen */ CI, 625 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 626 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 627 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 628 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 629 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 630 /* TargetTransformInfo */ TTI, 631 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 632 } else { 633 createMemCpyLoopUnknownSize( 634 /* InsertBefore */ AtomicMemcpy, 635 /* SrcAddr */ AtomicMemcpy->getRawSource(), 636 /* DstAddr */ AtomicMemcpy->getRawDest(), 637 /* CopyLen */ AtomicMemcpy->getLength(), 638 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 639 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 640 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 641 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 642 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 643 /* TargetTransformInfo */ TTI, 644 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 645 } 646 } 647