1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 10 #include "llvm/Analysis/ScalarEvolution.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/IR/MDBuilder.h" 15 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 16 #include <optional> 17 18 using namespace llvm; 19 20 void llvm::createMemCpyLoopKnownSize( 21 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, 22 ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, 23 bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, 24 std::optional<uint32_t> AtomicElementSize) { 25 // No need to expand zero length copies. 26 if (CopyLen->isZero()) 27 return; 28 29 BasicBlock *PreLoopBB = InsertBefore->getParent(); 30 BasicBlock *PostLoopBB = nullptr; 31 Function *ParentFunc = PreLoopBB->getParent(); 32 LLVMContext &Ctx = PreLoopBB->getContext(); 33 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 34 MDBuilder MDB(Ctx); 35 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 36 StringRef Name = "MemCopyAliasScope"; 37 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 38 39 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 40 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 41 42 Type *TypeOfCopyLen = CopyLen->getType(); 43 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 44 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 45 AtomicElementSize); 46 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 47 "Atomic memcpy lowering is not supported for vector operand type"); 48 49 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 50 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 51 "Atomic memcpy lowering is not supported for selected operand size"); 52 53 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; 54 55 if (LoopEndCount != 0) { 56 // Split 57 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 58 BasicBlock *LoopBB = 59 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 60 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 61 62 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 63 64 // Cast the Src and Dst pointers to pointers to the loop operand type (if 65 // needed). 66 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 67 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 68 if (SrcAddr->getType() != SrcOpType) { 69 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 70 } 71 if (DstAddr->getType() != DstOpType) { 72 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 73 } 74 75 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 76 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 77 78 IRBuilder<> LoopBuilder(LoopBB); 79 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 80 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 81 // Loop Body 82 Value *SrcGEP = 83 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 84 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 85 PartSrcAlign, SrcIsVolatile); 86 if (!CanOverlap) { 87 // Set alias scope for loads. 88 Load->setMetadata(LLVMContext::MD_alias_scope, 89 MDNode::get(Ctx, NewScope)); 90 } 91 Value *DstGEP = 92 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 93 StoreInst *Store = LoopBuilder.CreateAlignedStore( 94 Load, DstGEP, PartDstAlign, DstIsVolatile); 95 if (!CanOverlap) { 96 // Indicate that stores don't overlap loads. 97 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 98 } 99 if (AtomicElementSize) { 100 Load->setAtomic(AtomicOrdering::Unordered); 101 Store->setAtomic(AtomicOrdering::Unordered); 102 } 103 Value *NewIndex = 104 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); 105 LoopIndex->addIncoming(NewIndex, LoopBB); 106 107 // Create the loop branch condition. 108 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 109 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 110 LoopBB, PostLoopBB); 111 } 112 113 uint64_t BytesCopied = LoopEndCount * LoopOpSize; 114 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 115 if (RemainingBytes) { 116 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() 117 : InsertBefore); 118 119 SmallVector<Type *, 5> RemainingOps; 120 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 121 SrcAS, DstAS, SrcAlign.value(), 122 DstAlign.value(), AtomicElementSize); 123 124 for (auto *OpTy : RemainingOps) { 125 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 126 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); 127 128 // Calculate the new index 129 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 130 assert( 131 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && 132 "Atomic memcpy lowering is not supported for selected operand size"); 133 134 uint64_t GepIndex = BytesCopied / OperandSize; 135 assert(GepIndex * OperandSize == BytesCopied && 136 "Division should have no Remainder!"); 137 // Cast source to operand type and load 138 PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); 139 Value *CastedSrc = SrcAddr->getType() == SrcPtrType 140 ? SrcAddr 141 : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); 142 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 143 OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); 144 LoadInst *Load = 145 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); 146 if (!CanOverlap) { 147 // Set alias scope for loads. 148 Load->setMetadata(LLVMContext::MD_alias_scope, 149 MDNode::get(Ctx, NewScope)); 150 } 151 // Cast destination to operand type and store. 152 PointerType *DstPtrType = PointerType::get(OpTy, DstAS); 153 Value *CastedDst = DstAddr->getType() == DstPtrType 154 ? DstAddr 155 : RBuilder.CreateBitCast(DstAddr, DstPtrType); 156 Value *DstGEP = RBuilder.CreateInBoundsGEP( 157 OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); 158 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 159 DstIsVolatile); 160 if (!CanOverlap) { 161 // Indicate that stores don't overlap loads. 162 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 163 } 164 if (AtomicElementSize) { 165 Load->setAtomic(AtomicOrdering::Unordered); 166 Store->setAtomic(AtomicOrdering::Unordered); 167 } 168 BytesCopied += OperandSize; 169 } 170 } 171 assert(BytesCopied == CopyLen->getZExtValue() && 172 "Bytes copied should match size in the call!"); 173 } 174 175 void llvm::createMemCpyLoopUnknownSize( 176 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, 177 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, 178 bool CanOverlap, const TargetTransformInfo &TTI, 179 std::optional<uint32_t> AtomicElementSize) { 180 BasicBlock *PreLoopBB = InsertBefore->getParent(); 181 BasicBlock *PostLoopBB = 182 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 183 184 Function *ParentFunc = PreLoopBB->getParent(); 185 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 186 LLVMContext &Ctx = PreLoopBB->getContext(); 187 MDBuilder MDB(Ctx); 188 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 189 StringRef Name = "MemCopyAliasScope"; 190 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 191 192 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 193 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 194 195 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 196 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 197 AtomicElementSize); 198 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 199 "Atomic memcpy lowering is not supported for vector operand type"); 200 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 201 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 202 "Atomic memcpy lowering is not supported for selected operand size"); 203 204 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 205 206 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 207 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 208 if (SrcAddr->getType() != SrcOpType) { 209 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 210 } 211 if (DstAddr->getType() != DstOpType) { 212 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 213 } 214 215 // Calculate the loop trip count, and remaining bytes to copy after the loop. 216 Type *CopyLenType = CopyLen->getType(); 217 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 218 assert(ILengthType && 219 "expected size argument to memcpy to be an integer type!"); 220 Type *Int8Type = Type::getInt8Ty(Ctx); 221 bool LoopOpIsInt8 = LoopOpType == Int8Type; 222 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 223 Value *RuntimeLoopCount = LoopOpIsInt8 ? 224 CopyLen : 225 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); 226 BasicBlock *LoopBB = 227 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 228 IRBuilder<> LoopBuilder(LoopBB); 229 230 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 231 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 232 233 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 234 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 235 236 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 237 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 238 PartSrcAlign, SrcIsVolatile); 239 if (!CanOverlap) { 240 // Set alias scope for loads. 241 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); 242 } 243 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 244 StoreInst *Store = 245 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); 246 if (!CanOverlap) { 247 // Indicate that stores don't overlap loads. 248 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 249 } 250 if (AtomicElementSize) { 251 Load->setAtomic(AtomicOrdering::Unordered); 252 Store->setAtomic(AtomicOrdering::Unordered); 253 } 254 Value *NewIndex = 255 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); 256 LoopIndex->addIncoming(NewIndex, LoopBB); 257 258 bool requiresResidual = 259 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); 260 if (requiresResidual) { 261 Type *ResLoopOpType = AtomicElementSize 262 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) 263 : Int8Type; 264 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); 265 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && 266 "Store size is expected to match type size"); 267 268 // Add in the 269 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); 270 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); 271 272 // Loop body for the residual copy. 273 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", 274 PreLoopBB->getParent(), 275 PostLoopBB); 276 // Residual loop header. 277 BasicBlock *ResHeaderBB = BasicBlock::Create( 278 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 279 280 // Need to update the pre-loop basic block to branch to the correct place. 281 // branch to the main loop if the count is non-zero, branch to the residual 282 // loop if the copy size is smaller then 1 iteration of the main loop but 283 // non-zero and finally branch to after the residual loop if the memcpy 284 // size is zero. 285 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 286 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 287 LoopBB, ResHeaderBB); 288 PreLoopBB->getTerminator()->eraseFromParent(); 289 290 LoopBuilder.CreateCondBr( 291 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 292 ResHeaderBB); 293 294 // Determine if we need to branch to the residual loop or bypass it. 295 IRBuilder<> RHBuilder(ResHeaderBB); 296 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), 297 ResLoopBB, PostLoopBB); 298 299 // Copy the residual with single byte load/store loop. 300 IRBuilder<> ResBuilder(ResLoopBB); 301 PHINode *ResidualIndex = 302 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 303 ResidualIndex->addIncoming(Zero, ResHeaderBB); 304 305 Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( 306 SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); 307 Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( 308 DstAddr, PointerType::get(ResLoopOpType, DstAS)); 309 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); 310 Value *SrcGEP = ResBuilder.CreateInBoundsGEP( 311 ResLoopOpType, SrcAsResLoopOpType, FullOffset); 312 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, 313 PartSrcAlign, SrcIsVolatile); 314 if (!CanOverlap) { 315 // Set alias scope for loads. 316 Load->setMetadata(LLVMContext::MD_alias_scope, 317 MDNode::get(Ctx, NewScope)); 318 } 319 Value *DstGEP = ResBuilder.CreateInBoundsGEP( 320 ResLoopOpType, DstAsResLoopOpType, FullOffset); 321 StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 322 DstIsVolatile); 323 if (!CanOverlap) { 324 // Indicate that stores don't overlap loads. 325 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 326 } 327 if (AtomicElementSize) { 328 Load->setAtomic(AtomicOrdering::Unordered); 329 Store->setAtomic(AtomicOrdering::Unordered); 330 } 331 Value *ResNewIndex = ResBuilder.CreateAdd( 332 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); 333 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 334 335 // Create the loop branch condition. 336 ResBuilder.CreateCondBr( 337 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, 338 PostLoopBB); 339 } else { 340 // In this case the loop operand type was a byte, and there is no need for a 341 // residual loop to copy the remaining memory after the main loop. 342 // We do however need to patch up the control flow by creating the 343 // terminators for the preloop block and the memcpy loop. 344 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 345 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 346 LoopBB, PostLoopBB); 347 PreLoopBB->getTerminator()->eraseFromParent(); 348 LoopBuilder.CreateCondBr( 349 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 350 PostLoopBB); 351 } 352 } 353 354 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 355 // regions; therefore, it has to check the relative positions of the source and 356 // destination pointers and choose the copy direction accordingly. 357 // 358 // The code below is an IR rendition of this C function: 359 // 360 // void* memmove(void* dst, const void* src, size_t n) { 361 // unsigned char* d = dst; 362 // const unsigned char* s = src; 363 // if (s < d) { 364 // // copy backwards 365 // while (n--) { 366 // d[n] = s[n]; 367 // } 368 // } else { 369 // // copy forward 370 // for (size_t i = 0; i < n; ++i) { 371 // d[i] = s[i]; 372 // } 373 // } 374 // return dst; 375 // } 376 static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, 377 Value *DstAddr, Value *CopyLen, Align SrcAlign, 378 Align DstAlign, bool SrcIsVolatile, 379 bool DstIsVolatile) { 380 Type *TypeOfCopyLen = CopyLen->getType(); 381 BasicBlock *OrigBB = InsertBefore->getParent(); 382 Function *F = OrigBB->getParent(); 383 const DataLayout &DL = F->getParent()->getDataLayout(); 384 385 // TODO: Use different element type if possible? 386 IRBuilder<> CastBuilder(InsertBefore); 387 Type *EltTy = CastBuilder.getInt8Ty(); 388 Type *PtrTy = 389 CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace()); 390 SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy); 391 DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy); 392 393 // Create the a comparison of src and dst, based on which we jump to either 394 // the forward-copy part of the function (if src >= dst) or the backwards-copy 395 // part (if src < dst). 396 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 397 // structure. Its block terminators (unconditional branches) are replaced by 398 // the appropriate conditional branches when the loop is built. 399 ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, 400 SrcAddr, DstAddr, "compare_src_dst"); 401 Instruction *ThenTerm, *ElseTerm; 402 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, 403 &ElseTerm); 404 405 // Each part of the function consists of two blocks: 406 // copy_backwards: used to skip the loop when n == 0 407 // copy_backwards_loop: the actual backwards loop BB 408 // copy_forward: used to skip the loop when n == 0 409 // copy_forward_loop: the actual forward loop BB 410 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 411 CopyBackwardsBB->setName("copy_backwards"); 412 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 413 CopyForwardBB->setName("copy_forward"); 414 BasicBlock *ExitBB = InsertBefore->getParent(); 415 ExitBB->setName("memmove_done"); 416 417 unsigned PartSize = DL.getTypeStoreSize(EltTy); 418 Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); 419 Align PartDstAlign(commonAlignment(DstAlign, PartSize)); 420 421 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared 422 // between both backwards and forward copy clauses. 423 ICmpInst *CompareN = 424 new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, 425 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); 426 427 // Copying backwards. 428 BasicBlock *LoopBB = 429 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); 430 IRBuilder<> LoopBuilder(LoopBB); 431 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 432 Value *IndexPtr = LoopBuilder.CreateSub( 433 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); 434 Value *Element = LoopBuilder.CreateAlignedLoad( 435 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), 436 PartSrcAlign, "element"); 437 LoopBuilder.CreateAlignedStore( 438 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), 439 PartDstAlign); 440 LoopBuilder.CreateCondBr( 441 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), 442 ExitBB, LoopBB); 443 LoopPhi->addIncoming(IndexPtr, LoopBB); 444 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 445 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); 446 ThenTerm->eraseFromParent(); 447 448 // Copying forward. 449 BasicBlock *FwdLoopBB = 450 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); 451 IRBuilder<> FwdLoopBuilder(FwdLoopBB); 452 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); 453 Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); 454 Value *FwdElement = 455 FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element"); 456 Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); 457 FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign); 458 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( 459 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); 460 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), 461 ExitBB, FwdLoopBB); 462 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); 463 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); 464 465 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); 466 ElseTerm->eraseFromParent(); 467 } 468 469 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, 470 Value *CopyLen, Value *SetValue, Align DstAlign, 471 bool IsVolatile) { 472 Type *TypeOfCopyLen = CopyLen->getType(); 473 BasicBlock *OrigBB = InsertBefore->getParent(); 474 Function *F = OrigBB->getParent(); 475 const DataLayout &DL = F->getParent()->getDataLayout(); 476 BasicBlock *NewBB = 477 OrigBB->splitBasicBlock(InsertBefore, "split"); 478 BasicBlock *LoopBB 479 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 480 481 IRBuilder<> Builder(OrigBB->getTerminator()); 482 483 // Cast pointer to the type of value getting stored 484 unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 485 DstAddr = Builder.CreateBitCast(DstAddr, 486 PointerType::get(SetValue->getType(), dstAS)); 487 488 Builder.CreateCondBr( 489 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 490 LoopBB); 491 OrigBB->getTerminator()->eraseFromParent(); 492 493 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); 494 Align PartAlign(commonAlignment(DstAlign, PartSize)); 495 496 IRBuilder<> LoopBuilder(LoopBB); 497 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 498 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 499 500 LoopBuilder.CreateAlignedStore( 501 SetValue, 502 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 503 PartAlign, IsVolatile); 504 505 Value *NewIndex = 506 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 507 LoopIndex->addIncoming(NewIndex, LoopBB); 508 509 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 510 NewBB); 511 } 512 513 template <typename T> 514 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { 515 if (SE) { 516 auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); 517 auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); 518 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) 519 return false; 520 } 521 return true; 522 } 523 524 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 525 const TargetTransformInfo &TTI, 526 ScalarEvolution *SE) { 527 bool CanOverlap = canOverlap(Memcpy, SE); 528 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 529 createMemCpyLoopKnownSize( 530 /* InsertBefore */ Memcpy, 531 /* SrcAddr */ Memcpy->getRawSource(), 532 /* DstAddr */ Memcpy->getRawDest(), 533 /* CopyLen */ CI, 534 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 535 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 536 /* SrcIsVolatile */ Memcpy->isVolatile(), 537 /* DstIsVolatile */ Memcpy->isVolatile(), 538 /* CanOverlap */ CanOverlap, 539 /* TargetTransformInfo */ TTI); 540 } else { 541 createMemCpyLoopUnknownSize( 542 /* InsertBefore */ Memcpy, 543 /* SrcAddr */ Memcpy->getRawSource(), 544 /* DstAddr */ Memcpy->getRawDest(), 545 /* CopyLen */ Memcpy->getLength(), 546 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 547 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 548 /* SrcIsVolatile */ Memcpy->isVolatile(), 549 /* DstIsVolatile */ Memcpy->isVolatile(), 550 /* CanOverlap */ CanOverlap, 551 /* TargetTransformInfo */ TTI); 552 } 553 } 554 555 void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { 556 createMemMoveLoop(/* InsertBefore */ Memmove, 557 /* SrcAddr */ Memmove->getRawSource(), 558 /* DstAddr */ Memmove->getRawDest(), 559 /* CopyLen */ Memmove->getLength(), 560 /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(), 561 /* DestAlign */ Memmove->getDestAlign().valueOrOne(), 562 /* SrcIsVolatile */ Memmove->isVolatile(), 563 /* DstIsVolatile */ Memmove->isVolatile()); 564 } 565 566 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 567 createMemSetLoop(/* InsertBefore */ Memset, 568 /* DstAddr */ Memset->getRawDest(), 569 /* CopyLen */ Memset->getLength(), 570 /* SetValue */ Memset->getValue(), 571 /* Alignment */ Memset->getDestAlign().valueOrOne(), 572 Memset->isVolatile()); 573 } 574 575 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, 576 const TargetTransformInfo &TTI, 577 ScalarEvolution *SE) { 578 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { 579 createMemCpyLoopKnownSize( 580 /* InsertBefore */ AtomicMemcpy, 581 /* SrcAddr */ AtomicMemcpy->getRawSource(), 582 /* DstAddr */ AtomicMemcpy->getRawDest(), 583 /* CopyLen */ CI, 584 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 585 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 586 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 587 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 588 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 589 /* TargetTransformInfo */ TTI, 590 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 591 } else { 592 createMemCpyLoopUnknownSize( 593 /* InsertBefore */ AtomicMemcpy, 594 /* SrcAddr */ AtomicMemcpy->getRawSource(), 595 /* DstAddr */ AtomicMemcpy->getRawDest(), 596 /* CopyLen */ AtomicMemcpy->getLength(), 597 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 598 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 599 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 600 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 601 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 602 /* TargetTransformInfo */ TTI, 603 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 604 } 605 } 606