1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 10 #include "llvm/Analysis/ScalarEvolution.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/IR/MDBuilder.h" 15 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 16 17 using namespace llvm; 18 19 void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, 20 Value *DstAddr, ConstantInt *CopyLen, 21 Align SrcAlign, Align DstAlign, 22 bool SrcIsVolatile, bool DstIsVolatile, 23 bool CanOverlap, 24 const TargetTransformInfo &TTI, 25 Optional<uint32_t> AtomicElementSize) { 26 // No need to expand zero length copies. 27 if (CopyLen->isZero()) 28 return; 29 30 BasicBlock *PreLoopBB = InsertBefore->getParent(); 31 BasicBlock *PostLoopBB = nullptr; 32 Function *ParentFunc = PreLoopBB->getParent(); 33 LLVMContext &Ctx = PreLoopBB->getContext(); 34 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 35 MDBuilder MDB(Ctx); 36 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 37 StringRef Name = "MemCopyAliasScope"; 38 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 39 40 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 41 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 42 43 Type *TypeOfCopyLen = CopyLen->getType(); 44 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 45 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 46 AtomicElementSize); 47 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 48 "Atomic memcpy lowering is not supported for vector operand type"); 49 50 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 51 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 52 "Atomic memcpy lowering is not supported for selected operand size"); 53 54 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; 55 56 if (LoopEndCount != 0) { 57 // Split 58 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 59 BasicBlock *LoopBB = 60 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 61 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 62 63 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 64 65 // Cast the Src and Dst pointers to pointers to the loop operand type (if 66 // needed). 67 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 68 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 69 if (SrcAddr->getType() != SrcOpType) { 70 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 71 } 72 if (DstAddr->getType() != DstOpType) { 73 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 74 } 75 76 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 77 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 78 79 IRBuilder<> LoopBuilder(LoopBB); 80 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 81 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 82 // Loop Body 83 Value *SrcGEP = 84 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 85 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 86 PartSrcAlign, SrcIsVolatile); 87 if (!CanOverlap) { 88 // Set alias scope for loads. 89 Load->setMetadata(LLVMContext::MD_alias_scope, 90 MDNode::get(Ctx, NewScope)); 91 } 92 Value *DstGEP = 93 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 94 StoreInst *Store = LoopBuilder.CreateAlignedStore( 95 Load, DstGEP, PartDstAlign, DstIsVolatile); 96 if (!CanOverlap) { 97 // Indicate that stores don't overlap loads. 98 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 99 } 100 if (AtomicElementSize) { 101 Load->setAtomic(AtomicOrdering::Unordered); 102 Store->setAtomic(AtomicOrdering::Unordered); 103 } 104 Value *NewIndex = 105 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); 106 LoopIndex->addIncoming(NewIndex, LoopBB); 107 108 // Create the loop branch condition. 109 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 110 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 111 LoopBB, PostLoopBB); 112 } 113 114 uint64_t BytesCopied = LoopEndCount * LoopOpSize; 115 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 116 if (RemainingBytes) { 117 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() 118 : InsertBefore); 119 120 SmallVector<Type *, 5> RemainingOps; 121 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 122 SrcAS, DstAS, SrcAlign.value(), 123 DstAlign.value(), AtomicElementSize); 124 125 for (auto OpTy : RemainingOps) { 126 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 127 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); 128 129 // Calaculate the new index 130 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 131 assert( 132 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && 133 "Atomic memcpy lowering is not supported for selected operand size"); 134 135 uint64_t GepIndex = BytesCopied / OperandSize; 136 assert(GepIndex * OperandSize == BytesCopied && 137 "Division should have no Remainder!"); 138 // Cast source to operand type and load 139 PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); 140 Value *CastedSrc = SrcAddr->getType() == SrcPtrType 141 ? SrcAddr 142 : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); 143 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 144 OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); 145 LoadInst *Load = 146 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); 147 if (!CanOverlap) { 148 // Set alias scope for loads. 149 Load->setMetadata(LLVMContext::MD_alias_scope, 150 MDNode::get(Ctx, NewScope)); 151 } 152 // Cast destination to operand type and store. 153 PointerType *DstPtrType = PointerType::get(OpTy, DstAS); 154 Value *CastedDst = DstAddr->getType() == DstPtrType 155 ? DstAddr 156 : RBuilder.CreateBitCast(DstAddr, DstPtrType); 157 Value *DstGEP = RBuilder.CreateInBoundsGEP( 158 OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); 159 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 160 DstIsVolatile); 161 if (!CanOverlap) { 162 // Indicate that stores don't overlap loads. 163 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 164 } 165 if (AtomicElementSize) { 166 Load->setAtomic(AtomicOrdering::Unordered); 167 Store->setAtomic(AtomicOrdering::Unordered); 168 } 169 BytesCopied += OperandSize; 170 } 171 } 172 assert(BytesCopied == CopyLen->getZExtValue() && 173 "Bytes copied should match size in the call!"); 174 } 175 176 void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, 177 Value *SrcAddr, Value *DstAddr, 178 Value *CopyLen, Align SrcAlign, 179 Align DstAlign, bool SrcIsVolatile, 180 bool DstIsVolatile, bool CanOverlap, 181 const TargetTransformInfo &TTI, 182 Optional<uint32_t> AtomicElementSize) { 183 BasicBlock *PreLoopBB = InsertBefore->getParent(); 184 BasicBlock *PostLoopBB = 185 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 186 187 Function *ParentFunc = PreLoopBB->getParent(); 188 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 189 LLVMContext &Ctx = PreLoopBB->getContext(); 190 MDBuilder MDB(Ctx); 191 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 192 StringRef Name = "MemCopyAliasScope"; 193 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 194 195 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 196 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 197 198 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 199 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 200 AtomicElementSize); 201 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 202 "Atomic memcpy lowering is not supported for vector operand type"); 203 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 204 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 205 "Atomic memcpy lowering is not supported for selected operand size"); 206 207 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 208 209 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); 210 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); 211 if (SrcAddr->getType() != SrcOpType) { 212 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); 213 } 214 if (DstAddr->getType() != DstOpType) { 215 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); 216 } 217 218 // Calculate the loop trip count, and remaining bytes to copy after the loop. 219 Type *CopyLenType = CopyLen->getType(); 220 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 221 assert(ILengthType && 222 "expected size argument to memcpy to be an integer type!"); 223 Type *Int8Type = Type::getInt8Ty(Ctx); 224 bool LoopOpIsInt8 = LoopOpType == Int8Type; 225 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 226 Value *RuntimeLoopCount = LoopOpIsInt8 ? 227 CopyLen : 228 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); 229 BasicBlock *LoopBB = 230 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 231 IRBuilder<> LoopBuilder(LoopBB); 232 233 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 234 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 235 236 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 237 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 238 239 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 240 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 241 PartSrcAlign, SrcIsVolatile); 242 if (!CanOverlap) { 243 // Set alias scope for loads. 244 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); 245 } 246 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 247 StoreInst *Store = 248 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); 249 if (!CanOverlap) { 250 // Indicate that stores don't overlap loads. 251 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 252 } 253 if (AtomicElementSize) { 254 Load->setAtomic(AtomicOrdering::Unordered); 255 Store->setAtomic(AtomicOrdering::Unordered); 256 } 257 Value *NewIndex = 258 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); 259 LoopIndex->addIncoming(NewIndex, LoopBB); 260 261 bool requiresResidual = 262 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); 263 if (requiresResidual) { 264 Type *ResLoopOpType = AtomicElementSize 265 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) 266 : Int8Type; 267 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); 268 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && 269 "Store size is expected to match type size"); 270 271 // Add in the 272 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); 273 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); 274 275 // Loop body for the residual copy. 276 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", 277 PreLoopBB->getParent(), 278 PostLoopBB); 279 // Residual loop header. 280 BasicBlock *ResHeaderBB = BasicBlock::Create( 281 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 282 283 // Need to update the pre-loop basic block to branch to the correct place. 284 // branch to the main loop if the count is non-zero, branch to the residual 285 // loop if the copy size is smaller then 1 iteration of the main loop but 286 // non-zero and finally branch to after the residual loop if the memcpy 287 // size is zero. 288 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 289 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 290 LoopBB, ResHeaderBB); 291 PreLoopBB->getTerminator()->eraseFromParent(); 292 293 LoopBuilder.CreateCondBr( 294 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 295 ResHeaderBB); 296 297 // Determine if we need to branch to the residual loop or bypass it. 298 IRBuilder<> RHBuilder(ResHeaderBB); 299 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), 300 ResLoopBB, PostLoopBB); 301 302 // Copy the residual with single byte load/store loop. 303 IRBuilder<> ResBuilder(ResLoopBB); 304 PHINode *ResidualIndex = 305 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 306 ResidualIndex->addIncoming(Zero, ResHeaderBB); 307 308 Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast( 309 SrcAddr, PointerType::get(ResLoopOpType, SrcAS)); 310 Value *DstAsResLoopOpType = ResBuilder.CreateBitCast( 311 DstAddr, PointerType::get(ResLoopOpType, DstAS)); 312 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); 313 Value *SrcGEP = ResBuilder.CreateInBoundsGEP( 314 ResLoopOpType, SrcAsResLoopOpType, FullOffset); 315 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, 316 PartSrcAlign, SrcIsVolatile); 317 if (!CanOverlap) { 318 // Set alias scope for loads. 319 Load->setMetadata(LLVMContext::MD_alias_scope, 320 MDNode::get(Ctx, NewScope)); 321 } 322 Value *DstGEP = ResBuilder.CreateInBoundsGEP( 323 ResLoopOpType, DstAsResLoopOpType, FullOffset); 324 StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 325 DstIsVolatile); 326 if (!CanOverlap) { 327 // Indicate that stores don't overlap loads. 328 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 329 } 330 if (AtomicElementSize) { 331 Load->setAtomic(AtomicOrdering::Unordered); 332 Store->setAtomic(AtomicOrdering::Unordered); 333 } 334 Value *ResNewIndex = ResBuilder.CreateAdd( 335 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); 336 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 337 338 // Create the loop branch condition. 339 ResBuilder.CreateCondBr( 340 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, 341 PostLoopBB); 342 } else { 343 // In this case the loop operand type was a byte, and there is no need for a 344 // residual loop to copy the remaining memory after the main loop. 345 // We do however need to patch up the control flow by creating the 346 // terminators for the preloop block and the memcpy loop. 347 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 348 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 349 LoopBB, PostLoopBB); 350 PreLoopBB->getTerminator()->eraseFromParent(); 351 LoopBuilder.CreateCondBr( 352 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 353 PostLoopBB); 354 } 355 } 356 357 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 358 // regions; therefore, it has to check the relative positions of the source and 359 // destination pointers and choose the copy direction accordingly. 360 // 361 // The code below is an IR rendition of this C function: 362 // 363 // void* memmove(void* dst, const void* src, size_t n) { 364 // unsigned char* d = dst; 365 // const unsigned char* s = src; 366 // if (s < d) { 367 // // copy backwards 368 // while (n--) { 369 // d[n] = s[n]; 370 // } 371 // } else { 372 // // copy forward 373 // for (size_t i = 0; i < n; ++i) { 374 // d[i] = s[i]; 375 // } 376 // } 377 // return dst; 378 // } 379 static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, 380 Value *DstAddr, Value *CopyLen, Align SrcAlign, 381 Align DstAlign, bool SrcIsVolatile, 382 bool DstIsVolatile) { 383 Type *TypeOfCopyLen = CopyLen->getType(); 384 BasicBlock *OrigBB = InsertBefore->getParent(); 385 Function *F = OrigBB->getParent(); 386 const DataLayout &DL = F->getParent()->getDataLayout(); 387 388 // TODO: Use different element type if possible? 389 IRBuilder<> CastBuilder(InsertBefore); 390 Type *EltTy = CastBuilder.getInt8Ty(); 391 Type *PtrTy = 392 CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace()); 393 SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy); 394 DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy); 395 396 // Create the a comparison of src and dst, based on which we jump to either 397 // the forward-copy part of the function (if src >= dst) or the backwards-copy 398 // part (if src < dst). 399 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 400 // structure. Its block terminators (unconditional branches) are replaced by 401 // the appropriate conditional branches when the loop is built. 402 ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, 403 SrcAddr, DstAddr, "compare_src_dst"); 404 Instruction *ThenTerm, *ElseTerm; 405 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, 406 &ElseTerm); 407 408 // Each part of the function consists of two blocks: 409 // copy_backwards: used to skip the loop when n == 0 410 // copy_backwards_loop: the actual backwards loop BB 411 // copy_forward: used to skip the loop when n == 0 412 // copy_forward_loop: the actual forward loop BB 413 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 414 CopyBackwardsBB->setName("copy_backwards"); 415 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 416 CopyForwardBB->setName("copy_forward"); 417 BasicBlock *ExitBB = InsertBefore->getParent(); 418 ExitBB->setName("memmove_done"); 419 420 unsigned PartSize = DL.getTypeStoreSize(EltTy); 421 Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); 422 Align PartDstAlign(commonAlignment(DstAlign, PartSize)); 423 424 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared 425 // between both backwards and forward copy clauses. 426 ICmpInst *CompareN = 427 new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, 428 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); 429 430 // Copying backwards. 431 BasicBlock *LoopBB = 432 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); 433 IRBuilder<> LoopBuilder(LoopBB); 434 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 435 Value *IndexPtr = LoopBuilder.CreateSub( 436 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); 437 Value *Element = LoopBuilder.CreateAlignedLoad( 438 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), 439 PartSrcAlign, "element"); 440 LoopBuilder.CreateAlignedStore( 441 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), 442 PartDstAlign); 443 LoopBuilder.CreateCondBr( 444 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), 445 ExitBB, LoopBB); 446 LoopPhi->addIncoming(IndexPtr, LoopBB); 447 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 448 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); 449 ThenTerm->eraseFromParent(); 450 451 // Copying forward. 452 BasicBlock *FwdLoopBB = 453 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); 454 IRBuilder<> FwdLoopBuilder(FwdLoopBB); 455 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); 456 Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); 457 Value *FwdElement = 458 FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element"); 459 Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); 460 FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign); 461 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( 462 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); 463 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), 464 ExitBB, FwdLoopBB); 465 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); 466 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); 467 468 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); 469 ElseTerm->eraseFromParent(); 470 } 471 472 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, 473 Value *CopyLen, Value *SetValue, Align DstAlign, 474 bool IsVolatile) { 475 Type *TypeOfCopyLen = CopyLen->getType(); 476 BasicBlock *OrigBB = InsertBefore->getParent(); 477 Function *F = OrigBB->getParent(); 478 const DataLayout &DL = F->getParent()->getDataLayout(); 479 BasicBlock *NewBB = 480 OrigBB->splitBasicBlock(InsertBefore, "split"); 481 BasicBlock *LoopBB 482 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 483 484 IRBuilder<> Builder(OrigBB->getTerminator()); 485 486 // Cast pointer to the type of value getting stored 487 unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 488 DstAddr = Builder.CreateBitCast(DstAddr, 489 PointerType::get(SetValue->getType(), dstAS)); 490 491 Builder.CreateCondBr( 492 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 493 LoopBB); 494 OrigBB->getTerminator()->eraseFromParent(); 495 496 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); 497 Align PartAlign(commonAlignment(DstAlign, PartSize)); 498 499 IRBuilder<> LoopBuilder(LoopBB); 500 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 501 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 502 503 LoopBuilder.CreateAlignedStore( 504 SetValue, 505 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 506 PartAlign, IsVolatile); 507 508 Value *NewIndex = 509 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 510 LoopIndex->addIncoming(NewIndex, LoopBB); 511 512 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 513 NewBB); 514 } 515 516 template <typename T> 517 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { 518 if (SE) { 519 auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); 520 auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); 521 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) 522 return false; 523 } 524 return true; 525 } 526 527 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 528 const TargetTransformInfo &TTI, 529 ScalarEvolution *SE) { 530 bool CanOverlap = canOverlap(Memcpy, SE); 531 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 532 createMemCpyLoopKnownSize( 533 /* InsertBefore */ Memcpy, 534 /* SrcAddr */ Memcpy->getRawSource(), 535 /* DstAddr */ Memcpy->getRawDest(), 536 /* CopyLen */ CI, 537 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 538 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 539 /* SrcIsVolatile */ Memcpy->isVolatile(), 540 /* DstIsVolatile */ Memcpy->isVolatile(), 541 /* CanOverlap */ CanOverlap, 542 /* TargetTransformInfo */ TTI); 543 } else { 544 createMemCpyLoopUnknownSize( 545 /* InsertBefore */ Memcpy, 546 /* SrcAddr */ Memcpy->getRawSource(), 547 /* DstAddr */ Memcpy->getRawDest(), 548 /* CopyLen */ Memcpy->getLength(), 549 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 550 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 551 /* SrcIsVolatile */ Memcpy->isVolatile(), 552 /* DstIsVolatile */ Memcpy->isVolatile(), 553 /* CanOverlap */ CanOverlap, 554 /* TargetTransformInfo */ TTI); 555 } 556 } 557 558 void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { 559 createMemMoveLoop(/* InsertBefore */ Memmove, 560 /* SrcAddr */ Memmove->getRawSource(), 561 /* DstAddr */ Memmove->getRawDest(), 562 /* CopyLen */ Memmove->getLength(), 563 /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(), 564 /* DestAlign */ Memmove->getDestAlign().valueOrOne(), 565 /* SrcIsVolatile */ Memmove->isVolatile(), 566 /* DstIsVolatile */ Memmove->isVolatile()); 567 } 568 569 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 570 createMemSetLoop(/* InsertBefore */ Memset, 571 /* DstAddr */ Memset->getRawDest(), 572 /* CopyLen */ Memset->getLength(), 573 /* SetValue */ Memset->getValue(), 574 /* Alignment */ Memset->getDestAlign().valueOrOne(), 575 Memset->isVolatile()); 576 } 577 578 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, 579 const TargetTransformInfo &TTI, 580 ScalarEvolution *SE) { 581 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { 582 createMemCpyLoopKnownSize( 583 /* InsertBefore */ AtomicMemcpy, 584 /* SrcAddr */ AtomicMemcpy->getRawSource(), 585 /* DstAddr */ AtomicMemcpy->getRawDest(), 586 /* CopyLen */ CI, 587 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 588 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 589 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 590 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 591 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 592 /* TargetTransformInfo */ TTI, 593 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 594 } else { 595 createMemCpyLoopUnknownSize( 596 /* InsertBefore */ AtomicMemcpy, 597 /* SrcAddr */ AtomicMemcpy->getRawSource(), 598 /* DstAddr */ AtomicMemcpy->getRawDest(), 599 /* CopyLen */ AtomicMemcpy->getLength(), 600 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 601 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 602 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 603 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 604 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 605 /* TargetTransformInfo */ TTI, 606 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 607 } 608 } 609