1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 10 #include "llvm/Analysis/ScalarEvolution.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/IR/MDBuilder.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Support/MathExtras.h" 17 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 18 #include <optional> 19 20 #define DEBUG_TYPE "lower-mem-intrinsics" 21 22 using namespace llvm; 23 24 void llvm::createMemCpyLoopKnownSize( 25 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, 26 ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, 27 bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, 28 std::optional<uint32_t> AtomicElementSize) { 29 // No need to expand zero length copies. 30 if (CopyLen->isZero()) 31 return; 32 33 BasicBlock *PreLoopBB = InsertBefore->getParent(); 34 BasicBlock *PostLoopBB = nullptr; 35 Function *ParentFunc = PreLoopBB->getParent(); 36 LLVMContext &Ctx = PreLoopBB->getContext(); 37 const DataLayout &DL = ParentFunc->getDataLayout(); 38 MDBuilder MDB(Ctx); 39 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 40 StringRef Name = "MemCopyAliasScope"; 41 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 42 43 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 44 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 45 46 Type *TypeOfCopyLen = CopyLen->getType(); 47 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 48 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 49 AtomicElementSize); 50 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 51 "Atomic memcpy lowering is not supported for vector operand type"); 52 53 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 54 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 55 "Atomic memcpy lowering is not supported for selected operand size"); 56 57 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; 58 59 if (LoopEndCount != 0) { 60 // Split 61 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 62 BasicBlock *LoopBB = 63 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 64 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 65 66 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 67 68 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 69 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 70 71 IRBuilder<> LoopBuilder(LoopBB); 72 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 73 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 74 // Loop Body 75 Value *SrcGEP = 76 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 77 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 78 PartSrcAlign, SrcIsVolatile); 79 if (!CanOverlap) { 80 // Set alias scope for loads. 81 Load->setMetadata(LLVMContext::MD_alias_scope, 82 MDNode::get(Ctx, NewScope)); 83 } 84 Value *DstGEP = 85 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 86 StoreInst *Store = LoopBuilder.CreateAlignedStore( 87 Load, DstGEP, PartDstAlign, DstIsVolatile); 88 if (!CanOverlap) { 89 // Indicate that stores don't overlap loads. 90 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 91 } 92 if (AtomicElementSize) { 93 Load->setAtomic(AtomicOrdering::Unordered); 94 Store->setAtomic(AtomicOrdering::Unordered); 95 } 96 Value *NewIndex = 97 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); 98 LoopIndex->addIncoming(NewIndex, LoopBB); 99 100 // Create the loop branch condition. 101 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 102 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 103 LoopBB, PostLoopBB); 104 } 105 106 uint64_t BytesCopied = LoopEndCount * LoopOpSize; 107 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 108 if (RemainingBytes) { 109 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() 110 : InsertBefore); 111 112 SmallVector<Type *, 5> RemainingOps; 113 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 114 SrcAS, DstAS, SrcAlign.value(), 115 DstAlign.value(), AtomicElementSize); 116 117 for (auto *OpTy : RemainingOps) { 118 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 119 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); 120 121 // Calculate the new index 122 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 123 assert( 124 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && 125 "Atomic memcpy lowering is not supported for selected operand size"); 126 127 uint64_t GepIndex = BytesCopied / OperandSize; 128 assert(GepIndex * OperandSize == BytesCopied && 129 "Division should have no Remainder!"); 130 131 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 132 OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex)); 133 LoadInst *Load = 134 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); 135 if (!CanOverlap) { 136 // Set alias scope for loads. 137 Load->setMetadata(LLVMContext::MD_alias_scope, 138 MDNode::get(Ctx, NewScope)); 139 } 140 Value *DstGEP = RBuilder.CreateInBoundsGEP( 141 OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex)); 142 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 143 DstIsVolatile); 144 if (!CanOverlap) { 145 // Indicate that stores don't overlap loads. 146 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 147 } 148 if (AtomicElementSize) { 149 Load->setAtomic(AtomicOrdering::Unordered); 150 Store->setAtomic(AtomicOrdering::Unordered); 151 } 152 BytesCopied += OperandSize; 153 } 154 } 155 assert(BytesCopied == CopyLen->getZExtValue() && 156 "Bytes copied should match size in the call!"); 157 } 158 159 // \returns \p Len udiv \p OpSize, checking for optimization opportunities. 160 static Value *getRuntimeLoopCount(const DataLayout &DL, IRBuilderBase &B, 161 Value *Len, Value *OpSize, 162 unsigned OpSizeVal) { 163 // For powers of 2, we can lshr by log2 instead of using udiv. 164 if (isPowerOf2_32(OpSizeVal)) 165 return B.CreateLShr(Len, Log2_32(OpSizeVal)); 166 return B.CreateUDiv(Len, OpSize); 167 } 168 169 // \returns \p Len urem \p OpSize, checking for optimization opportunities. 170 static Value *getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B, 171 Value *Len, Value *OpSize, 172 unsigned OpSizeVal) { 173 // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem. 174 if (isPowerOf2_32(OpSizeVal)) 175 return B.CreateAnd(Len, OpSizeVal - 1); 176 return B.CreateURem(Len, OpSize); 177 } 178 179 void llvm::createMemCpyLoopUnknownSize( 180 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, 181 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, 182 bool CanOverlap, const TargetTransformInfo &TTI, 183 std::optional<uint32_t> AtomicElementSize) { 184 BasicBlock *PreLoopBB = InsertBefore->getParent(); 185 BasicBlock *PostLoopBB = 186 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 187 188 Function *ParentFunc = PreLoopBB->getParent(); 189 const DataLayout &DL = ParentFunc->getDataLayout(); 190 LLVMContext &Ctx = PreLoopBB->getContext(); 191 MDBuilder MDB(Ctx); 192 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 193 StringRef Name = "MemCopyAliasScope"; 194 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 195 196 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 197 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 198 199 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 200 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 201 AtomicElementSize); 202 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 203 "Atomic memcpy lowering is not supported for vector operand type"); 204 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 205 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 206 "Atomic memcpy lowering is not supported for selected operand size"); 207 208 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 209 210 // Calculate the loop trip count, and remaining bytes to copy after the loop. 211 Type *CopyLenType = CopyLen->getType(); 212 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 213 assert(ILengthType && 214 "expected size argument to memcpy to be an integer type!"); 215 Type *Int8Type = Type::getInt8Ty(Ctx); 216 bool LoopOpIsInt8 = LoopOpType == Int8Type; 217 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 218 Value *RuntimeLoopCount = LoopOpIsInt8 219 ? CopyLen 220 : getRuntimeLoopCount(DL, PLBuilder, CopyLen, 221 CILoopOpSize, LoopOpSize); 222 223 BasicBlock *LoopBB = 224 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 225 IRBuilder<> LoopBuilder(LoopBB); 226 227 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 228 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 229 230 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 231 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 232 233 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 234 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 235 PartSrcAlign, SrcIsVolatile); 236 if (!CanOverlap) { 237 // Set alias scope for loads. 238 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); 239 } 240 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 241 StoreInst *Store = 242 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); 243 if (!CanOverlap) { 244 // Indicate that stores don't overlap loads. 245 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 246 } 247 if (AtomicElementSize) { 248 Load->setAtomic(AtomicOrdering::Unordered); 249 Store->setAtomic(AtomicOrdering::Unordered); 250 } 251 Value *NewIndex = 252 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); 253 LoopIndex->addIncoming(NewIndex, LoopBB); 254 255 bool requiresResidual = 256 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); 257 if (requiresResidual) { 258 Type *ResLoopOpType = AtomicElementSize 259 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) 260 : Int8Type; 261 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); 262 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && 263 "Store size is expected to match type size"); 264 265 Align ResSrcAlign(commonAlignment(PartSrcAlign, ResLoopOpSize)); 266 Align ResDstAlign(commonAlignment(PartDstAlign, ResLoopOpSize)); 267 268 Value *RuntimeResidual = getRuntimeLoopRemainder(DL, PLBuilder, CopyLen, 269 CILoopOpSize, LoopOpSize); 270 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); 271 272 // Loop body for the residual copy. 273 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", 274 PreLoopBB->getParent(), 275 PostLoopBB); 276 // Residual loop header. 277 BasicBlock *ResHeaderBB = BasicBlock::Create( 278 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 279 280 // Need to update the pre-loop basic block to branch to the correct place. 281 // branch to the main loop if the count is non-zero, branch to the residual 282 // loop if the copy size is smaller then 1 iteration of the main loop but 283 // non-zero and finally branch to after the residual loop if the memcpy 284 // size is zero. 285 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 286 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 287 LoopBB, ResHeaderBB); 288 PreLoopBB->getTerminator()->eraseFromParent(); 289 290 LoopBuilder.CreateCondBr( 291 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 292 ResHeaderBB); 293 294 // Determine if we need to branch to the residual loop or bypass it. 295 IRBuilder<> RHBuilder(ResHeaderBB); 296 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), 297 ResLoopBB, PostLoopBB); 298 299 // Copy the residual with single byte load/store loop. 300 IRBuilder<> ResBuilder(ResLoopBB); 301 PHINode *ResidualIndex = 302 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 303 ResidualIndex->addIncoming(Zero, ResHeaderBB); 304 305 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); 306 Value *SrcGEP = 307 ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset); 308 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, 309 ResSrcAlign, SrcIsVolatile); 310 if (!CanOverlap) { 311 // Set alias scope for loads. 312 Load->setMetadata(LLVMContext::MD_alias_scope, 313 MDNode::get(Ctx, NewScope)); 314 } 315 Value *DstGEP = 316 ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset); 317 StoreInst *Store = 318 ResBuilder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile); 319 if (!CanOverlap) { 320 // Indicate that stores don't overlap loads. 321 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 322 } 323 if (AtomicElementSize) { 324 Load->setAtomic(AtomicOrdering::Unordered); 325 Store->setAtomic(AtomicOrdering::Unordered); 326 } 327 Value *ResNewIndex = ResBuilder.CreateAdd( 328 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); 329 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 330 331 // Create the loop branch condition. 332 ResBuilder.CreateCondBr( 333 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, 334 PostLoopBB); 335 } else { 336 // In this case the loop operand type was a byte, and there is no need for a 337 // residual loop to copy the remaining memory after the main loop. 338 // We do however need to patch up the control flow by creating the 339 // terminators for the preloop block and the memcpy loop. 340 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 341 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 342 LoopBB, PostLoopBB); 343 PreLoopBB->getTerminator()->eraseFromParent(); 344 LoopBuilder.CreateCondBr( 345 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 346 PostLoopBB); 347 } 348 } 349 350 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 351 // regions; therefore, it has to check the relative positions of the source and 352 // destination pointers and choose the copy direction accordingly. 353 // 354 // The code below is an IR rendition of this C function: 355 // 356 // void* memmove(void* dst, const void* src, size_t n) { 357 // unsigned char* d = dst; 358 // const unsigned char* s = src; 359 // if (s < d) { 360 // // copy backwards 361 // while (n--) { 362 // d[n] = s[n]; 363 // } 364 // } else { 365 // // copy forward 366 // for (size_t i = 0; i < n; ++i) { 367 // d[i] = s[i]; 368 // } 369 // } 370 // return dst; 371 // } 372 static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, 373 Value *DstAddr, Value *CopyLen, Align SrcAlign, 374 Align DstAlign, bool SrcIsVolatile, 375 bool DstIsVolatile, 376 const TargetTransformInfo &TTI) { 377 Type *TypeOfCopyLen = CopyLen->getType(); 378 BasicBlock *OrigBB = InsertBefore->getParent(); 379 Function *F = OrigBB->getParent(); 380 const DataLayout &DL = F->getDataLayout(); 381 // TODO: Use different element type if possible? 382 Type *EltTy = Type::getInt8Ty(F->getContext()); 383 384 // Create the a comparison of src and dst, based on which we jump to either 385 // the forward-copy part of the function (if src >= dst) or the backwards-copy 386 // part (if src < dst). 387 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 388 // structure. Its block terminators (unconditional branches) are replaced by 389 // the appropriate conditional branches when the loop is built. 390 ICmpInst *PtrCompare = new ICmpInst(InsertBefore->getIterator(), ICmpInst::ICMP_ULT, 391 SrcAddr, DstAddr, "compare_src_dst"); 392 Instruction *ThenTerm, *ElseTerm; 393 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(), &ThenTerm, 394 &ElseTerm); 395 396 // Each part of the function consists of two blocks: 397 // copy_backwards: used to skip the loop when n == 0 398 // copy_backwards_loop: the actual backwards loop BB 399 // copy_forward: used to skip the loop when n == 0 400 // copy_forward_loop: the actual forward loop BB 401 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 402 CopyBackwardsBB->setName("copy_backwards"); 403 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 404 CopyForwardBB->setName("copy_forward"); 405 BasicBlock *ExitBB = InsertBefore->getParent(); 406 ExitBB->setName("memmove_done"); 407 408 unsigned PartSize = DL.getTypeStoreSize(EltTy); 409 Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); 410 Align PartDstAlign(commonAlignment(DstAlign, PartSize)); 411 412 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared 413 // between both backwards and forward copy clauses. 414 ICmpInst *CompareN = 415 new ICmpInst(OrigBB->getTerminator()->getIterator(), ICmpInst::ICMP_EQ, CopyLen, 416 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); 417 418 // Copying backwards. 419 BasicBlock *LoopBB = 420 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); 421 IRBuilder<> LoopBuilder(LoopBB); 422 423 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 424 Value *IndexPtr = LoopBuilder.CreateSub( 425 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); 426 Value *Element = LoopBuilder.CreateAlignedLoad( 427 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), 428 PartSrcAlign, SrcIsVolatile, "element"); 429 LoopBuilder.CreateAlignedStore( 430 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), 431 PartDstAlign, DstIsVolatile); 432 LoopBuilder.CreateCondBr( 433 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), 434 ExitBB, LoopBB); 435 LoopPhi->addIncoming(IndexPtr, LoopBB); 436 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 437 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm->getIterator()); 438 ThenTerm->eraseFromParent(); 439 440 // Copying forward. 441 BasicBlock *FwdLoopBB = 442 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); 443 IRBuilder<> FwdLoopBuilder(FwdLoopBB); 444 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); 445 Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); 446 Value *FwdElement = FwdLoopBuilder.CreateAlignedLoad( 447 EltTy, SrcGEP, PartSrcAlign, SrcIsVolatile, "element"); 448 Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); 449 FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign, 450 DstIsVolatile); 451 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( 452 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); 453 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), 454 ExitBB, FwdLoopBB); 455 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); 456 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); 457 458 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm->getIterator()); 459 ElseTerm->eraseFromParent(); 460 } 461 462 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, 463 Value *CopyLen, Value *SetValue, Align DstAlign, 464 bool IsVolatile) { 465 Type *TypeOfCopyLen = CopyLen->getType(); 466 BasicBlock *OrigBB = InsertBefore->getParent(); 467 Function *F = OrigBB->getParent(); 468 const DataLayout &DL = F->getDataLayout(); 469 BasicBlock *NewBB = 470 OrigBB->splitBasicBlock(InsertBefore, "split"); 471 BasicBlock *LoopBB 472 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 473 474 IRBuilder<> Builder(OrigBB->getTerminator()); 475 476 Builder.CreateCondBr( 477 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 478 LoopBB); 479 OrigBB->getTerminator()->eraseFromParent(); 480 481 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); 482 Align PartAlign(commonAlignment(DstAlign, PartSize)); 483 484 IRBuilder<> LoopBuilder(LoopBB); 485 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 486 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 487 488 LoopBuilder.CreateAlignedStore( 489 SetValue, 490 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 491 PartAlign, IsVolatile); 492 493 Value *NewIndex = 494 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 495 LoopIndex->addIncoming(NewIndex, LoopBB); 496 497 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 498 NewBB); 499 } 500 501 template <typename T> 502 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { 503 if (SE) { 504 auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); 505 auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); 506 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) 507 return false; 508 } 509 return true; 510 } 511 512 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 513 const TargetTransformInfo &TTI, 514 ScalarEvolution *SE) { 515 bool CanOverlap = canOverlap(Memcpy, SE); 516 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 517 createMemCpyLoopKnownSize( 518 /* InsertBefore */ Memcpy, 519 /* SrcAddr */ Memcpy->getRawSource(), 520 /* DstAddr */ Memcpy->getRawDest(), 521 /* CopyLen */ CI, 522 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 523 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 524 /* SrcIsVolatile */ Memcpy->isVolatile(), 525 /* DstIsVolatile */ Memcpy->isVolatile(), 526 /* CanOverlap */ CanOverlap, 527 /* TargetTransformInfo */ TTI); 528 } else { 529 createMemCpyLoopUnknownSize( 530 /* InsertBefore */ Memcpy, 531 /* SrcAddr */ Memcpy->getRawSource(), 532 /* DstAddr */ Memcpy->getRawDest(), 533 /* CopyLen */ Memcpy->getLength(), 534 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 535 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 536 /* SrcIsVolatile */ Memcpy->isVolatile(), 537 /* DstIsVolatile */ Memcpy->isVolatile(), 538 /* CanOverlap */ CanOverlap, 539 /* TargetTransformInfo */ TTI); 540 } 541 } 542 543 bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, 544 const TargetTransformInfo &TTI) { 545 Value *CopyLen = Memmove->getLength(); 546 Value *SrcAddr = Memmove->getRawSource(); 547 Value *DstAddr = Memmove->getRawDest(); 548 Align SrcAlign = Memmove->getSourceAlign().valueOrOne(); 549 Align DstAlign = Memmove->getDestAlign().valueOrOne(); 550 bool SrcIsVolatile = Memmove->isVolatile(); 551 bool DstIsVolatile = SrcIsVolatile; 552 IRBuilder<> CastBuilder(Memmove); 553 554 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace(); 555 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace(); 556 if (SrcAS != DstAS) { 557 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) { 558 // We may not be able to emit a pointer comparison, but we don't have 559 // to. Expand as memcpy. 560 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) { 561 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 562 CI, SrcAlign, DstAlign, SrcIsVolatile, 563 DstIsVolatile, 564 /*CanOverlap=*/false, TTI); 565 } else { 566 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 567 CopyLen, SrcAlign, DstAlign, SrcIsVolatile, 568 DstIsVolatile, 569 /*CanOverlap=*/false, TTI); 570 } 571 572 return true; 573 } 574 575 if (TTI.isValidAddrSpaceCast(DstAS, SrcAS)) 576 DstAddr = CastBuilder.CreateAddrSpaceCast(DstAddr, SrcAddr->getType()); 577 else if (TTI.isValidAddrSpaceCast(SrcAS, DstAS)) 578 SrcAddr = CastBuilder.CreateAddrSpaceCast(SrcAddr, DstAddr->getType()); 579 else { 580 // We don't know generically if it's legal to introduce an 581 // addrspacecast. We need to know either if it's legal to insert an 582 // addrspacecast, or if the address spaces cannot alias. 583 LLVM_DEBUG( 584 dbgs() << "Do not know how to expand memmove between different " 585 "address spaces\n"); 586 return false; 587 } 588 } 589 590 createMemMoveLoop( 591 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign, 592 SrcIsVolatile, DstIsVolatile, TTI); 593 return true; 594 } 595 596 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 597 createMemSetLoop(/* InsertBefore */ Memset, 598 /* DstAddr */ Memset->getRawDest(), 599 /* CopyLen */ Memset->getLength(), 600 /* SetValue */ Memset->getValue(), 601 /* Alignment */ Memset->getDestAlign().valueOrOne(), 602 Memset->isVolatile()); 603 } 604 605 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, 606 const TargetTransformInfo &TTI, 607 ScalarEvolution *SE) { 608 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { 609 createMemCpyLoopKnownSize( 610 /* InsertBefore */ AtomicMemcpy, 611 /* SrcAddr */ AtomicMemcpy->getRawSource(), 612 /* DstAddr */ AtomicMemcpy->getRawDest(), 613 /* CopyLen */ CI, 614 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 615 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 616 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 617 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 618 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 619 /* TargetTransformInfo */ TTI, 620 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 621 } else { 622 createMemCpyLoopUnknownSize( 623 /* InsertBefore */ AtomicMemcpy, 624 /* SrcAddr */ AtomicMemcpy->getRawSource(), 625 /* DstAddr */ AtomicMemcpy->getRawDest(), 626 /* CopyLen */ AtomicMemcpy->getLength(), 627 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 628 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 629 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 630 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 631 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 632 /* TargetTransformInfo */ TTI, 633 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 634 } 635 } 636