1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 10 #include "llvm/Analysis/ScalarEvolution.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/IR/MDBuilder.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 17 #include <optional> 18 19 #define DEBUG_TYPE "lower-mem-intrinsics" 20 21 using namespace llvm; 22 23 void llvm::createMemCpyLoopKnownSize( 24 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, 25 ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, 26 bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, 27 std::optional<uint32_t> AtomicElementSize) { 28 // No need to expand zero length copies. 29 if (CopyLen->isZero()) 30 return; 31 32 BasicBlock *PreLoopBB = InsertBefore->getParent(); 33 BasicBlock *PostLoopBB = nullptr; 34 Function *ParentFunc = PreLoopBB->getParent(); 35 LLVMContext &Ctx = PreLoopBB->getContext(); 36 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 37 MDBuilder MDB(Ctx); 38 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 39 StringRef Name = "MemCopyAliasScope"; 40 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 41 42 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 43 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 44 45 Type *TypeOfCopyLen = CopyLen->getType(); 46 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 47 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 48 AtomicElementSize); 49 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 50 "Atomic memcpy lowering is not supported for vector operand type"); 51 52 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 53 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 54 "Atomic memcpy lowering is not supported for selected operand size"); 55 56 uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; 57 58 if (LoopEndCount != 0) { 59 // Split 60 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 61 BasicBlock *LoopBB = 62 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 63 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 64 65 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 66 67 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 68 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 69 70 IRBuilder<> LoopBuilder(LoopBB); 71 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 72 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 73 // Loop Body 74 Value *SrcGEP = 75 LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 76 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 77 PartSrcAlign, SrcIsVolatile); 78 if (!CanOverlap) { 79 // Set alias scope for loads. 80 Load->setMetadata(LLVMContext::MD_alias_scope, 81 MDNode::get(Ctx, NewScope)); 82 } 83 Value *DstGEP = 84 LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 85 StoreInst *Store = LoopBuilder.CreateAlignedStore( 86 Load, DstGEP, PartDstAlign, DstIsVolatile); 87 if (!CanOverlap) { 88 // Indicate that stores don't overlap loads. 89 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 90 } 91 if (AtomicElementSize) { 92 Load->setAtomic(AtomicOrdering::Unordered); 93 Store->setAtomic(AtomicOrdering::Unordered); 94 } 95 Value *NewIndex = 96 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); 97 LoopIndex->addIncoming(NewIndex, LoopBB); 98 99 // Create the loop branch condition. 100 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 101 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 102 LoopBB, PostLoopBB); 103 } 104 105 uint64_t BytesCopied = LoopEndCount * LoopOpSize; 106 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 107 if (RemainingBytes) { 108 IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() 109 : InsertBefore); 110 111 SmallVector<Type *, 5> RemainingOps; 112 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 113 SrcAS, DstAS, SrcAlign.value(), 114 DstAlign.value(), AtomicElementSize); 115 116 for (auto *OpTy : RemainingOps) { 117 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 118 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); 119 120 // Calculate the new index 121 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 122 assert( 123 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && 124 "Atomic memcpy lowering is not supported for selected operand size"); 125 126 uint64_t GepIndex = BytesCopied / OperandSize; 127 assert(GepIndex * OperandSize == BytesCopied && 128 "Division should have no Remainder!"); 129 130 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 131 OpTy, SrcAddr, ConstantInt::get(TypeOfCopyLen, GepIndex)); 132 LoadInst *Load = 133 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); 134 if (!CanOverlap) { 135 // Set alias scope for loads. 136 Load->setMetadata(LLVMContext::MD_alias_scope, 137 MDNode::get(Ctx, NewScope)); 138 } 139 Value *DstGEP = RBuilder.CreateInBoundsGEP( 140 OpTy, DstAddr, ConstantInt::get(TypeOfCopyLen, GepIndex)); 141 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 142 DstIsVolatile); 143 if (!CanOverlap) { 144 // Indicate that stores don't overlap loads. 145 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 146 } 147 if (AtomicElementSize) { 148 Load->setAtomic(AtomicOrdering::Unordered); 149 Store->setAtomic(AtomicOrdering::Unordered); 150 } 151 BytesCopied += OperandSize; 152 } 153 } 154 assert(BytesCopied == CopyLen->getZExtValue() && 155 "Bytes copied should match size in the call!"); 156 } 157 158 void llvm::createMemCpyLoopUnknownSize( 159 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, 160 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, 161 bool CanOverlap, const TargetTransformInfo &TTI, 162 std::optional<uint32_t> AtomicElementSize) { 163 BasicBlock *PreLoopBB = InsertBefore->getParent(); 164 BasicBlock *PostLoopBB = 165 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 166 167 Function *ParentFunc = PreLoopBB->getParent(); 168 const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); 169 LLVMContext &Ctx = PreLoopBB->getContext(); 170 MDBuilder MDB(Ctx); 171 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 172 StringRef Name = "MemCopyAliasScope"; 173 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 174 175 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 176 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 177 178 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 179 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(), 180 AtomicElementSize); 181 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 182 "Atomic memcpy lowering is not supported for vector operand type"); 183 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 184 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 185 "Atomic memcpy lowering is not supported for selected operand size"); 186 187 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 188 189 // Calculate the loop trip count, and remaining bytes to copy after the loop. 190 Type *CopyLenType = CopyLen->getType(); 191 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 192 assert(ILengthType && 193 "expected size argument to memcpy to be an integer type!"); 194 Type *Int8Type = Type::getInt8Ty(Ctx); 195 bool LoopOpIsInt8 = LoopOpType == Int8Type; 196 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 197 Value *RuntimeLoopCount = LoopOpIsInt8 ? 198 CopyLen : 199 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); 200 BasicBlock *LoopBB = 201 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 202 IRBuilder<> LoopBuilder(LoopBB); 203 204 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 205 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 206 207 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 208 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 209 210 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); 211 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 212 PartSrcAlign, SrcIsVolatile); 213 if (!CanOverlap) { 214 // Set alias scope for loads. 215 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); 216 } 217 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); 218 StoreInst *Store = 219 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); 220 if (!CanOverlap) { 221 // Indicate that stores don't overlap loads. 222 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 223 } 224 if (AtomicElementSize) { 225 Load->setAtomic(AtomicOrdering::Unordered); 226 Store->setAtomic(AtomicOrdering::Unordered); 227 } 228 Value *NewIndex = 229 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); 230 LoopIndex->addIncoming(NewIndex, LoopBB); 231 232 bool requiresResidual = 233 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); 234 if (requiresResidual) { 235 Type *ResLoopOpType = AtomicElementSize 236 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) 237 : Int8Type; 238 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); 239 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && 240 "Store size is expected to match type size"); 241 242 // Add in the 243 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); 244 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); 245 246 // Loop body for the residual copy. 247 BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", 248 PreLoopBB->getParent(), 249 PostLoopBB); 250 // Residual loop header. 251 BasicBlock *ResHeaderBB = BasicBlock::Create( 252 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 253 254 // Need to update the pre-loop basic block to branch to the correct place. 255 // branch to the main loop if the count is non-zero, branch to the residual 256 // loop if the copy size is smaller then 1 iteration of the main loop but 257 // non-zero and finally branch to after the residual loop if the memcpy 258 // size is zero. 259 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 260 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 261 LoopBB, ResHeaderBB); 262 PreLoopBB->getTerminator()->eraseFromParent(); 263 264 LoopBuilder.CreateCondBr( 265 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 266 ResHeaderBB); 267 268 // Determine if we need to branch to the residual loop or bypass it. 269 IRBuilder<> RHBuilder(ResHeaderBB); 270 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), 271 ResLoopBB, PostLoopBB); 272 273 // Copy the residual with single byte load/store loop. 274 IRBuilder<> ResBuilder(ResLoopBB); 275 PHINode *ResidualIndex = 276 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 277 ResidualIndex->addIncoming(Zero, ResHeaderBB); 278 279 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); 280 Value *SrcGEP = 281 ResBuilder.CreateInBoundsGEP(ResLoopOpType, SrcAddr, FullOffset); 282 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, 283 PartSrcAlign, SrcIsVolatile); 284 if (!CanOverlap) { 285 // Set alias scope for loads. 286 Load->setMetadata(LLVMContext::MD_alias_scope, 287 MDNode::get(Ctx, NewScope)); 288 } 289 Value *DstGEP = 290 ResBuilder.CreateInBoundsGEP(ResLoopOpType, DstAddr, FullOffset); 291 StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 292 DstIsVolatile); 293 if (!CanOverlap) { 294 // Indicate that stores don't overlap loads. 295 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 296 } 297 if (AtomicElementSize) { 298 Load->setAtomic(AtomicOrdering::Unordered); 299 Store->setAtomic(AtomicOrdering::Unordered); 300 } 301 Value *ResNewIndex = ResBuilder.CreateAdd( 302 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); 303 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 304 305 // Create the loop branch condition. 306 ResBuilder.CreateCondBr( 307 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, 308 PostLoopBB); 309 } else { 310 // In this case the loop operand type was a byte, and there is no need for a 311 // residual loop to copy the remaining memory after the main loop. 312 // We do however need to patch up the control flow by creating the 313 // terminators for the preloop block and the memcpy loop. 314 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 315 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), 316 LoopBB, PostLoopBB); 317 PreLoopBB->getTerminator()->eraseFromParent(); 318 LoopBuilder.CreateCondBr( 319 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, 320 PostLoopBB); 321 } 322 } 323 324 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 325 // regions; therefore, it has to check the relative positions of the source and 326 // destination pointers and choose the copy direction accordingly. 327 // 328 // The code below is an IR rendition of this C function: 329 // 330 // void* memmove(void* dst, const void* src, size_t n) { 331 // unsigned char* d = dst; 332 // const unsigned char* s = src; 333 // if (s < d) { 334 // // copy backwards 335 // while (n--) { 336 // d[n] = s[n]; 337 // } 338 // } else { 339 // // copy forward 340 // for (size_t i = 0; i < n; ++i) { 341 // d[i] = s[i]; 342 // } 343 // } 344 // return dst; 345 // } 346 static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, 347 Value *DstAddr, Value *CopyLen, Align SrcAlign, 348 Align DstAlign, bool SrcIsVolatile, 349 bool DstIsVolatile, 350 const TargetTransformInfo &TTI) { 351 Type *TypeOfCopyLen = CopyLen->getType(); 352 BasicBlock *OrigBB = InsertBefore->getParent(); 353 Function *F = OrigBB->getParent(); 354 const DataLayout &DL = F->getParent()->getDataLayout(); 355 // TODO: Use different element type if possible? 356 Type *EltTy = Type::getInt8Ty(F->getContext()); 357 358 // Create the a comparison of src and dst, based on which we jump to either 359 // the forward-copy part of the function (if src >= dst) or the backwards-copy 360 // part (if src < dst). 361 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 362 // structure. Its block terminators (unconditional branches) are replaced by 363 // the appropriate conditional branches when the loop is built. 364 ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, 365 SrcAddr, DstAddr, "compare_src_dst"); 366 Instruction *ThenTerm, *ElseTerm; 367 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, 368 &ElseTerm); 369 370 // Each part of the function consists of two blocks: 371 // copy_backwards: used to skip the loop when n == 0 372 // copy_backwards_loop: the actual backwards loop BB 373 // copy_forward: used to skip the loop when n == 0 374 // copy_forward_loop: the actual forward loop BB 375 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 376 CopyBackwardsBB->setName("copy_backwards"); 377 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 378 CopyForwardBB->setName("copy_forward"); 379 BasicBlock *ExitBB = InsertBefore->getParent(); 380 ExitBB->setName("memmove_done"); 381 382 unsigned PartSize = DL.getTypeStoreSize(EltTy); 383 Align PartSrcAlign(commonAlignment(SrcAlign, PartSize)); 384 Align PartDstAlign(commonAlignment(DstAlign, PartSize)); 385 386 // Initial comparison of n == 0 that lets us skip the loops altogether. Shared 387 // between both backwards and forward copy clauses. 388 ICmpInst *CompareN = 389 new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, 390 ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); 391 392 // Copying backwards. 393 BasicBlock *LoopBB = 394 BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); 395 IRBuilder<> LoopBuilder(LoopBB); 396 397 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 398 Value *IndexPtr = LoopBuilder.CreateSub( 399 LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); 400 Value *Element = LoopBuilder.CreateAlignedLoad( 401 EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), 402 PartSrcAlign, "element"); 403 LoopBuilder.CreateAlignedStore( 404 Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr), 405 PartDstAlign); 406 LoopBuilder.CreateCondBr( 407 LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), 408 ExitBB, LoopBB); 409 LoopPhi->addIncoming(IndexPtr, LoopBB); 410 LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 411 BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); 412 ThenTerm->eraseFromParent(); 413 414 // Copying forward. 415 BasicBlock *FwdLoopBB = 416 BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); 417 IRBuilder<> FwdLoopBuilder(FwdLoopBB); 418 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); 419 Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi); 420 Value *FwdElement = 421 FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element"); 422 Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi); 423 FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign); 424 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( 425 FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); 426 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), 427 ExitBB, FwdLoopBB); 428 FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); 429 FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); 430 431 BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); 432 ElseTerm->eraseFromParent(); 433 } 434 435 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, 436 Value *CopyLen, Value *SetValue, Align DstAlign, 437 bool IsVolatile) { 438 Type *TypeOfCopyLen = CopyLen->getType(); 439 BasicBlock *OrigBB = InsertBefore->getParent(); 440 Function *F = OrigBB->getParent(); 441 const DataLayout &DL = F->getParent()->getDataLayout(); 442 BasicBlock *NewBB = 443 OrigBB->splitBasicBlock(InsertBefore, "split"); 444 BasicBlock *LoopBB 445 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 446 447 IRBuilder<> Builder(OrigBB->getTerminator()); 448 449 Builder.CreateCondBr( 450 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 451 LoopBB); 452 OrigBB->getTerminator()->eraseFromParent(); 453 454 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); 455 Align PartAlign(commonAlignment(DstAlign, PartSize)); 456 457 IRBuilder<> LoopBuilder(LoopBB); 458 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 459 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 460 461 LoopBuilder.CreateAlignedStore( 462 SetValue, 463 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 464 PartAlign, IsVolatile); 465 466 Value *NewIndex = 467 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 468 LoopIndex->addIncoming(NewIndex, LoopBB); 469 470 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 471 NewBB); 472 } 473 474 template <typename T> 475 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { 476 if (SE) { 477 auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); 478 auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); 479 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) 480 return false; 481 } 482 return true; 483 } 484 485 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 486 const TargetTransformInfo &TTI, 487 ScalarEvolution *SE) { 488 bool CanOverlap = canOverlap(Memcpy, SE); 489 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 490 createMemCpyLoopKnownSize( 491 /* InsertBefore */ Memcpy, 492 /* SrcAddr */ Memcpy->getRawSource(), 493 /* DstAddr */ Memcpy->getRawDest(), 494 /* CopyLen */ CI, 495 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 496 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 497 /* SrcIsVolatile */ Memcpy->isVolatile(), 498 /* DstIsVolatile */ Memcpy->isVolatile(), 499 /* CanOverlap */ CanOverlap, 500 /* TargetTransformInfo */ TTI); 501 } else { 502 createMemCpyLoopUnknownSize( 503 /* InsertBefore */ Memcpy, 504 /* SrcAddr */ Memcpy->getRawSource(), 505 /* DstAddr */ Memcpy->getRawDest(), 506 /* CopyLen */ Memcpy->getLength(), 507 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 508 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 509 /* SrcIsVolatile */ Memcpy->isVolatile(), 510 /* DstIsVolatile */ Memcpy->isVolatile(), 511 /* CanOverlap */ CanOverlap, 512 /* TargetTransformInfo */ TTI); 513 } 514 } 515 516 bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, 517 const TargetTransformInfo &TTI) { 518 Value *CopyLen = Memmove->getLength(); 519 Value *SrcAddr = Memmove->getRawSource(); 520 Value *DstAddr = Memmove->getRawDest(); 521 Align SrcAlign = Memmove->getSourceAlign().valueOrOne(); 522 Align DstAlign = Memmove->getDestAlign().valueOrOne(); 523 bool SrcIsVolatile = Memmove->isVolatile(); 524 bool DstIsVolatile = SrcIsVolatile; 525 IRBuilder<> CastBuilder(Memmove); 526 527 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace(); 528 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace(); 529 if (SrcAS != DstAS) { 530 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) { 531 // We may not be able to emit a pointer comparison, but we don't have 532 // to. Expand as memcpy. 533 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) { 534 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 535 CI, SrcAlign, DstAlign, SrcIsVolatile, 536 DstIsVolatile, 537 /*CanOverlap=*/false, TTI); 538 } else { 539 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 540 CopyLen, SrcAlign, DstAlign, SrcIsVolatile, 541 DstIsVolatile, 542 /*CanOverlap=*/false, TTI); 543 } 544 545 return true; 546 } 547 548 if (TTI.isValidAddrSpaceCast(DstAS, SrcAS)) 549 DstAddr = CastBuilder.CreateAddrSpaceCast(DstAddr, SrcAddr->getType()); 550 else if (TTI.isValidAddrSpaceCast(SrcAS, DstAS)) 551 SrcAddr = CastBuilder.CreateAddrSpaceCast(SrcAddr, DstAddr->getType()); 552 else { 553 // We don't know generically if it's legal to introduce an 554 // addrspacecast. We need to know either if it's legal to insert an 555 // addrspacecast, or if the address spaces cannot alias. 556 LLVM_DEBUG( 557 dbgs() << "Do not know how to expand memmove between different " 558 "address spaces\n"); 559 return false; 560 } 561 } 562 563 createMemMoveLoop( 564 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign, 565 SrcIsVolatile, DstIsVolatile, TTI); 566 return true; 567 } 568 569 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 570 createMemSetLoop(/* InsertBefore */ Memset, 571 /* DstAddr */ Memset->getRawDest(), 572 /* CopyLen */ Memset->getLength(), 573 /* SetValue */ Memset->getValue(), 574 /* Alignment */ Memset->getDestAlign().valueOrOne(), 575 Memset->isVolatile()); 576 } 577 578 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, 579 const TargetTransformInfo &TTI, 580 ScalarEvolution *SE) { 581 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { 582 createMemCpyLoopKnownSize( 583 /* InsertBefore */ AtomicMemcpy, 584 /* SrcAddr */ AtomicMemcpy->getRawSource(), 585 /* DstAddr */ AtomicMemcpy->getRawDest(), 586 /* CopyLen */ CI, 587 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 588 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 589 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 590 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 591 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 592 /* TargetTransformInfo */ TTI, 593 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 594 } else { 595 createMemCpyLoopUnknownSize( 596 /* InsertBefore */ AtomicMemcpy, 597 /* SrcAddr */ AtomicMemcpy->getRawSource(), 598 /* DstAddr */ AtomicMemcpy->getRawDest(), 599 /* CopyLen */ AtomicMemcpy->getLength(), 600 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 601 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 602 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 603 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 604 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 605 /* TargetTransformInfo */ TTI, 606 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 607 } 608 } 609