1 //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass tries to expand memcmp() calls into optimally-sized loads and 10 // compares for the target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandMemCmp.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/ConstantFolding.h" 17 #include "llvm/Analysis/DomTreeUpdater.h" 18 #include "llvm/Analysis/LazyBlockFrequencyInfo.h" 19 #include "llvm/Analysis/ProfileSummaryInfo.h" 20 #include "llvm/Analysis/TargetLibraryInfo.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/TargetPassConfig.h" 24 #include "llvm/CodeGen/TargetSubtargetInfo.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/PatternMatch.h" 28 #include "llvm/InitializePasses.h" 29 #include "llvm/Target/TargetMachine.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/Local.h" 32 #include "llvm/Transforms/Utils/SizeOpts.h" 33 #include <optional> 34 35 using namespace llvm; 36 using namespace llvm::PatternMatch; 37 38 namespace llvm { 39 class TargetLowering; 40 } 41 42 #define DEBUG_TYPE "expand-memcmp" 43 44 STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); 45 STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); 46 STATISTIC(NumMemCmpGreaterThanMax, 47 "Number of memcmp calls with size greater than max size"); 48 STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); 49 50 static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock( 51 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), 52 cl::desc("The number of loads per basic block for inline expansion of " 53 "memcmp that is only being compared against zero.")); 54 55 static cl::opt<unsigned> MaxLoadsPerMemcmp( 56 "max-loads-per-memcmp", cl::Hidden, 57 cl::desc("Set maximum number of loads used in expanded memcmp")); 58 59 static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize( 60 "max-loads-per-memcmp-opt-size", cl::Hidden, 61 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz")); 62 63 namespace { 64 65 66 // This class provides helper functions to expand a memcmp library call into an 67 // inline expansion. 68 class MemCmpExpansion { 69 struct ResultBlock { 70 BasicBlock *BB = nullptr; 71 PHINode *PhiSrc1 = nullptr; 72 PHINode *PhiSrc2 = nullptr; 73 74 ResultBlock() = default; 75 }; 76 77 CallInst *const CI = nullptr; 78 ResultBlock ResBlock; 79 const uint64_t Size; 80 unsigned MaxLoadSize = 0; 81 uint64_t NumLoadsNonOneByte = 0; 82 const uint64_t NumLoadsPerBlockForZeroCmp; 83 std::vector<BasicBlock *> LoadCmpBlocks; 84 BasicBlock *EndBlock = nullptr; 85 PHINode *PhiRes = nullptr; 86 const bool IsUsedForZeroCmp; 87 const DataLayout &DL; 88 DomTreeUpdater *DTU = nullptr; 89 IRBuilder<> Builder; 90 // Represents the decomposition in blocks of the expansion. For example, 91 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and 92 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}. 93 struct LoadEntry { 94 LoadEntry(unsigned LoadSize, uint64_t Offset) 95 : LoadSize(LoadSize), Offset(Offset) { 96 } 97 98 // The size of the load for this block, in bytes. 99 unsigned LoadSize; 100 // The offset of this load from the base pointer, in bytes. 101 uint64_t Offset; 102 }; 103 using LoadEntryVector = SmallVector<LoadEntry, 8>; 104 LoadEntryVector LoadSequence; 105 106 void createLoadCmpBlocks(); 107 void createResultBlock(); 108 void setupResultBlockPHINodes(); 109 void setupEndBlockPHINodes(); 110 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); 111 void emitLoadCompareBlock(unsigned BlockIndex); 112 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, 113 unsigned &LoadIndex); 114 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes); 115 void emitMemCmpResultBlock(); 116 Value *getMemCmpExpansionZeroCase(); 117 Value *getMemCmpEqZeroOneBlock(); 118 Value *getMemCmpOneBlock(); 119 struct LoadPair { 120 Value *Lhs = nullptr; 121 Value *Rhs = nullptr; 122 }; 123 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType, 124 Type *CmpSizeType, unsigned OffsetBytes); 125 126 static LoadEntryVector 127 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, 128 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte); 129 static LoadEntryVector 130 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize, 131 unsigned MaxNumLoads, 132 unsigned &NumLoadsNonOneByte); 133 134 static void optimiseLoadSequence( 135 LoadEntryVector &LoadSequence, 136 const TargetTransformInfo::MemCmpExpansionOptions &Options, 137 bool IsUsedForZeroCmp); 138 139 public: 140 MemCmpExpansion(CallInst *CI, uint64_t Size, 141 const TargetTransformInfo::MemCmpExpansionOptions &Options, 142 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout, 143 DomTreeUpdater *DTU); 144 145 unsigned getNumBlocks(); 146 uint64_t getNumLoads() const { return LoadSequence.size(); } 147 148 Value *getMemCmpExpansion(); 149 }; 150 151 MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence( 152 uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes, 153 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) { 154 NumLoadsNonOneByte = 0; 155 LoadEntryVector LoadSequence; 156 uint64_t Offset = 0; 157 while (Size && !LoadSizes.empty()) { 158 const unsigned LoadSize = LoadSizes.front(); 159 const uint64_t NumLoadsForThisSize = Size / LoadSize; 160 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { 161 // Do not expand if the total number of loads is larger than what the 162 // target allows. Note that it's important that we exit before completing 163 // the expansion to avoid using a ton of memory to store the expansion for 164 // large sizes. 165 return {}; 166 } 167 if (NumLoadsForThisSize > 0) { 168 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { 169 LoadSequence.push_back({LoadSize, Offset}); 170 Offset += LoadSize; 171 } 172 if (LoadSize > 1) 173 ++NumLoadsNonOneByte; 174 Size = Size % LoadSize; 175 } 176 LoadSizes = LoadSizes.drop_front(); 177 } 178 return LoadSequence; 179 } 180 181 MemCmpExpansion::LoadEntryVector 182 MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, 183 const unsigned MaxLoadSize, 184 const unsigned MaxNumLoads, 185 unsigned &NumLoadsNonOneByte) { 186 // These are already handled by the greedy approach. 187 if (Size < 2 || MaxLoadSize < 2) 188 return {}; 189 190 // We try to do as many non-overlapping loads as possible starting from the 191 // beginning. 192 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize; 193 assert(NumNonOverlappingLoads && "there must be at least one load"); 194 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with 195 // an overlapping load. 196 Size = Size - NumNonOverlappingLoads * MaxLoadSize; 197 // Bail if we do not need an overloapping store, this is already handled by 198 // the greedy approach. 199 if (Size == 0) 200 return {}; 201 // Bail if the number of loads (non-overlapping + potential overlapping one) 202 // is larger than the max allowed. 203 if ((NumNonOverlappingLoads + 1) > MaxNumLoads) 204 return {}; 205 206 // Add non-overlapping loads. 207 LoadEntryVector LoadSequence; 208 uint64_t Offset = 0; 209 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) { 210 LoadSequence.push_back({MaxLoadSize, Offset}); 211 Offset += MaxLoadSize; 212 } 213 214 // Add the last overlapping load. 215 assert(Size > 0 && Size < MaxLoadSize && "broken invariant"); 216 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)}); 217 NumLoadsNonOneByte = 1; 218 return LoadSequence; 219 } 220 221 void MemCmpExpansion::optimiseLoadSequence( 222 LoadEntryVector &LoadSequence, 223 const TargetTransformInfo::MemCmpExpansionOptions &Options, 224 bool IsUsedForZeroCmp) { 225 // This part of code attempts to optimize the LoadSequence by merging allowed 226 // subsequences into single loads of allowed sizes from 227 // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero 228 // comparison or if no allowed tail expansions are specified, we exit early. 229 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty()) 230 return; 231 232 while (LoadSequence.size() >= 2) { 233 auto Last = LoadSequence[LoadSequence.size() - 1]; 234 auto PreLast = LoadSequence[LoadSequence.size() - 2]; 235 236 // Exit the loop if the two sequences are not contiguous 237 if (PreLast.Offset + PreLast.LoadSize != Last.Offset) 238 break; 239 240 auto LoadSize = Last.LoadSize + PreLast.LoadSize; 241 if (find(Options.AllowedTailExpansions, LoadSize) == 242 Options.AllowedTailExpansions.end()) 243 break; 244 245 // Remove the last two sequences and replace with the combined sequence 246 LoadSequence.pop_back(); 247 LoadSequence.pop_back(); 248 LoadSequence.emplace_back(PreLast.Offset, LoadSize); 249 } 250 } 251 252 // Initialize the basic block structure required for expansion of memcmp call 253 // with given maximum load size and memcmp size parameter. 254 // This structure includes: 255 // 1. A list of load compare blocks - LoadCmpBlocks. 256 // 2. An EndBlock, split from original instruction point, which is the block to 257 // return from. 258 // 3. ResultBlock, block to branch to for early exit when a 259 // LoadCmpBlock finds a difference. 260 MemCmpExpansion::MemCmpExpansion( 261 CallInst *const CI, uint64_t Size, 262 const TargetTransformInfo::MemCmpExpansionOptions &Options, 263 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout, 264 DomTreeUpdater *DTU) 265 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), 266 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU), 267 Builder(CI) { 268 assert(Size > 0 && "zero blocks"); 269 // Scale the max size down if the target can load more bytes than we need. 270 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes); 271 while (!LoadSizes.empty() && LoadSizes.front() > Size) { 272 LoadSizes = LoadSizes.drop_front(); 273 } 274 assert(!LoadSizes.empty() && "cannot load Size bytes"); 275 MaxLoadSize = LoadSizes.front(); 276 // Compute the decomposition. 277 unsigned GreedyNumLoadsNonOneByte = 0; 278 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads, 279 GreedyNumLoadsNonOneByte); 280 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte; 281 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); 282 // If we allow overlapping loads and the load sequence is not already optimal, 283 // use overlapping loads. 284 if (Options.AllowOverlappingLoads && 285 (LoadSequence.empty() || LoadSequence.size() > 2)) { 286 unsigned OverlappingNumLoadsNonOneByte = 0; 287 auto OverlappingLoads = computeOverlappingLoadSequence( 288 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte); 289 if (!OverlappingLoads.empty() && 290 (LoadSequence.empty() || 291 OverlappingLoads.size() < LoadSequence.size())) { 292 LoadSequence = OverlappingLoads; 293 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte; 294 } 295 } 296 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); 297 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp); 298 } 299 300 unsigned MemCmpExpansion::getNumBlocks() { 301 if (IsUsedForZeroCmp) 302 return getNumLoads() / NumLoadsPerBlockForZeroCmp + 303 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0); 304 return getNumLoads(); 305 } 306 307 void MemCmpExpansion::createLoadCmpBlocks() { 308 for (unsigned i = 0; i < getNumBlocks(); i++) { 309 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", 310 EndBlock->getParent(), EndBlock); 311 LoadCmpBlocks.push_back(BB); 312 } 313 } 314 315 void MemCmpExpansion::createResultBlock() { 316 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", 317 EndBlock->getParent(), EndBlock); 318 } 319 320 MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, 321 Type *BSwapSizeType, 322 Type *CmpSizeType, 323 unsigned OffsetBytes) { 324 // Get the memory source at offset `OffsetBytes`. 325 Value *LhsSource = CI->getArgOperand(0); 326 Value *RhsSource = CI->getArgOperand(1); 327 Align LhsAlign = LhsSource->getPointerAlignment(DL); 328 Align RhsAlign = RhsSource->getPointerAlignment(DL); 329 if (OffsetBytes > 0) { 330 auto *ByteType = Type::getInt8Ty(CI->getContext()); 331 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes); 332 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes); 333 LhsAlign = commonAlignment(LhsAlign, OffsetBytes); 334 RhsAlign = commonAlignment(RhsAlign, OffsetBytes); 335 } 336 337 // Create a constant or a load from the source. 338 Value *Lhs = nullptr; 339 if (auto *C = dyn_cast<Constant>(LhsSource)) 340 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); 341 if (!Lhs) 342 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign); 343 344 Value *Rhs = nullptr; 345 if (auto *C = dyn_cast<Constant>(RhsSource)) 346 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); 347 if (!Rhs) 348 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); 349 350 // Zero extend if Byte Swap intrinsic has different type 351 if (BSwapSizeType && LoadSizeType != BSwapSizeType) { 352 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType); 353 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType); 354 } 355 356 // Swap bytes if required. 357 if (BSwapSizeType) { 358 Function *Bswap = Intrinsic::getDeclaration( 359 CI->getModule(), Intrinsic::bswap, BSwapSizeType); 360 Lhs = Builder.CreateCall(Bswap, Lhs); 361 Rhs = Builder.CreateCall(Bswap, Rhs); 362 } 363 364 // Zero extend if required. 365 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) { 366 Lhs = Builder.CreateZExt(Lhs, CmpSizeType); 367 Rhs = Builder.CreateZExt(Rhs, CmpSizeType); 368 } 369 return {Lhs, Rhs}; 370 } 371 372 // This function creates the IR instructions for loading and comparing 1 byte. 373 // It loads 1 byte from each source of the memcmp parameters with the given 374 // GEPIndex. It then subtracts the two loaded values and adds this result to the 375 // final phi node for selecting the memcmp result. 376 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, 377 unsigned OffsetBytes) { 378 BasicBlock *BB = LoadCmpBlocks[BlockIndex]; 379 Builder.SetInsertPoint(BB); 380 const LoadPair Loads = 381 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr, 382 Type::getInt32Ty(CI->getContext()), OffsetBytes); 383 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); 384 385 PhiRes->addIncoming(Diff, BB); 386 387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) { 388 // Early exit branch if difference found to EndBlock. Otherwise, continue to 389 // next LoadCmpBlock, 390 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, 391 ConstantInt::get(Diff->getType(), 0)); 392 BranchInst *CmpBr = 393 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); 394 Builder.Insert(CmpBr); 395 if (DTU) 396 DTU->applyUpdates( 397 {{DominatorTree::Insert, BB, EndBlock}, 398 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}}); 399 } else { 400 // The last block has an unconditional branch to EndBlock. 401 BranchInst *CmpBr = BranchInst::Create(EndBlock); 402 Builder.Insert(CmpBr); 403 if (DTU) 404 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}}); 405 } 406 } 407 408 /// Generate an equality comparison for one or more pairs of loaded values. 409 /// This is used in the case where the memcmp() call is compared equal or not 410 /// equal to zero. 411 Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, 412 unsigned &LoadIndex) { 413 assert(LoadIndex < getNumLoads() && 414 "getCompareLoadPairs() called with no remaining loads"); 415 std::vector<Value *> XorList, OrList; 416 Value *Diff = nullptr; 417 418 const unsigned NumLoads = 419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); 420 421 // For a single-block expansion, start inserting before the memcmp call. 422 if (LoadCmpBlocks.empty()) 423 Builder.SetInsertPoint(CI); 424 else 425 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); 426 427 Value *Cmp = nullptr; 428 // If we have multiple loads per block, we need to generate a composite 429 // comparison using xor+or. The type for the combinations is the largest load 430 // type. 431 IntegerType *const MaxLoadType = 432 NumLoads == 1 ? nullptr 433 : IntegerType::get(CI->getContext(), MaxLoadSize * 8); 434 435 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { 436 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; 437 const LoadPair Loads = getLoadPair( 438 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr, 439 MaxLoadType, CurLoadEntry.Offset); 440 441 if (NumLoads != 1) { 442 // If we have multiple loads per block, we need to generate a composite 443 // comparison using xor+or. 444 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs); 445 Diff = Builder.CreateZExt(Diff, MaxLoadType); 446 XorList.push_back(Diff); 447 } else { 448 // If there's only one load per block, we just compare the loaded values. 449 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); 450 } 451 } 452 453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { 454 std::vector<Value *> OutList; 455 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { 456 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); 457 OutList.push_back(Or); 458 } 459 if (InList.size() % 2 != 0) 460 OutList.push_back(InList.back()); 461 return OutList; 462 }; 463 464 if (!Cmp) { 465 // Pairwise OR the XOR results. 466 OrList = pairWiseOr(XorList); 467 468 // Pairwise OR the OR results until one result left. 469 while (OrList.size() != 1) { 470 OrList = pairWiseOr(OrList); 471 } 472 473 assert(Diff && "Failed to find comparison diff"); 474 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); 475 } 476 477 return Cmp; 478 } 479 480 void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, 481 unsigned &LoadIndex) { 482 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); 483 484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) 485 ? EndBlock 486 : LoadCmpBlocks[BlockIndex + 1]; 487 // Early exit branch if difference found to ResultBlock. Otherwise, 488 // continue to next LoadCmpBlock or EndBlock. 489 BasicBlock *BB = Builder.GetInsertBlock(); 490 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); 491 Builder.Insert(CmpBr); 492 if (DTU) 493 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB}, 494 {DominatorTree::Insert, BB, NextBB}}); 495 496 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 497 // since early exit to ResultBlock was not taken (no difference was found in 498 // any of the bytes). 499 if (BlockIndex == LoadCmpBlocks.size() - 1) { 500 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); 501 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); 502 } 503 } 504 505 // This function creates the IR intructions for loading and comparing using the 506 // given LoadSize. It loads the number of bytes specified by LoadSize from each 507 // source of the memcmp parameters. It then does a subtract to see if there was 508 // a difference in the loaded values. If a difference is found, it branches 509 // with an early exit to the ResultBlock for calculating which source was 510 // larger. Otherwise, it falls through to the either the next LoadCmpBlock or 511 // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with 512 // a special case through emitLoadCompareByteBlock. The special handling can 513 // simply subtract the loaded values and add it to the result phi node. 514 void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { 515 // There is one load per block in this case, BlockIndex == LoadIndex. 516 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; 517 518 if (CurLoadEntry.LoadSize == 1) { 519 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset); 520 return; 521 } 522 523 Type *LoadSizeType = 524 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); 525 Type *BSwapSizeType = 526 DL.isLittleEndian() 527 ? IntegerType::get(CI->getContext(), 528 PowerOf2Ceil(CurLoadEntry.LoadSize * 8)) 529 : nullptr; 530 Type *MaxLoadType = IntegerType::get( 531 CI->getContext(), 532 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8); 533 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); 534 535 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); 536 537 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType, 538 CurLoadEntry.Offset); 539 540 // Add the loaded values to the phi nodes for calculating memcmp result only 541 // if result is not used in a zero equality. 542 if (!IsUsedForZeroCmp) { 543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]); 544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]); 545 } 546 547 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs); 548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) 549 ? EndBlock 550 : LoadCmpBlocks[BlockIndex + 1]; 551 // Early exit branch if difference found to ResultBlock. Otherwise, continue 552 // to next LoadCmpBlock or EndBlock. 553 BasicBlock *BB = Builder.GetInsertBlock(); 554 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); 555 Builder.Insert(CmpBr); 556 if (DTU) 557 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB}, 558 {DominatorTree::Insert, BB, ResBlock.BB}}); 559 560 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 561 // since early exit to ResultBlock was not taken (no difference was found in 562 // any of the bytes). 563 if (BlockIndex == LoadCmpBlocks.size() - 1) { 564 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); 565 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); 566 } 567 } 568 569 // This function populates the ResultBlock with a sequence to calculate the 570 // memcmp result. It compares the two loaded source values and returns -1 if 571 // src1 < src2 and 1 if src1 > src2. 572 void MemCmpExpansion::emitMemCmpResultBlock() { 573 // Special case: if memcmp result is used in a zero equality, result does not 574 // need to be calculated and can simply return 1. 575 if (IsUsedForZeroCmp) { 576 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); 577 Builder.SetInsertPoint(ResBlock.BB, InsertPt); 578 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); 579 PhiRes->addIncoming(Res, ResBlock.BB); 580 BranchInst *NewBr = BranchInst::Create(EndBlock); 581 Builder.Insert(NewBr); 582 if (DTU) 583 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}}); 584 return; 585 } 586 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); 587 Builder.SetInsertPoint(ResBlock.BB, InsertPt); 588 589 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, 590 ResBlock.PhiSrc2); 591 592 Value *Res = 593 Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), 594 ConstantInt::get(Builder.getInt32Ty(), 1)); 595 596 PhiRes->addIncoming(Res, ResBlock.BB); 597 BranchInst *NewBr = BranchInst::Create(EndBlock); 598 Builder.Insert(NewBr); 599 if (DTU) 600 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}}); 601 } 602 603 void MemCmpExpansion::setupResultBlockPHINodes() { 604 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); 605 Builder.SetInsertPoint(ResBlock.BB); 606 // Note: this assumes one load per block. 607 ResBlock.PhiSrc1 = 608 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); 609 ResBlock.PhiSrc2 = 610 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); 611 } 612 613 void MemCmpExpansion::setupEndBlockPHINodes() { 614 Builder.SetInsertPoint(EndBlock, EndBlock->begin()); 615 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); 616 } 617 618 Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { 619 unsigned LoadIndex = 0; 620 // This loop populates each of the LoadCmpBlocks with the IR sequence to 621 // handle multiple loads per block. 622 for (unsigned I = 0; I < getNumBlocks(); ++I) { 623 emitLoadCompareBlockMultipleLoads(I, LoadIndex); 624 } 625 626 emitMemCmpResultBlock(); 627 return PhiRes; 628 } 629 630 /// A memcmp expansion that compares equality with 0 and only has one block of 631 /// load and compare can bypass the compare, branch, and phi IR that is required 632 /// in the general case. 633 Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { 634 unsigned LoadIndex = 0; 635 Value *Cmp = getCompareLoadPairs(0, LoadIndex); 636 assert(LoadIndex == getNumLoads() && "some entries were not consumed"); 637 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); 638 } 639 640 /// A memcmp expansion that only has one block of load and compare can bypass 641 /// the compare, branch, and phi IR that is required in the general case. 642 /// This function also analyses users of memcmp, and if there is only one user 643 /// from which we can conclude that only 2 out of 3 memcmp outcomes really 644 /// matter, then it generates more efficient code with only one comparison. 645 Value *MemCmpExpansion::getMemCmpOneBlock() { 646 bool NeedsBSwap = DL.isLittleEndian() && Size != 1; 647 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); 648 Type *BSwapSizeType = 649 NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8)) 650 : nullptr; 651 Type *MaxLoadType = 652 IntegerType::get(CI->getContext(), 653 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8); 654 655 // The i8 and i16 cases don't need compares. We zext the loaded values and 656 // subtract them to get the suitable negative, zero, or positive i32 result. 657 if (Size == 1 || Size == 2) { 658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, 659 Builder.getInt32Ty(), /*Offset*/ 0); 660 return Builder.CreateSub(Loads.Lhs, Loads.Rhs); 661 } 662 663 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType, 664 /*Offset*/ 0); 665 666 // If a user of memcmp cares only about two outcomes, for example: 667 // bool result = memcmp(a, b, NBYTES) > 0; 668 // We can generate more optimal code with a smaller number of operations 669 if (CI->hasOneUser()) { 670 auto *UI = cast<Instruction>(*CI->user_begin()); 671 ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE; 672 uint64_t Shift; 673 bool NeedsZExt = false; 674 // This is a special case because instead of checking if the result is less 675 // than zero: 676 // bool result = memcmp(a, b, NBYTES) < 0; 677 // Compiler is clever enough to generate the following code: 678 // bool result = memcmp(a, b, NBYTES) >> 31; 679 if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) && 680 Shift == (CI->getType()->getIntegerBitWidth() - 1)) { 681 Pred = ICmpInst::ICMP_SLT; 682 NeedsZExt = true; 683 } else { 684 // In case of a successful match this call will set `Pred` variable 685 match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero())); 686 } 687 // Generate new code and remove the original memcmp call and the user 688 if (ICmpInst::isSigned(Pred)) { 689 Value *Cmp = Builder.CreateICmp(CmpInst::getUnsignedPredicate(Pred), 690 Loads.Lhs, Loads.Rhs); 691 auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp; 692 UI->replaceAllUsesWith(Result); 693 UI->eraseFromParent(); 694 CI->eraseFromParent(); 695 return nullptr; 696 } 697 } 698 699 // The result of memcmp is negative, zero, or positive, so produce that by 700 // subtracting 2 extended compare bits: sub (ugt, ult). 701 // If a target prefers to use selects to get -1/0/1, they should be able 702 // to transform this later. The inverse transform (going from selects to math) 703 // may not be possible in the DAG because the selects got converted into 704 // branches before we got there. 705 Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs); 706 Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs); 707 Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); 708 Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); 709 return Builder.CreateSub(ZextUGT, ZextULT); 710 } 711 712 // This function expands the memcmp call into an inline expansion and returns 713 // the memcmp result. Returns nullptr if the memcmp is already replaced. 714 Value *MemCmpExpansion::getMemCmpExpansion() { 715 // Create the basic block framework for a multi-block expansion. 716 if (getNumBlocks() != 1) { 717 BasicBlock *StartBlock = CI->getParent(); 718 EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr, 719 /*MSSAU=*/nullptr, "endblock"); 720 setupEndBlockPHINodes(); 721 createResultBlock(); 722 723 // If return value of memcmp is not used in a zero equality, we need to 724 // calculate which source was larger. The calculation requires the 725 // two loaded source values of each load compare block. 726 // These will be saved in the phi nodes created by setupResultBlockPHINodes. 727 if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); 728 729 // Create the number of required load compare basic blocks. 730 createLoadCmpBlocks(); 731 732 // Update the terminator added by SplitBlock to branch to the first 733 // LoadCmpBlock. 734 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); 735 if (DTU) 736 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]}, 737 {DominatorTree::Delete, StartBlock, EndBlock}}); 738 } 739 740 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 741 742 if (IsUsedForZeroCmp) 743 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() 744 : getMemCmpExpansionZeroCase(); 745 746 if (getNumBlocks() == 1) 747 return getMemCmpOneBlock(); 748 749 for (unsigned I = 0; I < getNumBlocks(); ++I) { 750 emitLoadCompareBlock(I); 751 } 752 753 emitMemCmpResultBlock(); 754 return PhiRes; 755 } 756 757 // This function checks to see if an expansion of memcmp can be generated. 758 // It checks for constant compare size that is less than the max inline size. 759 // If an expansion cannot occur, returns false to leave as a library call. 760 // Otherwise, the library call is replaced with a new IR instruction sequence. 761 /// We want to transform: 762 /// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) 763 /// To: 764 /// loadbb: 765 /// %0 = bitcast i32* %buffer2 to i8* 766 /// %1 = bitcast i32* %buffer1 to i8* 767 /// %2 = bitcast i8* %1 to i64* 768 /// %3 = bitcast i8* %0 to i64* 769 /// %4 = load i64, i64* %2 770 /// %5 = load i64, i64* %3 771 /// %6 = call i64 @llvm.bswap.i64(i64 %4) 772 /// %7 = call i64 @llvm.bswap.i64(i64 %5) 773 /// %8 = sub i64 %6, %7 774 /// %9 = icmp ne i64 %8, 0 775 /// br i1 %9, label %res_block, label %loadbb1 776 /// res_block: ; preds = %loadbb2, 777 /// %loadbb1, %loadbb 778 /// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] 779 /// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] 780 /// %10 = icmp ult i64 %phi.src1, %phi.src2 781 /// %11 = select i1 %10, i32 -1, i32 1 782 /// br label %endblock 783 /// loadbb1: ; preds = %loadbb 784 /// %12 = bitcast i32* %buffer2 to i8* 785 /// %13 = bitcast i32* %buffer1 to i8* 786 /// %14 = bitcast i8* %13 to i32* 787 /// %15 = bitcast i8* %12 to i32* 788 /// %16 = getelementptr i32, i32* %14, i32 2 789 /// %17 = getelementptr i32, i32* %15, i32 2 790 /// %18 = load i32, i32* %16 791 /// %19 = load i32, i32* %17 792 /// %20 = call i32 @llvm.bswap.i32(i32 %18) 793 /// %21 = call i32 @llvm.bswap.i32(i32 %19) 794 /// %22 = zext i32 %20 to i64 795 /// %23 = zext i32 %21 to i64 796 /// %24 = sub i64 %22, %23 797 /// %25 = icmp ne i64 %24, 0 798 /// br i1 %25, label %res_block, label %loadbb2 799 /// loadbb2: ; preds = %loadbb1 800 /// %26 = bitcast i32* %buffer2 to i8* 801 /// %27 = bitcast i32* %buffer1 to i8* 802 /// %28 = bitcast i8* %27 to i16* 803 /// %29 = bitcast i8* %26 to i16* 804 /// %30 = getelementptr i16, i16* %28, i16 6 805 /// %31 = getelementptr i16, i16* %29, i16 6 806 /// %32 = load i16, i16* %30 807 /// %33 = load i16, i16* %31 808 /// %34 = call i16 @llvm.bswap.i16(i16 %32) 809 /// %35 = call i16 @llvm.bswap.i16(i16 %33) 810 /// %36 = zext i16 %34 to i64 811 /// %37 = zext i16 %35 to i64 812 /// %38 = sub i64 %36, %37 813 /// %39 = icmp ne i64 %38, 0 814 /// br i1 %39, label %res_block, label %loadbb3 815 /// loadbb3: ; preds = %loadbb2 816 /// %40 = bitcast i32* %buffer2 to i8* 817 /// %41 = bitcast i32* %buffer1 to i8* 818 /// %42 = getelementptr i8, i8* %41, i8 14 819 /// %43 = getelementptr i8, i8* %40, i8 14 820 /// %44 = load i8, i8* %42 821 /// %45 = load i8, i8* %43 822 /// %46 = zext i8 %44 to i32 823 /// %47 = zext i8 %45 to i32 824 /// %48 = sub i32 %46, %47 825 /// br label %endblock 826 /// endblock: ; preds = %res_block, 827 /// %loadbb3 828 /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] 829 /// ret i32 %phi.res 830 static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, 831 const TargetLowering *TLI, const DataLayout *DL, 832 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, 833 DomTreeUpdater *DTU, const bool IsBCmp) { 834 NumMemCmpCalls++; 835 836 // Early exit from expansion if -Oz. 837 if (CI->getFunction()->hasMinSize()) 838 return false; 839 840 // Early exit from expansion if size is not a constant. 841 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); 842 if (!SizeCast) { 843 NumMemCmpNotConstant++; 844 return false; 845 } 846 const uint64_t SizeVal = SizeCast->getZExtValue(); 847 848 if (SizeVal == 0) { 849 return false; 850 } 851 // TTI call to check if target would like to expand memcmp. Also, get the 852 // available load sizes. 853 const bool IsUsedForZeroCmp = 854 IsBCmp || isOnlyUsedInZeroEqualityComparison(CI); 855 bool OptForSize = CI->getFunction()->hasOptSize() || 856 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); 857 auto Options = TTI->enableMemCmpExpansion(OptForSize, 858 IsUsedForZeroCmp); 859 if (!Options) return false; 860 861 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) 862 Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock; 863 864 if (OptForSize && 865 MaxLoadsPerMemcmpOptSize.getNumOccurrences()) 866 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize; 867 868 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) 869 Options.MaxNumLoads = MaxLoadsPerMemcmp; 870 871 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU); 872 873 // Don't expand if this will require more loads than desired by the target. 874 if (Expansion.getNumLoads() == 0) { 875 NumMemCmpGreaterThanMax++; 876 return false; 877 } 878 879 NumMemCmpInlined++; 880 881 if (Value *Res = Expansion.getMemCmpExpansion()) { 882 // Replace call with result of expansion and erase call. 883 CI->replaceAllUsesWith(Res); 884 CI->eraseFromParent(); 885 } 886 887 return true; 888 } 889 890 // Returns true if a change was made. 891 static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, 892 const TargetTransformInfo *TTI, const TargetLowering *TL, 893 const DataLayout &DL, ProfileSummaryInfo *PSI, 894 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU); 895 896 static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, 897 const TargetTransformInfo *TTI, 898 const TargetLowering *TL, 899 ProfileSummaryInfo *PSI, 900 BlockFrequencyInfo *BFI, DominatorTree *DT); 901 902 class ExpandMemCmpLegacyPass : public FunctionPass { 903 public: 904 static char ID; 905 906 ExpandMemCmpLegacyPass() : FunctionPass(ID) { 907 initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry()); 908 } 909 910 bool runOnFunction(Function &F) override { 911 if (skipFunction(F)) return false; 912 913 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 914 if (!TPC) { 915 return false; 916 } 917 const TargetLowering* TL = 918 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); 919 920 const TargetLibraryInfo *TLI = 921 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 922 const TargetTransformInfo *TTI = 923 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 924 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 925 auto *BFI = (PSI && PSI->hasProfileSummary()) ? 926 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : 927 nullptr; 928 DominatorTree *DT = nullptr; 929 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 930 DT = &DTWP->getDomTree(); 931 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT); 932 return !PA.areAllPreserved(); 933 } 934 935 private: 936 void getAnalysisUsage(AnalysisUsage &AU) const override { 937 AU.addRequired<TargetLibraryInfoWrapperPass>(); 938 AU.addRequired<TargetTransformInfoWrapperPass>(); 939 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 940 AU.addPreserved<DominatorTreeWrapperPass>(); 941 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); 942 FunctionPass::getAnalysisUsage(AU); 943 } 944 }; 945 946 bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, 947 const TargetTransformInfo *TTI, const TargetLowering *TL, 948 const DataLayout &DL, ProfileSummaryInfo *PSI, 949 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) { 950 for (Instruction &I : BB) { 951 CallInst *CI = dyn_cast<CallInst>(&I); 952 if (!CI) { 953 continue; 954 } 955 LibFunc Func; 956 if (TLI->getLibFunc(*CI, Func) && 957 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && 958 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) { 959 return true; 960 } 961 } 962 return false; 963 } 964 965 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, 966 const TargetTransformInfo *TTI, 967 const TargetLowering *TL, ProfileSummaryInfo *PSI, 968 BlockFrequencyInfo *BFI, DominatorTree *DT) { 969 std::optional<DomTreeUpdater> DTU; 970 if (DT) 971 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); 972 973 const DataLayout& DL = F.getParent()->getDataLayout(); 974 bool MadeChanges = false; 975 for (auto BBIt = F.begin(); BBIt != F.end();) { 976 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) { 977 MadeChanges = true; 978 // If changes were made, restart the function from the beginning, since 979 // the structure of the function was changed. 980 BBIt = F.begin(); 981 } else { 982 ++BBIt; 983 } 984 } 985 if (MadeChanges) 986 for (BasicBlock &BB : F) 987 SimplifyInstructionsInBlock(&BB); 988 if (!MadeChanges) 989 return PreservedAnalyses::all(); 990 PreservedAnalyses PA; 991 PA.preserve<DominatorTreeAnalysis>(); 992 return PA; 993 } 994 995 } // namespace 996 997 PreservedAnalyses ExpandMemCmpPass::run(Function &F, 998 FunctionAnalysisManager &FAM) { 999 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); 1000 const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1001 const auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 1002 auto *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F) 1003 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 1004 BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary()) 1005 ? &FAM.getResult<BlockFrequencyAnalysis>(F) 1006 : nullptr; 1007 auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 1008 1009 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT); 1010 } 1011 1012 char ExpandMemCmpLegacyPass::ID = 0; 1013 INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE, 1014 "Expand memcmp() to load/stores", false, false) 1015 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 1016 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 1017 INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) 1018 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 1019 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1020 INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE, 1021 "Expand memcmp() to load/stores", false, false) 1022 1023 FunctionPass *llvm::createExpandMemCmpLegacyPass() { 1024 return new ExpandMemCmpLegacyPass(); 1025 } 1026