1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass (at IR level) to replace atomic instructions with 10 // __atomic_* library calls, or target specific instruction which implement the 11 // same semantics in a way which better fits the target backend. This can 12 // include the use of (intrinsic-based) load-linked/store-conditional loops, 13 // AtomicCmpXchg, or type coercions. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Analysis/InstSimplifyFolder.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/CodeGen/AtomicExpand.h" 23 #include "llvm/CodeGen/AtomicExpandUtils.h" 24 #include "llvm/CodeGen/TargetLowering.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/CodeGen/TargetSubtargetInfo.h" 27 #include "llvm/CodeGen/ValueTypes.h" 28 #include "llvm/IR/Attributes.h" 29 #include "llvm/IR/BasicBlock.h" 30 #include "llvm/IR/Constant.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/Instruction.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/MDBuilder.h" 39 #include "llvm/IR/MemoryModelRelaxationAnnotations.h" 40 #include "llvm/IR/Module.h" 41 #include "llvm/IR/Type.h" 42 #include "llvm/IR/User.h" 43 #include "llvm/IR/Value.h" 44 #include "llvm/InitializePasses.h" 45 #include "llvm/Pass.h" 46 #include "llvm/Support/AtomicOrdering.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/raw_ostream.h" 51 #include "llvm/Target/TargetMachine.h" 52 #include "llvm/Transforms/Utils/LowerAtomic.h" 53 #include <cassert> 54 #include <cstdint> 55 #include <iterator> 56 57 using namespace llvm; 58 59 #define DEBUG_TYPE "atomic-expand" 60 61 namespace { 62 63 class AtomicExpandImpl { 64 const TargetLowering *TLI = nullptr; 65 const DataLayout *DL = nullptr; 66 67 private: 68 void handleFailure(Instruction &FailedInst, const Twine &Msg) const { 69 LLVMContext &Ctx = FailedInst.getContext(); 70 71 // TODO: Do not use generic error type. 72 Ctx.emitError(&FailedInst, Msg); 73 74 if (!FailedInst.getType()->isVoidTy()) 75 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType())); 76 FailedInst.eraseFromParent(); 77 } 78 79 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); 80 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); 81 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); 82 bool tryExpandAtomicLoad(LoadInst *LI); 83 bool expandAtomicLoadToLL(LoadInst *LI); 84 bool expandAtomicLoadToCmpXchg(LoadInst *LI); 85 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); 86 bool tryExpandAtomicStore(StoreInst *SI); 87 void expandAtomicStore(StoreInst *SI); 88 bool tryExpandAtomicRMW(AtomicRMWInst *AI); 89 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); 90 Value * 91 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr, 92 Align AddrAlign, AtomicOrdering MemOpOrder, 93 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 94 void expandAtomicOpToLLSC( 95 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, 96 AtomicOrdering MemOpOrder, 97 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 98 void expandPartwordAtomicRMW( 99 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); 100 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); 101 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); 102 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); 103 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); 104 105 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); 106 static Value *insertRMWCmpXchgLoop( 107 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, 108 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 109 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 110 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc); 111 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); 112 113 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 114 bool isIdempotentRMW(AtomicRMWInst *RMWI); 115 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); 116 117 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, 118 Value *PointerOperand, Value *ValueOperand, 119 Value *CASExpected, AtomicOrdering Ordering, 120 AtomicOrdering Ordering2, 121 ArrayRef<RTLIB::Libcall> Libcalls); 122 void expandAtomicLoadToLibcall(LoadInst *LI); 123 void expandAtomicStoreToLibcall(StoreInst *LI); 124 void expandAtomicRMWToLibcall(AtomicRMWInst *I); 125 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); 126 127 friend bool 128 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 129 CreateCmpXchgInstFun CreateCmpXchg); 130 131 bool processAtomicInstr(Instruction *I); 132 133 public: 134 bool run(Function &F, const TargetMachine *TM); 135 }; 136 137 class AtomicExpandLegacy : public FunctionPass { 138 public: 139 static char ID; // Pass identification, replacement for typeid 140 141 AtomicExpandLegacy() : FunctionPass(ID) { 142 initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry()); 143 } 144 145 bool runOnFunction(Function &F) override; 146 }; 147 148 // IRBuilder to be used for replacement atomic instructions. 149 struct ReplacementIRBuilder 150 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> { 151 MDNode *MMRAMD = nullptr; 152 153 // Preserves the DebugLoc from I, and preserves still valid metadata. 154 // Enable StrictFP builder mode when appropriate. 155 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) 156 : IRBuilder(I->getContext(), InstSimplifyFolder(DL), 157 IRBuilderCallbackInserter( 158 [this](Instruction *I) { addMMRAMD(I); })) { 159 SetInsertPoint(I); 160 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); 161 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) 162 this->setIsFPConstrained(true); 163 164 MMRAMD = I->getMetadata(LLVMContext::MD_mmra); 165 } 166 167 void addMMRAMD(Instruction *I) { 168 if (canInstructionHaveMMRAs(*I)) 169 I->setMetadata(LLVMContext::MD_mmra, MMRAMD); 170 } 171 }; 172 173 } // end anonymous namespace 174 175 char AtomicExpandLegacy::ID = 0; 176 177 char &llvm::AtomicExpandID = AtomicExpandLegacy::ID; 178 179 INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE, 180 "Expand Atomic instructions", false, false) 181 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 182 INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE, 183 "Expand Atomic instructions", false, false) 184 185 // Helper functions to retrieve the size of atomic instructions. 186 static unsigned getAtomicOpSize(LoadInst *LI) { 187 const DataLayout &DL = LI->getDataLayout(); 188 return DL.getTypeStoreSize(LI->getType()); 189 } 190 191 static unsigned getAtomicOpSize(StoreInst *SI) { 192 const DataLayout &DL = SI->getDataLayout(); 193 return DL.getTypeStoreSize(SI->getValueOperand()->getType()); 194 } 195 196 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { 197 const DataLayout &DL = RMWI->getDataLayout(); 198 return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); 199 } 200 201 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { 202 const DataLayout &DL = CASI->getDataLayout(); 203 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); 204 } 205 206 /// Copy metadata that's safe to preserve when widening atomics. 207 static void copyMetadataForAtomic(Instruction &Dest, 208 const Instruction &Source) { 209 SmallVector<std::pair<unsigned, MDNode *>, 8> MD; 210 Source.getAllMetadata(MD); 211 LLVMContext &Ctx = Dest.getContext(); 212 MDBuilder MDB(Ctx); 213 214 for (auto [ID, N] : MD) { 215 switch (ID) { 216 case LLVMContext::MD_dbg: 217 case LLVMContext::MD_tbaa: 218 case LLVMContext::MD_tbaa_struct: 219 case LLVMContext::MD_alias_scope: 220 case LLVMContext::MD_noalias: 221 case LLVMContext::MD_noalias_addrspace: 222 case LLVMContext::MD_access_group: 223 case LLVMContext::MD_mmra: 224 Dest.setMetadata(ID, N); 225 break; 226 default: 227 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory")) 228 Dest.setMetadata(ID, N); 229 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory")) 230 Dest.setMetadata(ID, N); 231 232 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current 233 // uses. 234 break; 235 } 236 } 237 } 238 239 // Determine if a particular atomic operation has a supported size, 240 // and is of appropriate alignment, to be passed through for target 241 // lowering. (Versus turning into a __atomic libcall) 242 template <typename Inst> 243 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { 244 unsigned Size = getAtomicOpSize(I); 245 Align Alignment = I->getAlign(); 246 return Alignment >= Size && 247 Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; 248 } 249 250 bool AtomicExpandImpl::processAtomicInstr(Instruction *I) { 251 auto *LI = dyn_cast<LoadInst>(I); 252 auto *SI = dyn_cast<StoreInst>(I); 253 auto *RMWI = dyn_cast<AtomicRMWInst>(I); 254 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I); 255 256 bool MadeChange = false; 257 258 // If the Size/Alignment is not supported, replace with a libcall. 259 if (LI) { 260 if (!LI->isAtomic()) 261 return false; 262 263 if (!atomicSizeSupported(TLI, LI)) { 264 expandAtomicLoadToLibcall(LI); 265 return true; 266 } 267 268 if (TLI->shouldCastAtomicLoadInIR(LI) == 269 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 270 I = LI = convertAtomicLoadToIntegerType(LI); 271 MadeChange = true; 272 } 273 } else if (SI) { 274 if (!SI->isAtomic()) 275 return false; 276 277 if (!atomicSizeSupported(TLI, SI)) { 278 expandAtomicStoreToLibcall(SI); 279 return true; 280 } 281 282 if (TLI->shouldCastAtomicStoreInIR(SI) == 283 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 284 I = SI = convertAtomicStoreToIntegerType(SI); 285 MadeChange = true; 286 } 287 } else if (RMWI) { 288 if (!atomicSizeSupported(TLI, RMWI)) { 289 expandAtomicRMWToLibcall(RMWI); 290 return true; 291 } 292 293 if (TLI->shouldCastAtomicRMWIInIR(RMWI) == 294 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 295 I = RMWI = convertAtomicXchgToIntegerType(RMWI); 296 MadeChange = true; 297 } 298 } else if (CASI) { 299 if (!atomicSizeSupported(TLI, CASI)) { 300 expandAtomicCASToLibcall(CASI); 301 return true; 302 } 303 304 // TODO: when we're ready to make the change at the IR level, we can 305 // extend convertCmpXchgToInteger for floating point too. 306 if (CASI->getCompareOperand()->getType()->isPointerTy()) { 307 // TODO: add a TLI hook to control this so that each target can 308 // convert to lowering the original type one at a time. 309 I = CASI = convertCmpXchgToIntegerType(CASI); 310 MadeChange = true; 311 } 312 } else 313 return false; 314 315 if (TLI->shouldInsertFencesForAtomic(I)) { 316 auto FenceOrdering = AtomicOrdering::Monotonic; 317 if (LI && isAcquireOrStronger(LI->getOrdering())) { 318 FenceOrdering = LI->getOrdering(); 319 LI->setOrdering(AtomicOrdering::Monotonic); 320 } else if (SI && isReleaseOrStronger(SI->getOrdering())) { 321 FenceOrdering = SI->getOrdering(); 322 SI->setOrdering(AtomicOrdering::Monotonic); 323 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || 324 isAcquireOrStronger(RMWI->getOrdering()))) { 325 FenceOrdering = RMWI->getOrdering(); 326 RMWI->setOrdering(AtomicOrdering::Monotonic); 327 } else if (CASI && 328 TLI->shouldExpandAtomicCmpXchgInIR(CASI) == 329 TargetLoweringBase::AtomicExpansionKind::None && 330 (isReleaseOrStronger(CASI->getSuccessOrdering()) || 331 isAcquireOrStronger(CASI->getSuccessOrdering()) || 332 isAcquireOrStronger(CASI->getFailureOrdering()))) { 333 // If a compare and swap is lowered to LL/SC, we can do smarter fence 334 // insertion, with a stronger one on the success path than on the 335 // failure path. As a result, fence insertion is directly done by 336 // expandAtomicCmpXchg in that case. 337 FenceOrdering = CASI->getMergedOrdering(); 338 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI); 339 340 CASI->setSuccessOrdering(CASOrdering); 341 CASI->setFailureOrdering(CASOrdering); 342 } 343 344 if (FenceOrdering != AtomicOrdering::Monotonic) { 345 MadeChange |= bracketInstWithFences(I, FenceOrdering); 346 } 347 } else if (I->hasAtomicStore() && 348 TLI->shouldInsertTrailingFenceForAtomicStore(I)) { 349 auto FenceOrdering = AtomicOrdering::Monotonic; 350 if (SI) 351 FenceOrdering = SI->getOrdering(); 352 else if (RMWI) 353 FenceOrdering = RMWI->getOrdering(); 354 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) != 355 TargetLoweringBase::AtomicExpansionKind::LLSC) 356 // LLSC is handled in expandAtomicCmpXchg(). 357 FenceOrdering = CASI->getSuccessOrdering(); 358 359 IRBuilder Builder(I); 360 if (auto TrailingFence = 361 TLI->emitTrailingFence(Builder, I, FenceOrdering)) { 362 TrailingFence->moveAfter(I); 363 MadeChange = true; 364 } 365 } 366 367 if (LI) 368 MadeChange |= tryExpandAtomicLoad(LI); 369 else if (SI) 370 MadeChange |= tryExpandAtomicStore(SI); 371 else if (RMWI) { 372 // There are two different ways of expanding RMW instructions: 373 // - into a load if it is idempotent 374 // - into a Cmpxchg/LL-SC loop otherwise 375 // we try them in that order. 376 377 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { 378 MadeChange = true; 379 380 } else { 381 MadeChange |= tryExpandAtomicRMW(RMWI); 382 } 383 } else if (CASI) 384 MadeChange |= tryExpandAtomicCmpXchg(CASI); 385 386 return MadeChange; 387 } 388 389 bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) { 390 const auto *Subtarget = TM->getSubtargetImpl(F); 391 if (!Subtarget->enableAtomicExpand()) 392 return false; 393 TLI = Subtarget->getTargetLowering(); 394 DL = &F.getDataLayout(); 395 396 bool MadeChange = false; 397 398 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { 399 BasicBlock *BB = &*BBI; 400 401 BasicBlock::reverse_iterator Next; 402 403 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E; 404 I = Next) { 405 Instruction &Inst = *I; 406 Next = std::next(I); 407 408 if (processAtomicInstr(&Inst)) { 409 MadeChange = true; 410 411 // New blocks may have been inserted. 412 BBE = F.end(); 413 } 414 } 415 } 416 417 return MadeChange; 418 } 419 420 bool AtomicExpandLegacy::runOnFunction(Function &F) { 421 422 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 423 if (!TPC) 424 return false; 425 auto *TM = &TPC->getTM<TargetMachine>(); 426 AtomicExpandImpl AE; 427 return AE.run(F, TM); 428 } 429 430 FunctionPass *llvm::createAtomicExpandLegacyPass() { 431 return new AtomicExpandLegacy(); 432 } 433 434 PreservedAnalyses AtomicExpandPass::run(Function &F, 435 FunctionAnalysisManager &AM) { 436 AtomicExpandImpl AE; 437 438 bool Changed = AE.run(F, TM); 439 if (!Changed) 440 return PreservedAnalyses::all(); 441 442 return PreservedAnalyses::none(); 443 } 444 445 bool AtomicExpandImpl::bracketInstWithFences(Instruction *I, 446 AtomicOrdering Order) { 447 ReplacementIRBuilder Builder(I, *DL); 448 449 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); 450 451 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); 452 // We have a guard here because not every atomic operation generates a 453 // trailing fence. 454 if (TrailingFence) 455 TrailingFence->moveAfter(I); 456 457 return (LeadingFence || TrailingFence); 458 } 459 460 /// Get the iX type with the same bitwidth as T. 461 IntegerType * 462 AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) { 463 EVT VT = TLI->getMemValueType(DL, T); 464 unsigned BitWidth = VT.getStoreSizeInBits(); 465 assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); 466 return IntegerType::get(T->getContext(), BitWidth); 467 } 468 469 /// Convert an atomic load of a non-integral type to an integer load of the 470 /// equivalent bitwidth. See the function comment on 471 /// convertAtomicStoreToIntegerType for background. 472 LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) { 473 auto *M = LI->getModule(); 474 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); 475 476 ReplacementIRBuilder Builder(LI, *DL); 477 478 Value *Addr = LI->getPointerOperand(); 479 480 auto *NewLI = Builder.CreateLoad(NewTy, Addr); 481 NewLI->setAlignment(LI->getAlign()); 482 NewLI->setVolatile(LI->isVolatile()); 483 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); 484 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); 485 486 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); 487 LI->replaceAllUsesWith(NewVal); 488 LI->eraseFromParent(); 489 return NewLI; 490 } 491 492 AtomicRMWInst * 493 AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { 494 assert(RMWI->getOperation() == AtomicRMWInst::Xchg); 495 496 auto *M = RMWI->getModule(); 497 Type *NewTy = 498 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); 499 500 ReplacementIRBuilder Builder(RMWI, *DL); 501 502 Value *Addr = RMWI->getPointerOperand(); 503 Value *Val = RMWI->getValOperand(); 504 Value *NewVal = Val->getType()->isPointerTy() 505 ? Builder.CreatePtrToInt(Val, NewTy) 506 : Builder.CreateBitCast(Val, NewTy); 507 508 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal, 509 RMWI->getAlign(), RMWI->getOrdering(), 510 RMWI->getSyncScopeID()); 511 NewRMWI->setVolatile(RMWI->isVolatile()); 512 copyMetadataForAtomic(*NewRMWI, *RMWI); 513 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); 514 515 Value *NewRVal = RMWI->getType()->isPointerTy() 516 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) 517 : Builder.CreateBitCast(NewRMWI, RMWI->getType()); 518 RMWI->replaceAllUsesWith(NewRVal); 519 RMWI->eraseFromParent(); 520 return NewRMWI; 521 } 522 523 bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) { 524 switch (TLI->shouldExpandAtomicLoadInIR(LI)) { 525 case TargetLoweringBase::AtomicExpansionKind::None: 526 return false; 527 case TargetLoweringBase::AtomicExpansionKind::LLSC: 528 expandAtomicOpToLLSC( 529 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), 530 LI->getOrdering(), 531 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; }); 532 return true; 533 case TargetLoweringBase::AtomicExpansionKind::LLOnly: 534 return expandAtomicLoadToLL(LI); 535 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: 536 return expandAtomicLoadToCmpXchg(LI); 537 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 538 LI->setAtomic(AtomicOrdering::NotAtomic); 539 return true; 540 default: 541 llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); 542 } 543 } 544 545 bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) { 546 switch (TLI->shouldExpandAtomicStoreInIR(SI)) { 547 case TargetLoweringBase::AtomicExpansionKind::None: 548 return false; 549 case TargetLoweringBase::AtomicExpansionKind::Expand: 550 expandAtomicStore(SI); 551 return true; 552 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 553 SI->setAtomic(AtomicOrdering::NotAtomic); 554 return true; 555 default: 556 llvm_unreachable("Unhandled case in tryExpandAtomicStore"); 557 } 558 } 559 560 bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) { 561 ReplacementIRBuilder Builder(LI, *DL); 562 563 // On some architectures, load-linked instructions are atomic for larger 564 // sizes than normal loads. For example, the only 64-bit load guaranteed 565 // to be single-copy atomic by ARM is an ldrexd (A3.5.3). 566 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), 567 LI->getPointerOperand(), LI->getOrdering()); 568 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 569 570 LI->replaceAllUsesWith(Val); 571 LI->eraseFromParent(); 572 573 return true; 574 } 575 576 bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) { 577 ReplacementIRBuilder Builder(LI, *DL); 578 AtomicOrdering Order = LI->getOrdering(); 579 if (Order == AtomicOrdering::Unordered) 580 Order = AtomicOrdering::Monotonic; 581 582 Value *Addr = LI->getPointerOperand(); 583 Type *Ty = LI->getType(); 584 Constant *DummyVal = Constant::getNullValue(Ty); 585 586 Value *Pair = Builder.CreateAtomicCmpXchg( 587 Addr, DummyVal, DummyVal, LI->getAlign(), Order, 588 AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 589 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); 590 591 LI->replaceAllUsesWith(Loaded); 592 LI->eraseFromParent(); 593 594 return true; 595 } 596 597 /// Convert an atomic store of a non-integral type to an integer store of the 598 /// equivalent bitwidth. We used to not support floating point or vector 599 /// atomics in the IR at all. The backends learned to deal with the bitcast 600 /// idiom because that was the only way of expressing the notion of a atomic 601 /// float or vector store. The long term plan is to teach each backend to 602 /// instruction select from the original atomic store, but as a migration 603 /// mechanism, we convert back to the old format which the backends understand. 604 /// Each backend will need individual work to recognize the new format. 605 StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) { 606 ReplacementIRBuilder Builder(SI, *DL); 607 auto *M = SI->getModule(); 608 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), 609 M->getDataLayout()); 610 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); 611 612 Value *Addr = SI->getPointerOperand(); 613 614 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr); 615 NewSI->setAlignment(SI->getAlign()); 616 NewSI->setVolatile(SI->isVolatile()); 617 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); 618 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); 619 SI->eraseFromParent(); 620 return NewSI; 621 } 622 623 void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) { 624 // This function is only called on atomic stores that are too large to be 625 // atomic if implemented as a native store. So we replace them by an 626 // atomic swap, that can be implemented for example as a ldrex/strex on ARM 627 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. 628 // It is the responsibility of the target to only signal expansion via 629 // shouldExpandAtomicRMW in cases where this is required and possible. 630 ReplacementIRBuilder Builder(SI, *DL); 631 AtomicOrdering Ordering = SI->getOrdering(); 632 assert(Ordering != AtomicOrdering::NotAtomic); 633 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered 634 ? AtomicOrdering::Monotonic 635 : Ordering; 636 AtomicRMWInst *AI = Builder.CreateAtomicRMW( 637 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), 638 SI->getAlign(), RMWOrdering); 639 SI->eraseFromParent(); 640 641 // Now we have an appropriate swap instruction, lower it as usual. 642 tryExpandAtomicRMW(AI); 643 } 644 645 static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, 646 Value *Loaded, Value *NewVal, Align AddrAlign, 647 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 648 Value *&Success, Value *&NewLoaded, 649 Instruction *MetadataSrc) { 650 Type *OrigTy = NewVal->getType(); 651 652 // This code can go away when cmpxchg supports FP and vector types. 653 assert(!OrigTy->isPointerTy()); 654 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy(); 655 if (NeedBitcast) { 656 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); 657 NewVal = Builder.CreateBitCast(NewVal, IntTy); 658 Loaded = Builder.CreateBitCast(Loaded, IntTy); 659 } 660 661 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 662 Addr, Loaded, NewVal, AddrAlign, MemOpOrder, 663 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 664 if (MetadataSrc) 665 copyMetadataForAtomic(*Pair, *MetadataSrc); 666 667 Success = Builder.CreateExtractValue(Pair, 1, "success"); 668 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 669 670 if (NeedBitcast) 671 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); 672 } 673 674 bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) { 675 LLVMContext &Ctx = AI->getModule()->getContext(); 676 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); 677 switch (Kind) { 678 case TargetLoweringBase::AtomicExpansionKind::None: 679 return false; 680 case TargetLoweringBase::AtomicExpansionKind::LLSC: { 681 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 682 unsigned ValueSize = getAtomicOpSize(AI); 683 if (ValueSize < MinCASSize) { 684 expandPartwordAtomicRMW(AI, 685 TargetLoweringBase::AtomicExpansionKind::LLSC); 686 } else { 687 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) { 688 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 689 AI->getValOperand()); 690 }; 691 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), 692 AI->getAlign(), AI->getOrdering(), PerformOp); 693 } 694 return true; 695 } 696 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { 697 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 698 unsigned ValueSize = getAtomicOpSize(AI); 699 if (ValueSize < MinCASSize) { 700 expandPartwordAtomicRMW(AI, 701 TargetLoweringBase::AtomicExpansionKind::CmpXChg); 702 } else { 703 SmallVector<StringRef> SSNs; 704 Ctx.getSyncScopeNames(SSNs); 705 auto MemScope = SSNs[AI->getSyncScopeID()].empty() 706 ? "system" 707 : SSNs[AI->getSyncScopeID()]; 708 OptimizationRemarkEmitter ORE(AI->getFunction()); 709 ORE.emit([&]() { 710 return OptimizationRemark(DEBUG_TYPE, "Passed", AI) 711 << "A compare and swap loop was generated for an atomic " 712 << AI->getOperationName(AI->getOperation()) << " operation at " 713 << MemScope << " memory scope"; 714 }); 715 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); 716 } 717 return true; 718 } 719 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { 720 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 721 unsigned ValueSize = getAtomicOpSize(AI); 722 if (ValueSize < MinCASSize) { 723 AtomicRMWInst::BinOp Op = AI->getOperation(); 724 // Widen And/Or/Xor and give the target another chance at expanding it. 725 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 726 Op == AtomicRMWInst::And) { 727 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); 728 return true; 729 } 730 } 731 expandAtomicRMWToMaskedIntrinsic(AI); 732 return true; 733 } 734 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { 735 TLI->emitBitTestAtomicRMWIntrinsic(AI); 736 return true; 737 } 738 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: { 739 TLI->emitCmpArithAtomicRMWIntrinsic(AI); 740 return true; 741 } 742 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 743 return lowerAtomicRMWInst(AI); 744 case TargetLoweringBase::AtomicExpansionKind::Expand: 745 TLI->emitExpandAtomicRMW(AI); 746 return true; 747 default: 748 llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); 749 } 750 } 751 752 namespace { 753 754 struct PartwordMaskValues { 755 // These three fields are guaranteed to be set by createMaskInstrs. 756 Type *WordType = nullptr; 757 Type *ValueType = nullptr; 758 Type *IntValueType = nullptr; 759 Value *AlignedAddr = nullptr; 760 Align AlignedAddrAlignment; 761 // The remaining fields can be null. 762 Value *ShiftAmt = nullptr; 763 Value *Mask = nullptr; 764 Value *Inv_Mask = nullptr; 765 }; 766 767 LLVM_ATTRIBUTE_UNUSED 768 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { 769 auto PrintObj = [&O](auto *V) { 770 if (V) 771 O << *V; 772 else 773 O << "nullptr"; 774 O << '\n'; 775 }; 776 O << "PartwordMaskValues {\n"; 777 O << " WordType: "; 778 PrintObj(PMV.WordType); 779 O << " ValueType: "; 780 PrintObj(PMV.ValueType); 781 O << " AlignedAddr: "; 782 PrintObj(PMV.AlignedAddr); 783 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; 784 O << " ShiftAmt: "; 785 PrintObj(PMV.ShiftAmt); 786 O << " Mask: "; 787 PrintObj(PMV.Mask); 788 O << " Inv_Mask: "; 789 PrintObj(PMV.Inv_Mask); 790 O << "}\n"; 791 return O; 792 } 793 794 } // end anonymous namespace 795 796 /// This is a helper function which builds instructions to provide 797 /// values necessary for partword atomic operations. It takes an 798 /// incoming address, Addr, and ValueType, and constructs the address, 799 /// shift-amounts and masks needed to work with a larger value of size 800 /// WordSize. 801 /// 802 /// AlignedAddr: Addr rounded down to a multiple of WordSize 803 /// 804 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded 805 /// from AlignAddr for it to have the same value as if 806 /// ValueType was loaded from Addr. 807 /// 808 /// Mask: Value to mask with the value loaded from AlignAddr to 809 /// include only the part that would've been loaded from Addr. 810 /// 811 /// Inv_Mask: The inverse of Mask. 812 static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, 813 Instruction *I, Type *ValueType, 814 Value *Addr, Align AddrAlign, 815 unsigned MinWordSize) { 816 PartwordMaskValues PMV; 817 818 Module *M = I->getModule(); 819 LLVMContext &Ctx = M->getContext(); 820 const DataLayout &DL = M->getDataLayout(); 821 unsigned ValueSize = DL.getTypeStoreSize(ValueType); 822 823 PMV.ValueType = PMV.IntValueType = ValueType; 824 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy()) 825 PMV.IntValueType = 826 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); 827 828 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) 829 : ValueType; 830 if (PMV.ValueType == PMV.WordType) { 831 PMV.AlignedAddr = Addr; 832 PMV.AlignedAddrAlignment = AddrAlign; 833 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); 834 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true); 835 return PMV; 836 } 837 838 PMV.AlignedAddrAlignment = Align(MinWordSize); 839 840 assert(ValueSize < MinWordSize); 841 842 PointerType *PtrTy = cast<PointerType>(Addr->getType()); 843 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace()); 844 Value *PtrLSB; 845 846 if (AddrAlign < MinWordSize) { 847 PMV.AlignedAddr = Builder.CreateIntrinsic( 848 Intrinsic::ptrmask, {PtrTy, IntTy}, 849 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, 850 "AlignedAddr"); 851 852 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); 853 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); 854 } else { 855 // If the alignment is high enough, the LSB are known 0. 856 PMV.AlignedAddr = Addr; 857 PtrLSB = ConstantInt::getNullValue(IntTy); 858 } 859 860 if (DL.isLittleEndian()) { 861 // turn bytes into bits 862 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); 863 } else { 864 // turn bytes into bits, and count from the other side. 865 PMV.ShiftAmt = Builder.CreateShl( 866 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); 867 } 868 869 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); 870 PMV.Mask = Builder.CreateShl( 871 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, 872 "Mask"); 873 874 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); 875 876 return PMV; 877 } 878 879 static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, 880 const PartwordMaskValues &PMV) { 881 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 882 if (PMV.WordType == PMV.ValueType) 883 return WideWord; 884 885 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); 886 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted"); 887 return Builder.CreateBitCast(Trunc, PMV.ValueType); 888 } 889 890 static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, 891 Value *Updated, const PartwordMaskValues &PMV) { 892 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 893 assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); 894 if (PMV.WordType == PMV.ValueType) 895 return Updated; 896 897 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType); 898 899 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); 900 Value *Shift = 901 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); 902 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); 903 Value *Or = Builder.CreateOr(And, Shift, "inserted"); 904 return Or; 905 } 906 907 /// Emit IR to implement a masked version of a given atomicrmw 908 /// operation. (That is, only the bits under the Mask should be 909 /// affected by the operation) 910 static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, 911 IRBuilderBase &Builder, Value *Loaded, 912 Value *Shifted_Inc, Value *Inc, 913 const PartwordMaskValues &PMV) { 914 // TODO: update to use 915 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order 916 // to merge bits from two values without requiring PMV.Inv_Mask. 917 switch (Op) { 918 case AtomicRMWInst::Xchg: { 919 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 920 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); 921 return FinalVal; 922 } 923 case AtomicRMWInst::Or: 924 case AtomicRMWInst::Xor: 925 case AtomicRMWInst::And: 926 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); 927 case AtomicRMWInst::Add: 928 case AtomicRMWInst::Sub: 929 case AtomicRMWInst::Nand: { 930 // The other arithmetic ops need to be masked into place. 931 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc); 932 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); 933 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 934 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); 935 return FinalVal; 936 } 937 case AtomicRMWInst::Max: 938 case AtomicRMWInst::Min: 939 case AtomicRMWInst::UMax: 940 case AtomicRMWInst::UMin: 941 case AtomicRMWInst::FAdd: 942 case AtomicRMWInst::FSub: 943 case AtomicRMWInst::FMin: 944 case AtomicRMWInst::FMax: 945 case AtomicRMWInst::FMaximum: 946 case AtomicRMWInst::FMinimum: 947 case AtomicRMWInst::UIncWrap: 948 case AtomicRMWInst::UDecWrap: 949 case AtomicRMWInst::USubCond: 950 case AtomicRMWInst::USubSat: { 951 // Finally, other ops will operate on the full value, so truncate down to 952 // the original size, and expand out again after doing the 953 // operation. Bitcasts will be inserted for FP values. 954 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); 955 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); 956 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); 957 return FinalVal; 958 } 959 default: 960 llvm_unreachable("Unknown atomic op"); 961 } 962 } 963 964 /// Expand a sub-word atomicrmw operation into an appropriate 965 /// word-sized operation. 966 /// 967 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same 968 /// way as a typical atomicrmw expansion. The only difference here is 969 /// that the operation inside of the loop may operate upon only a 970 /// part of the value. 971 void AtomicExpandImpl::expandPartwordAtomicRMW( 972 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { 973 // Widen And/Or/Xor and give the target another chance at expanding it. 974 AtomicRMWInst::BinOp Op = AI->getOperation(); 975 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 976 Op == AtomicRMWInst::And) { 977 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); 978 return; 979 } 980 AtomicOrdering MemOpOrder = AI->getOrdering(); 981 SyncScope::ID SSID = AI->getSyncScopeID(); 982 983 ReplacementIRBuilder Builder(AI, *DL); 984 985 PartwordMaskValues PMV = 986 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 987 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 988 989 Value *ValOperand_Shifted = nullptr; 990 if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add || 991 Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) { 992 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType); 993 ValOperand_Shifted = 994 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt, 995 "ValOperand_Shifted"); 996 } 997 998 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { 999 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted, 1000 AI->getValOperand(), PMV); 1001 }; 1002 1003 Value *OldResult; 1004 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { 1005 OldResult = insertRMWCmpXchgLoop( 1006 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment, 1007 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI); 1008 } else { 1009 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); 1010 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, 1011 PMV.AlignedAddrAlignment, MemOpOrder, 1012 PerformPartwordOp); 1013 } 1014 1015 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 1016 AI->replaceAllUsesWith(FinalOldResult); 1017 AI->eraseFromParent(); 1018 } 1019 1020 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. 1021 AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) { 1022 ReplacementIRBuilder Builder(AI, *DL); 1023 AtomicRMWInst::BinOp Op = AI->getOperation(); 1024 1025 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 1026 Op == AtomicRMWInst::And) && 1027 "Unable to widen operation"); 1028 1029 PartwordMaskValues PMV = 1030 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 1031 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1032 1033 Value *ValOperand_Shifted = 1034 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 1035 PMV.ShiftAmt, "ValOperand_Shifted"); 1036 1037 Value *NewOperand; 1038 1039 if (Op == AtomicRMWInst::And) 1040 NewOperand = 1041 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand"); 1042 else 1043 NewOperand = ValOperand_Shifted; 1044 1045 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW( 1046 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment, 1047 AI->getOrdering(), AI->getSyncScopeID()); 1048 1049 copyMetadataForAtomic(*NewAI, *AI); 1050 1051 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); 1052 AI->replaceAllUsesWith(FinalOldResult); 1053 AI->eraseFromParent(); 1054 return NewAI; 1055 } 1056 1057 bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { 1058 // The basic idea here is that we're expanding a cmpxchg of a 1059 // smaller memory size up to a word-sized cmpxchg. To do this, we 1060 // need to add a retry-loop for strong cmpxchg, so that 1061 // modifications to other parts of the word don't cause a spurious 1062 // failure. 1063 1064 // This generates code like the following: 1065 // [[Setup mask values PMV.*]] 1066 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt 1067 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt 1068 // %InitLoaded = load i32* %addr 1069 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask 1070 // br partword.cmpxchg.loop 1071 // partword.cmpxchg.loop: 1072 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], 1073 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] 1074 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted 1075 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted 1076 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, 1077 // i32 %FullWord_NewVal success_ordering failure_ordering 1078 // %OldVal = extractvalue { i32, i1 } %NewCI, 0 1079 // %Success = extractvalue { i32, i1 } %NewCI, 1 1080 // br i1 %Success, label %partword.cmpxchg.end, 1081 // label %partword.cmpxchg.failure 1082 // partword.cmpxchg.failure: 1083 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask 1084 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut 1085 // br i1 %ShouldContinue, label %partword.cmpxchg.loop, 1086 // label %partword.cmpxchg.end 1087 // partword.cmpxchg.end: 1088 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt 1089 // %FinalOldVal = trunc i32 %tmp1 to i8 1090 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 1091 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 1092 1093 Value *Addr = CI->getPointerOperand(); 1094 Value *Cmp = CI->getCompareOperand(); 1095 Value *NewVal = CI->getNewValOperand(); 1096 1097 BasicBlock *BB = CI->getParent(); 1098 Function *F = BB->getParent(); 1099 ReplacementIRBuilder Builder(CI, *DL); 1100 LLVMContext &Ctx = Builder.getContext(); 1101 1102 BasicBlock *EndBB = 1103 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); 1104 auto FailureBB = 1105 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); 1106 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); 1107 1108 // The split call above "helpfully" added a branch at the end of BB 1109 // (to the wrong place). 1110 std::prev(BB->end())->eraseFromParent(); 1111 Builder.SetInsertPoint(BB); 1112 1113 PartwordMaskValues PMV = 1114 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1115 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1116 1117 // Shift the incoming values over, into the right location in the word. 1118 Value *NewVal_Shifted = 1119 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); 1120 Value *Cmp_Shifted = 1121 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); 1122 1123 // Load the entire current word, and mask into place the expected and new 1124 // values 1125 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); 1126 InitLoaded->setVolatile(CI->isVolatile()); 1127 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); 1128 Builder.CreateBr(LoopBB); 1129 1130 // partword.cmpxchg.loop: 1131 Builder.SetInsertPoint(LoopBB); 1132 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); 1133 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); 1134 1135 // Mask/Or the expected and new values into place in the loaded word. 1136 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); 1137 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); 1138 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( 1139 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, 1140 CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); 1141 NewCI->setVolatile(CI->isVolatile()); 1142 // When we're building a strong cmpxchg, we need a loop, so you 1143 // might think we could use a weak cmpxchg inside. But, using strong 1144 // allows the below comparison for ShouldContinue, and we're 1145 // expecting the underlying cmpxchg to be a machine instruction, 1146 // which is strong anyways. 1147 NewCI->setWeak(CI->isWeak()); 1148 1149 Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1150 Value *Success = Builder.CreateExtractValue(NewCI, 1); 1151 1152 if (CI->isWeak()) 1153 Builder.CreateBr(EndBB); 1154 else 1155 Builder.CreateCondBr(Success, EndBB, FailureBB); 1156 1157 // partword.cmpxchg.failure: 1158 Builder.SetInsertPoint(FailureBB); 1159 // Upon failure, verify that the masked-out part of the loaded value 1160 // has been modified. If it didn't, abort the cmpxchg, since the 1161 // masked-in part must've. 1162 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); 1163 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); 1164 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); 1165 1166 // Add the second value to the phi from above 1167 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); 1168 1169 // partword.cmpxchg.end: 1170 Builder.SetInsertPoint(CI); 1171 1172 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1173 Value *Res = PoisonValue::get(CI->getType()); 1174 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 1175 Res = Builder.CreateInsertValue(Res, Success, 1); 1176 1177 CI->replaceAllUsesWith(Res); 1178 CI->eraseFromParent(); 1179 return true; 1180 } 1181 1182 void AtomicExpandImpl::expandAtomicOpToLLSC( 1183 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, 1184 AtomicOrdering MemOpOrder, 1185 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1186 ReplacementIRBuilder Builder(I, *DL); 1187 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, 1188 MemOpOrder, PerformOp); 1189 1190 I->replaceAllUsesWith(Loaded); 1191 I->eraseFromParent(); 1192 } 1193 1194 void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { 1195 ReplacementIRBuilder Builder(AI, *DL); 1196 1197 PartwordMaskValues PMV = 1198 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 1199 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1200 1201 // The value operand must be sign-extended for signed min/max so that the 1202 // target's signed comparison instructions can be used. Otherwise, just 1203 // zero-ext. 1204 Instruction::CastOps CastOp = Instruction::ZExt; 1205 AtomicRMWInst::BinOp RMWOp = AI->getOperation(); 1206 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) 1207 CastOp = Instruction::SExt; 1208 1209 Value *ValOperand_Shifted = Builder.CreateShl( 1210 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), 1211 PMV.ShiftAmt, "ValOperand_Shifted"); 1212 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( 1213 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, 1214 AI->getOrdering()); 1215 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 1216 AI->replaceAllUsesWith(FinalOldResult); 1217 AI->eraseFromParent(); 1218 } 1219 1220 void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic( 1221 AtomicCmpXchgInst *CI) { 1222 ReplacementIRBuilder Builder(CI, *DL); 1223 1224 PartwordMaskValues PMV = createMaskInstrs( 1225 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), 1226 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1227 1228 Value *CmpVal_Shifted = Builder.CreateShl( 1229 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, 1230 "CmpVal_Shifted"); 1231 Value *NewVal_Shifted = Builder.CreateShl( 1232 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, 1233 "NewVal_Shifted"); 1234 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( 1235 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, 1236 CI->getMergedOrdering()); 1237 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1238 Value *Res = PoisonValue::get(CI->getType()); 1239 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 1240 Value *Success = Builder.CreateICmpEQ( 1241 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); 1242 Res = Builder.CreateInsertValue(Res, Success, 1); 1243 1244 CI->replaceAllUsesWith(Res); 1245 CI->eraseFromParent(); 1246 } 1247 1248 Value *AtomicExpandImpl::insertRMWLLSCLoop( 1249 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1250 AtomicOrdering MemOpOrder, 1251 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1252 LLVMContext &Ctx = Builder.getContext(); 1253 BasicBlock *BB = Builder.GetInsertBlock(); 1254 Function *F = BB->getParent(); 1255 1256 assert(AddrAlign >= 1257 F->getDataLayout().getTypeStoreSize(ResultTy) && 1258 "Expected at least natural alignment at this point."); 1259 1260 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1261 // 1262 // The standard expansion we produce is: 1263 // [...] 1264 // atomicrmw.start: 1265 // %loaded = @load.linked(%addr) 1266 // %new = some_op iN %loaded, %incr 1267 // %stored = @store_conditional(%new, %addr) 1268 // %try_again = icmp i32 ne %stored, 0 1269 // br i1 %try_again, label %loop, label %atomicrmw.end 1270 // atomicrmw.end: 1271 // [...] 1272 BasicBlock *ExitBB = 1273 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1274 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1275 1276 // The split call above "helpfully" added a branch at the end of BB (to the 1277 // wrong place). 1278 std::prev(BB->end())->eraseFromParent(); 1279 Builder.SetInsertPoint(BB); 1280 Builder.CreateBr(LoopBB); 1281 1282 // Start the main loop block now that we've taken care of the preliminaries. 1283 Builder.SetInsertPoint(LoopBB); 1284 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); 1285 1286 Value *NewVal = PerformOp(Builder, Loaded); 1287 1288 Value *StoreSuccess = 1289 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); 1290 Value *TryAgain = Builder.CreateICmpNE( 1291 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 1292 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 1293 1294 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1295 return Loaded; 1296 } 1297 1298 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of 1299 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the 1300 /// IR. As a migration step, we convert back to what use to be the standard 1301 /// way to represent a pointer cmpxchg so that we can update backends one by 1302 /// one. 1303 AtomicCmpXchgInst * 1304 AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { 1305 auto *M = CI->getModule(); 1306 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), 1307 M->getDataLayout()); 1308 1309 ReplacementIRBuilder Builder(CI, *DL); 1310 1311 Value *Addr = CI->getPointerOperand(); 1312 1313 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); 1314 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); 1315 1316 auto *NewCI = Builder.CreateAtomicCmpXchg( 1317 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), 1318 CI->getFailureOrdering(), CI->getSyncScopeID()); 1319 NewCI->setVolatile(CI->isVolatile()); 1320 NewCI->setWeak(CI->isWeak()); 1321 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); 1322 1323 Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1324 Value *Succ = Builder.CreateExtractValue(NewCI, 1); 1325 1326 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); 1327 1328 Value *Res = PoisonValue::get(CI->getType()); 1329 Res = Builder.CreateInsertValue(Res, OldVal, 0); 1330 Res = Builder.CreateInsertValue(Res, Succ, 1); 1331 1332 CI->replaceAllUsesWith(Res); 1333 CI->eraseFromParent(); 1334 return NewCI; 1335 } 1336 1337 bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1338 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 1339 AtomicOrdering FailureOrder = CI->getFailureOrdering(); 1340 Value *Addr = CI->getPointerOperand(); 1341 BasicBlock *BB = CI->getParent(); 1342 Function *F = BB->getParent(); 1343 LLVMContext &Ctx = F->getContext(); 1344 // If shouldInsertFencesForAtomic() returns true, then the target does not 1345 // want to deal with memory orders, and emitLeading/TrailingFence should take 1346 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we 1347 // should preserve the ordering. 1348 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); 1349 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic 1350 ? AtomicOrdering::Monotonic 1351 : CI->getMergedOrdering(); 1352 1353 // In implementations which use a barrier to achieve release semantics, we can 1354 // delay emitting this barrier until we know a store is actually going to be 1355 // attempted. The cost of this delay is that we need 2 copies of the block 1356 // emitting the load-linked, affecting code size. 1357 // 1358 // Ideally, this logic would be unconditional except for the minsize check 1359 // since in other cases the extra blocks naturally collapse down to the 1360 // minimal loop. Unfortunately, this puts too much stress on later 1361 // optimisations so we avoid emitting the extra logic in those cases too. 1362 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && 1363 SuccessOrder != AtomicOrdering::Monotonic && 1364 SuccessOrder != AtomicOrdering::Acquire && 1365 !F->hasMinSize(); 1366 1367 // There's no overhead for sinking the release barrier in a weak cmpxchg, so 1368 // do it even on minsize. 1369 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); 1370 1371 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 1372 // 1373 // The full expansion we produce is: 1374 // [...] 1375 // %aligned.addr = ... 1376 // cmpxchg.start: 1377 // %unreleasedload = @load.linked(%aligned.addr) 1378 // %unreleasedload.extract = extract value from %unreleasedload 1379 // %should_store = icmp eq %unreleasedload.extract, %desired 1380 // br i1 %should_store, label %cmpxchg.releasingstore, 1381 // label %cmpxchg.nostore 1382 // cmpxchg.releasingstore: 1383 // fence? 1384 // br label cmpxchg.trystore 1385 // cmpxchg.trystore: 1386 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], 1387 // [%releasedload, %cmpxchg.releasedload] 1388 // %updated.new = insert %new into %loaded.trystore 1389 // %stored = @store_conditional(%updated.new, %aligned.addr) 1390 // %success = icmp eq i32 %stored, 0 1391 // br i1 %success, label %cmpxchg.success, 1392 // label %cmpxchg.releasedload/%cmpxchg.failure 1393 // cmpxchg.releasedload: 1394 // %releasedload = @load.linked(%aligned.addr) 1395 // %releasedload.extract = extract value from %releasedload 1396 // %should_store = icmp eq %releasedload.extract, %desired 1397 // br i1 %should_store, label %cmpxchg.trystore, 1398 // label %cmpxchg.failure 1399 // cmpxchg.success: 1400 // fence? 1401 // br label %cmpxchg.end 1402 // cmpxchg.nostore: 1403 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], 1404 // [%releasedload, 1405 // %cmpxchg.releasedload/%cmpxchg.trystore] 1406 // @load_linked_fail_balance()? 1407 // br label %cmpxchg.failure 1408 // cmpxchg.failure: 1409 // fence? 1410 // br label %cmpxchg.end 1411 // cmpxchg.end: 1412 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], 1413 // [%loaded.trystore, %cmpxchg.trystore] 1414 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 1415 // %loaded = extract value from %loaded.exit 1416 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 1417 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 1418 // [...] 1419 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); 1420 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 1421 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); 1422 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); 1423 auto ReleasedLoadBB = 1424 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); 1425 auto TryStoreBB = 1426 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); 1427 auto ReleasingStoreBB = 1428 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); 1429 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); 1430 1431 ReplacementIRBuilder Builder(CI, *DL); 1432 1433 // The split call above "helpfully" added a branch at the end of BB (to the 1434 // wrong place), but we might want a fence too. It's easiest to just remove 1435 // the branch entirely. 1436 std::prev(BB->end())->eraseFromParent(); 1437 Builder.SetInsertPoint(BB); 1438 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) 1439 TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1440 1441 PartwordMaskValues PMV = 1442 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1443 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1444 Builder.CreateBr(StartBB); 1445 1446 // Start the main loop block now that we've taken care of the preliminaries. 1447 Builder.SetInsertPoint(StartBB); 1448 Value *UnreleasedLoad = 1449 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 1450 Value *UnreleasedLoadExtract = 1451 extractMaskedValue(Builder, UnreleasedLoad, PMV); 1452 Value *ShouldStore = Builder.CreateICmpEQ( 1453 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); 1454 1455 // If the cmpxchg doesn't actually need any ordering when it fails, we can 1456 // jump straight past that fence instruction (if it exists). 1457 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); 1458 1459 Builder.SetInsertPoint(ReleasingStoreBB); 1460 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) 1461 TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1462 Builder.CreateBr(TryStoreBB); 1463 1464 Builder.SetInsertPoint(TryStoreBB); 1465 PHINode *LoadedTryStore = 1466 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); 1467 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); 1468 Value *NewValueInsert = 1469 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); 1470 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert, 1471 PMV.AlignedAddr, MemOpOrder); 1472 StoreSuccess = Builder.CreateICmpEQ( 1473 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 1474 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; 1475 Builder.CreateCondBr(StoreSuccess, SuccessBB, 1476 CI->isWeak() ? FailureBB : RetryBB); 1477 1478 Builder.SetInsertPoint(ReleasedLoadBB); 1479 Value *SecondLoad; 1480 if (HasReleasedLoadBB) { 1481 SecondLoad = 1482 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 1483 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); 1484 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, 1485 CI->getCompareOperand(), "should_store"); 1486 1487 // If the cmpxchg doesn't actually need any ordering when it fails, we can 1488 // jump straight past that fence instruction (if it exists). 1489 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); 1490 // Update PHI node in TryStoreBB. 1491 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); 1492 } else 1493 Builder.CreateUnreachable(); 1494 1495 // Make sure later instructions don't get reordered with a fence if 1496 // necessary. 1497 Builder.SetInsertPoint(SuccessBB); 1498 if (ShouldInsertFencesForAtomic || 1499 TLI->shouldInsertTrailingFenceForAtomicStore(CI)) 1500 TLI->emitTrailingFence(Builder, CI, SuccessOrder); 1501 Builder.CreateBr(ExitBB); 1502 1503 Builder.SetInsertPoint(NoStoreBB); 1504 PHINode *LoadedNoStore = 1505 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); 1506 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); 1507 if (HasReleasedLoadBB) 1508 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); 1509 1510 // In the failing case, where we don't execute the store-conditional, the 1511 // target might want to balance out the load-linked with a dedicated 1512 // instruction (e.g., on ARM, clearing the exclusive monitor). 1513 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 1514 Builder.CreateBr(FailureBB); 1515 1516 Builder.SetInsertPoint(FailureBB); 1517 PHINode *LoadedFailure = 1518 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); 1519 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); 1520 if (CI->isWeak()) 1521 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); 1522 if (ShouldInsertFencesForAtomic) 1523 TLI->emitTrailingFence(Builder, CI, FailureOrder); 1524 Builder.CreateBr(ExitBB); 1525 1526 // Finally, we have control-flow based knowledge of whether the cmpxchg 1527 // succeeded or not. We expose this to later passes by converting any 1528 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate 1529 // PHI. 1530 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1531 PHINode *LoadedExit = 1532 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); 1533 LoadedExit->addIncoming(LoadedTryStore, SuccessBB); 1534 LoadedExit->addIncoming(LoadedFailure, FailureBB); 1535 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); 1536 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 1537 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 1538 1539 // This is the "exit value" from the cmpxchg expansion. It may be of 1540 // a type wider than the one in the cmpxchg instruction. 1541 Value *LoadedFull = LoadedExit; 1542 1543 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); 1544 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); 1545 1546 // Look for any users of the cmpxchg that are just comparing the loaded value 1547 // against the desired one, and replace them with the CFG-derived version. 1548 SmallVector<ExtractValueInst *, 2> PrunedInsts; 1549 for (auto *User : CI->users()) { 1550 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 1551 if (!EV) 1552 continue; 1553 1554 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 1555 "weird extraction from { iN, i1 }"); 1556 1557 if (EV->getIndices()[0] == 0) 1558 EV->replaceAllUsesWith(Loaded); 1559 else 1560 EV->replaceAllUsesWith(Success); 1561 1562 PrunedInsts.push_back(EV); 1563 } 1564 1565 // We can remove the instructions now we're no longer iterating through them. 1566 for (auto *EV : PrunedInsts) 1567 EV->eraseFromParent(); 1568 1569 if (!CI->use_empty()) { 1570 // Some use of the full struct return that we don't understand has happened, 1571 // so we've got to reconstruct it properly. 1572 Value *Res; 1573 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0); 1574 Res = Builder.CreateInsertValue(Res, Success, 1); 1575 1576 CI->replaceAllUsesWith(Res); 1577 } 1578 1579 CI->eraseFromParent(); 1580 return true; 1581 } 1582 1583 bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) { 1584 // TODO: Add floating point support. 1585 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); 1586 if (!C) 1587 return false; 1588 1589 switch (RMWI->getOperation()) { 1590 case AtomicRMWInst::Add: 1591 case AtomicRMWInst::Sub: 1592 case AtomicRMWInst::Or: 1593 case AtomicRMWInst::Xor: 1594 return C->isZero(); 1595 case AtomicRMWInst::And: 1596 return C->isMinusOne(); 1597 case AtomicRMWInst::Min: 1598 return C->isMaxValue(true); 1599 case AtomicRMWInst::Max: 1600 return C->isMinValue(true); 1601 case AtomicRMWInst::UMin: 1602 return C->isMaxValue(false); 1603 case AtomicRMWInst::UMax: 1604 return C->isMinValue(false); 1605 default: 1606 return false; 1607 } 1608 } 1609 1610 bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { 1611 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { 1612 tryExpandAtomicLoad(ResultingLoad); 1613 return true; 1614 } 1615 return false; 1616 } 1617 1618 Value *AtomicExpandImpl::insertRMWCmpXchgLoop( 1619 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1620 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 1621 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 1622 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) { 1623 LLVMContext &Ctx = Builder.getContext(); 1624 BasicBlock *BB = Builder.GetInsertBlock(); 1625 Function *F = BB->getParent(); 1626 1627 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1628 // 1629 // The standard expansion we produce is: 1630 // [...] 1631 // %init_loaded = load atomic iN* %addr 1632 // br label %loop 1633 // loop: 1634 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 1635 // %new = some_op iN %loaded, %incr 1636 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 1637 // %new_loaded = extractvalue { iN, i1 } %pair, 0 1638 // %success = extractvalue { iN, i1 } %pair, 1 1639 // br i1 %success, label %atomicrmw.end, label %loop 1640 // atomicrmw.end: 1641 // [...] 1642 BasicBlock *ExitBB = 1643 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1644 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1645 1646 // The split call above "helpfully" added a branch at the end of BB (to the 1647 // wrong place), but we want a load. It's easiest to just remove 1648 // the branch entirely. 1649 std::prev(BB->end())->eraseFromParent(); 1650 Builder.SetInsertPoint(BB); 1651 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); 1652 Builder.CreateBr(LoopBB); 1653 1654 // Start the main loop block now that we've taken care of the preliminaries. 1655 Builder.SetInsertPoint(LoopBB); 1656 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); 1657 Loaded->addIncoming(InitLoaded, BB); 1658 1659 Value *NewVal = PerformOp(Builder, Loaded); 1660 1661 Value *NewLoaded = nullptr; 1662 Value *Success = nullptr; 1663 1664 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, 1665 MemOpOrder == AtomicOrdering::Unordered 1666 ? AtomicOrdering::Monotonic 1667 : MemOpOrder, 1668 SSID, Success, NewLoaded, MetadataSrc); 1669 assert(Success && NewLoaded); 1670 1671 Loaded->addIncoming(NewLoaded, LoopBB); 1672 1673 Builder.CreateCondBr(Success, ExitBB, LoopBB); 1674 1675 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1676 return NewLoaded; 1677 } 1678 1679 bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1680 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 1681 unsigned ValueSize = getAtomicOpSize(CI); 1682 1683 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { 1684 default: 1685 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); 1686 case TargetLoweringBase::AtomicExpansionKind::None: 1687 if (ValueSize < MinCASSize) 1688 return expandPartwordCmpXchg(CI); 1689 return false; 1690 case TargetLoweringBase::AtomicExpansionKind::LLSC: { 1691 return expandAtomicCmpXchg(CI); 1692 } 1693 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: 1694 expandAtomicCmpXchgToMaskedIntrinsic(CI); 1695 return true; 1696 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 1697 return lowerAtomicCmpXchgInst(CI); 1698 case TargetLoweringBase::AtomicExpansionKind::Expand: { 1699 TLI->emitExpandAtomicCmpXchg(CI); 1700 return true; 1701 } 1702 } 1703 } 1704 1705 // Note: This function is exposed externally by AtomicExpandUtils.h 1706 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 1707 CreateCmpXchgInstFun CreateCmpXchg) { 1708 ReplacementIRBuilder Builder(AI, AI->getDataLayout()); 1709 Builder.setIsFPConstrained( 1710 AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); 1711 1712 // FIXME: If FP exceptions are observable, we should force them off for the 1713 // loop for the FP atomics. 1714 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop( 1715 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), 1716 AI->getOrdering(), AI->getSyncScopeID(), 1717 [&](IRBuilderBase &Builder, Value *Loaded) { 1718 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 1719 AI->getValOperand()); 1720 }, 1721 CreateCmpXchg, /*MetadataSrc=*/AI); 1722 1723 AI->replaceAllUsesWith(Loaded); 1724 AI->eraseFromParent(); 1725 return true; 1726 } 1727 1728 // In order to use one of the sized library calls such as 1729 // __atomic_fetch_add_4, the alignment must be sufficient, the size 1730 // must be one of the potentially-specialized sizes, and the value 1731 // type must actually exist in C on the target (otherwise, the 1732 // function wouldn't actually be defined.) 1733 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, 1734 const DataLayout &DL) { 1735 // TODO: "LargestSize" is an approximation for "largest type that 1736 // you can express in C". It seems to be the case that int128 is 1737 // supported on all 64-bit platforms, otherwise only up to 64-bit 1738 // integers are supported. If we get this wrong, then we'll try to 1739 // call a sized libcall that doesn't actually exist. There should 1740 // really be some more reliable way in LLVM of determining integer 1741 // sizes which are valid in the target's C ABI... 1742 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; 1743 return Alignment >= Size && 1744 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && 1745 Size <= LargestSize; 1746 } 1747 1748 void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) { 1749 static const RTLIB::Libcall Libcalls[6] = { 1750 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, 1751 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; 1752 unsigned Size = getAtomicOpSize(I); 1753 1754 bool expanded = expandAtomicOpToLibcall( 1755 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, 1756 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1757 if (!expanded) 1758 handleFailure(*I, "unsupported atomic load"); 1759 } 1760 1761 void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) { 1762 static const RTLIB::Libcall Libcalls[6] = { 1763 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, 1764 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; 1765 unsigned Size = getAtomicOpSize(I); 1766 1767 bool expanded = expandAtomicOpToLibcall( 1768 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), 1769 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1770 if (!expanded) 1771 handleFailure(*I, "unsupported atomic store"); 1772 } 1773 1774 void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { 1775 static const RTLIB::Libcall Libcalls[6] = { 1776 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, 1777 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, 1778 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; 1779 unsigned Size = getAtomicOpSize(I); 1780 1781 bool expanded = expandAtomicOpToLibcall( 1782 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), 1783 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), 1784 Libcalls); 1785 if (!expanded) 1786 handleFailure(*I, "unsupported cmpxchg"); 1787 } 1788 1789 static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { 1790 static const RTLIB::Libcall LibcallsXchg[6] = { 1791 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, 1792 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, 1793 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; 1794 static const RTLIB::Libcall LibcallsAdd[6] = { 1795 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, 1796 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, 1797 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; 1798 static const RTLIB::Libcall LibcallsSub[6] = { 1799 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, 1800 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, 1801 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; 1802 static const RTLIB::Libcall LibcallsAnd[6] = { 1803 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, 1804 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, 1805 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; 1806 static const RTLIB::Libcall LibcallsOr[6] = { 1807 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, 1808 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, 1809 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; 1810 static const RTLIB::Libcall LibcallsXor[6] = { 1811 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, 1812 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, 1813 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; 1814 static const RTLIB::Libcall LibcallsNand[6] = { 1815 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, 1816 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, 1817 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; 1818 1819 switch (Op) { 1820 case AtomicRMWInst::BAD_BINOP: 1821 llvm_unreachable("Should not have BAD_BINOP."); 1822 case AtomicRMWInst::Xchg: 1823 return ArrayRef(LibcallsXchg); 1824 case AtomicRMWInst::Add: 1825 return ArrayRef(LibcallsAdd); 1826 case AtomicRMWInst::Sub: 1827 return ArrayRef(LibcallsSub); 1828 case AtomicRMWInst::And: 1829 return ArrayRef(LibcallsAnd); 1830 case AtomicRMWInst::Or: 1831 return ArrayRef(LibcallsOr); 1832 case AtomicRMWInst::Xor: 1833 return ArrayRef(LibcallsXor); 1834 case AtomicRMWInst::Nand: 1835 return ArrayRef(LibcallsNand); 1836 case AtomicRMWInst::Max: 1837 case AtomicRMWInst::Min: 1838 case AtomicRMWInst::UMax: 1839 case AtomicRMWInst::UMin: 1840 case AtomicRMWInst::FMax: 1841 case AtomicRMWInst::FMin: 1842 case AtomicRMWInst::FMaximum: 1843 case AtomicRMWInst::FMinimum: 1844 case AtomicRMWInst::FAdd: 1845 case AtomicRMWInst::FSub: 1846 case AtomicRMWInst::UIncWrap: 1847 case AtomicRMWInst::UDecWrap: 1848 case AtomicRMWInst::USubCond: 1849 case AtomicRMWInst::USubSat: 1850 // No atomic libcalls are available for these. 1851 return {}; 1852 } 1853 llvm_unreachable("Unexpected AtomicRMW operation."); 1854 } 1855 1856 void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) { 1857 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); 1858 1859 unsigned Size = getAtomicOpSize(I); 1860 1861 bool Success = false; 1862 if (!Libcalls.empty()) 1863 Success = expandAtomicOpToLibcall( 1864 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), 1865 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1866 1867 // The expansion failed: either there were no libcalls at all for 1868 // the operation (min/max), or there were only size-specialized 1869 // libcalls (add/sub/etc) and we needed a generic. So, expand to a 1870 // CAS libcall, via a CAS loop, instead. 1871 if (!Success) { 1872 expandAtomicRMWToCmpXchg( 1873 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, 1874 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, 1875 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, 1876 Instruction *MetadataSrc) { 1877 // Create the CAS instruction normally... 1878 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 1879 Addr, Loaded, NewVal, Alignment, MemOpOrder, 1880 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 1881 if (MetadataSrc) 1882 copyMetadataForAtomic(*Pair, *MetadataSrc); 1883 1884 Success = Builder.CreateExtractValue(Pair, 1, "success"); 1885 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 1886 1887 // ...and then expand the CAS into a libcall. 1888 expandAtomicCASToLibcall(Pair); 1889 }); 1890 } 1891 } 1892 1893 // A helper routine for the above expandAtomic*ToLibcall functions. 1894 // 1895 // 'Libcalls' contains an array of enum values for the particular 1896 // ATOMIC libcalls to be emitted. All of the other arguments besides 1897 // 'I' are extracted from the Instruction subclass by the 1898 // caller. Depending on the particular call, some will be null. 1899 bool AtomicExpandImpl::expandAtomicOpToLibcall( 1900 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, 1901 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, 1902 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { 1903 assert(Libcalls.size() == 6); 1904 1905 LLVMContext &Ctx = I->getContext(); 1906 Module *M = I->getModule(); 1907 const DataLayout &DL = M->getDataLayout(); 1908 IRBuilder<> Builder(I); 1909 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); 1910 1911 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); 1912 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); 1913 1914 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); 1915 1916 // TODO: the "order" argument type is "int", not int32. So 1917 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. 1918 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); 1919 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); 1920 Constant *OrderingVal = 1921 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); 1922 Constant *Ordering2Val = nullptr; 1923 if (CASExpected) { 1924 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); 1925 Ordering2Val = 1926 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); 1927 } 1928 bool HasResult = I->getType() != Type::getVoidTy(Ctx); 1929 1930 RTLIB::Libcall RTLibType; 1931 if (UseSizedLibcall) { 1932 switch (Size) { 1933 case 1: 1934 RTLibType = Libcalls[1]; 1935 break; 1936 case 2: 1937 RTLibType = Libcalls[2]; 1938 break; 1939 case 4: 1940 RTLibType = Libcalls[3]; 1941 break; 1942 case 8: 1943 RTLibType = Libcalls[4]; 1944 break; 1945 case 16: 1946 RTLibType = Libcalls[5]; 1947 break; 1948 } 1949 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { 1950 RTLibType = Libcalls[0]; 1951 } else { 1952 // Can't use sized function, and there's no generic for this 1953 // operation, so give up. 1954 return false; 1955 } 1956 1957 if (!TLI->getLibcallName(RTLibType)) { 1958 // This target does not implement the requested atomic libcall so give up. 1959 return false; 1960 } 1961 1962 // Build up the function call. There's two kinds. First, the sized 1963 // variants. These calls are going to be one of the following (with 1964 // N=1,2,4,8,16): 1965 // iN __atomic_load_N(iN *ptr, int ordering) 1966 // void __atomic_store_N(iN *ptr, iN val, int ordering) 1967 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) 1968 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, 1969 // int success_order, int failure_order) 1970 // 1971 // Note that these functions can be used for non-integer atomic 1972 // operations, the values just need to be bitcast to integers on the 1973 // way in and out. 1974 // 1975 // And, then, the generic variants. They look like the following: 1976 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) 1977 // void __atomic_store(size_t size, void *ptr, void *val, int ordering) 1978 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, 1979 // int ordering) 1980 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, 1981 // void *desired, int success_order, 1982 // int failure_order) 1983 // 1984 // The different signatures are built up depending on the 1985 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' 1986 // variables. 1987 1988 AllocaInst *AllocaCASExpected = nullptr; 1989 AllocaInst *AllocaValue = nullptr; 1990 AllocaInst *AllocaResult = nullptr; 1991 1992 Type *ResultTy; 1993 SmallVector<Value *, 6> Args; 1994 AttributeList Attr; 1995 1996 // 'size' argument. 1997 if (!UseSizedLibcall) { 1998 // Note, getIntPtrType is assumed equivalent to size_t. 1999 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); 2000 } 2001 2002 // 'ptr' argument. 2003 // note: This assumes all address spaces share a common libfunc 2004 // implementation and that addresses are convertable. For systems without 2005 // that property, we'd need to extend this mechanism to support AS-specific 2006 // families of atomic intrinsics. 2007 Value *PtrVal = PointerOperand; 2008 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx)); 2009 Args.push_back(PtrVal); 2010 2011 // 'expected' argument, if present. 2012 if (CASExpected) { 2013 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); 2014 AllocaCASExpected->setAlignment(AllocaAlignment); 2015 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64); 2016 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); 2017 Args.push_back(AllocaCASExpected); 2018 } 2019 2020 // 'val' argument ('desired' for cas), if present. 2021 if (ValueOperand) { 2022 if (UseSizedLibcall) { 2023 Value *IntValue = 2024 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); 2025 Args.push_back(IntValue); 2026 } else { 2027 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); 2028 AllocaValue->setAlignment(AllocaAlignment); 2029 Builder.CreateLifetimeStart(AllocaValue, SizeVal64); 2030 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); 2031 Args.push_back(AllocaValue); 2032 } 2033 } 2034 2035 // 'ret' argument. 2036 if (!CASExpected && HasResult && !UseSizedLibcall) { 2037 AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); 2038 AllocaResult->setAlignment(AllocaAlignment); 2039 Builder.CreateLifetimeStart(AllocaResult, SizeVal64); 2040 Args.push_back(AllocaResult); 2041 } 2042 2043 // 'ordering' ('success_order' for cas) argument. 2044 Args.push_back(OrderingVal); 2045 2046 // 'failure_order' argument, if present. 2047 if (Ordering2Val) 2048 Args.push_back(Ordering2Val); 2049 2050 // Now, the return type. 2051 if (CASExpected) { 2052 ResultTy = Type::getInt1Ty(Ctx); 2053 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt); 2054 } else if (HasResult && UseSizedLibcall) 2055 ResultTy = SizedIntTy; 2056 else 2057 ResultTy = Type::getVoidTy(Ctx); 2058 2059 // Done with setting up arguments and return types, create the call: 2060 SmallVector<Type *, 6> ArgTys; 2061 for (Value *Arg : Args) 2062 ArgTys.push_back(Arg->getType()); 2063 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); 2064 FunctionCallee LibcallFn = 2065 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); 2066 CallInst *Call = Builder.CreateCall(LibcallFn, Args); 2067 Call->setAttributes(Attr); 2068 Value *Result = Call; 2069 2070 // And then, extract the results... 2071 if (ValueOperand && !UseSizedLibcall) 2072 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64); 2073 2074 if (CASExpected) { 2075 // The final result from the CAS is {load of 'expected' alloca, bool result 2076 // from call} 2077 Type *FinalResultTy = I->getType(); 2078 Value *V = PoisonValue::get(FinalResultTy); 2079 Value *ExpectedOut = Builder.CreateAlignedLoad( 2080 CASExpected->getType(), AllocaCASExpected, AllocaAlignment); 2081 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64); 2082 V = Builder.CreateInsertValue(V, ExpectedOut, 0); 2083 V = Builder.CreateInsertValue(V, Result, 1); 2084 I->replaceAllUsesWith(V); 2085 } else if (HasResult) { 2086 Value *V; 2087 if (UseSizedLibcall) 2088 V = Builder.CreateBitOrPointerCast(Result, I->getType()); 2089 else { 2090 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, 2091 AllocaAlignment); 2092 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); 2093 } 2094 I->replaceAllUsesWith(V); 2095 } 2096 I->eraseFromParent(); 2097 return true; 2098 } 2099