1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass (at IR level) to replace atomic instructions with 10 // __atomic_* library calls, or target specific instruction which implement the 11 // same semantics in a way which better fits the target backend. This can 12 // include the use of (intrinsic-based) load-linked/store-conditional loops, 13 // AtomicCmpXchg, or type coercions. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Analysis/InstSimplifyFolder.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/CodeGen/AtomicExpandUtils.h" 23 #include "llvm/CodeGen/RuntimeLibcalls.h" 24 #include "llvm/CodeGen/TargetLowering.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/CodeGen/TargetSubtargetInfo.h" 27 #include "llvm/CodeGen/ValueTypes.h" 28 #include "llvm/IR/Attributes.h" 29 #include "llvm/IR/BasicBlock.h" 30 #include "llvm/IR/Constant.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/InstIterator.h" 37 #include "llvm/IR/Instruction.h" 38 #include "llvm/IR/Instructions.h" 39 #include "llvm/IR/Module.h" 40 #include "llvm/IR/Type.h" 41 #include "llvm/IR/User.h" 42 #include "llvm/IR/Value.h" 43 #include "llvm/InitializePasses.h" 44 #include "llvm/Pass.h" 45 #include "llvm/Support/AtomicOrdering.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Debug.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Target/TargetMachine.h" 51 #include "llvm/Transforms/Utils/LowerAtomic.h" 52 #include <cassert> 53 #include <cstdint> 54 #include <iterator> 55 56 using namespace llvm; 57 58 #define DEBUG_TYPE "atomic-expand" 59 60 namespace { 61 62 class AtomicExpand : public FunctionPass { 63 const TargetLowering *TLI = nullptr; 64 const DataLayout *DL = nullptr; 65 66 public: 67 static char ID; // Pass identification, replacement for typeid 68 69 AtomicExpand() : FunctionPass(ID) { 70 initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); 71 } 72 73 bool runOnFunction(Function &F) override; 74 75 private: 76 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); 77 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); 78 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); 79 bool tryExpandAtomicLoad(LoadInst *LI); 80 bool expandAtomicLoadToLL(LoadInst *LI); 81 bool expandAtomicLoadToCmpXchg(LoadInst *LI); 82 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); 83 bool tryExpandAtomicStore(StoreInst *SI); 84 void expandAtomicStore(StoreInst *SI); 85 bool tryExpandAtomicRMW(AtomicRMWInst *AI); 86 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); 87 Value * 88 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr, 89 Align AddrAlign, AtomicOrdering MemOpOrder, 90 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 91 void expandAtomicOpToLLSC( 92 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, 93 AtomicOrdering MemOpOrder, 94 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 95 void expandPartwordAtomicRMW( 96 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); 97 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); 98 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); 99 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); 100 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); 101 102 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); 103 static Value *insertRMWCmpXchgLoop( 104 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, 105 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 106 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 107 CreateCmpXchgInstFun CreateCmpXchg); 108 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); 109 110 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 111 bool isIdempotentRMW(AtomicRMWInst *RMWI); 112 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); 113 114 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, 115 Value *PointerOperand, Value *ValueOperand, 116 Value *CASExpected, AtomicOrdering Ordering, 117 AtomicOrdering Ordering2, 118 ArrayRef<RTLIB::Libcall> Libcalls); 119 void expandAtomicLoadToLibcall(LoadInst *LI); 120 void expandAtomicStoreToLibcall(StoreInst *LI); 121 void expandAtomicRMWToLibcall(AtomicRMWInst *I); 122 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); 123 124 friend bool 125 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 126 CreateCmpXchgInstFun CreateCmpXchg); 127 }; 128 129 // IRBuilder to be used for replacement atomic instructions. 130 struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> { 131 // Preserves the DebugLoc from I, and preserves still valid metadata. 132 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) 133 : IRBuilder(I->getContext(), DL) { 134 SetInsertPoint(I); 135 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); 136 } 137 }; 138 139 } // end anonymous namespace 140 141 char AtomicExpand::ID = 0; 142 143 char &llvm::AtomicExpandID = AtomicExpand::ID; 144 145 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, 146 false) 147 148 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } 149 150 // Helper functions to retrieve the size of atomic instructions. 151 static unsigned getAtomicOpSize(LoadInst *LI) { 152 const DataLayout &DL = LI->getModule()->getDataLayout(); 153 return DL.getTypeStoreSize(LI->getType()); 154 } 155 156 static unsigned getAtomicOpSize(StoreInst *SI) { 157 const DataLayout &DL = SI->getModule()->getDataLayout(); 158 return DL.getTypeStoreSize(SI->getValueOperand()->getType()); 159 } 160 161 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { 162 const DataLayout &DL = RMWI->getModule()->getDataLayout(); 163 return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); 164 } 165 166 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { 167 const DataLayout &DL = CASI->getModule()->getDataLayout(); 168 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); 169 } 170 171 // Determine if a particular atomic operation has a supported size, 172 // and is of appropriate alignment, to be passed through for target 173 // lowering. (Versus turning into a __atomic libcall) 174 template <typename Inst> 175 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { 176 unsigned Size = getAtomicOpSize(I); 177 Align Alignment = I->getAlign(); 178 return Alignment >= Size && 179 Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; 180 } 181 182 bool AtomicExpand::runOnFunction(Function &F) { 183 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 184 if (!TPC) 185 return false; 186 187 auto &TM = TPC->getTM<TargetMachine>(); 188 const auto *Subtarget = TM.getSubtargetImpl(F); 189 if (!Subtarget->enableAtomicExpand()) 190 return false; 191 TLI = Subtarget->getTargetLowering(); 192 DL = &F.getParent()->getDataLayout(); 193 194 SmallVector<Instruction *, 1> AtomicInsts; 195 196 // Changing control-flow while iterating through it is a bad idea, so gather a 197 // list of all atomic instructions before we start. 198 for (Instruction &I : instructions(F)) 199 if (I.isAtomic() && !isa<FenceInst>(&I)) 200 AtomicInsts.push_back(&I); 201 202 bool MadeChange = false; 203 for (auto *I : AtomicInsts) { 204 auto LI = dyn_cast<LoadInst>(I); 205 auto SI = dyn_cast<StoreInst>(I); 206 auto RMWI = dyn_cast<AtomicRMWInst>(I); 207 auto CASI = dyn_cast<AtomicCmpXchgInst>(I); 208 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction"); 209 210 // If the Size/Alignment is not supported, replace with a libcall. 211 if (LI) { 212 if (!atomicSizeSupported(TLI, LI)) { 213 expandAtomicLoadToLibcall(LI); 214 MadeChange = true; 215 continue; 216 } 217 } else if (SI) { 218 if (!atomicSizeSupported(TLI, SI)) { 219 expandAtomicStoreToLibcall(SI); 220 MadeChange = true; 221 continue; 222 } 223 } else if (RMWI) { 224 if (!atomicSizeSupported(TLI, RMWI)) { 225 expandAtomicRMWToLibcall(RMWI); 226 MadeChange = true; 227 continue; 228 } 229 } else if (CASI) { 230 if (!atomicSizeSupported(TLI, CASI)) { 231 expandAtomicCASToLibcall(CASI); 232 MadeChange = true; 233 continue; 234 } 235 } 236 237 if (LI && TLI->shouldCastAtomicLoadInIR(LI) == 238 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 239 I = LI = convertAtomicLoadToIntegerType(LI); 240 MadeChange = true; 241 } else if (SI && 242 TLI->shouldCastAtomicStoreInIR(SI) == 243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 244 I = SI = convertAtomicStoreToIntegerType(SI); 245 MadeChange = true; 246 } else if (RMWI && 247 TLI->shouldCastAtomicRMWIInIR(RMWI) == 248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 249 I = RMWI = convertAtomicXchgToIntegerType(RMWI); 250 MadeChange = true; 251 } else if (CASI) { 252 // TODO: when we're ready to make the change at the IR level, we can 253 // extend convertCmpXchgToInteger for floating point too. 254 if (CASI->getCompareOperand()->getType()->isPointerTy()) { 255 // TODO: add a TLI hook to control this so that each target can 256 // convert to lowering the original type one at a time. 257 I = CASI = convertCmpXchgToIntegerType(CASI); 258 MadeChange = true; 259 } 260 } 261 262 if (TLI->shouldInsertFencesForAtomic(I)) { 263 auto FenceOrdering = AtomicOrdering::Monotonic; 264 if (LI && isAcquireOrStronger(LI->getOrdering())) { 265 FenceOrdering = LI->getOrdering(); 266 LI->setOrdering(AtomicOrdering::Monotonic); 267 } else if (SI && isReleaseOrStronger(SI->getOrdering())) { 268 FenceOrdering = SI->getOrdering(); 269 SI->setOrdering(AtomicOrdering::Monotonic); 270 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || 271 isAcquireOrStronger(RMWI->getOrdering()))) { 272 FenceOrdering = RMWI->getOrdering(); 273 RMWI->setOrdering(AtomicOrdering::Monotonic); 274 } else if (CASI && 275 TLI->shouldExpandAtomicCmpXchgInIR(CASI) == 276 TargetLoweringBase::AtomicExpansionKind::None && 277 (isReleaseOrStronger(CASI->getSuccessOrdering()) || 278 isAcquireOrStronger(CASI->getSuccessOrdering()) || 279 isAcquireOrStronger(CASI->getFailureOrdering()))) { 280 // If a compare and swap is lowered to LL/SC, we can do smarter fence 281 // insertion, with a stronger one on the success path than on the 282 // failure path. As a result, fence insertion is directly done by 283 // expandAtomicCmpXchg in that case. 284 FenceOrdering = CASI->getMergedOrdering(); 285 CASI->setSuccessOrdering(AtomicOrdering::Monotonic); 286 CASI->setFailureOrdering(AtomicOrdering::Monotonic); 287 } 288 289 if (FenceOrdering != AtomicOrdering::Monotonic) { 290 MadeChange |= bracketInstWithFences(I, FenceOrdering); 291 } 292 } else if (I->hasAtomicStore() && 293 TLI->shouldInsertTrailingFenceForAtomicStore(I)) { 294 auto FenceOrdering = AtomicOrdering::Monotonic; 295 if (SI) 296 FenceOrdering = SI->getOrdering(); 297 else if (RMWI) 298 FenceOrdering = RMWI->getOrdering(); 299 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) != 300 TargetLoweringBase::AtomicExpansionKind::LLSC) 301 // LLSC is handled in expandAtomicCmpXchg(). 302 FenceOrdering = CASI->getSuccessOrdering(); 303 304 IRBuilder Builder(I); 305 if (auto TrailingFence = 306 TLI->emitTrailingFence(Builder, I, FenceOrdering)) { 307 TrailingFence->moveAfter(I); 308 MadeChange = true; 309 } 310 } 311 312 if (LI) 313 MadeChange |= tryExpandAtomicLoad(LI); 314 else if (SI) 315 MadeChange |= tryExpandAtomicStore(SI); 316 else if (RMWI) { 317 // There are two different ways of expanding RMW instructions: 318 // - into a load if it is idempotent 319 // - into a Cmpxchg/LL-SC loop otherwise 320 // we try them in that order. 321 322 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { 323 MadeChange = true; 324 } else { 325 AtomicRMWInst::BinOp Op = RMWI->getOperation(); 326 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 327 unsigned ValueSize = getAtomicOpSize(RMWI); 328 if (ValueSize < MinCASSize && 329 (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 330 Op == AtomicRMWInst::And)) { 331 RMWI = widenPartwordAtomicRMW(RMWI); 332 MadeChange = true; 333 } 334 335 MadeChange |= tryExpandAtomicRMW(RMWI); 336 } 337 } else if (CASI) 338 MadeChange |= tryExpandAtomicCmpXchg(CASI); 339 } 340 return MadeChange; 341 } 342 343 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { 344 ReplacementIRBuilder Builder(I, *DL); 345 346 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); 347 348 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); 349 // We have a guard here because not every atomic operation generates a 350 // trailing fence. 351 if (TrailingFence) 352 TrailingFence->moveAfter(I); 353 354 return (LeadingFence || TrailingFence); 355 } 356 357 /// Get the iX type with the same bitwidth as T. 358 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, 359 const DataLayout &DL) { 360 EVT VT = TLI->getMemValueType(DL, T); 361 unsigned BitWidth = VT.getStoreSizeInBits(); 362 assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); 363 return IntegerType::get(T->getContext(), BitWidth); 364 } 365 366 /// Convert an atomic load of a non-integral type to an integer load of the 367 /// equivalent bitwidth. See the function comment on 368 /// convertAtomicStoreToIntegerType for background. 369 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { 370 auto *M = LI->getModule(); 371 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); 372 373 ReplacementIRBuilder Builder(LI, *DL); 374 375 Value *Addr = LI->getPointerOperand(); 376 Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); 377 Value *NewAddr = Builder.CreateBitCast(Addr, PT); 378 379 auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); 380 NewLI->setAlignment(LI->getAlign()); 381 NewLI->setVolatile(LI->isVolatile()); 382 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); 383 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); 384 385 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); 386 LI->replaceAllUsesWith(NewVal); 387 LI->eraseFromParent(); 388 return NewLI; 389 } 390 391 AtomicRMWInst * 392 AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { 393 auto *M = RMWI->getModule(); 394 Type *NewTy = 395 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); 396 397 ReplacementIRBuilder Builder(RMWI, *DL); 398 399 Value *Addr = RMWI->getPointerOperand(); 400 Value *Val = RMWI->getValOperand(); 401 Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); 402 Value *NewAddr = Builder.CreateBitCast(Addr, PT); 403 Value *NewVal = Val->getType()->isPointerTy() 404 ? Builder.CreatePtrToInt(Val, NewTy) 405 : Builder.CreateBitCast(Val, NewTy); 406 407 auto *NewRMWI = 408 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, 409 RMWI->getAlign(), RMWI->getOrdering()); 410 NewRMWI->setVolatile(RMWI->isVolatile()); 411 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); 412 413 Value *NewRVal = RMWI->getType()->isPointerTy() 414 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) 415 : Builder.CreateBitCast(NewRMWI, RMWI->getType()); 416 RMWI->replaceAllUsesWith(NewRVal); 417 RMWI->eraseFromParent(); 418 return NewRMWI; 419 } 420 421 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { 422 switch (TLI->shouldExpandAtomicLoadInIR(LI)) { 423 case TargetLoweringBase::AtomicExpansionKind::None: 424 return false; 425 case TargetLoweringBase::AtomicExpansionKind::LLSC: 426 expandAtomicOpToLLSC( 427 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), 428 LI->getOrdering(), 429 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; }); 430 return true; 431 case TargetLoweringBase::AtomicExpansionKind::LLOnly: 432 return expandAtomicLoadToLL(LI); 433 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: 434 return expandAtomicLoadToCmpXchg(LI); 435 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 436 LI->setAtomic(AtomicOrdering::NotAtomic); 437 return true; 438 default: 439 llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); 440 } 441 } 442 443 bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { 444 switch (TLI->shouldExpandAtomicStoreInIR(SI)) { 445 case TargetLoweringBase::AtomicExpansionKind::None: 446 return false; 447 case TargetLoweringBase::AtomicExpansionKind::Expand: 448 expandAtomicStore(SI); 449 return true; 450 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 451 SI->setAtomic(AtomicOrdering::NotAtomic); 452 return true; 453 default: 454 llvm_unreachable("Unhandled case in tryExpandAtomicStore"); 455 } 456 } 457 458 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { 459 ReplacementIRBuilder Builder(LI, *DL); 460 461 // On some architectures, load-linked instructions are atomic for larger 462 // sizes than normal loads. For example, the only 64-bit load guaranteed 463 // to be single-copy atomic by ARM is an ldrexd (A3.5.3). 464 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), 465 LI->getPointerOperand(), LI->getOrdering()); 466 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 467 468 LI->replaceAllUsesWith(Val); 469 LI->eraseFromParent(); 470 471 return true; 472 } 473 474 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { 475 ReplacementIRBuilder Builder(LI, *DL); 476 AtomicOrdering Order = LI->getOrdering(); 477 if (Order == AtomicOrdering::Unordered) 478 Order = AtomicOrdering::Monotonic; 479 480 Value *Addr = LI->getPointerOperand(); 481 Type *Ty = LI->getType(); 482 Constant *DummyVal = Constant::getNullValue(Ty); 483 484 Value *Pair = Builder.CreateAtomicCmpXchg( 485 Addr, DummyVal, DummyVal, LI->getAlign(), Order, 486 AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 487 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); 488 489 LI->replaceAllUsesWith(Loaded); 490 LI->eraseFromParent(); 491 492 return true; 493 } 494 495 /// Convert an atomic store of a non-integral type to an integer store of the 496 /// equivalent bitwidth. We used to not support floating point or vector 497 /// atomics in the IR at all. The backends learned to deal with the bitcast 498 /// idiom because that was the only way of expressing the notion of a atomic 499 /// float or vector store. The long term plan is to teach each backend to 500 /// instruction select from the original atomic store, but as a migration 501 /// mechanism, we convert back to the old format which the backends understand. 502 /// Each backend will need individual work to recognize the new format. 503 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { 504 ReplacementIRBuilder Builder(SI, *DL); 505 auto *M = SI->getModule(); 506 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), 507 M->getDataLayout()); 508 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); 509 510 Value *Addr = SI->getPointerOperand(); 511 Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); 512 Value *NewAddr = Builder.CreateBitCast(Addr, PT); 513 514 StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); 515 NewSI->setAlignment(SI->getAlign()); 516 NewSI->setVolatile(SI->isVolatile()); 517 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); 518 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); 519 SI->eraseFromParent(); 520 return NewSI; 521 } 522 523 void AtomicExpand::expandAtomicStore(StoreInst *SI) { 524 // This function is only called on atomic stores that are too large to be 525 // atomic if implemented as a native store. So we replace them by an 526 // atomic swap, that can be implemented for example as a ldrex/strex on ARM 527 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. 528 // It is the responsibility of the target to only signal expansion via 529 // shouldExpandAtomicRMW in cases where this is required and possible. 530 ReplacementIRBuilder Builder(SI, *DL); 531 AtomicOrdering Ordering = SI->getOrdering(); 532 assert(Ordering != AtomicOrdering::NotAtomic); 533 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered 534 ? AtomicOrdering::Monotonic 535 : Ordering; 536 AtomicRMWInst *AI = Builder.CreateAtomicRMW( 537 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), 538 SI->getAlign(), RMWOrdering); 539 SI->eraseFromParent(); 540 541 // Now we have an appropriate swap instruction, lower it as usual. 542 tryExpandAtomicRMW(AI); 543 } 544 545 static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, 546 Value *Loaded, Value *NewVal, Align AddrAlign, 547 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 548 Value *&Success, Value *&NewLoaded) { 549 Type *OrigTy = NewVal->getType(); 550 551 // This code can go away when cmpxchg supports FP types. 552 assert(!OrigTy->isPointerTy()); 553 bool NeedBitcast = OrigTy->isFloatingPointTy(); 554 if (NeedBitcast) { 555 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); 556 unsigned AS = Addr->getType()->getPointerAddressSpace(); 557 Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); 558 NewVal = Builder.CreateBitCast(NewVal, IntTy); 559 Loaded = Builder.CreateBitCast(Loaded, IntTy); 560 } 561 562 Value *Pair = Builder.CreateAtomicCmpXchg( 563 Addr, Loaded, NewVal, AddrAlign, MemOpOrder, 564 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 565 Success = Builder.CreateExtractValue(Pair, 1, "success"); 566 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 567 568 if (NeedBitcast) 569 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); 570 } 571 572 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { 573 LLVMContext &Ctx = AI->getModule()->getContext(); 574 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); 575 switch (Kind) { 576 case TargetLoweringBase::AtomicExpansionKind::None: 577 return false; 578 case TargetLoweringBase::AtomicExpansionKind::LLSC: { 579 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 580 unsigned ValueSize = getAtomicOpSize(AI); 581 if (ValueSize < MinCASSize) { 582 expandPartwordAtomicRMW(AI, 583 TargetLoweringBase::AtomicExpansionKind::LLSC); 584 } else { 585 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) { 586 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 587 AI->getValOperand()); 588 }; 589 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), 590 AI->getAlign(), AI->getOrdering(), PerformOp); 591 } 592 return true; 593 } 594 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { 595 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 596 unsigned ValueSize = getAtomicOpSize(AI); 597 if (ValueSize < MinCASSize) { 598 expandPartwordAtomicRMW(AI, 599 TargetLoweringBase::AtomicExpansionKind::CmpXChg); 600 } else { 601 SmallVector<StringRef> SSNs; 602 Ctx.getSyncScopeNames(SSNs); 603 auto MemScope = SSNs[AI->getSyncScopeID()].empty() 604 ? "system" 605 : SSNs[AI->getSyncScopeID()]; 606 OptimizationRemarkEmitter ORE(AI->getFunction()); 607 ORE.emit([&]() { 608 return OptimizationRemark(DEBUG_TYPE, "Passed", AI) 609 << "A compare and swap loop was generated for an atomic " 610 << AI->getOperationName(AI->getOperation()) << " operation at " 611 << MemScope << " memory scope"; 612 }); 613 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); 614 } 615 return true; 616 } 617 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { 618 expandAtomicRMWToMaskedIntrinsic(AI); 619 return true; 620 } 621 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { 622 TLI->emitBitTestAtomicRMWIntrinsic(AI); 623 return true; 624 } 625 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: { 626 TLI->emitCmpArithAtomicRMWIntrinsic(AI); 627 return true; 628 } 629 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 630 return lowerAtomicRMWInst(AI); 631 case TargetLoweringBase::AtomicExpansionKind::Expand: 632 TLI->emitExpandAtomicRMW(AI); 633 return true; 634 default: 635 llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); 636 } 637 } 638 639 namespace { 640 641 struct PartwordMaskValues { 642 // These three fields are guaranteed to be set by createMaskInstrs. 643 Type *WordType = nullptr; 644 Type *ValueType = nullptr; 645 Type *IntValueType = nullptr; 646 Value *AlignedAddr = nullptr; 647 Align AlignedAddrAlignment; 648 // The remaining fields can be null. 649 Value *ShiftAmt = nullptr; 650 Value *Mask = nullptr; 651 Value *Inv_Mask = nullptr; 652 }; 653 654 LLVM_ATTRIBUTE_UNUSED 655 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { 656 auto PrintObj = [&O](auto *V) { 657 if (V) 658 O << *V; 659 else 660 O << "nullptr"; 661 O << '\n'; 662 }; 663 O << "PartwordMaskValues {\n"; 664 O << " WordType: "; 665 PrintObj(PMV.WordType); 666 O << " ValueType: "; 667 PrintObj(PMV.ValueType); 668 O << " AlignedAddr: "; 669 PrintObj(PMV.AlignedAddr); 670 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; 671 O << " ShiftAmt: "; 672 PrintObj(PMV.ShiftAmt); 673 O << " Mask: "; 674 PrintObj(PMV.Mask); 675 O << " Inv_Mask: "; 676 PrintObj(PMV.Inv_Mask); 677 O << "}\n"; 678 return O; 679 } 680 681 } // end anonymous namespace 682 683 /// This is a helper function which builds instructions to provide 684 /// values necessary for partword atomic operations. It takes an 685 /// incoming address, Addr, and ValueType, and constructs the address, 686 /// shift-amounts and masks needed to work with a larger value of size 687 /// WordSize. 688 /// 689 /// AlignedAddr: Addr rounded down to a multiple of WordSize 690 /// 691 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded 692 /// from AlignAddr for it to have the same value as if 693 /// ValueType was loaded from Addr. 694 /// 695 /// Mask: Value to mask with the value loaded from AlignAddr to 696 /// include only the part that would've been loaded from Addr. 697 /// 698 /// Inv_Mask: The inverse of Mask. 699 static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, 700 Instruction *I, Type *ValueType, 701 Value *Addr, Align AddrAlign, 702 unsigned MinWordSize) { 703 PartwordMaskValues PMV; 704 705 Module *M = I->getModule(); 706 LLVMContext &Ctx = M->getContext(); 707 const DataLayout &DL = M->getDataLayout(); 708 unsigned ValueSize = DL.getTypeStoreSize(ValueType); 709 710 PMV.ValueType = PMV.IntValueType = ValueType; 711 if (PMV.ValueType->isFloatingPointTy()) 712 PMV.IntValueType = 713 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); 714 715 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) 716 : ValueType; 717 if (PMV.ValueType == PMV.WordType) { 718 PMV.AlignedAddr = Addr; 719 PMV.AlignedAddrAlignment = AddrAlign; 720 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); 721 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true); 722 return PMV; 723 } 724 725 PMV.AlignedAddrAlignment = Align(MinWordSize); 726 727 assert(ValueSize < MinWordSize); 728 729 PointerType *PtrTy = cast<PointerType>(Addr->getType()); 730 Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace()); 731 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace()); 732 Value *PtrLSB; 733 734 if (AddrAlign < MinWordSize) { 735 PMV.AlignedAddr = Builder.CreateIntrinsic( 736 Intrinsic::ptrmask, {PtrTy, IntTy}, 737 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, 738 "AlignedAddr"); 739 740 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); 741 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); 742 } else { 743 // If the alignment is high enough, the LSB are known 0. 744 PMV.AlignedAddr = Addr; 745 PtrLSB = ConstantInt::getNullValue(IntTy); 746 } 747 748 if (DL.isLittleEndian()) { 749 // turn bytes into bits 750 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); 751 } else { 752 // turn bytes into bits, and count from the other side. 753 PMV.ShiftAmt = Builder.CreateShl( 754 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); 755 } 756 757 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); 758 PMV.Mask = Builder.CreateShl( 759 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, 760 "Mask"); 761 762 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); 763 764 // Cast for typed pointers. 765 PMV.AlignedAddr = 766 Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr"); 767 768 return PMV; 769 } 770 771 static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, 772 const PartwordMaskValues &PMV) { 773 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 774 if (PMV.WordType == PMV.ValueType) 775 return WideWord; 776 777 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); 778 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted"); 779 return Builder.CreateBitCast(Trunc, PMV.ValueType); 780 } 781 782 static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, 783 Value *Updated, const PartwordMaskValues &PMV) { 784 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 785 assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); 786 if (PMV.WordType == PMV.ValueType) 787 return Updated; 788 789 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType); 790 791 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); 792 Value *Shift = 793 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); 794 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); 795 Value *Or = Builder.CreateOr(And, Shift, "inserted"); 796 return Or; 797 } 798 799 /// Emit IR to implement a masked version of a given atomicrmw 800 /// operation. (That is, only the bits under the Mask should be 801 /// affected by the operation) 802 static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, 803 IRBuilderBase &Builder, Value *Loaded, 804 Value *Shifted_Inc, Value *Inc, 805 const PartwordMaskValues &PMV) { 806 // TODO: update to use 807 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order 808 // to merge bits from two values without requiring PMV.Inv_Mask. 809 switch (Op) { 810 case AtomicRMWInst::Xchg: { 811 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 812 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); 813 return FinalVal; 814 } 815 case AtomicRMWInst::Or: 816 case AtomicRMWInst::Xor: 817 case AtomicRMWInst::And: 818 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); 819 case AtomicRMWInst::Add: 820 case AtomicRMWInst::Sub: 821 case AtomicRMWInst::Nand: { 822 // The other arithmetic ops need to be masked into place. 823 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc); 824 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); 825 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 826 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); 827 return FinalVal; 828 } 829 case AtomicRMWInst::Max: 830 case AtomicRMWInst::Min: 831 case AtomicRMWInst::UMax: 832 case AtomicRMWInst::UMin: 833 case AtomicRMWInst::FAdd: 834 case AtomicRMWInst::FSub: 835 case AtomicRMWInst::FMin: 836 case AtomicRMWInst::FMax: 837 case AtomicRMWInst::UIncWrap: 838 case AtomicRMWInst::UDecWrap: { 839 // Finally, other ops will operate on the full value, so truncate down to 840 // the original size, and expand out again after doing the 841 // operation. Bitcasts will be inserted for FP values. 842 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); 843 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); 844 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); 845 return FinalVal; 846 } 847 default: 848 llvm_unreachable("Unknown atomic op"); 849 } 850 } 851 852 /// Expand a sub-word atomicrmw operation into an appropriate 853 /// word-sized operation. 854 /// 855 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same 856 /// way as a typical atomicrmw expansion. The only difference here is 857 /// that the operation inside of the loop may operate upon only a 858 /// part of the value. 859 void AtomicExpand::expandPartwordAtomicRMW( 860 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { 861 AtomicOrdering MemOpOrder = AI->getOrdering(); 862 SyncScope::ID SSID = AI->getSyncScopeID(); 863 864 ReplacementIRBuilder Builder(AI, *DL); 865 866 PartwordMaskValues PMV = 867 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 868 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 869 870 Value *ValOperand_Shifted = nullptr; 871 if (AI->getOperation() == AtomicRMWInst::Xchg || 872 AI->getOperation() == AtomicRMWInst::Add || 873 AI->getOperation() == AtomicRMWInst::Sub || 874 AI->getOperation() == AtomicRMWInst::Nand) { 875 ValOperand_Shifted = 876 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 877 PMV.ShiftAmt, "ValOperand_Shifted"); 878 } 879 880 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { 881 return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, 882 ValOperand_Shifted, AI->getValOperand(), PMV); 883 }; 884 885 Value *OldResult; 886 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { 887 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, 888 PMV.AlignedAddrAlignment, MemOpOrder, SSID, 889 PerformPartwordOp, createCmpXchgInstFun); 890 } else { 891 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); 892 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, 893 PMV.AlignedAddrAlignment, MemOpOrder, 894 PerformPartwordOp); 895 } 896 897 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 898 AI->replaceAllUsesWith(FinalOldResult); 899 AI->eraseFromParent(); 900 } 901 902 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. 903 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { 904 ReplacementIRBuilder Builder(AI, *DL); 905 AtomicRMWInst::BinOp Op = AI->getOperation(); 906 907 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 908 Op == AtomicRMWInst::And) && 909 "Unable to widen operation"); 910 911 PartwordMaskValues PMV = 912 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 913 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 914 915 Value *ValOperand_Shifted = 916 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 917 PMV.ShiftAmt, "ValOperand_Shifted"); 918 919 Value *NewOperand; 920 921 if (Op == AtomicRMWInst::And) 922 NewOperand = 923 Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); 924 else 925 NewOperand = ValOperand_Shifted; 926 927 AtomicRMWInst *NewAI = 928 Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, 929 PMV.AlignedAddrAlignment, AI->getOrdering()); 930 931 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); 932 AI->replaceAllUsesWith(FinalOldResult); 933 AI->eraseFromParent(); 934 return NewAI; 935 } 936 937 bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { 938 // The basic idea here is that we're expanding a cmpxchg of a 939 // smaller memory size up to a word-sized cmpxchg. To do this, we 940 // need to add a retry-loop for strong cmpxchg, so that 941 // modifications to other parts of the word don't cause a spurious 942 // failure. 943 944 // This generates code like the following: 945 // [[Setup mask values PMV.*]] 946 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt 947 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt 948 // %InitLoaded = load i32* %addr 949 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask 950 // br partword.cmpxchg.loop 951 // partword.cmpxchg.loop: 952 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], 953 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] 954 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted 955 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted 956 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, 957 // i32 %FullWord_NewVal success_ordering failure_ordering 958 // %OldVal = extractvalue { i32, i1 } %NewCI, 0 959 // %Success = extractvalue { i32, i1 } %NewCI, 1 960 // br i1 %Success, label %partword.cmpxchg.end, 961 // label %partword.cmpxchg.failure 962 // partword.cmpxchg.failure: 963 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask 964 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut 965 // br i1 %ShouldContinue, label %partword.cmpxchg.loop, 966 // label %partword.cmpxchg.end 967 // partword.cmpxchg.end: 968 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt 969 // %FinalOldVal = trunc i32 %tmp1 to i8 970 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 971 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 972 973 Value *Addr = CI->getPointerOperand(); 974 Value *Cmp = CI->getCompareOperand(); 975 Value *NewVal = CI->getNewValOperand(); 976 977 BasicBlock *BB = CI->getParent(); 978 Function *F = BB->getParent(); 979 ReplacementIRBuilder Builder(CI, *DL); 980 LLVMContext &Ctx = Builder.getContext(); 981 982 BasicBlock *EndBB = 983 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); 984 auto FailureBB = 985 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); 986 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); 987 988 // The split call above "helpfully" added a branch at the end of BB 989 // (to the wrong place). 990 std::prev(BB->end())->eraseFromParent(); 991 Builder.SetInsertPoint(BB); 992 993 PartwordMaskValues PMV = 994 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 995 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 996 997 // Shift the incoming values over, into the right location in the word. 998 Value *NewVal_Shifted = 999 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); 1000 Value *Cmp_Shifted = 1001 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); 1002 1003 // Load the entire current word, and mask into place the expected and new 1004 // values 1005 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); 1006 InitLoaded->setVolatile(CI->isVolatile()); 1007 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); 1008 Builder.CreateBr(LoopBB); 1009 1010 // partword.cmpxchg.loop: 1011 Builder.SetInsertPoint(LoopBB); 1012 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); 1013 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); 1014 1015 // Mask/Or the expected and new values into place in the loaded word. 1016 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); 1017 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); 1018 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( 1019 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, 1020 CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); 1021 NewCI->setVolatile(CI->isVolatile()); 1022 // When we're building a strong cmpxchg, we need a loop, so you 1023 // might think we could use a weak cmpxchg inside. But, using strong 1024 // allows the below comparison for ShouldContinue, and we're 1025 // expecting the underlying cmpxchg to be a machine instruction, 1026 // which is strong anyways. 1027 NewCI->setWeak(CI->isWeak()); 1028 1029 Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1030 Value *Success = Builder.CreateExtractValue(NewCI, 1); 1031 1032 if (CI->isWeak()) 1033 Builder.CreateBr(EndBB); 1034 else 1035 Builder.CreateCondBr(Success, EndBB, FailureBB); 1036 1037 // partword.cmpxchg.failure: 1038 Builder.SetInsertPoint(FailureBB); 1039 // Upon failure, verify that the masked-out part of the loaded value 1040 // has been modified. If it didn't, abort the cmpxchg, since the 1041 // masked-in part must've. 1042 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); 1043 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); 1044 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); 1045 1046 // Add the second value to the phi from above 1047 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); 1048 1049 // partword.cmpxchg.end: 1050 Builder.SetInsertPoint(CI); 1051 1052 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1053 Value *Res = PoisonValue::get(CI->getType()); 1054 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 1055 Res = Builder.CreateInsertValue(Res, Success, 1); 1056 1057 CI->replaceAllUsesWith(Res); 1058 CI->eraseFromParent(); 1059 return true; 1060 } 1061 1062 void AtomicExpand::expandAtomicOpToLLSC( 1063 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, 1064 AtomicOrdering MemOpOrder, 1065 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1066 ReplacementIRBuilder Builder(I, *DL); 1067 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, 1068 MemOpOrder, PerformOp); 1069 1070 I->replaceAllUsesWith(Loaded); 1071 I->eraseFromParent(); 1072 } 1073 1074 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { 1075 ReplacementIRBuilder Builder(AI, *DL); 1076 1077 PartwordMaskValues PMV = 1078 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 1079 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1080 1081 // The value operand must be sign-extended for signed min/max so that the 1082 // target's signed comparison instructions can be used. Otherwise, just 1083 // zero-ext. 1084 Instruction::CastOps CastOp = Instruction::ZExt; 1085 AtomicRMWInst::BinOp RMWOp = AI->getOperation(); 1086 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) 1087 CastOp = Instruction::SExt; 1088 1089 Value *ValOperand_Shifted = Builder.CreateShl( 1090 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), 1091 PMV.ShiftAmt, "ValOperand_Shifted"); 1092 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( 1093 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, 1094 AI->getOrdering()); 1095 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 1096 AI->replaceAllUsesWith(FinalOldResult); 1097 AI->eraseFromParent(); 1098 } 1099 1100 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { 1101 ReplacementIRBuilder Builder(CI, *DL); 1102 1103 PartwordMaskValues PMV = createMaskInstrs( 1104 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), 1105 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1106 1107 Value *CmpVal_Shifted = Builder.CreateShl( 1108 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, 1109 "CmpVal_Shifted"); 1110 Value *NewVal_Shifted = Builder.CreateShl( 1111 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, 1112 "NewVal_Shifted"); 1113 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( 1114 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, 1115 CI->getMergedOrdering()); 1116 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1117 Value *Res = PoisonValue::get(CI->getType()); 1118 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 1119 Value *Success = Builder.CreateICmpEQ( 1120 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); 1121 Res = Builder.CreateInsertValue(Res, Success, 1); 1122 1123 CI->replaceAllUsesWith(Res); 1124 CI->eraseFromParent(); 1125 } 1126 1127 Value *AtomicExpand::insertRMWLLSCLoop( 1128 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1129 AtomicOrdering MemOpOrder, 1130 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1131 LLVMContext &Ctx = Builder.getContext(); 1132 BasicBlock *BB = Builder.GetInsertBlock(); 1133 Function *F = BB->getParent(); 1134 1135 assert(AddrAlign >= 1136 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) && 1137 "Expected at least natural alignment at this point."); 1138 1139 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1140 // 1141 // The standard expansion we produce is: 1142 // [...] 1143 // atomicrmw.start: 1144 // %loaded = @load.linked(%addr) 1145 // %new = some_op iN %loaded, %incr 1146 // %stored = @store_conditional(%new, %addr) 1147 // %try_again = icmp i32 ne %stored, 0 1148 // br i1 %try_again, label %loop, label %atomicrmw.end 1149 // atomicrmw.end: 1150 // [...] 1151 BasicBlock *ExitBB = 1152 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1153 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1154 1155 // The split call above "helpfully" added a branch at the end of BB (to the 1156 // wrong place). 1157 std::prev(BB->end())->eraseFromParent(); 1158 Builder.SetInsertPoint(BB); 1159 Builder.CreateBr(LoopBB); 1160 1161 // Start the main loop block now that we've taken care of the preliminaries. 1162 Builder.SetInsertPoint(LoopBB); 1163 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); 1164 1165 Value *NewVal = PerformOp(Builder, Loaded); 1166 1167 Value *StoreSuccess = 1168 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); 1169 Value *TryAgain = Builder.CreateICmpNE( 1170 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 1171 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 1172 1173 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1174 return Loaded; 1175 } 1176 1177 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of 1178 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the 1179 /// IR. As a migration step, we convert back to what use to be the standard 1180 /// way to represent a pointer cmpxchg so that we can update backends one by 1181 /// one. 1182 AtomicCmpXchgInst * 1183 AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { 1184 auto *M = CI->getModule(); 1185 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), 1186 M->getDataLayout()); 1187 1188 ReplacementIRBuilder Builder(CI, *DL); 1189 1190 Value *Addr = CI->getPointerOperand(); 1191 Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); 1192 Value *NewAddr = Builder.CreateBitCast(Addr, PT); 1193 1194 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); 1195 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); 1196 1197 auto *NewCI = Builder.CreateAtomicCmpXchg( 1198 NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), 1199 CI->getFailureOrdering(), CI->getSyncScopeID()); 1200 NewCI->setVolatile(CI->isVolatile()); 1201 NewCI->setWeak(CI->isWeak()); 1202 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); 1203 1204 Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1205 Value *Succ = Builder.CreateExtractValue(NewCI, 1); 1206 1207 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); 1208 1209 Value *Res = PoisonValue::get(CI->getType()); 1210 Res = Builder.CreateInsertValue(Res, OldVal, 0); 1211 Res = Builder.CreateInsertValue(Res, Succ, 1); 1212 1213 CI->replaceAllUsesWith(Res); 1214 CI->eraseFromParent(); 1215 return NewCI; 1216 } 1217 1218 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1219 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 1220 AtomicOrdering FailureOrder = CI->getFailureOrdering(); 1221 Value *Addr = CI->getPointerOperand(); 1222 BasicBlock *BB = CI->getParent(); 1223 Function *F = BB->getParent(); 1224 LLVMContext &Ctx = F->getContext(); 1225 // If shouldInsertFencesForAtomic() returns true, then the target does not 1226 // want to deal with memory orders, and emitLeading/TrailingFence should take 1227 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we 1228 // should preserve the ordering. 1229 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); 1230 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic 1231 ? AtomicOrdering::Monotonic 1232 : CI->getMergedOrdering(); 1233 1234 // In implementations which use a barrier to achieve release semantics, we can 1235 // delay emitting this barrier until we know a store is actually going to be 1236 // attempted. The cost of this delay is that we need 2 copies of the block 1237 // emitting the load-linked, affecting code size. 1238 // 1239 // Ideally, this logic would be unconditional except for the minsize check 1240 // since in other cases the extra blocks naturally collapse down to the 1241 // minimal loop. Unfortunately, this puts too much stress on later 1242 // optimisations so we avoid emitting the extra logic in those cases too. 1243 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && 1244 SuccessOrder != AtomicOrdering::Monotonic && 1245 SuccessOrder != AtomicOrdering::Acquire && 1246 !F->hasMinSize(); 1247 1248 // There's no overhead for sinking the release barrier in a weak cmpxchg, so 1249 // do it even on minsize. 1250 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); 1251 1252 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 1253 // 1254 // The full expansion we produce is: 1255 // [...] 1256 // %aligned.addr = ... 1257 // cmpxchg.start: 1258 // %unreleasedload = @load.linked(%aligned.addr) 1259 // %unreleasedload.extract = extract value from %unreleasedload 1260 // %should_store = icmp eq %unreleasedload.extract, %desired 1261 // br i1 %should_store, label %cmpxchg.releasingstore, 1262 // label %cmpxchg.nostore 1263 // cmpxchg.releasingstore: 1264 // fence? 1265 // br label cmpxchg.trystore 1266 // cmpxchg.trystore: 1267 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], 1268 // [%releasedload, %cmpxchg.releasedload] 1269 // %updated.new = insert %new into %loaded.trystore 1270 // %stored = @store_conditional(%updated.new, %aligned.addr) 1271 // %success = icmp eq i32 %stored, 0 1272 // br i1 %success, label %cmpxchg.success, 1273 // label %cmpxchg.releasedload/%cmpxchg.failure 1274 // cmpxchg.releasedload: 1275 // %releasedload = @load.linked(%aligned.addr) 1276 // %releasedload.extract = extract value from %releasedload 1277 // %should_store = icmp eq %releasedload.extract, %desired 1278 // br i1 %should_store, label %cmpxchg.trystore, 1279 // label %cmpxchg.failure 1280 // cmpxchg.success: 1281 // fence? 1282 // br label %cmpxchg.end 1283 // cmpxchg.nostore: 1284 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], 1285 // [%releasedload, 1286 // %cmpxchg.releasedload/%cmpxchg.trystore] 1287 // @load_linked_fail_balance()? 1288 // br label %cmpxchg.failure 1289 // cmpxchg.failure: 1290 // fence? 1291 // br label %cmpxchg.end 1292 // cmpxchg.end: 1293 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], 1294 // [%loaded.trystore, %cmpxchg.trystore] 1295 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 1296 // %loaded = extract value from %loaded.exit 1297 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 1298 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 1299 // [...] 1300 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); 1301 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 1302 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); 1303 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); 1304 auto ReleasedLoadBB = 1305 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); 1306 auto TryStoreBB = 1307 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); 1308 auto ReleasingStoreBB = 1309 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); 1310 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); 1311 1312 ReplacementIRBuilder Builder(CI, *DL); 1313 1314 // The split call above "helpfully" added a branch at the end of BB (to the 1315 // wrong place), but we might want a fence too. It's easiest to just remove 1316 // the branch entirely. 1317 std::prev(BB->end())->eraseFromParent(); 1318 Builder.SetInsertPoint(BB); 1319 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) 1320 TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1321 1322 PartwordMaskValues PMV = 1323 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1324 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1325 Builder.CreateBr(StartBB); 1326 1327 // Start the main loop block now that we've taken care of the preliminaries. 1328 Builder.SetInsertPoint(StartBB); 1329 Value *UnreleasedLoad = 1330 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 1331 Value *UnreleasedLoadExtract = 1332 extractMaskedValue(Builder, UnreleasedLoad, PMV); 1333 Value *ShouldStore = Builder.CreateICmpEQ( 1334 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); 1335 1336 // If the cmpxchg doesn't actually need any ordering when it fails, we can 1337 // jump straight past that fence instruction (if it exists). 1338 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); 1339 1340 Builder.SetInsertPoint(ReleasingStoreBB); 1341 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) 1342 TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1343 Builder.CreateBr(TryStoreBB); 1344 1345 Builder.SetInsertPoint(TryStoreBB); 1346 PHINode *LoadedTryStore = 1347 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); 1348 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); 1349 Value *NewValueInsert = 1350 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); 1351 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert, 1352 PMV.AlignedAddr, MemOpOrder); 1353 StoreSuccess = Builder.CreateICmpEQ( 1354 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 1355 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; 1356 Builder.CreateCondBr(StoreSuccess, SuccessBB, 1357 CI->isWeak() ? FailureBB : RetryBB); 1358 1359 Builder.SetInsertPoint(ReleasedLoadBB); 1360 Value *SecondLoad; 1361 if (HasReleasedLoadBB) { 1362 SecondLoad = 1363 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 1364 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); 1365 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, 1366 CI->getCompareOperand(), "should_store"); 1367 1368 // If the cmpxchg doesn't actually need any ordering when it fails, we can 1369 // jump straight past that fence instruction (if it exists). 1370 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); 1371 // Update PHI node in TryStoreBB. 1372 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); 1373 } else 1374 Builder.CreateUnreachable(); 1375 1376 // Make sure later instructions don't get reordered with a fence if 1377 // necessary. 1378 Builder.SetInsertPoint(SuccessBB); 1379 if (ShouldInsertFencesForAtomic || 1380 TLI->shouldInsertTrailingFenceForAtomicStore(CI)) 1381 TLI->emitTrailingFence(Builder, CI, SuccessOrder); 1382 Builder.CreateBr(ExitBB); 1383 1384 Builder.SetInsertPoint(NoStoreBB); 1385 PHINode *LoadedNoStore = 1386 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); 1387 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); 1388 if (HasReleasedLoadBB) 1389 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); 1390 1391 // In the failing case, where we don't execute the store-conditional, the 1392 // target might want to balance out the load-linked with a dedicated 1393 // instruction (e.g., on ARM, clearing the exclusive monitor). 1394 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 1395 Builder.CreateBr(FailureBB); 1396 1397 Builder.SetInsertPoint(FailureBB); 1398 PHINode *LoadedFailure = 1399 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); 1400 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); 1401 if (CI->isWeak()) 1402 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); 1403 if (ShouldInsertFencesForAtomic) 1404 TLI->emitTrailingFence(Builder, CI, FailureOrder); 1405 Builder.CreateBr(ExitBB); 1406 1407 // Finally, we have control-flow based knowledge of whether the cmpxchg 1408 // succeeded or not. We expose this to later passes by converting any 1409 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate 1410 // PHI. 1411 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1412 PHINode *LoadedExit = 1413 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); 1414 LoadedExit->addIncoming(LoadedTryStore, SuccessBB); 1415 LoadedExit->addIncoming(LoadedFailure, FailureBB); 1416 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); 1417 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 1418 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 1419 1420 // This is the "exit value" from the cmpxchg expansion. It may be of 1421 // a type wider than the one in the cmpxchg instruction. 1422 Value *LoadedFull = LoadedExit; 1423 1424 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); 1425 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); 1426 1427 // Look for any users of the cmpxchg that are just comparing the loaded value 1428 // against the desired one, and replace them with the CFG-derived version. 1429 SmallVector<ExtractValueInst *, 2> PrunedInsts; 1430 for (auto *User : CI->users()) { 1431 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 1432 if (!EV) 1433 continue; 1434 1435 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 1436 "weird extraction from { iN, i1 }"); 1437 1438 if (EV->getIndices()[0] == 0) 1439 EV->replaceAllUsesWith(Loaded); 1440 else 1441 EV->replaceAllUsesWith(Success); 1442 1443 PrunedInsts.push_back(EV); 1444 } 1445 1446 // We can remove the instructions now we're no longer iterating through them. 1447 for (auto *EV : PrunedInsts) 1448 EV->eraseFromParent(); 1449 1450 if (!CI->use_empty()) { 1451 // Some use of the full struct return that we don't understand has happened, 1452 // so we've got to reconstruct it properly. 1453 Value *Res; 1454 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0); 1455 Res = Builder.CreateInsertValue(Res, Success, 1); 1456 1457 CI->replaceAllUsesWith(Res); 1458 } 1459 1460 CI->eraseFromParent(); 1461 return true; 1462 } 1463 1464 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) { 1465 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); 1466 if (!C) 1467 return false; 1468 1469 AtomicRMWInst::BinOp Op = RMWI->getOperation(); 1470 switch (Op) { 1471 case AtomicRMWInst::Add: 1472 case AtomicRMWInst::Sub: 1473 case AtomicRMWInst::Or: 1474 case AtomicRMWInst::Xor: 1475 return C->isZero(); 1476 case AtomicRMWInst::And: 1477 return C->isMinusOne(); 1478 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... 1479 default: 1480 return false; 1481 } 1482 } 1483 1484 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { 1485 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { 1486 tryExpandAtomicLoad(ResultingLoad); 1487 return true; 1488 } 1489 return false; 1490 } 1491 1492 Value *AtomicExpand::insertRMWCmpXchgLoop( 1493 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1494 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 1495 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 1496 CreateCmpXchgInstFun CreateCmpXchg) { 1497 LLVMContext &Ctx = Builder.getContext(); 1498 BasicBlock *BB = Builder.GetInsertBlock(); 1499 Function *F = BB->getParent(); 1500 1501 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1502 // 1503 // The standard expansion we produce is: 1504 // [...] 1505 // %init_loaded = load atomic iN* %addr 1506 // br label %loop 1507 // loop: 1508 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 1509 // %new = some_op iN %loaded, %incr 1510 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 1511 // %new_loaded = extractvalue { iN, i1 } %pair, 0 1512 // %success = extractvalue { iN, i1 } %pair, 1 1513 // br i1 %success, label %atomicrmw.end, label %loop 1514 // atomicrmw.end: 1515 // [...] 1516 BasicBlock *ExitBB = 1517 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1518 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1519 1520 // The split call above "helpfully" added a branch at the end of BB (to the 1521 // wrong place), but we want a load. It's easiest to just remove 1522 // the branch entirely. 1523 std::prev(BB->end())->eraseFromParent(); 1524 Builder.SetInsertPoint(BB); 1525 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); 1526 Builder.CreateBr(LoopBB); 1527 1528 // Start the main loop block now that we've taken care of the preliminaries. 1529 Builder.SetInsertPoint(LoopBB); 1530 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); 1531 Loaded->addIncoming(InitLoaded, BB); 1532 1533 Value *NewVal = PerformOp(Builder, Loaded); 1534 1535 Value *NewLoaded = nullptr; 1536 Value *Success = nullptr; 1537 1538 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, 1539 MemOpOrder == AtomicOrdering::Unordered 1540 ? AtomicOrdering::Monotonic 1541 : MemOpOrder, 1542 SSID, Success, NewLoaded); 1543 assert(Success && NewLoaded); 1544 1545 Loaded->addIncoming(NewLoaded, LoopBB); 1546 1547 Builder.CreateCondBr(Success, ExitBB, LoopBB); 1548 1549 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1550 return NewLoaded; 1551 } 1552 1553 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1554 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 1555 unsigned ValueSize = getAtomicOpSize(CI); 1556 1557 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { 1558 default: 1559 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); 1560 case TargetLoweringBase::AtomicExpansionKind::None: 1561 if (ValueSize < MinCASSize) 1562 return expandPartwordCmpXchg(CI); 1563 return false; 1564 case TargetLoweringBase::AtomicExpansionKind::LLSC: { 1565 return expandAtomicCmpXchg(CI); 1566 } 1567 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: 1568 expandAtomicCmpXchgToMaskedIntrinsic(CI); 1569 return true; 1570 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 1571 return lowerAtomicCmpXchgInst(CI); 1572 } 1573 } 1574 1575 // Note: This function is exposed externally by AtomicExpandUtils.h 1576 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 1577 CreateCmpXchgInstFun CreateCmpXchg) { 1578 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); 1579 Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( 1580 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), 1581 AI->getOrdering(), AI->getSyncScopeID(), 1582 [&](IRBuilderBase &Builder, Value *Loaded) { 1583 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 1584 AI->getValOperand()); 1585 }, 1586 CreateCmpXchg); 1587 1588 AI->replaceAllUsesWith(Loaded); 1589 AI->eraseFromParent(); 1590 return true; 1591 } 1592 1593 // In order to use one of the sized library calls such as 1594 // __atomic_fetch_add_4, the alignment must be sufficient, the size 1595 // must be one of the potentially-specialized sizes, and the value 1596 // type must actually exist in C on the target (otherwise, the 1597 // function wouldn't actually be defined.) 1598 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, 1599 const DataLayout &DL) { 1600 // TODO: "LargestSize" is an approximation for "largest type that 1601 // you can express in C". It seems to be the case that int128 is 1602 // supported on all 64-bit platforms, otherwise only up to 64-bit 1603 // integers are supported. If we get this wrong, then we'll try to 1604 // call a sized libcall that doesn't actually exist. There should 1605 // really be some more reliable way in LLVM of determining integer 1606 // sizes which are valid in the target's C ABI... 1607 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; 1608 return Alignment >= Size && 1609 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && 1610 Size <= LargestSize; 1611 } 1612 1613 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { 1614 static const RTLIB::Libcall Libcalls[6] = { 1615 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, 1616 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; 1617 unsigned Size = getAtomicOpSize(I); 1618 1619 bool expanded = expandAtomicOpToLibcall( 1620 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, 1621 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1622 if (!expanded) 1623 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); 1624 } 1625 1626 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { 1627 static const RTLIB::Libcall Libcalls[6] = { 1628 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, 1629 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; 1630 unsigned Size = getAtomicOpSize(I); 1631 1632 bool expanded = expandAtomicOpToLibcall( 1633 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), 1634 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1635 if (!expanded) 1636 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); 1637 } 1638 1639 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { 1640 static const RTLIB::Libcall Libcalls[6] = { 1641 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, 1642 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, 1643 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; 1644 unsigned Size = getAtomicOpSize(I); 1645 1646 bool expanded = expandAtomicOpToLibcall( 1647 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), 1648 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), 1649 Libcalls); 1650 if (!expanded) 1651 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); 1652 } 1653 1654 static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { 1655 static const RTLIB::Libcall LibcallsXchg[6] = { 1656 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, 1657 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, 1658 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; 1659 static const RTLIB::Libcall LibcallsAdd[6] = { 1660 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, 1661 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, 1662 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; 1663 static const RTLIB::Libcall LibcallsSub[6] = { 1664 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, 1665 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, 1666 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; 1667 static const RTLIB::Libcall LibcallsAnd[6] = { 1668 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, 1669 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, 1670 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; 1671 static const RTLIB::Libcall LibcallsOr[6] = { 1672 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, 1673 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, 1674 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; 1675 static const RTLIB::Libcall LibcallsXor[6] = { 1676 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, 1677 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, 1678 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; 1679 static const RTLIB::Libcall LibcallsNand[6] = { 1680 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, 1681 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, 1682 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; 1683 1684 switch (Op) { 1685 case AtomicRMWInst::BAD_BINOP: 1686 llvm_unreachable("Should not have BAD_BINOP."); 1687 case AtomicRMWInst::Xchg: 1688 return ArrayRef(LibcallsXchg); 1689 case AtomicRMWInst::Add: 1690 return ArrayRef(LibcallsAdd); 1691 case AtomicRMWInst::Sub: 1692 return ArrayRef(LibcallsSub); 1693 case AtomicRMWInst::And: 1694 return ArrayRef(LibcallsAnd); 1695 case AtomicRMWInst::Or: 1696 return ArrayRef(LibcallsOr); 1697 case AtomicRMWInst::Xor: 1698 return ArrayRef(LibcallsXor); 1699 case AtomicRMWInst::Nand: 1700 return ArrayRef(LibcallsNand); 1701 case AtomicRMWInst::Max: 1702 case AtomicRMWInst::Min: 1703 case AtomicRMWInst::UMax: 1704 case AtomicRMWInst::UMin: 1705 case AtomicRMWInst::FMax: 1706 case AtomicRMWInst::FMin: 1707 case AtomicRMWInst::FAdd: 1708 case AtomicRMWInst::FSub: 1709 case AtomicRMWInst::UIncWrap: 1710 case AtomicRMWInst::UDecWrap: 1711 // No atomic libcalls are available for max/min/umax/umin. 1712 return {}; 1713 } 1714 llvm_unreachable("Unexpected AtomicRMW operation."); 1715 } 1716 1717 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { 1718 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); 1719 1720 unsigned Size = getAtomicOpSize(I); 1721 1722 bool Success = false; 1723 if (!Libcalls.empty()) 1724 Success = expandAtomicOpToLibcall( 1725 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), 1726 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1727 1728 // The expansion failed: either there were no libcalls at all for 1729 // the operation (min/max), or there were only size-specialized 1730 // libcalls (add/sub/etc) and we needed a generic. So, expand to a 1731 // CAS libcall, via a CAS loop, instead. 1732 if (!Success) { 1733 expandAtomicRMWToCmpXchg( 1734 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, 1735 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, 1736 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { 1737 // Create the CAS instruction normally... 1738 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 1739 Addr, Loaded, NewVal, Alignment, MemOpOrder, 1740 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 1741 Success = Builder.CreateExtractValue(Pair, 1, "success"); 1742 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 1743 1744 // ...and then expand the CAS into a libcall. 1745 expandAtomicCASToLibcall(Pair); 1746 }); 1747 } 1748 } 1749 1750 // A helper routine for the above expandAtomic*ToLibcall functions. 1751 // 1752 // 'Libcalls' contains an array of enum values for the particular 1753 // ATOMIC libcalls to be emitted. All of the other arguments besides 1754 // 'I' are extracted from the Instruction subclass by the 1755 // caller. Depending on the particular call, some will be null. 1756 bool AtomicExpand::expandAtomicOpToLibcall( 1757 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, 1758 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, 1759 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { 1760 assert(Libcalls.size() == 6); 1761 1762 LLVMContext &Ctx = I->getContext(); 1763 Module *M = I->getModule(); 1764 const DataLayout &DL = M->getDataLayout(); 1765 IRBuilder<> Builder(I); 1766 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); 1767 1768 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); 1769 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); 1770 1771 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); 1772 1773 // TODO: the "order" argument type is "int", not int32. So 1774 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. 1775 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); 1776 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); 1777 Constant *OrderingVal = 1778 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); 1779 Constant *Ordering2Val = nullptr; 1780 if (CASExpected) { 1781 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); 1782 Ordering2Val = 1783 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); 1784 } 1785 bool HasResult = I->getType() != Type::getVoidTy(Ctx); 1786 1787 RTLIB::Libcall RTLibType; 1788 if (UseSizedLibcall) { 1789 switch (Size) { 1790 case 1: 1791 RTLibType = Libcalls[1]; 1792 break; 1793 case 2: 1794 RTLibType = Libcalls[2]; 1795 break; 1796 case 4: 1797 RTLibType = Libcalls[3]; 1798 break; 1799 case 8: 1800 RTLibType = Libcalls[4]; 1801 break; 1802 case 16: 1803 RTLibType = Libcalls[5]; 1804 break; 1805 } 1806 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { 1807 RTLibType = Libcalls[0]; 1808 } else { 1809 // Can't use sized function, and there's no generic for this 1810 // operation, so give up. 1811 return false; 1812 } 1813 1814 if (!TLI->getLibcallName(RTLibType)) { 1815 // This target does not implement the requested atomic libcall so give up. 1816 return false; 1817 } 1818 1819 // Build up the function call. There's two kinds. First, the sized 1820 // variants. These calls are going to be one of the following (with 1821 // N=1,2,4,8,16): 1822 // iN __atomic_load_N(iN *ptr, int ordering) 1823 // void __atomic_store_N(iN *ptr, iN val, int ordering) 1824 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) 1825 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, 1826 // int success_order, int failure_order) 1827 // 1828 // Note that these functions can be used for non-integer atomic 1829 // operations, the values just need to be bitcast to integers on the 1830 // way in and out. 1831 // 1832 // And, then, the generic variants. They look like the following: 1833 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) 1834 // void __atomic_store(size_t size, void *ptr, void *val, int ordering) 1835 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, 1836 // int ordering) 1837 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, 1838 // void *desired, int success_order, 1839 // int failure_order) 1840 // 1841 // The different signatures are built up depending on the 1842 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' 1843 // variables. 1844 1845 AllocaInst *AllocaCASExpected = nullptr; 1846 Value *AllocaCASExpected_i8 = nullptr; 1847 AllocaInst *AllocaValue = nullptr; 1848 Value *AllocaValue_i8 = nullptr; 1849 AllocaInst *AllocaResult = nullptr; 1850 Value *AllocaResult_i8 = nullptr; 1851 1852 Type *ResultTy; 1853 SmallVector<Value *, 6> Args; 1854 AttributeList Attr; 1855 1856 // 'size' argument. 1857 if (!UseSizedLibcall) { 1858 // Note, getIntPtrType is assumed equivalent to size_t. 1859 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); 1860 } 1861 1862 // 'ptr' argument. 1863 // note: This assumes all address spaces share a common libfunc 1864 // implementation and that addresses are convertable. For systems without 1865 // that property, we'd need to extend this mechanism to support AS-specific 1866 // families of atomic intrinsics. 1867 auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); 1868 Value *PtrVal = 1869 Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS)); 1870 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); 1871 Args.push_back(PtrVal); 1872 1873 // 'expected' argument, if present. 1874 if (CASExpected) { 1875 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); 1876 AllocaCASExpected->setAlignment(AllocaAlignment); 1877 unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); 1878 1879 AllocaCASExpected_i8 = Builder.CreateBitCast( 1880 AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS)); 1881 Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); 1882 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); 1883 Args.push_back(AllocaCASExpected_i8); 1884 } 1885 1886 // 'val' argument ('desired' for cas), if present. 1887 if (ValueOperand) { 1888 if (UseSizedLibcall) { 1889 Value *IntValue = 1890 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); 1891 Args.push_back(IntValue); 1892 } else { 1893 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); 1894 AllocaValue->setAlignment(AllocaAlignment); 1895 AllocaValue_i8 = 1896 Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); 1897 Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); 1898 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); 1899 Args.push_back(AllocaValue_i8); 1900 } 1901 } 1902 1903 // 'ret' argument. 1904 if (!CASExpected && HasResult && !UseSizedLibcall) { 1905 AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); 1906 AllocaResult->setAlignment(AllocaAlignment); 1907 unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); 1908 AllocaResult_i8 = 1909 Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); 1910 Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); 1911 Args.push_back(AllocaResult_i8); 1912 } 1913 1914 // 'ordering' ('success_order' for cas) argument. 1915 Args.push_back(OrderingVal); 1916 1917 // 'failure_order' argument, if present. 1918 if (Ordering2Val) 1919 Args.push_back(Ordering2Val); 1920 1921 // Now, the return type. 1922 if (CASExpected) { 1923 ResultTy = Type::getInt1Ty(Ctx); 1924 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt); 1925 } else if (HasResult && UseSizedLibcall) 1926 ResultTy = SizedIntTy; 1927 else 1928 ResultTy = Type::getVoidTy(Ctx); 1929 1930 // Done with setting up arguments and return types, create the call: 1931 SmallVector<Type *, 6> ArgTys; 1932 for (Value *Arg : Args) 1933 ArgTys.push_back(Arg->getType()); 1934 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); 1935 FunctionCallee LibcallFn = 1936 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); 1937 CallInst *Call = Builder.CreateCall(LibcallFn, Args); 1938 Call->setAttributes(Attr); 1939 Value *Result = Call; 1940 1941 // And then, extract the results... 1942 if (ValueOperand && !UseSizedLibcall) 1943 Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); 1944 1945 if (CASExpected) { 1946 // The final result from the CAS is {load of 'expected' alloca, bool result 1947 // from call} 1948 Type *FinalResultTy = I->getType(); 1949 Value *V = PoisonValue::get(FinalResultTy); 1950 Value *ExpectedOut = Builder.CreateAlignedLoad( 1951 CASExpected->getType(), AllocaCASExpected, AllocaAlignment); 1952 Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); 1953 V = Builder.CreateInsertValue(V, ExpectedOut, 0); 1954 V = Builder.CreateInsertValue(V, Result, 1); 1955 I->replaceAllUsesWith(V); 1956 } else if (HasResult) { 1957 Value *V; 1958 if (UseSizedLibcall) 1959 V = Builder.CreateBitOrPointerCast(Result, I->getType()); 1960 else { 1961 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, 1962 AllocaAlignment); 1963 Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); 1964 } 1965 I->replaceAllUsesWith(V); 1966 } 1967 I->eraseFromParent(); 1968 return true; 1969 } 1970