xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGAtomic.cpp (revision 95eb4b873b6a8b527c5bd78d7191975dfca38998)
1 //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the code for emitting atomic operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCall.h"
14 #include "CGRecordLayout.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/CodeGen/CGFunctionInfo.h"
20 #include "clang/Frontend/FrontendDiagnostic.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/Operator.h"
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30   class AtomicInfo {
31     CodeGenFunction &CGF;
32     QualType AtomicTy;
33     QualType ValueTy;
34     uint64_t AtomicSizeInBits;
35     uint64_t ValueSizeInBits;
36     CharUnits AtomicAlign;
37     CharUnits ValueAlign;
38     TypeEvaluationKind EvaluationKind;
39     bool UseLibcall;
40     LValue LVal;
41     CGBitFieldInfo BFI;
42   public:
43     AtomicInfo(CodeGenFunction &CGF, LValue &lvalue)
44         : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0),
45           EvaluationKind(TEK_Scalar), UseLibcall(true) {
46       assert(!lvalue.isGlobalReg());
47       ASTContext &C = CGF.getContext();
48       if (lvalue.isSimple()) {
49         AtomicTy = lvalue.getType();
50         if (auto *ATy = AtomicTy->getAs<AtomicType>())
51           ValueTy = ATy->getValueType();
52         else
53           ValueTy = AtomicTy;
54         EvaluationKind = CGF.getEvaluationKind(ValueTy);
55 
56         uint64_t ValueAlignInBits;
57         uint64_t AtomicAlignInBits;
58         TypeInfo ValueTI = C.getTypeInfo(ValueTy);
59         ValueSizeInBits = ValueTI.Width;
60         ValueAlignInBits = ValueTI.Align;
61 
62         TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
63         AtomicSizeInBits = AtomicTI.Width;
64         AtomicAlignInBits = AtomicTI.Align;
65 
66         assert(ValueSizeInBits <= AtomicSizeInBits);
67         assert(ValueAlignInBits <= AtomicAlignInBits);
68 
69         AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
70         ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
71         if (lvalue.getAlignment().isZero())
72           lvalue.setAlignment(AtomicAlign);
73 
74         LVal = lvalue;
75       } else if (lvalue.isBitField()) {
76         ValueTy = lvalue.getType();
77         ValueSizeInBits = C.getTypeSize(ValueTy);
78         auto &OrigBFI = lvalue.getBitFieldInfo();
79         auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
80         AtomicSizeInBits = C.toBits(
81             C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
82                 .alignTo(lvalue.getAlignment()));
83         llvm::Value *BitFieldPtr = lvalue.getBitFieldPointer();
84         auto OffsetInChars =
85             (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
86             lvalue.getAlignment();
87         llvm::Value *StoragePtr = CGF.Builder.CreateConstGEP1_64(
88             CGF.Int8Ty, BitFieldPtr, OffsetInChars.getQuantity());
89         StoragePtr = CGF.Builder.CreateAddrSpaceCast(
90             StoragePtr, CGF.UnqualPtrTy, "atomic_bitfield_base");
91         BFI = OrigBFI;
92         BFI.Offset = Offset;
93         BFI.StorageSize = AtomicSizeInBits;
94         BFI.StorageOffset += OffsetInChars;
95         llvm::Type *StorageTy = CGF.Builder.getIntNTy(AtomicSizeInBits);
96         LVal = LValue::MakeBitfield(
97             Address(StoragePtr, StorageTy, lvalue.getAlignment()), BFI,
98             lvalue.getType(), lvalue.getBaseInfo(), lvalue.getTBAAInfo());
99         AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
100         if (AtomicTy.isNull()) {
101           llvm::APInt Size(
102               /*numBits=*/32,
103               C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity());
104           AtomicTy = C.getConstantArrayType(C.CharTy, Size, nullptr,
105                                             ArraySizeModifier::Normal,
106                                             /*IndexTypeQuals=*/0);
107         }
108         AtomicAlign = ValueAlign = lvalue.getAlignment();
109       } else if (lvalue.isVectorElt()) {
110         ValueTy = lvalue.getType()->castAs<VectorType>()->getElementType();
111         ValueSizeInBits = C.getTypeSize(ValueTy);
112         AtomicTy = lvalue.getType();
113         AtomicSizeInBits = C.getTypeSize(AtomicTy);
114         AtomicAlign = ValueAlign = lvalue.getAlignment();
115         LVal = lvalue;
116       } else {
117         assert(lvalue.isExtVectorElt());
118         ValueTy = lvalue.getType();
119         ValueSizeInBits = C.getTypeSize(ValueTy);
120         AtomicTy = ValueTy = CGF.getContext().getExtVectorType(
121             lvalue.getType(), cast<llvm::FixedVectorType>(
122                                   lvalue.getExtVectorAddress().getElementType())
123                                   ->getNumElements());
124         AtomicSizeInBits = C.getTypeSize(AtomicTy);
125         AtomicAlign = ValueAlign = lvalue.getAlignment();
126         LVal = lvalue;
127       }
128       UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
129           AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
130     }
131 
132     QualType getAtomicType() const { return AtomicTy; }
133     QualType getValueType() const { return ValueTy; }
134     CharUnits getAtomicAlignment() const { return AtomicAlign; }
135     uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
136     uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
137     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
138     bool shouldUseLibcall() const { return UseLibcall; }
139     const LValue &getAtomicLValue() const { return LVal; }
140     llvm::Value *getAtomicPointer() const {
141       if (LVal.isSimple())
142         return LVal.getPointer(CGF);
143       else if (LVal.isBitField())
144         return LVal.getBitFieldPointer();
145       else if (LVal.isVectorElt())
146         return LVal.getVectorPointer();
147       assert(LVal.isExtVectorElt());
148       return LVal.getExtVectorPointer();
149     }
150     Address getAtomicAddress() const {
151       llvm::Type *ElTy;
152       if (LVal.isSimple())
153         ElTy = LVal.getAddress(CGF).getElementType();
154       else if (LVal.isBitField())
155         ElTy = LVal.getBitFieldAddress().getElementType();
156       else if (LVal.isVectorElt())
157         ElTy = LVal.getVectorAddress().getElementType();
158       else
159         ElTy = LVal.getExtVectorAddress().getElementType();
160       return Address(getAtomicPointer(), ElTy, getAtomicAlignment());
161     }
162 
163     Address getAtomicAddressAsAtomicIntPointer() const {
164       return castToAtomicIntPointer(getAtomicAddress());
165     }
166 
167     /// Is the atomic size larger than the underlying value type?
168     ///
169     /// Note that the absence of padding does not mean that atomic
170     /// objects are completely interchangeable with non-atomic
171     /// objects: we might have promoted the alignment of a type
172     /// without making it bigger.
173     bool hasPadding() const {
174       return (ValueSizeInBits != AtomicSizeInBits);
175     }
176 
177     bool emitMemSetZeroIfNecessary() const;
178 
179     llvm::Value *getAtomicSizeValue() const {
180       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
181       return CGF.CGM.getSize(size);
182     }
183 
184     /// Cast the given pointer to an integer pointer suitable for atomic
185     /// operations if the source.
186     Address castToAtomicIntPointer(Address Addr) const;
187 
188     /// If Addr is compatible with the iN that will be used for an atomic
189     /// operation, bitcast it. Otherwise, create a temporary that is suitable
190     /// and copy the value across.
191     Address convertToAtomicIntPointer(Address Addr) const;
192 
193     /// Turn an atomic-layout object into an r-value.
194     RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
195                                      SourceLocation loc, bool AsValue) const;
196 
197     /// Converts a rvalue to integer value.
198     llvm::Value *convertRValueToInt(RValue RVal) const;
199 
200     RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
201                                      AggValueSlot ResultSlot,
202                                      SourceLocation Loc, bool AsValue) const;
203 
204     /// Copy an atomic r-value into atomic-layout memory.
205     void emitCopyIntoMemory(RValue rvalue) const;
206 
207     /// Project an l-value down to the value field.
208     LValue projectValue() const {
209       assert(LVal.isSimple());
210       Address addr = getAtomicAddress();
211       if (hasPadding())
212         addr = CGF.Builder.CreateStructGEP(addr, 0);
213 
214       return LValue::MakeAddr(addr, getValueType(), CGF.getContext(),
215                               LVal.getBaseInfo(), LVal.getTBAAInfo());
216     }
217 
218     /// Emits atomic load.
219     /// \returns Loaded value.
220     RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
221                           bool AsValue, llvm::AtomicOrdering AO,
222                           bool IsVolatile);
223 
224     /// Emits atomic compare-and-exchange sequence.
225     /// \param Expected Expected value.
226     /// \param Desired Desired value.
227     /// \param Success Atomic ordering for success operation.
228     /// \param Failure Atomic ordering for failed operation.
229     /// \param IsWeak true if atomic operation is weak, false otherwise.
230     /// \returns Pair of values: previous value from storage (value type) and
231     /// boolean flag (i1 type) with true if success and false otherwise.
232     std::pair<RValue, llvm::Value *>
233     EmitAtomicCompareExchange(RValue Expected, RValue Desired,
234                               llvm::AtomicOrdering Success =
235                                   llvm::AtomicOrdering::SequentiallyConsistent,
236                               llvm::AtomicOrdering Failure =
237                                   llvm::AtomicOrdering::SequentiallyConsistent,
238                               bool IsWeak = false);
239 
240     /// Emits atomic update.
241     /// \param AO Atomic ordering.
242     /// \param UpdateOp Update operation for the current lvalue.
243     void EmitAtomicUpdate(llvm::AtomicOrdering AO,
244                           const llvm::function_ref<RValue(RValue)> &UpdateOp,
245                           bool IsVolatile);
246     /// Emits atomic update.
247     /// \param AO Atomic ordering.
248     void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
249                           bool IsVolatile);
250 
251     /// Materialize an atomic r-value in atomic-layout memory.
252     Address materializeRValue(RValue rvalue) const;
253 
254     /// Creates temp alloca for intermediate operations on atomic value.
255     Address CreateTempAlloca() const;
256   private:
257     bool requiresMemSetZero(llvm::Type *type) const;
258 
259 
260     /// Emits atomic load as a libcall.
261     void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
262                                llvm::AtomicOrdering AO, bool IsVolatile);
263     /// Emits atomic load as LLVM instruction.
264     llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
265     /// Emits atomic compare-and-exchange op as a libcall.
266     llvm::Value *EmitAtomicCompareExchangeLibcall(
267         llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
268         llvm::AtomicOrdering Success =
269             llvm::AtomicOrdering::SequentiallyConsistent,
270         llvm::AtomicOrdering Failure =
271             llvm::AtomicOrdering::SequentiallyConsistent);
272     /// Emits atomic compare-and-exchange op as LLVM instruction.
273     std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
274         llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
275         llvm::AtomicOrdering Success =
276             llvm::AtomicOrdering::SequentiallyConsistent,
277         llvm::AtomicOrdering Failure =
278             llvm::AtomicOrdering::SequentiallyConsistent,
279         bool IsWeak = false);
280     /// Emit atomic update as libcalls.
281     void
282     EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
283                             const llvm::function_ref<RValue(RValue)> &UpdateOp,
284                             bool IsVolatile);
285     /// Emit atomic update as LLVM instructions.
286     void EmitAtomicUpdateOp(llvm::AtomicOrdering AO,
287                             const llvm::function_ref<RValue(RValue)> &UpdateOp,
288                             bool IsVolatile);
289     /// Emit atomic update as libcalls.
290     void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal,
291                                  bool IsVolatile);
292     /// Emit atomic update as LLVM instructions.
293     void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal,
294                             bool IsVolatile);
295   };
296 }
297 
298 Address AtomicInfo::CreateTempAlloca() const {
299   Address TempAlloca = CGF.CreateMemTemp(
300       (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
301                                                                 : AtomicTy,
302       getAtomicAlignment(),
303       "atomic-temp");
304   // Cast to pointer to value type for bitfields.
305   if (LVal.isBitField())
306     return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
307         TempAlloca, getAtomicAddress().getType(),
308         getAtomicAddress().getElementType());
309   return TempAlloca;
310 }
311 
312 static RValue emitAtomicLibcall(CodeGenFunction &CGF,
313                                 StringRef fnName,
314                                 QualType resultType,
315                                 CallArgList &args) {
316   const CGFunctionInfo &fnInfo =
317     CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args);
318   llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
319   llvm::AttrBuilder fnAttrB(CGF.getLLVMContext());
320   fnAttrB.addAttribute(llvm::Attribute::NoUnwind);
321   fnAttrB.addAttribute(llvm::Attribute::WillReturn);
322   llvm::AttributeList fnAttrs = llvm::AttributeList::get(
323       CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, fnAttrB);
324 
325   llvm::FunctionCallee fn =
326       CGF.CGM.CreateRuntimeFunction(fnTy, fnName, fnAttrs);
327   auto callee = CGCallee::forDirect(fn);
328   return CGF.EmitCall(fnInfo, callee, ReturnValueSlot(), args);
329 }
330 
331 /// Does a store of the given IR type modify the full expected width?
332 static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type,
333                            uint64_t expectedSize) {
334   return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize);
335 }
336 
337 /// Does the atomic type require memsetting to zero before initialization?
338 ///
339 /// The IR type is provided as a way of making certain queries faster.
340 bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
341   // If the atomic type has size padding, we definitely need a memset.
342   if (hasPadding()) return true;
343 
344   // Otherwise, do some simple heuristics to try to avoid it:
345   switch (getEvaluationKind()) {
346   // For scalars and complexes, check whether the store size of the
347   // type uses the full size.
348   case TEK_Scalar:
349     return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits);
350   case TEK_Complex:
351     return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
352                            AtomicSizeInBits / 2);
353 
354   // Padding in structs has an undefined bit pattern.  User beware.
355   case TEK_Aggregate:
356     return false;
357   }
358   llvm_unreachable("bad evaluation kind");
359 }
360 
361 bool AtomicInfo::emitMemSetZeroIfNecessary() const {
362   assert(LVal.isSimple());
363   Address addr = LVal.getAddress(CGF);
364   if (!requiresMemSetZero(addr.getElementType()))
365     return false;
366 
367   CGF.Builder.CreateMemSet(
368       addr.getPointer(), llvm::ConstantInt::get(CGF.Int8Ty, 0),
369       CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
370       LVal.getAlignment().getAsAlign());
371   return true;
372 }
373 
374 static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
375                               Address Dest, Address Ptr,
376                               Address Val1, Address Val2,
377                               uint64_t Size,
378                               llvm::AtomicOrdering SuccessOrder,
379                               llvm::AtomicOrdering FailureOrder,
380                               llvm::SyncScope::ID Scope) {
381   // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
382   llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
383   llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
384 
385   llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
386       Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
387   Pair->setVolatile(E->isVolatile());
388   Pair->setWeak(IsWeak);
389 
390   // Cmp holds the result of the compare-exchange operation: true on success,
391   // false on failure.
392   llvm::Value *Old = CGF.Builder.CreateExtractValue(Pair, 0);
393   llvm::Value *Cmp = CGF.Builder.CreateExtractValue(Pair, 1);
394 
395   // This basic block is used to hold the store instruction if the operation
396   // failed.
397   llvm::BasicBlock *StoreExpectedBB =
398       CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn);
399 
400   // This basic block is the exit point of the operation, we should end up
401   // here regardless of whether or not the operation succeeded.
402   llvm::BasicBlock *ContinueBB =
403       CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
404 
405   // Update Expected if Expected isn't equal to Old, otherwise branch to the
406   // exit point.
407   CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB);
408 
409   CGF.Builder.SetInsertPoint(StoreExpectedBB);
410   // Update the memory at Expected with Old's value.
411   CGF.Builder.CreateStore(Old, Val1);
412   // Finally, branch to the exit point.
413   CGF.Builder.CreateBr(ContinueBB);
414 
415   CGF.Builder.SetInsertPoint(ContinueBB);
416   // Update the memory at Dest with Cmp's value.
417   CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
418 }
419 
420 /// Given an ordering required on success, emit all possible cmpxchg
421 /// instructions to cope with the provided (but possibly only dynamically known)
422 /// FailureOrder.
423 static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
424                                         bool IsWeak, Address Dest, Address Ptr,
425                                         Address Val1, Address Val2,
426                                         llvm::Value *FailureOrderVal,
427                                         uint64_t Size,
428                                         llvm::AtomicOrdering SuccessOrder,
429                                         llvm::SyncScope::ID Scope) {
430   llvm::AtomicOrdering FailureOrder;
431   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
432     auto FOS = FO->getSExtValue();
433     if (!llvm::isValidAtomicOrderingCABI(FOS))
434       FailureOrder = llvm::AtomicOrdering::Monotonic;
435     else
436       switch ((llvm::AtomicOrderingCABI)FOS) {
437       case llvm::AtomicOrderingCABI::relaxed:
438       // 31.7.2.18: "The failure argument shall not be memory_order_release
439       // nor memory_order_acq_rel". Fallback to monotonic.
440       case llvm::AtomicOrderingCABI::release:
441       case llvm::AtomicOrderingCABI::acq_rel:
442         FailureOrder = llvm::AtomicOrdering::Monotonic;
443         break;
444       case llvm::AtomicOrderingCABI::consume:
445       case llvm::AtomicOrderingCABI::acquire:
446         FailureOrder = llvm::AtomicOrdering::Acquire;
447         break;
448       case llvm::AtomicOrderingCABI::seq_cst:
449         FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent;
450         break;
451       }
452     // Prior to c++17, "the failure argument shall be no stronger than the
453     // success argument". This condition has been lifted and the only
454     // precondition is 31.7.2.18. Effectively treat this as a DR and skip
455     // language version checks.
456     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
457                       FailureOrder, Scope);
458     return;
459   }
460 
461   // Create all the relevant BB's
462   auto *MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
463   auto *AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
464   auto *SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
465   auto *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
466 
467   // MonotonicBB is arbitrarily chosen as the default case; in practice, this
468   // doesn't matter unless someone is crazy enough to use something that
469   // doesn't fold to a constant for the ordering.
470   llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB);
471   // Implemented as acquire, since it's the closest in LLVM.
472   SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
473               AcquireBB);
474   SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
475               AcquireBB);
476   SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
477               SeqCstBB);
478 
479   // Emit all the different atomics
480   CGF.Builder.SetInsertPoint(MonotonicBB);
481   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
482                     Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
483   CGF.Builder.CreateBr(ContBB);
484 
485   CGF.Builder.SetInsertPoint(AcquireBB);
486   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
487                     llvm::AtomicOrdering::Acquire, Scope);
488   CGF.Builder.CreateBr(ContBB);
489 
490   CGF.Builder.SetInsertPoint(SeqCstBB);
491   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
492                     llvm::AtomicOrdering::SequentiallyConsistent, Scope);
493   CGF.Builder.CreateBr(ContBB);
494 
495   CGF.Builder.SetInsertPoint(ContBB);
496 }
497 
498 /// Duplicate the atomic min/max operation in conventional IR for the builtin
499 /// variants that return the new rather than the original value.
500 static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
501                                          AtomicExpr::AtomicOp Op,
502                                          bool IsSigned,
503                                          llvm::Value *OldVal,
504                                          llvm::Value *RHS) {
505   llvm::CmpInst::Predicate Pred;
506   switch (Op) {
507   default:
508     llvm_unreachable("Unexpected min/max operation");
509   case AtomicExpr::AO__atomic_max_fetch:
510   case AtomicExpr::AO__scoped_atomic_max_fetch:
511     Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT;
512     break;
513   case AtomicExpr::AO__atomic_min_fetch:
514   case AtomicExpr::AO__scoped_atomic_min_fetch:
515     Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT;
516     break;
517   }
518   llvm::Value *Cmp = Builder.CreateICmp(Pred, OldVal, RHS, "tst");
519   return Builder.CreateSelect(Cmp, OldVal, RHS, "newval");
520 }
521 
522 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
523                          Address Ptr, Address Val1, Address Val2,
524                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
525                          uint64_t Size, llvm::AtomicOrdering Order,
526                          llvm::SyncScope::ID Scope) {
527   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
528   bool PostOpMinMax = false;
529   unsigned PostOp = 0;
530 
531   switch (E->getOp()) {
532   case AtomicExpr::AO__c11_atomic_init:
533   case AtomicExpr::AO__opencl_atomic_init:
534     llvm_unreachable("Already handled!");
535 
536   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
537   case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
538   case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
539     emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
540                                 FailureOrder, Size, Order, Scope);
541     return;
542   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
543   case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
544   case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
545     emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
546                                 FailureOrder, Size, Order, Scope);
547     return;
548   case AtomicExpr::AO__atomic_compare_exchange:
549   case AtomicExpr::AO__atomic_compare_exchange_n:
550   case AtomicExpr::AO__scoped_atomic_compare_exchange:
551   case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
552     if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
553       emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
554                                   Val1, Val2, FailureOrder, Size, Order, Scope);
555     } else {
556       // Create all the relevant BB's
557       llvm::BasicBlock *StrongBB =
558           CGF.createBasicBlock("cmpxchg.strong", CGF.CurFn);
559       llvm::BasicBlock *WeakBB = CGF.createBasicBlock("cmxchg.weak", CGF.CurFn);
560       llvm::BasicBlock *ContBB =
561           CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
562 
563       llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(IsWeak, WeakBB);
564       SI->addCase(CGF.Builder.getInt1(false), StrongBB);
565 
566       CGF.Builder.SetInsertPoint(StrongBB);
567       emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
568                                   FailureOrder, Size, Order, Scope);
569       CGF.Builder.CreateBr(ContBB);
570 
571       CGF.Builder.SetInsertPoint(WeakBB);
572       emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
573                                   FailureOrder, Size, Order, Scope);
574       CGF.Builder.CreateBr(ContBB);
575 
576       CGF.Builder.SetInsertPoint(ContBB);
577     }
578     return;
579   }
580   case AtomicExpr::AO__c11_atomic_load:
581   case AtomicExpr::AO__opencl_atomic_load:
582   case AtomicExpr::AO__hip_atomic_load:
583   case AtomicExpr::AO__atomic_load_n:
584   case AtomicExpr::AO__atomic_load:
585   case AtomicExpr::AO__scoped_atomic_load_n:
586   case AtomicExpr::AO__scoped_atomic_load: {
587     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
588     Load->setAtomic(Order, Scope);
589     Load->setVolatile(E->isVolatile());
590     CGF.Builder.CreateStore(Load, Dest);
591     return;
592   }
593 
594   case AtomicExpr::AO__c11_atomic_store:
595   case AtomicExpr::AO__opencl_atomic_store:
596   case AtomicExpr::AO__hip_atomic_store:
597   case AtomicExpr::AO__atomic_store:
598   case AtomicExpr::AO__atomic_store_n:
599   case AtomicExpr::AO__scoped_atomic_store:
600   case AtomicExpr::AO__scoped_atomic_store_n: {
601     llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
602     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
603     Store->setAtomic(Order, Scope);
604     Store->setVolatile(E->isVolatile());
605     return;
606   }
607 
608   case AtomicExpr::AO__c11_atomic_exchange:
609   case AtomicExpr::AO__hip_atomic_exchange:
610   case AtomicExpr::AO__opencl_atomic_exchange:
611   case AtomicExpr::AO__atomic_exchange_n:
612   case AtomicExpr::AO__atomic_exchange:
613   case AtomicExpr::AO__scoped_atomic_exchange_n:
614   case AtomicExpr::AO__scoped_atomic_exchange:
615     Op = llvm::AtomicRMWInst::Xchg;
616     break;
617 
618   case AtomicExpr::AO__atomic_add_fetch:
619   case AtomicExpr::AO__scoped_atomic_add_fetch:
620     PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
621                                                  : llvm::Instruction::Add;
622     [[fallthrough]];
623   case AtomicExpr::AO__c11_atomic_fetch_add:
624   case AtomicExpr::AO__hip_atomic_fetch_add:
625   case AtomicExpr::AO__opencl_atomic_fetch_add:
626   case AtomicExpr::AO__atomic_fetch_add:
627   case AtomicExpr::AO__scoped_atomic_fetch_add:
628     Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
629                                              : llvm::AtomicRMWInst::Add;
630     break;
631 
632   case AtomicExpr::AO__atomic_sub_fetch:
633   case AtomicExpr::AO__scoped_atomic_sub_fetch:
634     PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
635                                                  : llvm::Instruction::Sub;
636     [[fallthrough]];
637   case AtomicExpr::AO__c11_atomic_fetch_sub:
638   case AtomicExpr::AO__hip_atomic_fetch_sub:
639   case AtomicExpr::AO__opencl_atomic_fetch_sub:
640   case AtomicExpr::AO__atomic_fetch_sub:
641   case AtomicExpr::AO__scoped_atomic_fetch_sub:
642     Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
643                                              : llvm::AtomicRMWInst::Sub;
644     break;
645 
646   case AtomicExpr::AO__atomic_min_fetch:
647   case AtomicExpr::AO__scoped_atomic_min_fetch:
648     PostOpMinMax = true;
649     [[fallthrough]];
650   case AtomicExpr::AO__c11_atomic_fetch_min:
651   case AtomicExpr::AO__hip_atomic_fetch_min:
652   case AtomicExpr::AO__opencl_atomic_fetch_min:
653   case AtomicExpr::AO__atomic_fetch_min:
654   case AtomicExpr::AO__scoped_atomic_fetch_min:
655     Op = E->getValueType()->isFloatingType()
656              ? llvm::AtomicRMWInst::FMin
657              : (E->getValueType()->isSignedIntegerType()
658                     ? llvm::AtomicRMWInst::Min
659                     : llvm::AtomicRMWInst::UMin);
660     break;
661 
662   case AtomicExpr::AO__atomic_max_fetch:
663   case AtomicExpr::AO__scoped_atomic_max_fetch:
664     PostOpMinMax = true;
665     [[fallthrough]];
666   case AtomicExpr::AO__c11_atomic_fetch_max:
667   case AtomicExpr::AO__hip_atomic_fetch_max:
668   case AtomicExpr::AO__opencl_atomic_fetch_max:
669   case AtomicExpr::AO__atomic_fetch_max:
670   case AtomicExpr::AO__scoped_atomic_fetch_max:
671     Op = E->getValueType()->isFloatingType()
672              ? llvm::AtomicRMWInst::FMax
673              : (E->getValueType()->isSignedIntegerType()
674                     ? llvm::AtomicRMWInst::Max
675                     : llvm::AtomicRMWInst::UMax);
676     break;
677 
678   case AtomicExpr::AO__atomic_and_fetch:
679   case AtomicExpr::AO__scoped_atomic_and_fetch:
680     PostOp = llvm::Instruction::And;
681     [[fallthrough]];
682   case AtomicExpr::AO__c11_atomic_fetch_and:
683   case AtomicExpr::AO__hip_atomic_fetch_and:
684   case AtomicExpr::AO__opencl_atomic_fetch_and:
685   case AtomicExpr::AO__atomic_fetch_and:
686   case AtomicExpr::AO__scoped_atomic_fetch_and:
687     Op = llvm::AtomicRMWInst::And;
688     break;
689 
690   case AtomicExpr::AO__atomic_or_fetch:
691   case AtomicExpr::AO__scoped_atomic_or_fetch:
692     PostOp = llvm::Instruction::Or;
693     [[fallthrough]];
694   case AtomicExpr::AO__c11_atomic_fetch_or:
695   case AtomicExpr::AO__hip_atomic_fetch_or:
696   case AtomicExpr::AO__opencl_atomic_fetch_or:
697   case AtomicExpr::AO__atomic_fetch_or:
698   case AtomicExpr::AO__scoped_atomic_fetch_or:
699     Op = llvm::AtomicRMWInst::Or;
700     break;
701 
702   case AtomicExpr::AO__atomic_xor_fetch:
703   case AtomicExpr::AO__scoped_atomic_xor_fetch:
704     PostOp = llvm::Instruction::Xor;
705     [[fallthrough]];
706   case AtomicExpr::AO__c11_atomic_fetch_xor:
707   case AtomicExpr::AO__hip_atomic_fetch_xor:
708   case AtomicExpr::AO__opencl_atomic_fetch_xor:
709   case AtomicExpr::AO__atomic_fetch_xor:
710   case AtomicExpr::AO__scoped_atomic_fetch_xor:
711     Op = llvm::AtomicRMWInst::Xor;
712     break;
713 
714   case AtomicExpr::AO__atomic_nand_fetch:
715   case AtomicExpr::AO__scoped_atomic_nand_fetch:
716     PostOp = llvm::Instruction::And; // the NOT is special cased below
717     [[fallthrough]];
718   case AtomicExpr::AO__c11_atomic_fetch_nand:
719   case AtomicExpr::AO__atomic_fetch_nand:
720   case AtomicExpr::AO__scoped_atomic_fetch_nand:
721     Op = llvm::AtomicRMWInst::Nand;
722     break;
723   }
724 
725   llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
726   llvm::AtomicRMWInst *RMWI =
727       CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope);
728   RMWI->setVolatile(E->isVolatile());
729 
730   // For __atomic_*_fetch operations, perform the operation again to
731   // determine the value which was written.
732   llvm::Value *Result = RMWI;
733   if (PostOpMinMax)
734     Result = EmitPostAtomicMinMax(CGF.Builder, E->getOp(),
735                                   E->getValueType()->isSignedIntegerType(),
736                                   RMWI, LoadVal1);
737   else if (PostOp)
738     Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI,
739                                      LoadVal1);
740   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
741       E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
742     Result = CGF.Builder.CreateNot(Result);
743   CGF.Builder.CreateStore(Result, Dest);
744 }
745 
746 // This function emits any expression (scalar, complex, or aggregate)
747 // into a temporary alloca.
748 static Address
749 EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
750   Address DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
751   CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
752                        /*Init*/ true);
753   return DeclPtr;
754 }
755 
756 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
757                          Address Ptr, Address Val1, Address Val2,
758                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
759                          uint64_t Size, llvm::AtomicOrdering Order,
760                          llvm::Value *Scope) {
761   auto ScopeModel = Expr->getScopeModel();
762 
763   // LLVM atomic instructions always have synch scope. If clang atomic
764   // expression has no scope operand, use default LLVM synch scope.
765   if (!ScopeModel) {
766     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
767                  Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID(""));
768     return;
769   }
770 
771   // Handle constant scope.
772   if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) {
773     auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
774         CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
775         Order, CGF.CGM.getLLVMContext());
776     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
777                  Order, SCID);
778     return;
779   }
780 
781   // Handle non-constant scope.
782   auto &Builder = CGF.Builder;
783   auto Scopes = ScopeModel->getRuntimeValues();
784   llvm::DenseMap<unsigned, llvm::BasicBlock *> BB;
785   for (auto S : Scopes)
786     BB[S] = CGF.createBasicBlock(getAsString(ScopeModel->map(S)), CGF.CurFn);
787 
788   llvm::BasicBlock *ContBB =
789       CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn);
790 
791   auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
792   // If unsupported synch scope is encountered at run time, assume a fallback
793   // synch scope value.
794   auto FallBack = ScopeModel->getFallBackValue();
795   llvm::SwitchInst *SI = Builder.CreateSwitch(SC, BB[FallBack]);
796   for (auto S : Scopes) {
797     auto *B = BB[S];
798     if (S != FallBack)
799       SI->addCase(Builder.getInt32(S), B);
800 
801     Builder.SetInsertPoint(B);
802     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
803                  Order,
804                  CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
805                                                          ScopeModel->map(S),
806                                                          Order,
807                                                          CGF.getLLVMContext()));
808     Builder.CreateBr(ContBB);
809   }
810 
811   Builder.SetInsertPoint(ContBB);
812 }
813 
814 static void
815 AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
816                   bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
817                   SourceLocation Loc, CharUnits SizeInChars) {
818   if (UseOptimizedLibcall) {
819     // Load value and pass it to the function directly.
820     CharUnits Align = CGF.getContext().getTypeAlignInChars(ValTy);
821     int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
822     ValTy =
823         CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
824     llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), SizeInBits);
825     Address Ptr = Address(Val, ITy, Align);
826     Val = CGF.EmitLoadOfScalar(Ptr, false,
827                                CGF.getContext().getPointerType(ValTy),
828                                Loc);
829     // Coerce the value into an appropriately sized integer type.
830     Args.add(RValue::get(Val), ValTy);
831   } else {
832     // Non-optimized functions always take a reference.
833     Args.add(RValue::get(Val), CGF.getContext().VoidPtrTy);
834   }
835 }
836 
837 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
838   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
839   QualType MemTy = AtomicTy;
840   if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
841     MemTy = AT->getValueType();
842   llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
843 
844   Address Val1 = Address::invalid();
845   Address Val2 = Address::invalid();
846   Address Dest = Address::invalid();
847   Address Ptr = EmitPointerWithAlignment(E->getPtr());
848 
849   if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
850       E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
851     LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
852     EmitAtomicInit(E->getVal1(), lvalue);
853     return RValue::get(nullptr);
854   }
855 
856   auto TInfo = getContext().getTypeInfoInChars(AtomicTy);
857   uint64_t Size = TInfo.Width.getQuantity();
858   unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
859 
860   bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
861   bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
862   bool UseLibcall = Misaligned | Oversized;
863   bool ShouldCastToIntPtrTy = true;
864 
865   CharUnits MaxInlineWidth =
866       getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
867 
868   DiagnosticsEngine &Diags = CGM.getDiags();
869 
870   if (Misaligned) {
871     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
872         << (int)TInfo.Width.getQuantity()
873         << (int)Ptr.getAlignment().getQuantity();
874   }
875 
876   if (Oversized) {
877     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized)
878         << (int)TInfo.Width.getQuantity() << (int)MaxInlineWidth.getQuantity();
879   }
880 
881   llvm::Value *Order = EmitScalarExpr(E->getOrder());
882   llvm::Value *Scope =
883       E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr;
884 
885   switch (E->getOp()) {
886   case AtomicExpr::AO__c11_atomic_init:
887   case AtomicExpr::AO__opencl_atomic_init:
888     llvm_unreachable("Already handled above with EmitAtomicInit!");
889 
890   case AtomicExpr::AO__atomic_load_n:
891   case AtomicExpr::AO__scoped_atomic_load_n:
892   case AtomicExpr::AO__c11_atomic_load:
893   case AtomicExpr::AO__opencl_atomic_load:
894   case AtomicExpr::AO__hip_atomic_load:
895     break;
896 
897   case AtomicExpr::AO__atomic_load:
898   case AtomicExpr::AO__scoped_atomic_load:
899     Dest = EmitPointerWithAlignment(E->getVal1());
900     break;
901 
902   case AtomicExpr::AO__atomic_store:
903   case AtomicExpr::AO__scoped_atomic_store:
904     Val1 = EmitPointerWithAlignment(E->getVal1());
905     break;
906 
907   case AtomicExpr::AO__atomic_exchange:
908   case AtomicExpr::AO__scoped_atomic_exchange:
909     Val1 = EmitPointerWithAlignment(E->getVal1());
910     Dest = EmitPointerWithAlignment(E->getVal2());
911     break;
912 
913   case AtomicExpr::AO__atomic_compare_exchange:
914   case AtomicExpr::AO__atomic_compare_exchange_n:
915   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
916   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
917   case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
918   case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
919   case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
920   case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
921   case AtomicExpr::AO__scoped_atomic_compare_exchange:
922   case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
923     Val1 = EmitPointerWithAlignment(E->getVal1());
924     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
925         E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
926       Val2 = EmitPointerWithAlignment(E->getVal2());
927     else
928       Val2 = EmitValToTemp(*this, E->getVal2());
929     OrderFail = EmitScalarExpr(E->getOrderFail());
930     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
931         E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
932         E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange_n ||
933         E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
934       IsWeak = EmitScalarExpr(E->getWeak());
935     break;
936 
937   case AtomicExpr::AO__c11_atomic_fetch_add:
938   case AtomicExpr::AO__c11_atomic_fetch_sub:
939   case AtomicExpr::AO__hip_atomic_fetch_add:
940   case AtomicExpr::AO__hip_atomic_fetch_sub:
941   case AtomicExpr::AO__opencl_atomic_fetch_add:
942   case AtomicExpr::AO__opencl_atomic_fetch_sub:
943     if (MemTy->isPointerType()) {
944       // For pointer arithmetic, we're required to do a bit of math:
945       // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
946       // ... but only for the C11 builtins. The GNU builtins expect the
947       // user to multiply by sizeof(T).
948       QualType Val1Ty = E->getVal1()->getType();
949       llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
950       CharUnits PointeeIncAmt =
951           getContext().getTypeSizeInChars(MemTy->getPointeeType());
952       Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
953       auto Temp = CreateMemTemp(Val1Ty, ".atomictmp");
954       Val1 = Temp;
955       EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
956       break;
957     }
958     [[fallthrough]];
959   case AtomicExpr::AO__atomic_fetch_add:
960   case AtomicExpr::AO__atomic_fetch_max:
961   case AtomicExpr::AO__atomic_fetch_min:
962   case AtomicExpr::AO__atomic_fetch_sub:
963   case AtomicExpr::AO__atomic_add_fetch:
964   case AtomicExpr::AO__atomic_max_fetch:
965   case AtomicExpr::AO__atomic_min_fetch:
966   case AtomicExpr::AO__atomic_sub_fetch:
967   case AtomicExpr::AO__c11_atomic_fetch_max:
968   case AtomicExpr::AO__c11_atomic_fetch_min:
969   case AtomicExpr::AO__opencl_atomic_fetch_max:
970   case AtomicExpr::AO__opencl_atomic_fetch_min:
971   case AtomicExpr::AO__hip_atomic_fetch_max:
972   case AtomicExpr::AO__hip_atomic_fetch_min:
973   case AtomicExpr::AO__scoped_atomic_fetch_add:
974   case AtomicExpr::AO__scoped_atomic_fetch_max:
975   case AtomicExpr::AO__scoped_atomic_fetch_min:
976   case AtomicExpr::AO__scoped_atomic_fetch_sub:
977   case AtomicExpr::AO__scoped_atomic_add_fetch:
978   case AtomicExpr::AO__scoped_atomic_max_fetch:
979   case AtomicExpr::AO__scoped_atomic_min_fetch:
980   case AtomicExpr::AO__scoped_atomic_sub_fetch:
981     ShouldCastToIntPtrTy = !MemTy->isFloatingType();
982     [[fallthrough]];
983 
984   case AtomicExpr::AO__atomic_fetch_and:
985   case AtomicExpr::AO__atomic_fetch_nand:
986   case AtomicExpr::AO__atomic_fetch_or:
987   case AtomicExpr::AO__atomic_fetch_xor:
988   case AtomicExpr::AO__atomic_and_fetch:
989   case AtomicExpr::AO__atomic_nand_fetch:
990   case AtomicExpr::AO__atomic_or_fetch:
991   case AtomicExpr::AO__atomic_xor_fetch:
992   case AtomicExpr::AO__atomic_store_n:
993   case AtomicExpr::AO__atomic_exchange_n:
994   case AtomicExpr::AO__c11_atomic_fetch_and:
995   case AtomicExpr::AO__c11_atomic_fetch_nand:
996   case AtomicExpr::AO__c11_atomic_fetch_or:
997   case AtomicExpr::AO__c11_atomic_fetch_xor:
998   case AtomicExpr::AO__c11_atomic_store:
999   case AtomicExpr::AO__c11_atomic_exchange:
1000   case AtomicExpr::AO__hip_atomic_fetch_and:
1001   case AtomicExpr::AO__hip_atomic_fetch_or:
1002   case AtomicExpr::AO__hip_atomic_fetch_xor:
1003   case AtomicExpr::AO__hip_atomic_store:
1004   case AtomicExpr::AO__hip_atomic_exchange:
1005   case AtomicExpr::AO__opencl_atomic_fetch_and:
1006   case AtomicExpr::AO__opencl_atomic_fetch_or:
1007   case AtomicExpr::AO__opencl_atomic_fetch_xor:
1008   case AtomicExpr::AO__opencl_atomic_store:
1009   case AtomicExpr::AO__opencl_atomic_exchange:
1010   case AtomicExpr::AO__scoped_atomic_fetch_and:
1011   case AtomicExpr::AO__scoped_atomic_fetch_nand:
1012   case AtomicExpr::AO__scoped_atomic_fetch_or:
1013   case AtomicExpr::AO__scoped_atomic_fetch_xor:
1014   case AtomicExpr::AO__scoped_atomic_and_fetch:
1015   case AtomicExpr::AO__scoped_atomic_nand_fetch:
1016   case AtomicExpr::AO__scoped_atomic_or_fetch:
1017   case AtomicExpr::AO__scoped_atomic_xor_fetch:
1018   case AtomicExpr::AO__scoped_atomic_store_n:
1019   case AtomicExpr::AO__scoped_atomic_exchange_n:
1020     Val1 = EmitValToTemp(*this, E->getVal1());
1021     break;
1022   }
1023 
1024   QualType RValTy = E->getType().getUnqualifiedType();
1025 
1026   // The inlined atomics only function on iN types, where N is a power of 2. We
1027   // need to make sure (via temporaries if necessary) that all incoming values
1028   // are compatible.
1029   LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
1030   AtomicInfo Atomics(*this, AtomicVal);
1031 
1032   if (ShouldCastToIntPtrTy) {
1033     Ptr = Atomics.castToAtomicIntPointer(Ptr);
1034     if (Val1.isValid())
1035       Val1 = Atomics.convertToAtomicIntPointer(Val1);
1036     if (Val2.isValid())
1037       Val2 = Atomics.convertToAtomicIntPointer(Val2);
1038   }
1039   if (Dest.isValid()) {
1040     if (ShouldCastToIntPtrTy)
1041       Dest = Atomics.castToAtomicIntPointer(Dest);
1042   } else if (E->isCmpXChg())
1043     Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
1044   else if (!RValTy->isVoidType()) {
1045     Dest = Atomics.CreateTempAlloca();
1046     if (ShouldCastToIntPtrTy)
1047       Dest = Atomics.castToAtomicIntPointer(Dest);
1048   }
1049 
1050   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
1051   if (UseLibcall) {
1052     bool UseOptimizedLibcall = false;
1053     switch (E->getOp()) {
1054     case AtomicExpr::AO__c11_atomic_init:
1055     case AtomicExpr::AO__opencl_atomic_init:
1056       llvm_unreachable("Already handled above with EmitAtomicInit!");
1057 
1058     case AtomicExpr::AO__atomic_fetch_add:
1059     case AtomicExpr::AO__atomic_fetch_and:
1060     case AtomicExpr::AO__atomic_fetch_max:
1061     case AtomicExpr::AO__atomic_fetch_min:
1062     case AtomicExpr::AO__atomic_fetch_nand:
1063     case AtomicExpr::AO__atomic_fetch_or:
1064     case AtomicExpr::AO__atomic_fetch_sub:
1065     case AtomicExpr::AO__atomic_fetch_xor:
1066     case AtomicExpr::AO__atomic_add_fetch:
1067     case AtomicExpr::AO__atomic_and_fetch:
1068     case AtomicExpr::AO__atomic_max_fetch:
1069     case AtomicExpr::AO__atomic_min_fetch:
1070     case AtomicExpr::AO__atomic_nand_fetch:
1071     case AtomicExpr::AO__atomic_or_fetch:
1072     case AtomicExpr::AO__atomic_sub_fetch:
1073     case AtomicExpr::AO__atomic_xor_fetch:
1074     case AtomicExpr::AO__c11_atomic_fetch_add:
1075     case AtomicExpr::AO__c11_atomic_fetch_and:
1076     case AtomicExpr::AO__c11_atomic_fetch_max:
1077     case AtomicExpr::AO__c11_atomic_fetch_min:
1078     case AtomicExpr::AO__c11_atomic_fetch_nand:
1079     case AtomicExpr::AO__c11_atomic_fetch_or:
1080     case AtomicExpr::AO__c11_atomic_fetch_sub:
1081     case AtomicExpr::AO__c11_atomic_fetch_xor:
1082     case AtomicExpr::AO__hip_atomic_fetch_add:
1083     case AtomicExpr::AO__hip_atomic_fetch_and:
1084     case AtomicExpr::AO__hip_atomic_fetch_max:
1085     case AtomicExpr::AO__hip_atomic_fetch_min:
1086     case AtomicExpr::AO__hip_atomic_fetch_or:
1087     case AtomicExpr::AO__hip_atomic_fetch_sub:
1088     case AtomicExpr::AO__hip_atomic_fetch_xor:
1089     case AtomicExpr::AO__opencl_atomic_fetch_add:
1090     case AtomicExpr::AO__opencl_atomic_fetch_and:
1091     case AtomicExpr::AO__opencl_atomic_fetch_max:
1092     case AtomicExpr::AO__opencl_atomic_fetch_min:
1093     case AtomicExpr::AO__opencl_atomic_fetch_or:
1094     case AtomicExpr::AO__opencl_atomic_fetch_sub:
1095     case AtomicExpr::AO__opencl_atomic_fetch_xor:
1096     case AtomicExpr::AO__scoped_atomic_fetch_add:
1097     case AtomicExpr::AO__scoped_atomic_fetch_and:
1098     case AtomicExpr::AO__scoped_atomic_fetch_max:
1099     case AtomicExpr::AO__scoped_atomic_fetch_min:
1100     case AtomicExpr::AO__scoped_atomic_fetch_nand:
1101     case AtomicExpr::AO__scoped_atomic_fetch_or:
1102     case AtomicExpr::AO__scoped_atomic_fetch_sub:
1103     case AtomicExpr::AO__scoped_atomic_fetch_xor:
1104     case AtomicExpr::AO__scoped_atomic_add_fetch:
1105     case AtomicExpr::AO__scoped_atomic_and_fetch:
1106     case AtomicExpr::AO__scoped_atomic_max_fetch:
1107     case AtomicExpr::AO__scoped_atomic_min_fetch:
1108     case AtomicExpr::AO__scoped_atomic_nand_fetch:
1109     case AtomicExpr::AO__scoped_atomic_or_fetch:
1110     case AtomicExpr::AO__scoped_atomic_sub_fetch:
1111     case AtomicExpr::AO__scoped_atomic_xor_fetch:
1112       // For these, only library calls for certain sizes exist.
1113       UseOptimizedLibcall = true;
1114       break;
1115 
1116     case AtomicExpr::AO__atomic_load:
1117     case AtomicExpr::AO__atomic_store:
1118     case AtomicExpr::AO__atomic_exchange:
1119     case AtomicExpr::AO__atomic_compare_exchange:
1120     case AtomicExpr::AO__scoped_atomic_load:
1121     case AtomicExpr::AO__scoped_atomic_store:
1122     case AtomicExpr::AO__scoped_atomic_exchange:
1123     case AtomicExpr::AO__scoped_atomic_compare_exchange:
1124       // Use the generic version if we don't know that the operand will be
1125       // suitably aligned for the optimized version.
1126       if (Misaligned)
1127         break;
1128       [[fallthrough]];
1129     case AtomicExpr::AO__atomic_load_n:
1130     case AtomicExpr::AO__atomic_store_n:
1131     case AtomicExpr::AO__atomic_exchange_n:
1132     case AtomicExpr::AO__atomic_compare_exchange_n:
1133     case AtomicExpr::AO__c11_atomic_load:
1134     case AtomicExpr::AO__c11_atomic_store:
1135     case AtomicExpr::AO__c11_atomic_exchange:
1136     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
1137     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
1138     case AtomicExpr::AO__hip_atomic_load:
1139     case AtomicExpr::AO__hip_atomic_store:
1140     case AtomicExpr::AO__hip_atomic_exchange:
1141     case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
1142     case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
1143     case AtomicExpr::AO__opencl_atomic_load:
1144     case AtomicExpr::AO__opencl_atomic_store:
1145     case AtomicExpr::AO__opencl_atomic_exchange:
1146     case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
1147     case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
1148     case AtomicExpr::AO__scoped_atomic_load_n:
1149     case AtomicExpr::AO__scoped_atomic_store_n:
1150     case AtomicExpr::AO__scoped_atomic_exchange_n:
1151     case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
1152       // Only use optimized library calls for sizes for which they exist.
1153       // FIXME: Size == 16 optimized library functions exist too.
1154       if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
1155         UseOptimizedLibcall = true;
1156       break;
1157     }
1158 
1159     CallArgList Args;
1160     if (!UseOptimizedLibcall) {
1161       // For non-optimized library calls, the size is the first parameter
1162       Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
1163                getContext().getSizeType());
1164     }
1165     // Atomic address is the first or second parameter
1166     // The OpenCL atomic library functions only accept pointer arguments to
1167     // generic address space.
1168     auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
1169       if (!E->isOpenCL())
1170         return V;
1171       auto AS = PT->castAs<PointerType>()->getPointeeType().getAddressSpace();
1172       if (AS == LangAS::opencl_generic)
1173         return V;
1174       auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic);
1175       auto *DestType = llvm::PointerType::get(getLLVMContext(), DestAS);
1176 
1177       return getTargetHooks().performAddrSpaceCast(
1178           *this, V, AS, LangAS::opencl_generic, DestType, false);
1179     };
1180 
1181     Args.add(RValue::get(CastToGenericAddrSpace(Ptr.getPointer(),
1182                                                 E->getPtr()->getType())),
1183              getContext().VoidPtrTy);
1184 
1185     std::string LibCallName;
1186     QualType LoweredMemTy =
1187       MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
1188     QualType RetTy;
1189     bool HaveRetTy = false;
1190     llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
1191     bool PostOpMinMax = false;
1192     switch (E->getOp()) {
1193     case AtomicExpr::AO__c11_atomic_init:
1194     case AtomicExpr::AO__opencl_atomic_init:
1195       llvm_unreachable("Already handled!");
1196 
1197     // There is only one libcall for compare an exchange, because there is no
1198     // optimisation benefit possible from a libcall version of a weak compare
1199     // and exchange.
1200     // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
1201     //                                void *desired, int success, int failure)
1202     // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
1203     //                                  int success, int failure)
1204     case AtomicExpr::AO__atomic_compare_exchange:
1205     case AtomicExpr::AO__atomic_compare_exchange_n:
1206     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
1207     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
1208     case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
1209     case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
1210     case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
1211     case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
1212     case AtomicExpr::AO__scoped_atomic_compare_exchange:
1213     case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
1214       LibCallName = "__atomic_compare_exchange";
1215       RetTy = getContext().BoolTy;
1216       HaveRetTy = true;
1217       Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
1218                                                   E->getVal1()->getType())),
1219                getContext().VoidPtrTy);
1220       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
1221                         MemTy, E->getExprLoc(), TInfo.Width);
1222       Args.add(RValue::get(Order), getContext().IntTy);
1223       Order = OrderFail;
1224       break;
1225     // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
1226     //                        int order)
1227     // T __atomic_exchange_N(T *mem, T val, int order)
1228     case AtomicExpr::AO__atomic_exchange:
1229     case AtomicExpr::AO__atomic_exchange_n:
1230     case AtomicExpr::AO__c11_atomic_exchange:
1231     case AtomicExpr::AO__hip_atomic_exchange:
1232     case AtomicExpr::AO__opencl_atomic_exchange:
1233     case AtomicExpr::AO__scoped_atomic_exchange:
1234     case AtomicExpr::AO__scoped_atomic_exchange_n:
1235       LibCallName = "__atomic_exchange";
1236       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1237                         MemTy, E->getExprLoc(), TInfo.Width);
1238       break;
1239     // void __atomic_store(size_t size, void *mem, void *val, int order)
1240     // void __atomic_store_N(T *mem, T val, int order)
1241     case AtomicExpr::AO__atomic_store:
1242     case AtomicExpr::AO__atomic_store_n:
1243     case AtomicExpr::AO__c11_atomic_store:
1244     case AtomicExpr::AO__hip_atomic_store:
1245     case AtomicExpr::AO__opencl_atomic_store:
1246     case AtomicExpr::AO__scoped_atomic_store:
1247     case AtomicExpr::AO__scoped_atomic_store_n:
1248       LibCallName = "__atomic_store";
1249       RetTy = getContext().VoidTy;
1250       HaveRetTy = true;
1251       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1252                         MemTy, E->getExprLoc(), TInfo.Width);
1253       break;
1254     // void __atomic_load(size_t size, void *mem, void *return, int order)
1255     // T __atomic_load_N(T *mem, int order)
1256     case AtomicExpr::AO__atomic_load:
1257     case AtomicExpr::AO__atomic_load_n:
1258     case AtomicExpr::AO__c11_atomic_load:
1259     case AtomicExpr::AO__hip_atomic_load:
1260     case AtomicExpr::AO__opencl_atomic_load:
1261     case AtomicExpr::AO__scoped_atomic_load:
1262     case AtomicExpr::AO__scoped_atomic_load_n:
1263       LibCallName = "__atomic_load";
1264       break;
1265     // T __atomic_add_fetch_N(T *mem, T val, int order)
1266     // T __atomic_fetch_add_N(T *mem, T val, int order)
1267     case AtomicExpr::AO__atomic_add_fetch:
1268     case AtomicExpr::AO__scoped_atomic_add_fetch:
1269       PostOp = llvm::Instruction::Add;
1270       [[fallthrough]];
1271     case AtomicExpr::AO__atomic_fetch_add:
1272     case AtomicExpr::AO__c11_atomic_fetch_add:
1273     case AtomicExpr::AO__hip_atomic_fetch_add:
1274     case AtomicExpr::AO__opencl_atomic_fetch_add:
1275     case AtomicExpr::AO__scoped_atomic_fetch_add:
1276       LibCallName = "__atomic_fetch_add";
1277       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1278                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1279       break;
1280     // T __atomic_and_fetch_N(T *mem, T val, int order)
1281     // T __atomic_fetch_and_N(T *mem, T val, int order)
1282     case AtomicExpr::AO__atomic_and_fetch:
1283     case AtomicExpr::AO__scoped_atomic_and_fetch:
1284       PostOp = llvm::Instruction::And;
1285       [[fallthrough]];
1286     case AtomicExpr::AO__atomic_fetch_and:
1287     case AtomicExpr::AO__c11_atomic_fetch_and:
1288     case AtomicExpr::AO__hip_atomic_fetch_and:
1289     case AtomicExpr::AO__opencl_atomic_fetch_and:
1290     case AtomicExpr::AO__scoped_atomic_fetch_and:
1291       LibCallName = "__atomic_fetch_and";
1292       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1293                         MemTy, E->getExprLoc(), TInfo.Width);
1294       break;
1295     // T __atomic_or_fetch_N(T *mem, T val, int order)
1296     // T __atomic_fetch_or_N(T *mem, T val, int order)
1297     case AtomicExpr::AO__atomic_or_fetch:
1298     case AtomicExpr::AO__scoped_atomic_or_fetch:
1299       PostOp = llvm::Instruction::Or;
1300       [[fallthrough]];
1301     case AtomicExpr::AO__atomic_fetch_or:
1302     case AtomicExpr::AO__c11_atomic_fetch_or:
1303     case AtomicExpr::AO__hip_atomic_fetch_or:
1304     case AtomicExpr::AO__opencl_atomic_fetch_or:
1305     case AtomicExpr::AO__scoped_atomic_fetch_or:
1306       LibCallName = "__atomic_fetch_or";
1307       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1308                         MemTy, E->getExprLoc(), TInfo.Width);
1309       break;
1310     // T __atomic_sub_fetch_N(T *mem, T val, int order)
1311     // T __atomic_fetch_sub_N(T *mem, T val, int order)
1312     case AtomicExpr::AO__atomic_sub_fetch:
1313     case AtomicExpr::AO__scoped_atomic_sub_fetch:
1314       PostOp = llvm::Instruction::Sub;
1315       [[fallthrough]];
1316     case AtomicExpr::AO__atomic_fetch_sub:
1317     case AtomicExpr::AO__c11_atomic_fetch_sub:
1318     case AtomicExpr::AO__hip_atomic_fetch_sub:
1319     case AtomicExpr::AO__opencl_atomic_fetch_sub:
1320     case AtomicExpr::AO__scoped_atomic_fetch_sub:
1321       LibCallName = "__atomic_fetch_sub";
1322       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1323                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1324       break;
1325     // T __atomic_xor_fetch_N(T *mem, T val, int order)
1326     // T __atomic_fetch_xor_N(T *mem, T val, int order)
1327     case AtomicExpr::AO__atomic_xor_fetch:
1328     case AtomicExpr::AO__scoped_atomic_xor_fetch:
1329       PostOp = llvm::Instruction::Xor;
1330       [[fallthrough]];
1331     case AtomicExpr::AO__atomic_fetch_xor:
1332     case AtomicExpr::AO__c11_atomic_fetch_xor:
1333     case AtomicExpr::AO__hip_atomic_fetch_xor:
1334     case AtomicExpr::AO__opencl_atomic_fetch_xor:
1335     case AtomicExpr::AO__scoped_atomic_fetch_xor:
1336       LibCallName = "__atomic_fetch_xor";
1337       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1338                         MemTy, E->getExprLoc(), TInfo.Width);
1339       break;
1340     case AtomicExpr::AO__atomic_min_fetch:
1341     case AtomicExpr::AO__scoped_atomic_min_fetch:
1342       PostOpMinMax = true;
1343       [[fallthrough]];
1344     case AtomicExpr::AO__atomic_fetch_min:
1345     case AtomicExpr::AO__c11_atomic_fetch_min:
1346     case AtomicExpr::AO__scoped_atomic_fetch_min:
1347     case AtomicExpr::AO__hip_atomic_fetch_min:
1348     case AtomicExpr::AO__opencl_atomic_fetch_min:
1349       LibCallName = E->getValueType()->isSignedIntegerType()
1350                         ? "__atomic_fetch_min"
1351                         : "__atomic_fetch_umin";
1352       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1353                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1354       break;
1355     case AtomicExpr::AO__atomic_max_fetch:
1356     case AtomicExpr::AO__scoped_atomic_max_fetch:
1357       PostOpMinMax = true;
1358       [[fallthrough]];
1359     case AtomicExpr::AO__atomic_fetch_max:
1360     case AtomicExpr::AO__c11_atomic_fetch_max:
1361     case AtomicExpr::AO__hip_atomic_fetch_max:
1362     case AtomicExpr::AO__opencl_atomic_fetch_max:
1363     case AtomicExpr::AO__scoped_atomic_fetch_max:
1364       LibCallName = E->getValueType()->isSignedIntegerType()
1365                         ? "__atomic_fetch_max"
1366                         : "__atomic_fetch_umax";
1367       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1368                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1369       break;
1370     // T __atomic_nand_fetch_N(T *mem, T val, int order)
1371     // T __atomic_fetch_nand_N(T *mem, T val, int order)
1372     case AtomicExpr::AO__atomic_nand_fetch:
1373     case AtomicExpr::AO__scoped_atomic_nand_fetch:
1374       PostOp = llvm::Instruction::And; // the NOT is special cased below
1375       [[fallthrough]];
1376     case AtomicExpr::AO__atomic_fetch_nand:
1377     case AtomicExpr::AO__c11_atomic_fetch_nand:
1378     case AtomicExpr::AO__scoped_atomic_fetch_nand:
1379       LibCallName = "__atomic_fetch_nand";
1380       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1381                         MemTy, E->getExprLoc(), TInfo.Width);
1382       break;
1383     }
1384 
1385     if (E->isOpenCL()) {
1386       LibCallName = std::string("__opencl") +
1387           StringRef(LibCallName).drop_front(1).str();
1388 
1389     }
1390     // Optimized functions have the size in their name.
1391     if (UseOptimizedLibcall)
1392       LibCallName += "_" + llvm::utostr(Size);
1393     // By default, assume we return a value of the atomic type.
1394     if (!HaveRetTy) {
1395       if (UseOptimizedLibcall) {
1396         // Value is returned directly.
1397         // The function returns an appropriately sized integer type.
1398         RetTy = getContext().getIntTypeForBitwidth(
1399             getContext().toBits(TInfo.Width), /*Signed=*/false);
1400       } else {
1401         // Value is returned through parameter before the order.
1402         RetTy = getContext().VoidTy;
1403         Args.add(RValue::get(Dest.getPointer()), getContext().VoidPtrTy);
1404       }
1405     }
1406     // order is always the last parameter
1407     Args.add(RValue::get(Order),
1408              getContext().IntTy);
1409     if (E->isOpenCL())
1410       Args.add(RValue::get(Scope), getContext().IntTy);
1411 
1412     // PostOp is only needed for the atomic_*_fetch operations, and
1413     // thus is only needed for and implemented in the
1414     // UseOptimizedLibcall codepath.
1415     assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax));
1416 
1417     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
1418     // The value is returned directly from the libcall.
1419     if (E->isCmpXChg())
1420       return Res;
1421 
1422     // The value is returned directly for optimized libcalls but the expr
1423     // provided an out-param.
1424     if (UseOptimizedLibcall && Res.getScalarVal()) {
1425       llvm::Value *ResVal = Res.getScalarVal();
1426       if (PostOpMinMax) {
1427         llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
1428         ResVal = EmitPostAtomicMinMax(Builder, E->getOp(),
1429                                       E->getValueType()->isSignedIntegerType(),
1430                                       ResVal, LoadVal1);
1431       } else if (PostOp) {
1432         llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
1433         ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
1434       }
1435       if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
1436           E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
1437         ResVal = Builder.CreateNot(ResVal);
1438 
1439       Builder.CreateStore(ResVal, Dest.withElementType(ResVal->getType()));
1440     }
1441 
1442     if (RValTy->isVoidType())
1443       return RValue::get(nullptr);
1444 
1445     return convertTempToRValue(Dest.withElementType(ConvertTypeForMem(RValTy)),
1446                                RValTy, E->getExprLoc());
1447   }
1448 
1449   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
1450                  E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
1451                  E->getOp() == AtomicExpr::AO__hip_atomic_store ||
1452                  E->getOp() == AtomicExpr::AO__atomic_store ||
1453                  E->getOp() == AtomicExpr::AO__atomic_store_n ||
1454                  E->getOp() == AtomicExpr::AO__scoped_atomic_store ||
1455                  E->getOp() == AtomicExpr::AO__scoped_atomic_store_n;
1456   bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
1457                 E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
1458                 E->getOp() == AtomicExpr::AO__hip_atomic_load ||
1459                 E->getOp() == AtomicExpr::AO__atomic_load ||
1460                 E->getOp() == AtomicExpr::AO__atomic_load_n ||
1461                 E->getOp() == AtomicExpr::AO__scoped_atomic_load ||
1462                 E->getOp() == AtomicExpr::AO__scoped_atomic_load_n;
1463 
1464   if (isa<llvm::ConstantInt>(Order)) {
1465     auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1466     // We should not ever get to a case where the ordering isn't a valid C ABI
1467     // value, but it's hard to enforce that in general.
1468     if (llvm::isValidAtomicOrderingCABI(ord))
1469       switch ((llvm::AtomicOrderingCABI)ord) {
1470       case llvm::AtomicOrderingCABI::relaxed:
1471         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1472                      llvm::AtomicOrdering::Monotonic, Scope);
1473         break;
1474       case llvm::AtomicOrderingCABI::consume:
1475       case llvm::AtomicOrderingCABI::acquire:
1476         if (IsStore)
1477           break; // Avoid crashing on code with undefined behavior
1478         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1479                      llvm::AtomicOrdering::Acquire, Scope);
1480         break;
1481       case llvm::AtomicOrderingCABI::release:
1482         if (IsLoad)
1483           break; // Avoid crashing on code with undefined behavior
1484         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1485                      llvm::AtomicOrdering::Release, Scope);
1486         break;
1487       case llvm::AtomicOrderingCABI::acq_rel:
1488         if (IsLoad || IsStore)
1489           break; // Avoid crashing on code with undefined behavior
1490         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1491                      llvm::AtomicOrdering::AcquireRelease, Scope);
1492         break;
1493       case llvm::AtomicOrderingCABI::seq_cst:
1494         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1495                      llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1496         break;
1497       }
1498     if (RValTy->isVoidType())
1499       return RValue::get(nullptr);
1500 
1501     return convertTempToRValue(Dest.withElementType(ConvertTypeForMem(RValTy)),
1502                                RValTy, E->getExprLoc());
1503   }
1504 
1505   // Long case, when Order isn't obviously constant.
1506 
1507   // Create all the relevant BB's
1508   llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
1509                    *ReleaseBB = nullptr, *AcqRelBB = nullptr,
1510                    *SeqCstBB = nullptr;
1511   MonotonicBB = createBasicBlock("monotonic", CurFn);
1512   if (!IsStore)
1513     AcquireBB = createBasicBlock("acquire", CurFn);
1514   if (!IsLoad)
1515     ReleaseBB = createBasicBlock("release", CurFn);
1516   if (!IsLoad && !IsStore)
1517     AcqRelBB = createBasicBlock("acqrel", CurFn);
1518   SeqCstBB = createBasicBlock("seqcst", CurFn);
1519   llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1520 
1521   // Create the switch for the split
1522   // MonotonicBB is arbitrarily chosen as the default case; in practice, this
1523   // doesn't matter unless someone is crazy enough to use something that
1524   // doesn't fold to a constant for the ordering.
1525   Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1526   llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
1527 
1528   // Emit all the different atomics
1529   Builder.SetInsertPoint(MonotonicBB);
1530   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1531                llvm::AtomicOrdering::Monotonic, Scope);
1532   Builder.CreateBr(ContBB);
1533   if (!IsStore) {
1534     Builder.SetInsertPoint(AcquireBB);
1535     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1536                  llvm::AtomicOrdering::Acquire, Scope);
1537     Builder.CreateBr(ContBB);
1538     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
1539                 AcquireBB);
1540     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
1541                 AcquireBB);
1542   }
1543   if (!IsLoad) {
1544     Builder.SetInsertPoint(ReleaseBB);
1545     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1546                  llvm::AtomicOrdering::Release, Scope);
1547     Builder.CreateBr(ContBB);
1548     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
1549                 ReleaseBB);
1550   }
1551   if (!IsLoad && !IsStore) {
1552     Builder.SetInsertPoint(AcqRelBB);
1553     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1554                  llvm::AtomicOrdering::AcquireRelease, Scope);
1555     Builder.CreateBr(ContBB);
1556     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
1557                 AcqRelBB);
1558   }
1559   Builder.SetInsertPoint(SeqCstBB);
1560   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1561                llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1562   Builder.CreateBr(ContBB);
1563   SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
1564               SeqCstBB);
1565 
1566   // Cleanup and return
1567   Builder.SetInsertPoint(ContBB);
1568   if (RValTy->isVoidType())
1569     return RValue::get(nullptr);
1570 
1571   assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
1572   return convertTempToRValue(Dest.withElementType(ConvertTypeForMem(RValTy)),
1573                              RValTy, E->getExprLoc());
1574 }
1575 
1576 Address AtomicInfo::castToAtomicIntPointer(Address addr) const {
1577   llvm::IntegerType *ty =
1578     llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
1579   return addr.withElementType(ty);
1580 }
1581 
1582 Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const {
1583   llvm::Type *Ty = Addr.getElementType();
1584   uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty);
1585   if (SourceSizeInBits != AtomicSizeInBits) {
1586     Address Tmp = CreateTempAlloca();
1587     CGF.Builder.CreateMemCpy(Tmp, Addr,
1588                              std::min(AtomicSizeInBits, SourceSizeInBits) / 8);
1589     Addr = Tmp;
1590   }
1591 
1592   return castToAtomicIntPointer(Addr);
1593 }
1594 
1595 RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
1596                                              AggValueSlot resultSlot,
1597                                              SourceLocation loc,
1598                                              bool asValue) const {
1599   if (LVal.isSimple()) {
1600     if (EvaluationKind == TEK_Aggregate)
1601       return resultSlot.asRValue();
1602 
1603     // Drill into the padding structure if we have one.
1604     if (hasPadding())
1605       addr = CGF.Builder.CreateStructGEP(addr, 0);
1606 
1607     // Otherwise, just convert the temporary to an r-value using the
1608     // normal conversion routine.
1609     return CGF.convertTempToRValue(addr, getValueType(), loc);
1610   }
1611   if (!asValue)
1612     // Get RValue from temp memory as atomic for non-simple lvalues
1613     return RValue::get(CGF.Builder.CreateLoad(addr));
1614   if (LVal.isBitField())
1615     return CGF.EmitLoadOfBitfieldLValue(
1616         LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
1617                              LVal.getBaseInfo(), TBAAAccessInfo()), loc);
1618   if (LVal.isVectorElt())
1619     return CGF.EmitLoadOfLValue(
1620         LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
1621                               LVal.getBaseInfo(), TBAAAccessInfo()), loc);
1622   assert(LVal.isExtVectorElt());
1623   return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
1624       addr, LVal.getExtVectorElts(), LVal.getType(),
1625       LVal.getBaseInfo(), TBAAAccessInfo()));
1626 }
1627 
1628 RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
1629                                              AggValueSlot ResultSlot,
1630                                              SourceLocation Loc,
1631                                              bool AsValue) const {
1632   // Try not to in some easy cases.
1633   assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
1634   if (getEvaluationKind() == TEK_Scalar &&
1635       (((!LVal.isBitField() ||
1636          LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
1637         !hasPadding()) ||
1638        !AsValue)) {
1639     auto *ValTy = AsValue
1640                       ? CGF.ConvertTypeForMem(ValueTy)
1641                       : getAtomicAddress().getElementType();
1642     if (ValTy->isIntegerTy()) {
1643       assert(IntVal->getType() == ValTy && "Different integer types.");
1644       return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
1645     } else if (ValTy->isPointerTy())
1646       return RValue::get(CGF.Builder.CreateIntToPtr(IntVal, ValTy));
1647     else if (llvm::CastInst::isBitCastable(IntVal->getType(), ValTy))
1648       return RValue::get(CGF.Builder.CreateBitCast(IntVal, ValTy));
1649   }
1650 
1651   // Create a temporary.  This needs to be big enough to hold the
1652   // atomic integer.
1653   Address Temp = Address::invalid();
1654   bool TempIsVolatile = false;
1655   if (AsValue && getEvaluationKind() == TEK_Aggregate) {
1656     assert(!ResultSlot.isIgnored());
1657     Temp = ResultSlot.getAddress();
1658     TempIsVolatile = ResultSlot.isVolatile();
1659   } else {
1660     Temp = CreateTempAlloca();
1661   }
1662 
1663   // Slam the integer into the temporary.
1664   Address CastTemp = castToAtomicIntPointer(Temp);
1665   CGF.Builder.CreateStore(IntVal, CastTemp)
1666       ->setVolatile(TempIsVolatile);
1667 
1668   return convertAtomicTempToRValue(Temp, ResultSlot, Loc, AsValue);
1669 }
1670 
1671 void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
1672                                        llvm::AtomicOrdering AO, bool) {
1673   // void __atomic_load(size_t size, void *mem, void *return, int order);
1674   CallArgList Args;
1675   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
1676   Args.add(RValue::get(getAtomicPointer()), CGF.getContext().VoidPtrTy);
1677   Args.add(RValue::get(AddForLoaded), CGF.getContext().VoidPtrTy);
1678   Args.add(
1679       RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))),
1680       CGF.getContext().IntTy);
1681   emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args);
1682 }
1683 
1684 llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
1685                                           bool IsVolatile) {
1686   // Okay, we're doing this natively.
1687   Address Addr = getAtomicAddressAsAtomicIntPointer();
1688   llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
1689   Load->setAtomic(AO);
1690 
1691   // Other decoration.
1692   if (IsVolatile)
1693     Load->setVolatile(true);
1694   CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo());
1695   return Load;
1696 }
1697 
1698 /// An LValue is a candidate for having its loads and stores be made atomic if
1699 /// we are operating under /volatile:ms *and* the LValue itself is volatile and
1700 /// performing such an operation can be performed without a libcall.
1701 bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
1702   if (!CGM.getLangOpts().MSVolatile) return false;
1703   AtomicInfo AI(*this, LV);
1704   bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
1705   // An atomic is inline if we don't need to use a libcall.
1706   bool AtomicIsInline = !AI.shouldUseLibcall();
1707   // MSVC doesn't seem to do this for types wider than a pointer.
1708   if (getContext().getTypeSize(LV.getType()) >
1709       getContext().getTypeSize(getContext().getIntPtrType()))
1710     return false;
1711   return IsVolatile && AtomicIsInline;
1712 }
1713 
1714 RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL,
1715                                        AggValueSlot Slot) {
1716   llvm::AtomicOrdering AO;
1717   bool IsVolatile = LV.isVolatileQualified();
1718   if (LV.getType()->isAtomicType()) {
1719     AO = llvm::AtomicOrdering::SequentiallyConsistent;
1720   } else {
1721     AO = llvm::AtomicOrdering::Acquire;
1722     IsVolatile = true;
1723   }
1724   return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
1725 }
1726 
1727 RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
1728                                   bool AsValue, llvm::AtomicOrdering AO,
1729                                   bool IsVolatile) {
1730   // Check whether we should use a library call.
1731   if (shouldUseLibcall()) {
1732     Address TempAddr = Address::invalid();
1733     if (LVal.isSimple() && !ResultSlot.isIgnored()) {
1734       assert(getEvaluationKind() == TEK_Aggregate);
1735       TempAddr = ResultSlot.getAddress();
1736     } else
1737       TempAddr = CreateTempAlloca();
1738 
1739     EmitAtomicLoadLibcall(TempAddr.getPointer(), AO, IsVolatile);
1740 
1741     // Okay, turn that back into the original value or whole atomic (for
1742     // non-simple lvalues) type.
1743     return convertAtomicTempToRValue(TempAddr, ResultSlot, Loc, AsValue);
1744   }
1745 
1746   // Okay, we're doing this natively.
1747   auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
1748 
1749   // If we're ignoring an aggregate return, don't do anything.
1750   if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
1751     return RValue::getAggregate(Address::invalid(), false);
1752 
1753   // Okay, turn that back into the original value or atomic (for non-simple
1754   // lvalues) type.
1755   return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
1756 }
1757 
1758 /// Emit a load from an l-value of atomic type.  Note that the r-value
1759 /// we produce is an r-value of the atomic *value* type.
1760 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
1761                                        llvm::AtomicOrdering AO, bool IsVolatile,
1762                                        AggValueSlot resultSlot) {
1763   AtomicInfo Atomics(*this, src);
1764   return Atomics.EmitAtomicLoad(resultSlot, loc, /*AsValue=*/true, AO,
1765                                 IsVolatile);
1766 }
1767 
1768 /// Copy an r-value into memory as part of storing to an atomic type.
1769 /// This needs to create a bit-pattern suitable for atomic operations.
1770 void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
1771   assert(LVal.isSimple());
1772   // If we have an r-value, the rvalue should be of the atomic type,
1773   // which means that the caller is responsible for having zeroed
1774   // any padding.  Just do an aggregate copy of that type.
1775   if (rvalue.isAggregate()) {
1776     LValue Dest = CGF.MakeAddrLValue(getAtomicAddress(), getAtomicType());
1777     LValue Src = CGF.MakeAddrLValue(rvalue.getAggregateAddress(),
1778                                     getAtomicType());
1779     bool IsVolatile = rvalue.isVolatileQualified() ||
1780                       LVal.isVolatileQualified();
1781     CGF.EmitAggregateCopy(Dest, Src, getAtomicType(),
1782                           AggValueSlot::DoesNotOverlap, IsVolatile);
1783     return;
1784   }
1785 
1786   // Okay, otherwise we're copying stuff.
1787 
1788   // Zero out the buffer if necessary.
1789   emitMemSetZeroIfNecessary();
1790 
1791   // Drill past the padding if present.
1792   LValue TempLVal = projectValue();
1793 
1794   // Okay, store the rvalue in.
1795   if (rvalue.isScalar()) {
1796     CGF.EmitStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true);
1797   } else {
1798     CGF.EmitStoreOfComplex(rvalue.getComplexVal(), TempLVal, /*init*/ true);
1799   }
1800 }
1801 
1802 
1803 /// Materialize an r-value into memory for the purposes of storing it
1804 /// to an atomic type.
1805 Address AtomicInfo::materializeRValue(RValue rvalue) const {
1806   // Aggregate r-values are already in memory, and EmitAtomicStore
1807   // requires them to be values of the atomic type.
1808   if (rvalue.isAggregate())
1809     return rvalue.getAggregateAddress();
1810 
1811   // Otherwise, make a temporary and materialize into it.
1812   LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType());
1813   AtomicInfo Atomics(CGF, TempLV);
1814   Atomics.emitCopyIntoMemory(rvalue);
1815   return TempLV.getAddress(CGF);
1816 }
1817 
1818 llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
1819   // If we've got a scalar value of the right size, try to avoid going
1820   // through memory.
1821   if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) {
1822     llvm::Value *Value = RVal.getScalarVal();
1823     if (isa<llvm::IntegerType>(Value->getType()))
1824       return CGF.EmitToMemory(Value, ValueTy);
1825     else {
1826       llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
1827           CGF.getLLVMContext(),
1828           LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
1829       if (isa<llvm::PointerType>(Value->getType()))
1830         return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
1831       else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
1832         return CGF.Builder.CreateBitCast(Value, InputIntTy);
1833     }
1834   }
1835   // Otherwise, we need to go through memory.
1836   // Put the r-value in memory.
1837   Address Addr = materializeRValue(RVal);
1838 
1839   // Cast the temporary to the atomic int type and pull a value out.
1840   Addr = castToAtomicIntPointer(Addr);
1841   return CGF.Builder.CreateLoad(Addr);
1842 }
1843 
1844 std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
1845     llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
1846     llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) {
1847   // Do the atomic store.
1848   Address Addr = getAtomicAddressAsAtomicIntPointer();
1849   auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
1850                                                Success, Failure);
1851   // Other decoration.
1852   Inst->setVolatile(LVal.isVolatileQualified());
1853   Inst->setWeak(IsWeak);
1854 
1855   // Okay, turn that back into the original value type.
1856   auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
1857   auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1);
1858   return std::make_pair(PreviousVal, SuccessFailureVal);
1859 }
1860 
1861 llvm::Value *
1862 AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
1863                                              llvm::Value *DesiredAddr,
1864                                              llvm::AtomicOrdering Success,
1865                                              llvm::AtomicOrdering Failure) {
1866   // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
1867   // void *desired, int success, int failure);
1868   CallArgList Args;
1869   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
1870   Args.add(RValue::get(getAtomicPointer()), CGF.getContext().VoidPtrTy);
1871   Args.add(RValue::get(ExpectedAddr), CGF.getContext().VoidPtrTy);
1872   Args.add(RValue::get(DesiredAddr), CGF.getContext().VoidPtrTy);
1873   Args.add(RValue::get(
1874                llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))),
1875            CGF.getContext().IntTy);
1876   Args.add(RValue::get(
1877                llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Failure))),
1878            CGF.getContext().IntTy);
1879   auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
1880                                               CGF.getContext().BoolTy, Args);
1881 
1882   return SuccessFailureRVal.getScalarVal();
1883 }
1884 
1885 std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
1886     RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
1887     llvm::AtomicOrdering Failure, bool IsWeak) {
1888   // Check whether we should use a library call.
1889   if (shouldUseLibcall()) {
1890     // Produce a source address.
1891     Address ExpectedAddr = materializeRValue(Expected);
1892     Address DesiredAddr = materializeRValue(Desired);
1893     auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
1894                                                  DesiredAddr.getPointer(),
1895                                                  Success, Failure);
1896     return std::make_pair(
1897         convertAtomicTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
1898                                   SourceLocation(), /*AsValue=*/false),
1899         Res);
1900   }
1901 
1902   // If we've got a scalar value of the right size, try to avoid going
1903   // through memory.
1904   auto *ExpectedVal = convertRValueToInt(Expected);
1905   auto *DesiredVal = convertRValueToInt(Desired);
1906   auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
1907                                          Failure, IsWeak);
1908   return std::make_pair(
1909       ConvertIntToValueOrAtomic(Res.first, AggValueSlot::ignored(),
1910                                 SourceLocation(), /*AsValue=*/false),
1911       Res.second);
1912 }
1913 
1914 static void
1915 EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal,
1916                       const llvm::function_ref<RValue(RValue)> &UpdateOp,
1917                       Address DesiredAddr) {
1918   RValue UpRVal;
1919   LValue AtomicLVal = Atomics.getAtomicLValue();
1920   LValue DesiredLVal;
1921   if (AtomicLVal.isSimple()) {
1922     UpRVal = OldRVal;
1923     DesiredLVal = CGF.MakeAddrLValue(DesiredAddr, AtomicLVal.getType());
1924   } else {
1925     // Build new lvalue for temp address.
1926     Address Ptr = Atomics.materializeRValue(OldRVal);
1927     LValue UpdateLVal;
1928     if (AtomicLVal.isBitField()) {
1929       UpdateLVal =
1930           LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
1931                                AtomicLVal.getType(),
1932                                AtomicLVal.getBaseInfo(),
1933                                AtomicLVal.getTBAAInfo());
1934       DesiredLVal =
1935           LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
1936                                AtomicLVal.getType(), AtomicLVal.getBaseInfo(),
1937                                AtomicLVal.getTBAAInfo());
1938     } else if (AtomicLVal.isVectorElt()) {
1939       UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
1940                                          AtomicLVal.getType(),
1941                                          AtomicLVal.getBaseInfo(),
1942                                          AtomicLVal.getTBAAInfo());
1943       DesiredLVal = LValue::MakeVectorElt(
1944           DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(),
1945           AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo());
1946     } else {
1947       assert(AtomicLVal.isExtVectorElt());
1948       UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
1949                                             AtomicLVal.getType(),
1950                                             AtomicLVal.getBaseInfo(),
1951                                             AtomicLVal.getTBAAInfo());
1952       DesiredLVal = LValue::MakeExtVectorElt(
1953           DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
1954           AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo());
1955     }
1956     UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation());
1957   }
1958   // Store new value in the corresponding memory area.
1959   RValue NewRVal = UpdateOp(UpRVal);
1960   if (NewRVal.isScalar()) {
1961     CGF.EmitStoreThroughLValue(NewRVal, DesiredLVal);
1962   } else {
1963     assert(NewRVal.isComplex());
1964     CGF.EmitStoreOfComplex(NewRVal.getComplexVal(), DesiredLVal,
1965                            /*isInit=*/false);
1966   }
1967 }
1968 
1969 void AtomicInfo::EmitAtomicUpdateLibcall(
1970     llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
1971     bool IsVolatile) {
1972   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
1973 
1974   Address ExpectedAddr = CreateTempAlloca();
1975 
1976   EmitAtomicLoadLibcall(ExpectedAddr.getPointer(), AO, IsVolatile);
1977   auto *ContBB = CGF.createBasicBlock("atomic_cont");
1978   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
1979   CGF.EmitBlock(ContBB);
1980   Address DesiredAddr = CreateTempAlloca();
1981   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
1982       requiresMemSetZero(getAtomicAddress().getElementType())) {
1983     auto *OldVal = CGF.Builder.CreateLoad(ExpectedAddr);
1984     CGF.Builder.CreateStore(OldVal, DesiredAddr);
1985   }
1986   auto OldRVal = convertAtomicTempToRValue(ExpectedAddr,
1987                                            AggValueSlot::ignored(),
1988                                            SourceLocation(), /*AsValue=*/false);
1989   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, DesiredAddr);
1990   auto *Res =
1991       EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
1992                                        DesiredAddr.getPointer(),
1993                                        AO, Failure);
1994   CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
1995   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1996 }
1997 
1998 void AtomicInfo::EmitAtomicUpdateOp(
1999     llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
2000     bool IsVolatile) {
2001   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
2002 
2003   // Do the atomic load.
2004   auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
2005   // For non-simple lvalues perform compare-and-swap procedure.
2006   auto *ContBB = CGF.createBasicBlock("atomic_cont");
2007   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
2008   auto *CurBB = CGF.Builder.GetInsertBlock();
2009   CGF.EmitBlock(ContBB);
2010   llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
2011                                              /*NumReservedValues=*/2);
2012   PHI->addIncoming(OldVal, CurBB);
2013   Address NewAtomicAddr = CreateTempAlloca();
2014   Address NewAtomicIntAddr = castToAtomicIntPointer(NewAtomicAddr);
2015   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
2016       requiresMemSetZero(getAtomicAddress().getElementType())) {
2017     CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
2018   }
2019   auto OldRVal = ConvertIntToValueOrAtomic(PHI, AggValueSlot::ignored(),
2020                                            SourceLocation(), /*AsValue=*/false);
2021   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
2022   auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
2023   // Try to write new value using cmpxchg operation.
2024   auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
2025   PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
2026   CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB);
2027   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2028 }
2029 
2030 static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics,
2031                                   RValue UpdateRVal, Address DesiredAddr) {
2032   LValue AtomicLVal = Atomics.getAtomicLValue();
2033   LValue DesiredLVal;
2034   // Build new lvalue for temp address.
2035   if (AtomicLVal.isBitField()) {
2036     DesiredLVal =
2037         LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
2038                              AtomicLVal.getType(), AtomicLVal.getBaseInfo(),
2039                              AtomicLVal.getTBAAInfo());
2040   } else if (AtomicLVal.isVectorElt()) {
2041     DesiredLVal =
2042         LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(),
2043                               AtomicLVal.getType(), AtomicLVal.getBaseInfo(),
2044                               AtomicLVal.getTBAAInfo());
2045   } else {
2046     assert(AtomicLVal.isExtVectorElt());
2047     DesiredLVal = LValue::MakeExtVectorElt(
2048         DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
2049         AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo());
2050   }
2051   // Store new value in the corresponding memory area.
2052   assert(UpdateRVal.isScalar());
2053   CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal);
2054 }
2055 
2056 void AtomicInfo::EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
2057                                          RValue UpdateRVal, bool IsVolatile) {
2058   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
2059 
2060   Address ExpectedAddr = CreateTempAlloca();
2061 
2062   EmitAtomicLoadLibcall(ExpectedAddr.getPointer(), AO, IsVolatile);
2063   auto *ContBB = CGF.createBasicBlock("atomic_cont");
2064   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
2065   CGF.EmitBlock(ContBB);
2066   Address DesiredAddr = CreateTempAlloca();
2067   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
2068       requiresMemSetZero(getAtomicAddress().getElementType())) {
2069     auto *OldVal = CGF.Builder.CreateLoad(ExpectedAddr);
2070     CGF.Builder.CreateStore(OldVal, DesiredAddr);
2071   }
2072   EmitAtomicUpdateValue(CGF, *this, UpdateRVal, DesiredAddr);
2073   auto *Res =
2074       EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
2075                                        DesiredAddr.getPointer(),
2076                                        AO, Failure);
2077   CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
2078   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2079 }
2080 
2081 void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
2082                                     bool IsVolatile) {
2083   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
2084 
2085   // Do the atomic load.
2086   auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
2087   // For non-simple lvalues perform compare-and-swap procedure.
2088   auto *ContBB = CGF.createBasicBlock("atomic_cont");
2089   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
2090   auto *CurBB = CGF.Builder.GetInsertBlock();
2091   CGF.EmitBlock(ContBB);
2092   llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
2093                                              /*NumReservedValues=*/2);
2094   PHI->addIncoming(OldVal, CurBB);
2095   Address NewAtomicAddr = CreateTempAlloca();
2096   Address NewAtomicIntAddr = castToAtomicIntPointer(NewAtomicAddr);
2097   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
2098       requiresMemSetZero(getAtomicAddress().getElementType())) {
2099     CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
2100   }
2101   EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr);
2102   auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
2103   // Try to write new value using cmpxchg operation.
2104   auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
2105   PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
2106   CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB);
2107   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2108 }
2109 
2110 void AtomicInfo::EmitAtomicUpdate(
2111     llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
2112     bool IsVolatile) {
2113   if (shouldUseLibcall()) {
2114     EmitAtomicUpdateLibcall(AO, UpdateOp, IsVolatile);
2115   } else {
2116     EmitAtomicUpdateOp(AO, UpdateOp, IsVolatile);
2117   }
2118 }
2119 
2120 void AtomicInfo::EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
2121                                   bool IsVolatile) {
2122   if (shouldUseLibcall()) {
2123     EmitAtomicUpdateLibcall(AO, UpdateRVal, IsVolatile);
2124   } else {
2125     EmitAtomicUpdateOp(AO, UpdateRVal, IsVolatile);
2126   }
2127 }
2128 
2129 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
2130                                       bool isInit) {
2131   bool IsVolatile = lvalue.isVolatileQualified();
2132   llvm::AtomicOrdering AO;
2133   if (lvalue.getType()->isAtomicType()) {
2134     AO = llvm::AtomicOrdering::SequentiallyConsistent;
2135   } else {
2136     AO = llvm::AtomicOrdering::Release;
2137     IsVolatile = true;
2138   }
2139   return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit);
2140 }
2141 
2142 /// Emit a store to an l-value of atomic type.
2143 ///
2144 /// Note that the r-value is expected to be an r-value *of the atomic
2145 /// type*; this means that for aggregate r-values, it should include
2146 /// storage for any padding that was necessary.
2147 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
2148                                       llvm::AtomicOrdering AO, bool IsVolatile,
2149                                       bool isInit) {
2150   // If this is an aggregate r-value, it should agree in type except
2151   // maybe for address-space qualification.
2152   assert(!rvalue.isAggregate() ||
2153          rvalue.getAggregateAddress().getElementType() ==
2154              dest.getAddress(*this).getElementType());
2155 
2156   AtomicInfo atomics(*this, dest);
2157   LValue LVal = atomics.getAtomicLValue();
2158 
2159   // If this is an initialization, just put the value there normally.
2160   if (LVal.isSimple()) {
2161     if (isInit) {
2162       atomics.emitCopyIntoMemory(rvalue);
2163       return;
2164     }
2165 
2166     // Check whether we should use a library call.
2167     if (atomics.shouldUseLibcall()) {
2168       // Produce a source address.
2169       Address srcAddr = atomics.materializeRValue(rvalue);
2170 
2171       // void __atomic_store(size_t size, void *mem, void *val, int order)
2172       CallArgList args;
2173       args.add(RValue::get(atomics.getAtomicSizeValue()),
2174                getContext().getSizeType());
2175       args.add(RValue::get(atomics.getAtomicPointer()), getContext().VoidPtrTy);
2176       args.add(RValue::get(srcAddr.getPointer()), getContext().VoidPtrTy);
2177       args.add(
2178           RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))),
2179           getContext().IntTy);
2180       emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
2181       return;
2182     }
2183 
2184     // Okay, we're doing this natively.
2185     llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
2186 
2187     // Do the atomic store.
2188     Address addr = atomics.castToAtomicIntPointer(atomics.getAtomicAddress());
2189     intValue = Builder.CreateIntCast(
2190         intValue, addr.getElementType(), /*isSigned=*/false);
2191     llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
2192 
2193     if (AO == llvm::AtomicOrdering::Acquire)
2194       AO = llvm::AtomicOrdering::Monotonic;
2195     else if (AO == llvm::AtomicOrdering::AcquireRelease)
2196       AO = llvm::AtomicOrdering::Release;
2197     // Initializations don't need to be atomic.
2198     if (!isInit)
2199       store->setAtomic(AO);
2200 
2201     // Other decoration.
2202     if (IsVolatile)
2203       store->setVolatile(true);
2204     CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo());
2205     return;
2206   }
2207 
2208   // Emit simple atomic update operation.
2209   atomics.EmitAtomicUpdate(AO, rvalue, IsVolatile);
2210 }
2211 
2212 /// Emit a compare-and-exchange op for atomic type.
2213 ///
2214 std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
2215     LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
2216     llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak,
2217     AggValueSlot Slot) {
2218   // If this is an aggregate r-value, it should agree in type except
2219   // maybe for address-space qualification.
2220   assert(!Expected.isAggregate() ||
2221          Expected.getAggregateAddress().getElementType() ==
2222              Obj.getAddress(*this).getElementType());
2223   assert(!Desired.isAggregate() ||
2224          Desired.getAggregateAddress().getElementType() ==
2225              Obj.getAddress(*this).getElementType());
2226   AtomicInfo Atomics(*this, Obj);
2227 
2228   return Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, Failure,
2229                                            IsWeak);
2230 }
2231 
2232 void CodeGenFunction::EmitAtomicUpdate(
2233     LValue LVal, llvm::AtomicOrdering AO,
2234     const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) {
2235   AtomicInfo Atomics(*this, LVal);
2236   Atomics.EmitAtomicUpdate(AO, UpdateOp, IsVolatile);
2237 }
2238 
2239 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
2240   AtomicInfo atomics(*this, dest);
2241 
2242   switch (atomics.getEvaluationKind()) {
2243   case TEK_Scalar: {
2244     llvm::Value *value = EmitScalarExpr(init);
2245     atomics.emitCopyIntoMemory(RValue::get(value));
2246     return;
2247   }
2248 
2249   case TEK_Complex: {
2250     ComplexPairTy value = EmitComplexExpr(init);
2251     atomics.emitCopyIntoMemory(RValue::getComplex(value));
2252     return;
2253   }
2254 
2255   case TEK_Aggregate: {
2256     // Fix up the destination if the initializer isn't an expression
2257     // of atomic type.
2258     bool Zeroed = false;
2259     if (!init->getType()->isAtomicType()) {
2260       Zeroed = atomics.emitMemSetZeroIfNecessary();
2261       dest = atomics.projectValue();
2262     }
2263 
2264     // Evaluate the expression directly into the destination.
2265     AggValueSlot slot = AggValueSlot::forLValue(
2266         dest, *this, AggValueSlot::IsNotDestructed,
2267         AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
2268         AggValueSlot::DoesNotOverlap,
2269         Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed);
2270 
2271     EmitAggExpr(init, slot);
2272     return;
2273   }
2274   }
2275   llvm_unreachable("bad evaluation kind");
2276 }
2277