1 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass builds the coroutine frame and outlines resume and destroy parts
9 // of the coroutine into separate functions.
10 //
11 // We present a coroutine to an LLVM as an ordinary function with suspension
12 // points marked up with intrinsics. We let the optimizer party on the coroutine
13 // as a single function for as long as possible. Shortly before the coroutine is
14 // eligible to be inlined into its callers, we split up the coroutine into parts
15 // corresponding to an initial, resume and destroy invocations of the coroutine,
16 // add them to the current SCC and restart the IPO pipeline to optimize the
17 // coroutine subfunctions we extracted before proceeding to the caller of the
18 // coroutine.
19 //===----------------------------------------------------------------------===//
20
21 #include "llvm/Transforms/Coroutines/CoroSplit.h"
22 #include "CoroInstr.h"
23 #include "CoroInternal.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/PriorityWorklist.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/ADT/Twine.h"
30 #include "llvm/Analysis/CFG.h"
31 #include "llvm/Analysis/CallGraph.h"
32 #include "llvm/Analysis/ConstantFolding.h"
33 #include "llvm/Analysis/LazyCallGraph.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/TargetTransformInfo.h"
36 #include "llvm/BinaryFormat/Dwarf.h"
37 #include "llvm/IR/Argument.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CFG.h"
41 #include "llvm/IR/CallingConv.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/GlobalValue.h"
48 #include "llvm/IR/GlobalVariable.h"
49 #include "llvm/IR/IRBuilder.h"
50 #include "llvm/IR/InstIterator.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/IR/Verifier.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/PrettyStackTrace.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Scalar.h"
65 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
66 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
67 #include "llvm/Transforms/Utils/Cloning.h"
68 #include "llvm/Transforms/Utils/Local.h"
69 #include "llvm/Transforms/Utils/ValueMapper.h"
70 #include <cassert>
71 #include <cstddef>
72 #include <cstdint>
73 #include <initializer_list>
74 #include <iterator>
75
76 using namespace llvm;
77
78 #define DEBUG_TYPE "coro-split"
79
80 namespace {
81
82 /// A little helper class for building
83 class CoroCloner {
84 public:
85 enum class Kind {
86 /// The shared resume function for a switch lowering.
87 SwitchResume,
88
89 /// The shared unwind function for a switch lowering.
90 SwitchUnwind,
91
92 /// The shared cleanup function for a switch lowering.
93 SwitchCleanup,
94
95 /// An individual continuation function.
96 Continuation,
97
98 /// An async resume function.
99 Async,
100 };
101
102 private:
103 Function &OrigF;
104 Function *NewF;
105 const Twine &Suffix;
106 coro::Shape &Shape;
107 Kind FKind;
108 ValueToValueMapTy VMap;
109 IRBuilder<> Builder;
110 Value *NewFramePtr = nullptr;
111
112 /// The active suspend instruction; meaningful only for continuation and async
113 /// ABIs.
114 AnyCoroSuspendInst *ActiveSuspend = nullptr;
115
116 TargetTransformInfo &TTI;
117
118 public:
119 /// Create a cloner for a switch lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Kind FKind,TargetTransformInfo & TTI)120 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
121 Kind FKind, TargetTransformInfo &TTI)
122 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind),
123 Builder(OrigF.getContext()), TTI(TTI) {
124 assert(Shape.ABI == coro::ABI::Switch);
125 }
126
127 /// Create a cloner for a continuation lowering.
CoroCloner(Function & OrigF,const Twine & Suffix,coro::Shape & Shape,Function * NewF,AnyCoroSuspendInst * ActiveSuspend,TargetTransformInfo & TTI)128 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
129 Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
130 TargetTransformInfo &TTI)
131 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
132 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
133 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) {
134 assert(Shape.ABI == coro::ABI::Retcon ||
135 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
136 assert(NewF && "need existing function for continuation");
137 assert(ActiveSuspend && "need active suspend point for continuation");
138 }
139
getFunction() const140 Function *getFunction() const {
141 assert(NewF != nullptr && "declaration not yet set");
142 return NewF;
143 }
144
145 void create();
146
147 private:
isSwitchDestroyFunction()148 bool isSwitchDestroyFunction() {
149 switch (FKind) {
150 case Kind::Async:
151 case Kind::Continuation:
152 case Kind::SwitchResume:
153 return false;
154 case Kind::SwitchUnwind:
155 case Kind::SwitchCleanup:
156 return true;
157 }
158 llvm_unreachable("Unknown CoroCloner::Kind enum");
159 }
160
161 void replaceEntryBlock();
162 Value *deriveNewFramePointer();
163 void replaceRetconOrAsyncSuspendUses();
164 void replaceCoroSuspends();
165 void replaceCoroEnds();
166 void replaceSwiftErrorOps();
167 void salvageDebugInfo();
168 void handleFinalSuspend();
169 };
170
171 } // end anonymous namespace
172
173 // FIXME:
174 // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
175 // and it is known that other transformations, for example, sanitizers
176 // won't lead to incorrect code.
lowerAwaitSuspend(IRBuilder<> & Builder,CoroAwaitSuspendInst * CB,coro::Shape & Shape)177 static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
178 coro::Shape &Shape) {
179 auto Wrapper = CB->getWrapperFunction();
180 auto Awaiter = CB->getAwaiter();
181 auto FramePtr = CB->getFrame();
182
183 Builder.SetInsertPoint(CB);
184
185 CallBase *NewCall = nullptr;
186 // await_suspend has only 2 parameters, awaiter and handle.
187 // Copy parameter attributes from the intrinsic call, but remove the last,
188 // because the last parameter now becomes the function that is being called.
189 AttributeList NewAttributes =
190 CB->getAttributes().removeParamAttributes(CB->getContext(), 2);
191
192 if (auto Invoke = dyn_cast<InvokeInst>(CB)) {
193 auto WrapperInvoke =
194 Builder.CreateInvoke(Wrapper, Invoke->getNormalDest(),
195 Invoke->getUnwindDest(), {Awaiter, FramePtr});
196
197 WrapperInvoke->setCallingConv(Invoke->getCallingConv());
198 std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(),
199 WrapperInvoke->bundle_op_info_begin());
200 WrapperInvoke->setAttributes(NewAttributes);
201 WrapperInvoke->setDebugLoc(Invoke->getDebugLoc());
202 NewCall = WrapperInvoke;
203 } else if (auto Call = dyn_cast<CallInst>(CB)) {
204 auto WrapperCall = Builder.CreateCall(Wrapper, {Awaiter, FramePtr});
205
206 WrapperCall->setAttributes(NewAttributes);
207 WrapperCall->setDebugLoc(Call->getDebugLoc());
208 NewCall = WrapperCall;
209 } else {
210 llvm_unreachable("Unexpected coro_await_suspend invocation method");
211 }
212
213 if (CB->getCalledFunction()->getIntrinsicID() ==
214 Intrinsic::coro_await_suspend_handle) {
215 // Follow the lowered await_suspend call above with a lowered resume call
216 // to the returned coroutine.
217 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
218 // If the await_suspend call is an invoke, we continue in the next block.
219 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
220 }
221
222 coro::LowererBase LB(*Wrapper->getParent());
223 auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex,
224 &*Builder.GetInsertPoint());
225
226 LLVMContext &Ctx = Builder.getContext();
227 FunctionType *ResumeTy = FunctionType::get(
228 Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false);
229 auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall});
230 ResumeCall->setCallingConv(CallingConv::Fast);
231
232 // We can't insert the 'ret' instruction and adjust the cc until the
233 // function has been split, so remember this for later.
234 Shape.SymmetricTransfers.push_back(ResumeCall);
235
236 NewCall = ResumeCall;
237 }
238
239 CB->replaceAllUsesWith(NewCall);
240 CB->eraseFromParent();
241 }
242
lowerAwaitSuspends(Function & F,coro::Shape & Shape)243 static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
244 IRBuilder<> Builder(F.getContext());
245 for (auto *AWS : Shape.CoroAwaitSuspends)
246 lowerAwaitSuspend(Builder, AWS, Shape);
247 }
248
maybeFreeRetconStorage(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr,CallGraph * CG)249 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
250 const coro::Shape &Shape, Value *FramePtr,
251 CallGraph *CG) {
252 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
253 if (Shape.RetconLowering.IsFrameInlineInStorage)
254 return;
255
256 Shape.emitDealloc(Builder, FramePtr, CG);
257 }
258
259 /// Replace an llvm.coro.end.async.
260 /// Will inline the must tail call function call if there is one.
261 /// \returns true if cleanup of the coro.end block is needed, false otherwise.
replaceCoroEndAsync(AnyCoroEndInst * End)262 static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
263 IRBuilder<> Builder(End);
264
265 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(End);
266 if (!EndAsync) {
267 Builder.CreateRetVoid();
268 return true /*needs cleanup of coro.end block*/;
269 }
270
271 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
272 if (!MustTailCallFunc) {
273 Builder.CreateRetVoid();
274 return true /*needs cleanup of coro.end block*/;
275 }
276
277 // Move the must tail call from the predecessor block into the end block.
278 auto *CoroEndBlock = End->getParent();
279 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
280 assert(MustTailCallFuncBlock && "Must have a single predecessor block");
281 auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
282 auto *MustTailCall = cast<CallInst>(&*std::prev(It));
283 CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock,
284 MustTailCall->getIterator());
285
286 // Insert the return instruction.
287 Builder.SetInsertPoint(End);
288 Builder.CreateRetVoid();
289 InlineFunctionInfo FnInfo;
290
291 // Remove the rest of the block, by splitting it into an unreachable block.
292 auto *BB = End->getParent();
293 BB->splitBasicBlock(End);
294 BB->getTerminator()->eraseFromParent();
295
296 auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
297 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
298 (void)InlineRes;
299
300 // We have cleaned up the coro.end block above.
301 return false;
302 }
303
304 /// Replace a non-unwind call to llvm.coro.end.
replaceFallthroughCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)305 static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
306 const coro::Shape &Shape, Value *FramePtr,
307 bool InResume, CallGraph *CG) {
308 // Start inserting right before the coro.end.
309 IRBuilder<> Builder(End);
310
311 // Create the return instruction.
312 switch (Shape.ABI) {
313 // The cloned functions in switch-lowering always return void.
314 case coro::ABI::Switch:
315 assert(!cast<CoroEndInst>(End)->hasResults() &&
316 "switch coroutine should not return any values");
317 // coro.end doesn't immediately end the coroutine in the main function
318 // in this lowering, because we need to deallocate the coroutine.
319 if (!InResume)
320 return;
321 Builder.CreateRetVoid();
322 break;
323
324 // In async lowering this returns.
325 case coro::ABI::Async: {
326 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
327 if (!CoroEndBlockNeedsCleanup)
328 return;
329 break;
330 }
331
332 // In unique continuation lowering, the continuations always return void.
333 // But we may have implicitly allocated storage.
334 case coro::ABI::RetconOnce: {
335 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
336 auto *CoroEnd = cast<CoroEndInst>(End);
337 auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
338
339 if (!CoroEnd->hasResults()) {
340 assert(RetTy->isVoidTy());
341 Builder.CreateRetVoid();
342 break;
343 }
344
345 auto *CoroResults = CoroEnd->getResults();
346 unsigned NumReturns = CoroResults->numReturns();
347
348 if (auto *RetStructTy = dyn_cast<StructType>(RetTy)) {
349 assert(RetStructTy->getNumElements() == NumReturns &&
350 "numbers of returns should match resume function singature");
351 Value *ReturnValue = UndefValue::get(RetStructTy);
352 unsigned Idx = 0;
353 for (Value *RetValEl : CoroResults->return_values())
354 ReturnValue = Builder.CreateInsertValue(ReturnValue, RetValEl, Idx++);
355 Builder.CreateRet(ReturnValue);
356 } else if (NumReturns == 0) {
357 assert(RetTy->isVoidTy());
358 Builder.CreateRetVoid();
359 } else {
360 assert(NumReturns == 1);
361 Builder.CreateRet(*CoroResults->retval_begin());
362 }
363 CoroResults->replaceAllUsesWith(
364 ConstantTokenNone::get(CoroResults->getContext()));
365 CoroResults->eraseFromParent();
366 break;
367 }
368
369 // In non-unique continuation lowering, we signal completion by returning
370 // a null continuation.
371 case coro::ABI::Retcon: {
372 assert(!cast<CoroEndInst>(End)->hasResults() &&
373 "retcon coroutine should not return any values");
374 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
375 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
376 auto RetStructTy = dyn_cast<StructType>(RetTy);
377 PointerType *ContinuationTy =
378 cast<PointerType>(RetStructTy ? RetStructTy->getElementType(0) : RetTy);
379
380 Value *ReturnValue = ConstantPointerNull::get(ContinuationTy);
381 if (RetStructTy) {
382 ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy),
383 ReturnValue, 0);
384 }
385 Builder.CreateRet(ReturnValue);
386 break;
387 }
388 }
389
390 // Remove the rest of the block, by splitting it into an unreachable block.
391 auto *BB = End->getParent();
392 BB->splitBasicBlock(End);
393 BB->getTerminator()->eraseFromParent();
394 }
395
396 // Mark a coroutine as done, which implies that the coroutine is finished and
397 // never get resumed.
398 //
399 // In resume-switched ABI, the done state is represented by storing zero in
400 // ResumeFnAddr.
401 //
402 // NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
403 // pointer to the frame in splitted function is not stored in `Shape`.
markCoroutineAsDone(IRBuilder<> & Builder,const coro::Shape & Shape,Value * FramePtr)404 static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
405 Value *FramePtr) {
406 assert(
407 Shape.ABI == coro::ABI::Switch &&
408 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
409 auto *GepIndex = Builder.CreateStructGEP(
410 Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume,
411 "ResumeFn.addr");
412 auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
413 Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
414 Builder.CreateStore(NullPtr, GepIndex);
415
416 // If the coroutine don't have unwind coro end, we could omit the store to
417 // the final suspend point since we could infer the coroutine is suspended
418 // at the final suspend point by the nullness of ResumeFnAddr.
419 // However, we can't skip it if the coroutine have unwind coro end. Since
420 // the coroutine reaches unwind coro end is considered suspended at the
421 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
422 // didn't complete yet. We need the IndexVal for the final suspend point
423 // to make the states clear.
424 if (Shape.SwitchLowering.HasUnwindCoroEnd &&
425 Shape.SwitchLowering.HasFinalSuspend) {
426 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() &&
427 "The final suspend should only live in the last position of "
428 "CoroSuspends.");
429 ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1);
430 auto *FinalIndex = Builder.CreateStructGEP(
431 Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
432
433 Builder.CreateStore(IndexVal, FinalIndex);
434 }
435 }
436
437 /// Replace an unwind call to llvm.coro.end.
replaceUnwindCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)438 static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
439 Value *FramePtr, bool InResume,
440 CallGraph *CG) {
441 IRBuilder<> Builder(End);
442
443 switch (Shape.ABI) {
444 // In switch-lowering, this does nothing in the main function.
445 case coro::ABI::Switch: {
446 // In C++'s specification, the coroutine should be marked as done
447 // if promise.unhandled_exception() throws. The frontend will
448 // call coro.end(true) along this path.
449 //
450 // FIXME: We should refactor this once there is other language
451 // which uses Switch-Resumed style other than C++.
452 markCoroutineAsDone(Builder, Shape, FramePtr);
453 if (!InResume)
454 return;
455 break;
456 }
457 // In async lowering this does nothing.
458 case coro::ABI::Async:
459 break;
460 // In continuation-lowering, this frees the continuation storage.
461 case coro::ABI::Retcon:
462 case coro::ABI::RetconOnce:
463 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
464 break;
465 }
466
467 // If coro.end has an associated bundle, add cleanupret instruction.
468 if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) {
469 auto *FromPad = cast<CleanupPadInst>(Bundle->Inputs[0]);
470 auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr);
471 End->getParent()->splitBasicBlock(End);
472 CleanupRet->getParent()->getTerminator()->eraseFromParent();
473 }
474 }
475
replaceCoroEnd(AnyCoroEndInst * End,const coro::Shape & Shape,Value * FramePtr,bool InResume,CallGraph * CG)476 static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
477 Value *FramePtr, bool InResume, CallGraph *CG) {
478 if (End->isUnwind())
479 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
480 else
481 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
482
483 auto &Context = End->getContext();
484 End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context)
485 : ConstantInt::getFalse(Context));
486 End->eraseFromParent();
487 }
488
489 // In the resume function, we remove the last case (when coro::Shape is built,
490 // the final suspend point (if present) is always the last element of
491 // CoroSuspends array) since it is an undefined behavior to resume a coroutine
492 // suspended at the final suspend point.
493 // In the destroy function, if it isn't possible that the ResumeFnAddr is NULL
494 // and the coroutine doesn't suspend at the final suspend point actually (this
495 // is possible since the coroutine is considered suspended at the final suspend
496 // point if promise.unhandled_exception() exits via an exception), we can
497 // remove the last case.
handleFinalSuspend()498 void CoroCloner::handleFinalSuspend() {
499 assert(Shape.ABI == coro::ABI::Switch &&
500 Shape.SwitchLowering.HasFinalSuspend);
501
502 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd)
503 return;
504
505 auto *Switch = cast<SwitchInst>(VMap[Shape.SwitchLowering.ResumeSwitch]);
506 auto FinalCaseIt = std::prev(Switch->case_end());
507 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
508 Switch->removeCase(FinalCaseIt);
509 if (isSwitchDestroyFunction()) {
510 BasicBlock *OldSwitchBB = Switch->getParent();
511 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
512 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
513
514 if (NewF->isCoroOnlyDestroyWhenComplete()) {
515 // When the coroutine can only be destroyed when complete, we don't need
516 // to generate code for other cases.
517 Builder.CreateBr(ResumeBB);
518 } else {
519 auto *GepIndex = Builder.CreateStructGEP(
520 Shape.FrameTy, NewFramePtr, coro::Shape::SwitchFieldIndex::Resume,
521 "ResumeFn.addr");
522 auto *Load =
523 Builder.CreateLoad(Shape.getSwitchResumePointerType(), GepIndex);
524 auto *Cond = Builder.CreateIsNull(Load);
525 Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
526 }
527 OldSwitchBB->getTerminator()->eraseFromParent();
528 }
529 }
530
531 static FunctionType *
getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst * Suspend)532 getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
533 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Suspend);
534 auto *StructTy = cast<StructType>(AsyncSuspend->getType());
535 auto &Context = Suspend->getParent()->getParent()->getContext();
536 auto *VoidTy = Type::getVoidTy(Context);
537 return FunctionType::get(VoidTy, StructTy->elements(), false);
538 }
539
createCloneDeclaration(Function & OrigF,coro::Shape & Shape,const Twine & Suffix,Module::iterator InsertBefore,AnyCoroSuspendInst * ActiveSuspend)540 static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
541 const Twine &Suffix,
542 Module::iterator InsertBefore,
543 AnyCoroSuspendInst *ActiveSuspend) {
544 Module *M = OrigF.getParent();
545 auto *FnTy = (Shape.ABI != coro::ABI::Async)
546 ? Shape.getResumeFunctionType()
547 : getFunctionTypeFromAsyncSuspend(ActiveSuspend);
548
549 Function *NewF =
550 Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
551 OrigF.getName() + Suffix);
552
553 M->getFunctionList().insert(InsertBefore, NewF);
554
555 return NewF;
556 }
557
558 /// Replace uses of the active llvm.coro.suspend.retcon/async call with the
559 /// arguments to the continuation function.
560 ///
561 /// This assumes that the builder has a meaningful insertion point.
replaceRetconOrAsyncSuspendUses()562 void CoroCloner::replaceRetconOrAsyncSuspendUses() {
563 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
564 Shape.ABI == coro::ABI::Async);
565
566 auto NewS = VMap[ActiveSuspend];
567 if (NewS->use_empty())
568 return;
569
570 // Copy out all the continuation arguments after the buffer pointer into
571 // an easily-indexed data structure for convenience.
572 SmallVector<Value *, 8> Args;
573 // The async ABI includes all arguments -- including the first argument.
574 bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
575 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(NewF->arg_begin()),
576 E = NewF->arg_end();
577 I != E; ++I)
578 Args.push_back(&*I);
579
580 // If the suspend returns a single scalar value, we can just do a simple
581 // replacement.
582 if (!isa<StructType>(NewS->getType())) {
583 assert(Args.size() == 1);
584 NewS->replaceAllUsesWith(Args.front());
585 return;
586 }
587
588 // Try to peephole extracts of an aggregate return.
589 for (Use &U : llvm::make_early_inc_range(NewS->uses())) {
590 auto *EVI = dyn_cast<ExtractValueInst>(U.getUser());
591 if (!EVI || EVI->getNumIndices() != 1)
592 continue;
593
594 EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]);
595 EVI->eraseFromParent();
596 }
597
598 // If we have no remaining uses, we're done.
599 if (NewS->use_empty())
600 return;
601
602 // Otherwise, we need to create an aggregate.
603 Value *Agg = PoisonValue::get(NewS->getType());
604 for (size_t I = 0, E = Args.size(); I != E; ++I)
605 Agg = Builder.CreateInsertValue(Agg, Args[I], I);
606
607 NewS->replaceAllUsesWith(Agg);
608 }
609
replaceCoroSuspends()610 void CoroCloner::replaceCoroSuspends() {
611 Value *SuspendResult;
612
613 switch (Shape.ABI) {
614 // In switch lowering, replace coro.suspend with the appropriate value
615 // for the type of function we're extracting.
616 // Replacing coro.suspend with (0) will result in control flow proceeding to
617 // a resume label associated with a suspend point, replacing it with (1) will
618 // result in control flow proceeding to a cleanup label associated with this
619 // suspend point.
620 case coro::ABI::Switch:
621 SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0);
622 break;
623
624 // In async lowering there are no uses of the result.
625 case coro::ABI::Async:
626 return;
627
628 // In returned-continuation lowering, the arguments from earlier
629 // continuations are theoretically arbitrary, and they should have been
630 // spilled.
631 case coro::ABI::RetconOnce:
632 case coro::ABI::Retcon:
633 return;
634 }
635
636 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
637 // The active suspend was handled earlier.
638 if (CS == ActiveSuspend)
639 continue;
640
641 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[CS]);
642 MappedCS->replaceAllUsesWith(SuspendResult);
643 MappedCS->eraseFromParent();
644 }
645 }
646
replaceCoroEnds()647 void CoroCloner::replaceCoroEnds() {
648 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
649 // We use a null call graph because there's no call graph node for
650 // the cloned function yet. We'll just be rebuilding that later.
651 auto *NewCE = cast<AnyCoroEndInst>(VMap[CE]);
652 replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr);
653 }
654 }
655
replaceSwiftErrorOps(Function & F,coro::Shape & Shape,ValueToValueMapTy * VMap)656 static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
657 ValueToValueMapTy *VMap) {
658 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
659 return;
660 Value *CachedSlot = nullptr;
661 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
662 if (CachedSlot)
663 return CachedSlot;
664
665 // Check if the function has a swifterror argument.
666 for (auto &Arg : F.args()) {
667 if (Arg.isSwiftError()) {
668 CachedSlot = &Arg;
669 return &Arg;
670 }
671 }
672
673 // Create a swifterror alloca.
674 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
675 auto Alloca = Builder.CreateAlloca(ValueTy);
676 Alloca->setSwiftError(true);
677
678 CachedSlot = Alloca;
679 return Alloca;
680 };
681
682 for (CallInst *Op : Shape.SwiftErrorOps) {
683 auto MappedOp = VMap ? cast<CallInst>((*VMap)[Op]) : Op;
684 IRBuilder<> Builder(MappedOp);
685
686 // If there are no arguments, this is a 'get' operation.
687 Value *MappedResult;
688 if (Op->arg_empty()) {
689 auto ValueTy = Op->getType();
690 auto Slot = getSwiftErrorSlot(ValueTy);
691 MappedResult = Builder.CreateLoad(ValueTy, Slot);
692 } else {
693 assert(Op->arg_size() == 1);
694 auto Value = MappedOp->getArgOperand(0);
695 auto ValueTy = Value->getType();
696 auto Slot = getSwiftErrorSlot(ValueTy);
697 Builder.CreateStore(Value, Slot);
698 MappedResult = Slot;
699 }
700
701 MappedOp->replaceAllUsesWith(MappedResult);
702 MappedOp->eraseFromParent();
703 }
704
705 // If we're updating the original function, we've invalidated SwiftErrorOps.
706 if (VMap == nullptr) {
707 Shape.SwiftErrorOps.clear();
708 }
709 }
710
711 /// Returns all DbgVariableIntrinsic in F.
712 static std::pair<SmallVector<DbgVariableIntrinsic *, 8>,
713 SmallVector<DbgVariableRecord *>>
collectDbgVariableIntrinsics(Function & F)714 collectDbgVariableIntrinsics(Function &F) {
715 SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
716 SmallVector<DbgVariableRecord *> DbgVariableRecords;
717 for (auto &I : instructions(F)) {
718 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
719 DbgVariableRecords.push_back(&DVR);
720 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
721 Intrinsics.push_back(DVI);
722 }
723 return {Intrinsics, DbgVariableRecords};
724 }
725
replaceSwiftErrorOps()726 void CoroCloner::replaceSwiftErrorOps() {
727 ::replaceSwiftErrorOps(*NewF, Shape, &VMap);
728 }
729
salvageDebugInfo()730 void CoroCloner::salvageDebugInfo() {
731 auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(*NewF);
732 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
733
734 // Only 64-bit ABIs have a register we can refer to with the entry value.
735 bool UseEntryValue =
736 llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit();
737 for (DbgVariableIntrinsic *DVI : Worklist)
738 coro::salvageDebugInfo(ArgToAllocaMap, *DVI, Shape.OptimizeFrame,
739 UseEntryValue);
740 for (DbgVariableRecord *DVR : DbgVariableRecords)
741 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
742 UseEntryValue);
743
744 // Remove all salvaged dbg.declare intrinsics that became
745 // either unreachable or stale due to the CoroSplit transformation.
746 DominatorTree DomTree(*NewF);
747 auto IsUnreachableBlock = [&](BasicBlock *BB) {
748 return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
749 &DomTree);
750 };
751 auto RemoveOne = [&](auto *DVI) {
752 if (IsUnreachableBlock(DVI->getParent()))
753 DVI->eraseFromParent();
754 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
755 // Count all non-debuginfo uses in reachable blocks.
756 unsigned Uses = 0;
757 for (auto *User : DVI->getVariableLocationOp(0)->users())
758 if (auto *I = dyn_cast<Instruction>(User))
759 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
760 ++Uses;
761 if (!Uses)
762 DVI->eraseFromParent();
763 }
764 };
765 for_each(Worklist, RemoveOne);
766 for_each(DbgVariableRecords, RemoveOne);
767 }
768
replaceEntryBlock()769 void CoroCloner::replaceEntryBlock() {
770 // In the original function, the AllocaSpillBlock is a block immediately
771 // following the allocation of the frame object which defines GEPs for
772 // all the allocas that have been moved into the frame, and it ends by
773 // branching to the original beginning of the coroutine. Make this
774 // the entry block of the cloned function.
775 auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
776 auto *OldEntry = &NewF->getEntryBlock();
777 Entry->setName("entry" + Suffix);
778 Entry->moveBefore(OldEntry);
779 Entry->getTerminator()->eraseFromParent();
780
781 // Clear all predecessors of the new entry block. There should be
782 // exactly one predecessor, which we created when splitting out
783 // AllocaSpillBlock to begin with.
784 assert(Entry->hasOneUse());
785 auto BranchToEntry = cast<BranchInst>(Entry->user_back());
786 assert(BranchToEntry->isUnconditional());
787 Builder.SetInsertPoint(BranchToEntry);
788 Builder.CreateUnreachable();
789 BranchToEntry->eraseFromParent();
790
791 // Branch from the entry to the appropriate place.
792 Builder.SetInsertPoint(Entry);
793 switch (Shape.ABI) {
794 case coro::ABI::Switch: {
795 // In switch-lowering, we built a resume-entry block in the original
796 // function. Make the entry block branch to this.
797 auto *SwitchBB =
798 cast<BasicBlock>(VMap[Shape.SwitchLowering.ResumeEntryBlock]);
799 Builder.CreateBr(SwitchBB);
800 break;
801 }
802 case coro::ABI::Async:
803 case coro::ABI::Retcon:
804 case coro::ABI::RetconOnce: {
805 // In continuation ABIs, we want to branch to immediately after the
806 // active suspend point. Earlier phases will have put the suspend in its
807 // own basic block, so just thread our jump directly to its successor.
808 assert((Shape.ABI == coro::ABI::Async &&
809 isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
810 ((Shape.ABI == coro::ABI::Retcon ||
811 Shape.ABI == coro::ABI::RetconOnce) &&
812 isa<CoroSuspendRetconInst>(ActiveSuspend)));
813 auto *MappedCS = cast<AnyCoroSuspendInst>(VMap[ActiveSuspend]);
814 auto Branch = cast<BranchInst>(MappedCS->getNextNode());
815 assert(Branch->isUnconditional());
816 Builder.CreateBr(Branch->getSuccessor(0));
817 break;
818 }
819 }
820
821 // Any static alloca that's still being used but not reachable from the new
822 // entry needs to be moved to the new entry.
823 Function *F = OldEntry->getParent();
824 DominatorTree DT{*F};
825 for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
826 auto *Alloca = dyn_cast<AllocaInst>(&I);
827 if (!Alloca || I.use_empty())
828 continue;
829 if (DT.isReachableFromEntry(I.getParent()) ||
830 !isa<ConstantInt>(Alloca->getArraySize()))
831 continue;
832 I.moveBefore(*Entry, Entry->getFirstInsertionPt());
833 }
834 }
835
836 /// Derive the value of the new frame pointer.
deriveNewFramePointer()837 Value *CoroCloner::deriveNewFramePointer() {
838 // Builder should be inserting to the front of the new entry block.
839
840 switch (Shape.ABI) {
841 // In switch-lowering, the argument is the frame pointer.
842 case coro::ABI::Switch:
843 return &*NewF->arg_begin();
844 // In async-lowering, one of the arguments is an async context as determined
845 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
846 // the resume function from the async context projection function associated
847 // with the active suspend. The frame is located as a tail to the async
848 // context header.
849 case coro::ABI::Async: {
850 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
851 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
852 auto *CalleeContext = NewF->getArg(ContextIdx);
853 auto *ProjectionFunc =
854 ActiveAsyncSuspend->getAsyncContextProjectionFunction();
855 auto DbgLoc =
856 cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
857 // Calling i8* (i8*)
858 auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(),
859 ProjectionFunc, CalleeContext);
860 CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
861 CallerContext->setDebugLoc(DbgLoc);
862 // The frame is located after the async_context header.
863 auto &Context = Builder.getContext();
864 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
865 Type::getInt8Ty(Context), CallerContext,
866 Shape.AsyncLowering.FrameOffset, "async.ctx.frameptr");
867 // Inline the projection function.
868 InlineFunctionInfo InlineInfo;
869 auto InlineRes = InlineFunction(*CallerContext, InlineInfo);
870 assert(InlineRes.isSuccess());
871 (void)InlineRes;
872 return FramePtrAddr;
873 }
874 // In continuation-lowering, the argument is the opaque storage.
875 case coro::ABI::Retcon:
876 case coro::ABI::RetconOnce: {
877 Argument *NewStorage = &*NewF->arg_begin();
878 auto FramePtrTy = PointerType::getUnqual(Shape.FrameTy->getContext());
879
880 // If the storage is inline, just bitcast to the storage to the frame type.
881 if (Shape.RetconLowering.IsFrameInlineInStorage)
882 return NewStorage;
883
884 // Otherwise, load the real frame from the opaque storage.
885 return Builder.CreateLoad(FramePtrTy, NewStorage);
886 }
887 }
888 llvm_unreachable("bad ABI");
889 }
890
addFramePointerAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex,uint64_t Size,Align Alignment,bool NoAlias)891 static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
892 unsigned ParamIndex, uint64_t Size,
893 Align Alignment, bool NoAlias) {
894 AttrBuilder ParamAttrs(Context);
895 ParamAttrs.addAttribute(Attribute::NonNull);
896 ParamAttrs.addAttribute(Attribute::NoUndef);
897
898 if (NoAlias)
899 ParamAttrs.addAttribute(Attribute::NoAlias);
900
901 ParamAttrs.addAlignmentAttr(Alignment);
902 ParamAttrs.addDereferenceableAttr(Size);
903 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
904 }
905
addAsyncContextAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex)906 static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
907 unsigned ParamIndex) {
908 AttrBuilder ParamAttrs(Context);
909 ParamAttrs.addAttribute(Attribute::SwiftAsync);
910 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
911 }
912
addSwiftSelfAttrs(AttributeList & Attrs,LLVMContext & Context,unsigned ParamIndex)913 static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
914 unsigned ParamIndex) {
915 AttrBuilder ParamAttrs(Context);
916 ParamAttrs.addAttribute(Attribute::SwiftSelf);
917 Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs);
918 }
919
920 /// Clone the body of the original function into a resume function of
921 /// some sort.
create()922 void CoroCloner::create() {
923 // Create the new function if we don't already have one.
924 if (!NewF) {
925 NewF = createCloneDeclaration(OrigF, Shape, Suffix,
926 OrigF.getParent()->end(), ActiveSuspend);
927 }
928
929 // Replace all args with dummy instructions. If an argument is the old frame
930 // pointer, the dummy will be replaced by the new frame pointer once it is
931 // computed below. Uses of all other arguments should have already been
932 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
933 // frame.
934 SmallVector<Instruction *> DummyArgs;
935 for (Argument &A : OrigF.args()) {
936 DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType())));
937 VMap[&A] = DummyArgs.back();
938 }
939
940 SmallVector<ReturnInst *, 4> Returns;
941
942 // Ignore attempts to change certain attributes of the function.
943 // TODO: maybe there should be a way to suppress this during cloning?
944 auto savedVisibility = NewF->getVisibility();
945 auto savedUnnamedAddr = NewF->getUnnamedAddr();
946 auto savedDLLStorageClass = NewF->getDLLStorageClass();
947
948 // NewF's linkage (which CloneFunctionInto does *not* change) might not
949 // be compatible with the visibility of OrigF (which it *does* change),
950 // so protect against that.
951 auto savedLinkage = NewF->getLinkage();
952 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
953
954 CloneFunctionInto(NewF, &OrigF, VMap,
955 CloneFunctionChangeType::LocalChangesOnly, Returns);
956
957 auto &Context = NewF->getContext();
958
959 // For async functions / continuations, adjust the scope line of the
960 // clone to the line number of the suspend point. However, only
961 // adjust the scope line when the files are the same. This ensures
962 // line number and file name belong together. The scope line is
963 // associated with all pre-prologue instructions. This avoids a jump
964 // in the linetable from the function declaration to the suspend point.
965 if (DISubprogram *SP = NewF->getSubprogram()) {
966 assert(SP != OrigF.getSubprogram() && SP->isDistinct());
967 if (ActiveSuspend)
968 if (auto DL = ActiveSuspend->getDebugLoc())
969 if (SP->getFile() == DL->getFile())
970 SP->setScopeLine(DL->getLine());
971 // Update the linkage name to reflect the modified symbol name. It
972 // is necessary to update the linkage name in Swift, since the
973 // mangling changes for resume functions. It might also be the
974 // right thing to do in C++, but due to a limitation in LLVM's
975 // AsmPrinter we can only do this if the function doesn't have an
976 // abstract specification, since the DWARF backend expects the
977 // abstract specification to contain the linkage name and asserts
978 // that they are identical.
979 if (SP->getUnit() &&
980 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) {
981 SP->replaceLinkageName(MDString::get(Context, NewF->getName()));
982 if (auto *Decl = SP->getDeclaration()) {
983 auto *NewDecl = DISubprogram::get(
984 Decl->getContext(), Decl->getScope(), Decl->getName(),
985 NewF->getName(), Decl->getFile(), Decl->getLine(), Decl->getType(),
986 Decl->getScopeLine(), Decl->getContainingType(),
987 Decl->getVirtualIndex(), Decl->getThisAdjustment(),
988 Decl->getFlags(), Decl->getSPFlags(), Decl->getUnit(),
989 Decl->getTemplateParams(), nullptr, Decl->getRetainedNodes(),
990 Decl->getThrownTypes(), Decl->getAnnotations(),
991 Decl->getTargetFuncName());
992 SP->replaceDeclaration(NewDecl);
993 }
994 }
995 }
996
997 NewF->setLinkage(savedLinkage);
998 NewF->setVisibility(savedVisibility);
999 NewF->setUnnamedAddr(savedUnnamedAddr);
1000 NewF->setDLLStorageClass(savedDLLStorageClass);
1001 // The function sanitizer metadata needs to match the signature of the
1002 // function it is being attached to. However this does not hold for split
1003 // functions here. Thus remove the metadata for split functions.
1004 if (Shape.ABI == coro::ABI::Switch &&
1005 NewF->hasMetadata(LLVMContext::MD_func_sanitize))
1006 NewF->eraseMetadata(LLVMContext::MD_func_sanitize);
1007
1008 // Replace the attributes of the new function:
1009 auto OrigAttrs = NewF->getAttributes();
1010 auto NewAttrs = AttributeList();
1011
1012 switch (Shape.ABI) {
1013 case coro::ABI::Switch:
1014 // Bootstrap attributes by copying function attributes from the
1015 // original function. This should include optimization settings and so on.
1016 NewAttrs = NewAttrs.addFnAttributes(
1017 Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
1018
1019 addFramePointerAttrs(NewAttrs, Context, 0, Shape.FrameSize,
1020 Shape.FrameAlign, /*NoAlias=*/false);
1021 break;
1022 case coro::ABI::Async: {
1023 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(ActiveSuspend);
1024 if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo,
1025 Attribute::SwiftAsync)) {
1026 uint32_t ArgAttributeIndices =
1027 ActiveAsyncSuspend->getStorageArgumentIndex();
1028 auto ContextArgIndex = ArgAttributeIndices & 0xff;
1029 addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex);
1030
1031 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
1032 // `swiftself`.
1033 auto SwiftSelfIndex = ArgAttributeIndices >> 8;
1034 if (SwiftSelfIndex)
1035 addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex);
1036 }
1037
1038 // Transfer the original function's attributes.
1039 auto FnAttrs = OrigF.getAttributes().getFnAttrs();
1040 NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, FnAttrs));
1041 break;
1042 }
1043 case coro::ABI::Retcon:
1044 case coro::ABI::RetconOnce:
1045 // If we have a continuation prototype, just use its attributes,
1046 // full-stop.
1047 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
1048
1049 /// FIXME: Is it really good to add the NoAlias attribute?
1050 addFramePointerAttrs(NewAttrs, Context, 0,
1051 Shape.getRetconCoroId()->getStorageSize(),
1052 Shape.getRetconCoroId()->getStorageAlignment(),
1053 /*NoAlias=*/true);
1054
1055 break;
1056 }
1057
1058 switch (Shape.ABI) {
1059 // In these ABIs, the cloned functions always return 'void', and the
1060 // existing return sites are meaningless. Note that for unique
1061 // continuations, this includes the returns associated with suspends;
1062 // this is fine because we can't suspend twice.
1063 case coro::ABI::Switch:
1064 case coro::ABI::RetconOnce:
1065 // Remove old returns.
1066 for (ReturnInst *Return : Returns)
1067 changeToUnreachable(Return);
1068 break;
1069
1070 // With multi-suspend continuations, we'll already have eliminated the
1071 // original returns and inserted returns before all the suspend points,
1072 // so we want to leave any returns in place.
1073 case coro::ABI::Retcon:
1074 break;
1075 // Async lowering will insert musttail call functions at all suspend points
1076 // followed by a return.
1077 // Don't change returns to unreachable because that will trip up the verifier.
1078 // These returns should be unreachable from the clone.
1079 case coro::ABI::Async:
1080 break;
1081 }
1082
1083 NewF->setAttributes(NewAttrs);
1084 NewF->setCallingConv(Shape.getResumeFunctionCC());
1085
1086 // Set up the new entry block.
1087 replaceEntryBlock();
1088
1089 // Turn symmetric transfers into musttail calls.
1090 for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
1091 ResumeCall = cast<CallInst>(VMap[ResumeCall]);
1092 if (TTI.supportsTailCallFor(ResumeCall)) {
1093 // FIXME: Could we support symmetric transfer effectively without
1094 // musttail?
1095 ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
1096 }
1097
1098 // Put a 'ret void' after the call, and split any remaining instructions to
1099 // an unreachable block.
1100 BasicBlock *BB = ResumeCall->getParent();
1101 BB->splitBasicBlock(ResumeCall->getNextNode());
1102 Builder.SetInsertPoint(BB->getTerminator());
1103 Builder.CreateRetVoid();
1104 BB->getTerminator()->eraseFromParent();
1105 }
1106
1107 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1108 NewFramePtr = deriveNewFramePointer();
1109
1110 // Remap frame pointer.
1111 Value *OldFramePtr = VMap[Shape.FramePtr];
1112 NewFramePtr->takeName(OldFramePtr);
1113 OldFramePtr->replaceAllUsesWith(NewFramePtr);
1114
1115 // Remap vFrame pointer.
1116 auto *NewVFrame = Builder.CreateBitCast(
1117 NewFramePtr, PointerType::getUnqual(Builder.getContext()), "vFrame");
1118 Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
1119 if (OldVFrame != NewVFrame)
1120 OldVFrame->replaceAllUsesWith(NewVFrame);
1121
1122 // All uses of the arguments should have been resolved by this point,
1123 // so we can safely remove the dummy values.
1124 for (Instruction *DummyArg : DummyArgs) {
1125 DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType()));
1126 DummyArg->deleteValue();
1127 }
1128
1129 switch (Shape.ABI) {
1130 case coro::ABI::Switch:
1131 // Rewrite final suspend handling as it is not done via switch (allows to
1132 // remove final case from the switch, since it is undefined behavior to
1133 // resume the coroutine suspended at the final suspend point.
1134 if (Shape.SwitchLowering.HasFinalSuspend)
1135 handleFinalSuspend();
1136 break;
1137 case coro::ABI::Async:
1138 case coro::ABI::Retcon:
1139 case coro::ABI::RetconOnce:
1140 // Replace uses of the active suspend with the corresponding
1141 // continuation-function arguments.
1142 assert(ActiveSuspend != nullptr &&
1143 "no active suspend when lowering a continuation-style coroutine");
1144 replaceRetconOrAsyncSuspendUses();
1145 break;
1146 }
1147
1148 // Handle suspends.
1149 replaceCoroSuspends();
1150
1151 // Handle swifterror.
1152 replaceSwiftErrorOps();
1153
1154 // Remove coro.end intrinsics.
1155 replaceCoroEnds();
1156
1157 // Salvage debug info that points into the coroutine frame.
1158 salvageDebugInfo();
1159
1160 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
1161 // to suppress deallocation code.
1162 if (Shape.ABI == coro::ABI::Switch)
1163 coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
1164 /*Elide=*/FKind == CoroCloner::Kind::SwitchCleanup);
1165 }
1166
updateAsyncFuncPointerContextSize(coro::Shape & Shape)1167 static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1168 assert(Shape.ABI == coro::ABI::Async);
1169
1170 auto *FuncPtrStruct = cast<ConstantStruct>(
1171 Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1172 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(0);
1173 auto *OrigContextSize = FuncPtrStruct->getOperand(1);
1174 auto *NewContextSize = ConstantInt::get(OrigContextSize->getType(),
1175 Shape.AsyncLowering.ContextSize);
1176 auto *NewFuncPtrStruct = ConstantStruct::get(
1177 FuncPtrStruct->getType(), OrigRelativeFunOffset, NewContextSize);
1178
1179 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1180 }
1181
replaceFrameSizeAndAlignment(coro::Shape & Shape)1182 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1183 if (Shape.ABI == coro::ABI::Async)
1184 updateAsyncFuncPointerContextSize(Shape);
1185
1186 for (CoroAlignInst *CA : Shape.CoroAligns) {
1187 CA->replaceAllUsesWith(
1188 ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
1189 CA->eraseFromParent();
1190 }
1191
1192 if (Shape.CoroSizes.empty())
1193 return;
1194
1195 // In the same function all coro.sizes should have the same result type.
1196 auto *SizeIntrin = Shape.CoroSizes.back();
1197 Module *M = SizeIntrin->getModule();
1198 const DataLayout &DL = M->getDataLayout();
1199 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1200 auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
1201
1202 for (CoroSizeInst *CS : Shape.CoroSizes) {
1203 CS->replaceAllUsesWith(SizeConstant);
1204 CS->eraseFromParent();
1205 }
1206 }
1207
postSplitCleanup(Function & F)1208 static void postSplitCleanup(Function &F) {
1209 removeUnreachableBlocks(F);
1210
1211 #ifndef NDEBUG
1212 // For now, we do a mandatory verification step because we don't
1213 // entirely trust this pass. Note that we don't want to add a verifier
1214 // pass to FPM below because it will also verify all the global data.
1215 if (verifyFunction(F, &errs()))
1216 report_fatal_error("Broken function");
1217 #endif
1218 }
1219
1220 // Coroutine has no suspend points. Remove heap allocation for the coroutine
1221 // frame if possible.
handleNoSuspendCoroutine(coro::Shape & Shape)1222 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1223 auto *CoroBegin = Shape.CoroBegin;
1224 auto *CoroId = CoroBegin->getId();
1225 auto *AllocInst = CoroId->getCoroAlloc();
1226 switch (Shape.ABI) {
1227 case coro::ABI::Switch: {
1228 auto SwitchId = cast<CoroIdInst>(CoroId);
1229 coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
1230 if (AllocInst) {
1231 IRBuilder<> Builder(AllocInst);
1232 auto *Frame = Builder.CreateAlloca(Shape.FrameTy);
1233 Frame->setAlignment(Shape.FrameAlign);
1234 AllocInst->replaceAllUsesWith(Builder.getFalse());
1235 AllocInst->eraseFromParent();
1236 CoroBegin->replaceAllUsesWith(Frame);
1237 } else {
1238 CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
1239 }
1240
1241 break;
1242 }
1243 case coro::ABI::Async:
1244 case coro::ABI::Retcon:
1245 case coro::ABI::RetconOnce:
1246 CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType()));
1247 break;
1248 }
1249
1250 CoroBegin->eraseFromParent();
1251 Shape.CoroBegin = nullptr;
1252 }
1253
1254 // SimplifySuspendPoint needs to check that there is no calls between
1255 // coro_save and coro_suspend, since any of the calls may potentially resume
1256 // the coroutine and if that is the case we cannot eliminate the suspend point.
hasCallsInBlockBetween(Instruction * From,Instruction * To)1257 static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1258 for (Instruction *I = From; I != To; I = I->getNextNode()) {
1259 // Assume that no intrinsic can resume the coroutine.
1260 if (isa<IntrinsicInst>(I))
1261 continue;
1262
1263 if (isa<CallBase>(I))
1264 return true;
1265 }
1266 return false;
1267 }
1268
hasCallsInBlocksBetween(BasicBlock * SaveBB,BasicBlock * ResDesBB)1269 static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1270 SmallPtrSet<BasicBlock *, 8> Set;
1271 SmallVector<BasicBlock *, 8> Worklist;
1272
1273 Set.insert(SaveBB);
1274 Worklist.push_back(ResDesBB);
1275
1276 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1277 // returns a token consumed by suspend instruction, all blocks in between
1278 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1279 while (!Worklist.empty()) {
1280 auto *BB = Worklist.pop_back_val();
1281 Set.insert(BB);
1282 for (auto *Pred : predecessors(BB))
1283 if (!Set.contains(Pred))
1284 Worklist.push_back(Pred);
1285 }
1286
1287 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1288 Set.erase(SaveBB);
1289 Set.erase(ResDesBB);
1290
1291 for (auto *BB : Set)
1292 if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
1293 return true;
1294
1295 return false;
1296 }
1297
hasCallsBetween(Instruction * Save,Instruction * ResumeOrDestroy)1298 static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1299 auto *SaveBB = Save->getParent();
1300 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1301
1302 if (SaveBB == ResumeOrDestroyBB)
1303 return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
1304
1305 // Any calls from Save to the end of the block?
1306 if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
1307 return true;
1308
1309 // Any calls from begging of the block up to ResumeOrDestroy?
1310 if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
1311 ResumeOrDestroy))
1312 return true;
1313
1314 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1315 if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
1316 return true;
1317
1318 return false;
1319 }
1320
1321 // If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1322 // suspend point and replace it with nornal control flow.
simplifySuspendPoint(CoroSuspendInst * Suspend,CoroBeginInst * CoroBegin)1323 static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1324 CoroBeginInst *CoroBegin) {
1325 Instruction *Prev = Suspend->getPrevNode();
1326 if (!Prev) {
1327 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1328 if (!Pred)
1329 return false;
1330 Prev = Pred->getTerminator();
1331 }
1332
1333 CallBase *CB = dyn_cast<CallBase>(Prev);
1334 if (!CB)
1335 return false;
1336
1337 auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1338
1339 // See if the callsite is for resumption or destruction of the coroutine.
1340 auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
1341 if (!SubFn)
1342 return false;
1343
1344 // Does not refer to the current coroutine, we cannot do anything with it.
1345 if (SubFn->getFrame() != CoroBegin)
1346 return false;
1347
1348 // See if the transformation is safe. Specifically, see if there are any
1349 // calls in between Save and CallInstr. They can potenitally resume the
1350 // coroutine rendering this optimization unsafe.
1351 auto *Save = Suspend->getCoroSave();
1352 if (hasCallsBetween(Save, CB))
1353 return false;
1354
1355 // Replace llvm.coro.suspend with the value that results in resumption over
1356 // the resume or cleanup path.
1357 Suspend->replaceAllUsesWith(SubFn->getRawIndex());
1358 Suspend->eraseFromParent();
1359 Save->eraseFromParent();
1360
1361 // No longer need a call to coro.resume or coro.destroy.
1362 if (auto *Invoke = dyn_cast<InvokeInst>(CB)) {
1363 BranchInst::Create(Invoke->getNormalDest(), Invoke->getIterator());
1364 }
1365
1366 // Grab the CalledValue from CB before erasing the CallInstr.
1367 auto *CalledValue = CB->getCalledOperand();
1368 CB->eraseFromParent();
1369
1370 // If no more users remove it. Usually it is a bitcast of SubFn.
1371 if (CalledValue != SubFn && CalledValue->user_empty())
1372 if (auto *I = dyn_cast<Instruction>(CalledValue))
1373 I->eraseFromParent();
1374
1375 // Now we are good to remove SubFn.
1376 if (SubFn->user_empty())
1377 SubFn->eraseFromParent();
1378
1379 return true;
1380 }
1381
1382 // Remove suspend points that are simplified.
simplifySuspendPoints(coro::Shape & Shape)1383 static void simplifySuspendPoints(coro::Shape &Shape) {
1384 // Currently, the only simplification we do is switch-lowering-specific.
1385 if (Shape.ABI != coro::ABI::Switch)
1386 return;
1387
1388 auto &S = Shape.CoroSuspends;
1389 size_t I = 0, N = S.size();
1390 if (N == 0)
1391 return;
1392
1393 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max();
1394 while (true) {
1395 auto SI = cast<CoroSuspendInst>(S[I]);
1396 // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1397 // to resume a coroutine suspended at the final suspend point.
1398 if (!SI->isFinal() && simplifySuspendPoint(SI, Shape.CoroBegin)) {
1399 if (--N == I)
1400 break;
1401
1402 std::swap(S[I], S[N]);
1403
1404 if (cast<CoroSuspendInst>(S[I])->isFinal()) {
1405 assert(Shape.SwitchLowering.HasFinalSuspend);
1406 ChangedFinalIndex = I;
1407 }
1408
1409 continue;
1410 }
1411 if (++I == N)
1412 break;
1413 }
1414 S.resize(N);
1415
1416 // Maintain final.suspend in case final suspend was swapped.
1417 // Due to we requrie the final suspend to be the last element of CoroSuspends.
1418 if (ChangedFinalIndex < N) {
1419 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal());
1420 std::swap(S[ChangedFinalIndex], S.back());
1421 }
1422 }
1423
1424 namespace {
1425
1426 struct SwitchCoroutineSplitter {
split__anon98d7ec870511::SwitchCoroutineSplitter1427 static void split(Function &F, coro::Shape &Shape,
1428 SmallVectorImpl<Function *> &Clones,
1429 TargetTransformInfo &TTI) {
1430 assert(Shape.ABI == coro::ABI::Switch);
1431
1432 createResumeEntryBlock(F, Shape);
1433 auto *ResumeClone =
1434 createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
1435 auto *DestroyClone =
1436 createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
1437 auto *CleanupClone =
1438 createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
1439
1440 postSplitCleanup(*ResumeClone);
1441 postSplitCleanup(*DestroyClone);
1442 postSplitCleanup(*CleanupClone);
1443
1444 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1445 updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
1446
1447 assert(Clones.empty());
1448 Clones.push_back(ResumeClone);
1449 Clones.push_back(DestroyClone);
1450 Clones.push_back(CleanupClone);
1451
1452 // Create a constant array referring to resume/destroy/clone functions
1453 // pointed by the last argument of @llvm.coro.info, so that CoroElide pass
1454 // can determined correct function to call.
1455 setCoroInfo(F, Shape, Clones);
1456 }
1457
1458 private:
1459 // Create a resume clone by cloning the body of the original function, setting
1460 // new entry block and replacing coro.suspend an appropriate value to force
1461 // resume or cleanup pass for every suspend point.
createClone__anon98d7ec870511::SwitchCoroutineSplitter1462 static Function *createClone(Function &F, const Twine &Suffix,
1463 coro::Shape &Shape, CoroCloner::Kind FKind,
1464 TargetTransformInfo &TTI) {
1465 CoroCloner Cloner(F, Suffix, Shape, FKind, TTI);
1466 Cloner.create();
1467 return Cloner.getFunction();
1468 }
1469
1470 // Create an entry block for a resume function with a switch that will jump to
1471 // suspend points.
createResumeEntryBlock__anon98d7ec870511::SwitchCoroutineSplitter1472 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
1473 LLVMContext &C = F.getContext();
1474
1475 // resume.entry:
1476 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32
1477 // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label
1478 // %unreachable [
1479 // i32 0, label %resume.0
1480 // i32 1, label %resume.1
1481 // ...
1482 // ]
1483
1484 auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
1485 auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
1486
1487 IRBuilder<> Builder(NewEntry);
1488 auto *FramePtr = Shape.FramePtr;
1489 auto *FrameTy = Shape.FrameTy;
1490 auto *GepIndex = Builder.CreateStructGEP(
1491 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
1492 auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
1493 auto *Switch =
1494 Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
1495 Shape.SwitchLowering.ResumeSwitch = Switch;
1496
1497 size_t SuspendIndex = 0;
1498 for (auto *AnyS : Shape.CoroSuspends) {
1499 auto *S = cast<CoroSuspendInst>(AnyS);
1500 ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
1501
1502 // Replace CoroSave with a store to Index:
1503 // %index.addr = getelementptr %f.frame... (index field number)
1504 // store i32 %IndexVal, i32* %index.addr1
1505 auto *Save = S->getCoroSave();
1506 Builder.SetInsertPoint(Save);
1507 if (S->isFinal()) {
1508 // The coroutine should be marked done if it reaches the final suspend
1509 // point.
1510 markCoroutineAsDone(Builder, Shape, FramePtr);
1511 } else {
1512 auto *GepIndex = Builder.CreateStructGEP(
1513 FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
1514 Builder.CreateStore(IndexVal, GepIndex);
1515 }
1516
1517 Save->replaceAllUsesWith(ConstantTokenNone::get(C));
1518 Save->eraseFromParent();
1519
1520 // Split block before and after coro.suspend and add a jump from an entry
1521 // switch:
1522 //
1523 // whateverBB:
1524 // whatever
1525 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
1526 // switch i8 %0, label %suspend[i8 0, label %resume
1527 // i8 1, label %cleanup]
1528 // becomes:
1529 //
1530 // whateverBB:
1531 // whatever
1532 // br label %resume.0.landing
1533 //
1534 // resume.0: ; <--- jump from the switch in the resume.entry
1535 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
1536 // br label %resume.0.landing
1537 //
1538 // resume.0.landing:
1539 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
1540 // switch i8 % 1, label %suspend [i8 0, label %resume
1541 // i8 1, label %cleanup]
1542
1543 auto *SuspendBB = S->getParent();
1544 auto *ResumeBB =
1545 SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
1546 auto *LandingBB = ResumeBB->splitBasicBlock(
1547 S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
1548 Switch->addCase(IndexVal, ResumeBB);
1549
1550 cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
1551 auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "");
1552 PN->insertBefore(LandingBB->begin());
1553 S->replaceAllUsesWith(PN);
1554 PN->addIncoming(Builder.getInt8(-1), SuspendBB);
1555 PN->addIncoming(S, ResumeBB);
1556
1557 ++SuspendIndex;
1558 }
1559
1560 Builder.SetInsertPoint(UnreachBB);
1561 Builder.CreateUnreachable();
1562
1563 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
1564 }
1565
1566 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
updateCoroFrame__anon98d7ec870511::SwitchCoroutineSplitter1567 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1568 Function *DestroyFn, Function *CleanupFn) {
1569 IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr());
1570
1571 auto *ResumeAddr = Builder.CreateStructGEP(
1572 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
1573 "resume.addr");
1574 Builder.CreateStore(ResumeFn, ResumeAddr);
1575
1576 Value *DestroyOrCleanupFn = DestroyFn;
1577
1578 CoroIdInst *CoroId = Shape.getSwitchCoroId();
1579 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1580 // If there is a CoroAlloc and it returns false (meaning we elide the
1581 // allocation, use CleanupFn instead of DestroyFn).
1582 DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
1583 }
1584
1585 auto *DestroyAddr = Builder.CreateStructGEP(
1586 Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy,
1587 "destroy.addr");
1588 Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
1589 }
1590
1591 // Create a global constant array containing pointers to functions provided
1592 // and set Info parameter of CoroBegin to point at this constant. Example:
1593 //
1594 // @f.resumers = internal constant [2 x void(%f.frame*)*]
1595 // [void(%f.frame*)* @f.resume, void(%f.frame*)*
1596 // @f.destroy]
1597 // define void @f() {
1598 // ...
1599 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
1600 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to
1601 // i8*))
1602 //
1603 // Assumes that all the functions have the same signature.
setCoroInfo__anon98d7ec870511::SwitchCoroutineSplitter1604 static void setCoroInfo(Function &F, coro::Shape &Shape,
1605 ArrayRef<Function *> Fns) {
1606 // This only works under the switch-lowering ABI because coro elision
1607 // only works on the switch-lowering ABI.
1608 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
1609 assert(!Args.empty());
1610 Function *Part = *Fns.begin();
1611 Module *M = Part->getParent();
1612 auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
1613
1614 auto *ConstVal = ConstantArray::get(ArrTy, Args);
1615 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
1616 GlobalVariable::PrivateLinkage, ConstVal,
1617 F.getName() + Twine(".resumers"));
1618
1619 // Update coro.begin instruction to refer to this constant.
1620 LLVMContext &C = F.getContext();
1621 auto *BC = ConstantExpr::getPointerCast(GV, PointerType::getUnqual(C));
1622 Shape.getSwitchCoroId()->setInfo(BC);
1623 }
1624 };
1625
1626 } // namespace
1627
replaceAsyncResumeFunction(CoroSuspendAsyncInst * Suspend,Value * Continuation)1628 static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1629 Value *Continuation) {
1630 auto *ResumeIntrinsic = Suspend->getResumeFunction();
1631 auto &Context = Suspend->getParent()->getParent()->getContext();
1632 auto *Int8PtrTy = PointerType::getUnqual(Context);
1633
1634 IRBuilder<> Builder(ResumeIntrinsic);
1635 auto *Val = Builder.CreateBitOrPointerCast(Continuation, Int8PtrTy);
1636 ResumeIntrinsic->replaceAllUsesWith(Val);
1637 ResumeIntrinsic->eraseFromParent();
1638 Suspend->setOperand(CoroSuspendAsyncInst::ResumeFunctionArg,
1639 UndefValue::get(Int8PtrTy));
1640 }
1641
1642 /// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
coerceArguments(IRBuilder<> & Builder,FunctionType * FnTy,ArrayRef<Value * > FnArgs,SmallVectorImpl<Value * > & CallArgs)1643 static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1644 ArrayRef<Value *> FnArgs,
1645 SmallVectorImpl<Value *> &CallArgs) {
1646 size_t ArgIdx = 0;
1647 for (auto *paramTy : FnTy->params()) {
1648 assert(ArgIdx < FnArgs.size());
1649 if (paramTy != FnArgs[ArgIdx]->getType())
1650 CallArgs.push_back(
1651 Builder.CreateBitOrPointerCast(FnArgs[ArgIdx], paramTy));
1652 else
1653 CallArgs.push_back(FnArgs[ArgIdx]);
1654 ++ArgIdx;
1655 }
1656 }
1657
createMustTailCall(DebugLoc Loc,Function * MustTailCallFn,TargetTransformInfo & TTI,ArrayRef<Value * > Arguments,IRBuilder<> & Builder)1658 CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
1659 TargetTransformInfo &TTI,
1660 ArrayRef<Value *> Arguments,
1661 IRBuilder<> &Builder) {
1662 auto *FnTy = MustTailCallFn->getFunctionType();
1663 // Coerce the arguments, llvm optimizations seem to ignore the types in
1664 // vaarg functions and throws away casts in optimized mode.
1665 SmallVector<Value *, 8> CallArgs;
1666 coerceArguments(Builder, FnTy, Arguments, CallArgs);
1667
1668 auto *TailCall = Builder.CreateCall(FnTy, MustTailCallFn, CallArgs);
1669 // Skip targets which don't support tail call.
1670 if (TTI.supportsTailCallFor(TailCall)) {
1671 TailCall->setTailCallKind(CallInst::TCK_MustTail);
1672 }
1673 TailCall->setDebugLoc(Loc);
1674 TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1675 return TailCall;
1676 }
1677
splitAsyncCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI)1678 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
1679 SmallVectorImpl<Function *> &Clones,
1680 TargetTransformInfo &TTI) {
1681 assert(Shape.ABI == coro::ABI::Async);
1682 assert(Clones.empty());
1683 // Reset various things that the optimizer might have decided it
1684 // "knows" about the coroutine function due to not seeing a return.
1685 F.removeFnAttr(Attribute::NoReturn);
1686 F.removeRetAttr(Attribute::NoAlias);
1687 F.removeRetAttr(Attribute::NonNull);
1688
1689 auto &Context = F.getContext();
1690 auto *Int8PtrTy = PointerType::getUnqual(Context);
1691
1692 auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
1693 IRBuilder<> Builder(Id);
1694
1695 auto *FramePtr = Id->getStorage();
1696 FramePtr = Builder.CreateBitOrPointerCast(FramePtr, Int8PtrTy);
1697 FramePtr = Builder.CreateConstInBoundsGEP1_32(
1698 Type::getInt8Ty(Context), FramePtr, Shape.AsyncLowering.FrameOffset,
1699 "async.ctx.frameptr");
1700
1701 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1702 {
1703 // Make sure we don't invalidate Shape.FramePtr.
1704 TrackingVH<Value> Handle(Shape.FramePtr);
1705 Shape.CoroBegin->replaceAllUsesWith(FramePtr);
1706 Shape.FramePtr = Handle.getValPtr();
1707 }
1708
1709 // Create all the functions in order after the main function.
1710 auto NextF = std::next(F.getIterator());
1711
1712 // Create a continuation function for each of the suspend points.
1713 Clones.reserve(Shape.CoroSuspends.size());
1714 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1715 auto *Suspend = cast<CoroSuspendAsyncInst>(Shape.CoroSuspends[Idx]);
1716
1717 // Create the clone declaration.
1718 auto ResumeNameSuffix = ".resume.";
1719 auto ProjectionFunctionName =
1720 Suspend->getAsyncContextProjectionFunction()->getName();
1721 bool UseSwiftMangling = false;
1722 if (ProjectionFunctionName == "__swift_async_resume_project_context") {
1723 ResumeNameSuffix = "TQ";
1724 UseSwiftMangling = true;
1725 } else if (ProjectionFunctionName == "__swift_async_resume_get_context") {
1726 ResumeNameSuffix = "TY";
1727 UseSwiftMangling = true;
1728 }
1729 auto *Continuation = createCloneDeclaration(
1730 F, Shape,
1731 UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
1732 : ResumeNameSuffix + Twine(Idx),
1733 NextF, Suspend);
1734 Clones.push_back(Continuation);
1735
1736 // Insert a branch to a new return block immediately before the suspend
1737 // point.
1738 auto *SuspendBB = Suspend->getParent();
1739 auto *NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1740 auto *Branch = cast<BranchInst>(SuspendBB->getTerminator());
1741
1742 // Place it before the first suspend.
1743 auto *ReturnBB =
1744 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB);
1745 Branch->setSuccessor(0, ReturnBB);
1746
1747 IRBuilder<> Builder(ReturnBB);
1748
1749 // Insert the call to the tail call function and inline it.
1750 auto *Fn = Suspend->getMustTailCallFunction();
1751 SmallVector<Value *, 8> Args(Suspend->args());
1752 auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1753 CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
1754 auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI,
1755 FnArgs, Builder);
1756 Builder.CreateRetVoid();
1757 InlineFunctionInfo FnInfo;
1758 (void)InlineFunction(*TailCall, FnInfo);
1759
1760 // Replace the lvm.coro.async.resume intrisic call.
1761 replaceAsyncResumeFunction(Suspend, Continuation);
1762 }
1763
1764 assert(Clones.size() == Shape.CoroSuspends.size());
1765 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1766 auto *Suspend = Shape.CoroSuspends[Idx];
1767 auto *Clone = Clones[Idx];
1768
1769 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create();
1770 }
1771 }
1772
splitRetconCoroutine(Function & F,coro::Shape & Shape,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI)1773 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1774 SmallVectorImpl<Function *> &Clones,
1775 TargetTransformInfo &TTI) {
1776 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
1777 assert(Clones.empty());
1778
1779 // Reset various things that the optimizer might have decided it
1780 // "knows" about the coroutine function due to not seeing a return.
1781 F.removeFnAttr(Attribute::NoReturn);
1782 F.removeRetAttr(Attribute::NoAlias);
1783 F.removeRetAttr(Attribute::NonNull);
1784
1785 // Allocate the frame.
1786 auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
1787 Value *RawFramePtr;
1788 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1789 RawFramePtr = Id->getStorage();
1790 } else {
1791 IRBuilder<> Builder(Id);
1792
1793 // Determine the size of the frame.
1794 const DataLayout &DL = F.getDataLayout();
1795 auto Size = DL.getTypeAllocSize(Shape.FrameTy);
1796
1797 // Allocate. We don't need to update the call graph node because we're
1798 // going to recompute it from scratch after splitting.
1799 // FIXME: pass the required alignment
1800 RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr);
1801 RawFramePtr =
1802 Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType());
1803
1804 // Stash the allocated frame pointer in the continuation storage.
1805 Builder.CreateStore(RawFramePtr, Id->getStorage());
1806 }
1807
1808 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1809 {
1810 // Make sure we don't invalidate Shape.FramePtr.
1811 TrackingVH<Value> Handle(Shape.FramePtr);
1812 Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
1813 Shape.FramePtr = Handle.getValPtr();
1814 }
1815
1816 // Create a unique return block.
1817 BasicBlock *ReturnBB = nullptr;
1818 SmallVector<PHINode *, 4> ReturnPHIs;
1819
1820 // Create all the functions in order after the main function.
1821 auto NextF = std::next(F.getIterator());
1822
1823 // Create a continuation function for each of the suspend points.
1824 Clones.reserve(Shape.CoroSuspends.size());
1825 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1826 auto Suspend = cast<CoroSuspendRetconInst>(Shape.CoroSuspends[i]);
1827
1828 // Create the clone declaration.
1829 auto Continuation =
1830 createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF, nullptr);
1831 Clones.push_back(Continuation);
1832
1833 // Insert a branch to the unified return block immediately before
1834 // the suspend point.
1835 auto SuspendBB = Suspend->getParent();
1836 auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend);
1837 auto Branch = cast<BranchInst>(SuspendBB->getTerminator());
1838
1839 // Create the unified return block.
1840 if (!ReturnBB) {
1841 // Place it before the first suspend.
1842 ReturnBB =
1843 BasicBlock::Create(F.getContext(), "coro.return", &F, NewSuspendBB);
1844 Shape.RetconLowering.ReturnBlock = ReturnBB;
1845
1846 IRBuilder<> Builder(ReturnBB);
1847
1848 // Create PHIs for all the return values.
1849 assert(ReturnPHIs.empty());
1850
1851 // First, the continuation.
1852 ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(),
1853 Shape.CoroSuspends.size()));
1854
1855 // Next, all the directly-yielded values.
1856 for (auto *ResultTy : Shape.getRetconResultTypes())
1857 ReturnPHIs.push_back(
1858 Builder.CreatePHI(ResultTy, Shape.CoroSuspends.size()));
1859
1860 // Build the return value.
1861 auto RetTy = F.getReturnType();
1862
1863 // Cast the continuation value if necessary.
1864 // We can't rely on the types matching up because that type would
1865 // have to be infinite.
1866 auto CastedContinuationTy =
1867 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0));
1868 auto *CastedContinuation =
1869 Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy);
1870
1871 Value *RetV;
1872 if (ReturnPHIs.size() == 1) {
1873 RetV = CastedContinuation;
1874 } else {
1875 RetV = PoisonValue::get(RetTy);
1876 RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
1877 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
1878 RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
1879 }
1880
1881 Builder.CreateRet(RetV);
1882 }
1883
1884 // Branch to the return block.
1885 Branch->setSuccessor(0, ReturnBB);
1886 ReturnPHIs[0]->addIncoming(Continuation, SuspendBB);
1887 size_t NextPHIIndex = 1;
1888 for (auto &VUse : Suspend->value_operands())
1889 ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB);
1890 assert(NextPHIIndex == ReturnPHIs.size());
1891 }
1892
1893 assert(Clones.size() == Shape.CoroSuspends.size());
1894 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1895 auto Suspend = Shape.CoroSuspends[i];
1896 auto Clone = Clones[i];
1897
1898 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create();
1899 }
1900 }
1901
1902 namespace {
1903 class PrettyStackTraceFunction : public PrettyStackTraceEntry {
1904 Function &F;
1905
1906 public:
PrettyStackTraceFunction(Function & F)1907 PrettyStackTraceFunction(Function &F) : F(F) {}
print(raw_ostream & OS) const1908 void print(raw_ostream &OS) const override {
1909 OS << "While splitting coroutine ";
1910 F.printAsOperand(OS, /*print type*/ false, F.getParent());
1911 OS << "\n";
1912 }
1913 };
1914 } // namespace
1915
1916 static coro::Shape
splitCoroutine(Function & F,SmallVectorImpl<Function * > & Clones,TargetTransformInfo & TTI,bool OptimizeFrame,std::function<bool (Instruction &)> MaterializableCallback)1917 splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
1918 TargetTransformInfo &TTI, bool OptimizeFrame,
1919 std::function<bool(Instruction &)> MaterializableCallback) {
1920 PrettyStackTraceFunction prettyStackTrace(F);
1921
1922 // The suspend-crossing algorithm in buildCoroutineFrame get tripped
1923 // up by uses in unreachable blocks, so remove them as a first pass.
1924 removeUnreachableBlocks(F);
1925
1926 coro::Shape Shape(F, OptimizeFrame);
1927 if (!Shape.CoroBegin)
1928 return Shape;
1929
1930 lowerAwaitSuspends(F, Shape);
1931
1932 simplifySuspendPoints(Shape);
1933 buildCoroutineFrame(F, Shape, TTI, MaterializableCallback);
1934 replaceFrameSizeAndAlignment(Shape);
1935
1936 // If there are no suspend points, no split required, just remove
1937 // the allocation and deallocation blocks, they are not needed.
1938 if (Shape.CoroSuspends.empty()) {
1939 handleNoSuspendCoroutine(Shape);
1940 } else {
1941 switch (Shape.ABI) {
1942 case coro::ABI::Switch:
1943 SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
1944 break;
1945 case coro::ABI::Async:
1946 splitAsyncCoroutine(F, Shape, Clones, TTI);
1947 break;
1948 case coro::ABI::Retcon:
1949 case coro::ABI::RetconOnce:
1950 splitRetconCoroutine(F, Shape, Clones, TTI);
1951 break;
1952 }
1953 }
1954
1955 // Replace all the swifterror operations in the original function.
1956 // This invalidates SwiftErrorOps in the Shape.
1957 replaceSwiftErrorOps(F, Shape, nullptr);
1958
1959 // Salvage debug intrinsics that point into the coroutine frame in the
1960 // original function. The Cloner has already salvaged debug info in the new
1961 // coroutine funclets.
1962 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1963 auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F);
1964 for (auto *DDI : DbgInsts)
1965 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
1966 false /*UseEntryValue*/);
1967 for (DbgVariableRecord *DVR : DbgVariableRecords)
1968 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
1969 false /*UseEntryValue*/);
1970 return Shape;
1971 }
1972
1973 /// Remove calls to llvm.coro.end in the original function.
removeCoroEndsFromRampFunction(const coro::Shape & Shape)1974 static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
1975 if (Shape.ABI != coro::ABI::Switch) {
1976 for (auto *End : Shape.CoroEnds) {
1977 replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
1978 }
1979 } else {
1980 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
1981 auto &Context = End->getContext();
1982 End->replaceAllUsesWith(ConstantInt::getFalse(Context));
1983 End->eraseFromParent();
1984 }
1985 }
1986 }
1987
updateCallGraphAfterCoroutineSplit(LazyCallGraph::Node & N,const coro::Shape & Shape,const SmallVectorImpl<Function * > & Clones,LazyCallGraph::SCC & C,LazyCallGraph & CG,CGSCCAnalysisManager & AM,CGSCCUpdateResult & UR,FunctionAnalysisManager & FAM)1988 static void updateCallGraphAfterCoroutineSplit(
1989 LazyCallGraph::Node &N, const coro::Shape &Shape,
1990 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
1991 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
1992 FunctionAnalysisManager &FAM) {
1993
1994 if (!Clones.empty()) {
1995 switch (Shape.ABI) {
1996 case coro::ABI::Switch:
1997 // Each clone in the Switch lowering is independent of the other clones.
1998 // Let the LazyCallGraph know about each one separately.
1999 for (Function *Clone : Clones)
2000 CG.addSplitFunction(N.getFunction(), *Clone);
2001 break;
2002 case coro::ABI::Async:
2003 case coro::ABI::Retcon:
2004 case coro::ABI::RetconOnce:
2005 // Each clone in the Async/Retcon lowering references of the other clones.
2006 // Let the LazyCallGraph know about all of them at once.
2007 if (!Clones.empty())
2008 CG.addSplitRefRecursiveFunctions(N.getFunction(), Clones);
2009 break;
2010 }
2011
2012 // Let the CGSCC infra handle the changes to the original function.
2013 updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM);
2014 }
2015
2016 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
2017 // to the split functions.
2018 postSplitCleanup(N.getFunction());
2019 updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
2020 }
2021
2022 /// Replace a call to llvm.coro.prepare.retcon.
replacePrepare(CallInst * Prepare,LazyCallGraph & CG,LazyCallGraph::SCC & C)2023 static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2024 LazyCallGraph::SCC &C) {
2025 auto CastFn = Prepare->getArgOperand(0); // as an i8*
2026 auto Fn = CastFn->stripPointerCasts(); // as its original type
2027
2028 // Attempt to peephole this pattern:
2029 // %0 = bitcast [[TYPE]] @some_function to i8*
2030 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
2031 // %2 = bitcast %1 to [[TYPE]]
2032 // ==>
2033 // %2 = @some_function
2034 for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
2035 // Look for bitcasts back to the original function type.
2036 auto *Cast = dyn_cast<BitCastInst>(U.getUser());
2037 if (!Cast || Cast->getType() != Fn->getType())
2038 continue;
2039
2040 // Replace and remove the cast.
2041 Cast->replaceAllUsesWith(Fn);
2042 Cast->eraseFromParent();
2043 }
2044
2045 // Replace any remaining uses with the function as an i8*.
2046 // This can never directly be a callee, so we don't need to update CG.
2047 Prepare->replaceAllUsesWith(CastFn);
2048 Prepare->eraseFromParent();
2049
2050 // Kill dead bitcasts.
2051 while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
2052 if (!Cast->use_empty())
2053 break;
2054 CastFn = Cast->getOperand(0);
2055 Cast->eraseFromParent();
2056 }
2057 }
2058
replaceAllPrepares(Function * PrepareFn,LazyCallGraph & CG,LazyCallGraph::SCC & C)2059 static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2060 LazyCallGraph::SCC &C) {
2061 bool Changed = false;
2062 for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
2063 // Intrinsics can only be used in calls.
2064 auto *Prepare = cast<CallInst>(P.getUser());
2065 replacePrepare(Prepare, CG, C);
2066 Changed = true;
2067 }
2068
2069 return Changed;
2070 }
2071
addPrepareFunction(const Module & M,SmallVectorImpl<Function * > & Fns,StringRef Name)2072 static void addPrepareFunction(const Module &M,
2073 SmallVectorImpl<Function *> &Fns,
2074 StringRef Name) {
2075 auto *PrepareFn = M.getFunction(Name);
2076 if (PrepareFn && !PrepareFn->use_empty())
2077 Fns.push_back(PrepareFn);
2078 }
2079
CoroSplitPass(bool OptimizeFrame)2080 CoroSplitPass::CoroSplitPass(bool OptimizeFrame)
2081 : MaterializableCallback(coro::defaultMaterializable),
2082 OptimizeFrame(OptimizeFrame) {}
2083
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)2084 PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2085 CGSCCAnalysisManager &AM,
2086 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2087 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2088 // non-zero number of nodes, so we assume that here and grab the first
2089 // node's function's module.
2090 Module &M = *C.begin()->getFunction().getParent();
2091 auto &FAM =
2092 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
2093
2094 // Check for uses of llvm.coro.prepare.retcon/async.
2095 SmallVector<Function *, 2> PrepareFns;
2096 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
2097 addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
2098
2099 // Find coroutines for processing.
2100 SmallVector<LazyCallGraph::Node *> Coroutines;
2101 for (LazyCallGraph::Node &N : C)
2102 if (N.getFunction().isPresplitCoroutine())
2103 Coroutines.push_back(&N);
2104
2105 if (Coroutines.empty() && PrepareFns.empty())
2106 return PreservedAnalyses::all();
2107
2108 // Split all the coroutines.
2109 for (LazyCallGraph::Node *N : Coroutines) {
2110 Function &F = N->getFunction();
2111 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2112 << "\n");
2113 F.setSplittedCoroutine();
2114
2115 SmallVector<Function *, 4> Clones;
2116 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2117 const coro::Shape Shape =
2118 splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
2119 OptimizeFrame, MaterializableCallback);
2120 removeCoroEndsFromRampFunction(Shape);
2121 updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
2122
2123 ORE.emit([&]() {
2124 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F)
2125 << "Split '" << ore::NV("function", F.getName())
2126 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize)
2127 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")";
2128 });
2129
2130 if (!Shape.CoroSuspends.empty()) {
2131 // Run the CGSCC pipeline on the original and newly split functions.
2132 UR.CWorklist.insert(&C);
2133 for (Function *Clone : Clones)
2134 UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
2135 }
2136 }
2137
2138 for (auto *PrepareFn : PrepareFns) {
2139 replaceAllPrepares(PrepareFn, CG, C);
2140 }
2141
2142 return PreservedAnalyses::none();
2143 }
2144