1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
10 //
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //===----------------------------------------------------------------------===//
16
17 #include "CoroInternal.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/PostOrderIterator.h"
20 #include "llvm/ADT/ScopeExit.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/Analysis/PtrUseVisitor.h"
24 #include "llvm/Analysis/StackLifetime.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/DIBuilder.h"
28 #include "llvm/IR/DebugInfo.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstIterator.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/OptimizedStructLayout.h"
36 #include "llvm/Support/circular_raw_ostream.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39 #include "llvm/Transforms/Utils/Local.h"
40 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
41 #include <algorithm>
42 #include <deque>
43 #include <optional>
44
45 using namespace llvm;
46
47 extern cl::opt<bool> UseNewDbgInfoFormat;
48
49 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
50 // "coro-frame", which results in leaner debug spew.
51 #define DEBUG_TYPE "coro-suspend-crossing"
52
53 enum { SmallVectorThreshold = 32 };
54
55 // Provides two way mapping between the blocks and numbers.
56 namespace {
57 class BlockToIndexMapping {
58 SmallVector<BasicBlock *, SmallVectorThreshold> V;
59
60 public:
size() const61 size_t size() const { return V.size(); }
62
BlockToIndexMapping(Function & F)63 BlockToIndexMapping(Function &F) {
64 for (BasicBlock &BB : F)
65 V.push_back(&BB);
66 llvm::sort(V);
67 }
68
blockToIndex(BasicBlock const * BB) const69 size_t blockToIndex(BasicBlock const *BB) const {
70 auto *I = llvm::lower_bound(V, BB);
71 assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
72 return I - V.begin();
73 }
74
indexToBlock(unsigned Index) const75 BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
76 };
77 } // end anonymous namespace
78
79 // The SuspendCrossingInfo maintains data that allows to answer a question
80 // whether given two BasicBlocks A and B there is a path from A to B that
81 // passes through a suspend point.
82 //
83 // For every basic block 'i' it maintains a BlockData that consists of:
84 // Consumes: a bit vector which contains a set of indices of blocks that can
85 // reach block 'i'. A block can trivially reach itself.
86 // Kills: a bit vector which contains a set of indices of blocks that can
87 // reach block 'i' but there is a path crossing a suspend point
88 // not repeating 'i' (path to 'i' without cycles containing 'i').
89 // Suspend: a boolean indicating whether block 'i' contains a suspend point.
90 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
91 // KillLoop: There is a path from 'i' to 'i' not otherwise repeating 'i' that
92 // crosses a suspend point.
93 //
94 namespace {
95 class SuspendCrossingInfo {
96 BlockToIndexMapping Mapping;
97
98 struct BlockData {
99 BitVector Consumes;
100 BitVector Kills;
101 bool Suspend = false;
102 bool End = false;
103 bool KillLoop = false;
104 bool Changed = false;
105 };
106 SmallVector<BlockData, SmallVectorThreshold> Block;
107
predecessors(BlockData const & BD) const108 iterator_range<pred_iterator> predecessors(BlockData const &BD) const {
109 BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
110 return llvm::predecessors(BB);
111 }
112
getBlockData(BasicBlock * BB)113 BlockData &getBlockData(BasicBlock *BB) {
114 return Block[Mapping.blockToIndex(BB)];
115 }
116
117 /// Compute the BlockData for the current function in one iteration.
118 /// Initialize - Whether this is the first iteration, we can optimize
119 /// the initial case a little bit by manual loop switch.
120 /// Returns whether the BlockData changes in this iteration.
121 template <bool Initialize = false>
122 bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
123
124 public:
125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
126 void dump() const;
127 void dump(StringRef Label, BitVector const &BV) const;
128 #endif
129
130 SuspendCrossingInfo(Function &F, coro::Shape &Shape);
131
132 /// Returns true if there is a path from \p From to \p To crossing a suspend
133 /// point without crossing \p From a 2nd time.
hasPathCrossingSuspendPoint(BasicBlock * From,BasicBlock * To) const134 bool hasPathCrossingSuspendPoint(BasicBlock *From, BasicBlock *To) const {
135 size_t const FromIndex = Mapping.blockToIndex(From);
136 size_t const ToIndex = Mapping.blockToIndex(To);
137 bool const Result = Block[ToIndex].Kills[FromIndex];
138 LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
139 << " answer is " << Result << "\n");
140 return Result;
141 }
142
143 /// Returns true if there is a path from \p From to \p To crossing a suspend
144 /// point without crossing \p From a 2nd time. If \p From is the same as \p To
145 /// this will also check if there is a looping path crossing a suspend point.
hasPathOrLoopCrossingSuspendPoint(BasicBlock * From,BasicBlock * To) const146 bool hasPathOrLoopCrossingSuspendPoint(BasicBlock *From,
147 BasicBlock *To) const {
148 size_t const FromIndex = Mapping.blockToIndex(From);
149 size_t const ToIndex = Mapping.blockToIndex(To);
150 bool Result = Block[ToIndex].Kills[FromIndex] ||
151 (From == To && Block[ToIndex].KillLoop);
152 LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
153 << " answer is " << Result << " (path or loop)\n");
154 return Result;
155 }
156
isDefinitionAcrossSuspend(BasicBlock * DefBB,User * U) const157 bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
158 auto *I = cast<Instruction>(U);
159
160 // We rewrote PHINodes, so that only the ones with exactly one incoming
161 // value need to be analyzed.
162 if (auto *PN = dyn_cast<PHINode>(I))
163 if (PN->getNumIncomingValues() > 1)
164 return false;
165
166 BasicBlock *UseBB = I->getParent();
167
168 // As a special case, treat uses by an llvm.coro.suspend.retcon or an
169 // llvm.coro.suspend.async as if they were uses in the suspend's single
170 // predecessor: the uses conceptually occur before the suspend.
171 if (isa<CoroSuspendRetconInst>(I) || isa<CoroSuspendAsyncInst>(I)) {
172 UseBB = UseBB->getSinglePredecessor();
173 assert(UseBB && "should have split coro.suspend into its own block");
174 }
175
176 return hasPathCrossingSuspendPoint(DefBB, UseBB);
177 }
178
isDefinitionAcrossSuspend(Argument & A,User * U) const179 bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
180 return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
181 }
182
isDefinitionAcrossSuspend(Instruction & I,User * U) const183 bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
184 auto *DefBB = I.getParent();
185
186 // As a special case, treat values produced by an llvm.coro.suspend.*
187 // as if they were defined in the single successor: the uses
188 // conceptually occur after the suspend.
189 if (isa<AnyCoroSuspendInst>(I)) {
190 DefBB = DefBB->getSingleSuccessor();
191 assert(DefBB && "should have split coro.suspend into its own block");
192 }
193
194 return isDefinitionAcrossSuspend(DefBB, U);
195 }
196
isDefinitionAcrossSuspend(Value & V,User * U) const197 bool isDefinitionAcrossSuspend(Value &V, User *U) const {
198 if (auto *Arg = dyn_cast<Argument>(&V))
199 return isDefinitionAcrossSuspend(*Arg, U);
200 if (auto *Inst = dyn_cast<Instruction>(&V))
201 return isDefinitionAcrossSuspend(*Inst, U);
202
203 llvm_unreachable(
204 "Coroutine could only collect Argument and Instruction now.");
205 }
206 };
207 } // end anonymous namespace
208
209 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump(StringRef Label,BitVector const & BV) const210 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
211 BitVector const &BV) const {
212 dbgs() << Label << ":";
213 for (size_t I = 0, N = BV.size(); I < N; ++I)
214 if (BV[I])
215 dbgs() << " " << Mapping.indexToBlock(I)->getName();
216 dbgs() << "\n";
217 }
218
dump() const219 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
220 for (size_t I = 0, N = Block.size(); I < N; ++I) {
221 BasicBlock *const B = Mapping.indexToBlock(I);
222 dbgs() << B->getName() << ":\n";
223 dump(" Consumes", Block[I].Consumes);
224 dump(" Kills", Block[I].Kills);
225 }
226 dbgs() << "\n";
227 }
228 #endif
229
230 template <bool Initialize>
computeBlockData(const ReversePostOrderTraversal<Function * > & RPOT)231 bool SuspendCrossingInfo::computeBlockData(
232 const ReversePostOrderTraversal<Function *> &RPOT) {
233 bool Changed = false;
234
235 for (const BasicBlock *BB : RPOT) {
236 auto BBNo = Mapping.blockToIndex(BB);
237 auto &B = Block[BBNo];
238
239 // We don't need to count the predecessors when initialization.
240 if constexpr (!Initialize)
241 // If all the predecessors of the current Block don't change,
242 // the BlockData for the current block must not change too.
243 if (all_of(predecessors(B), [this](BasicBlock *BB) {
244 return !Block[Mapping.blockToIndex(BB)].Changed;
245 })) {
246 B.Changed = false;
247 continue;
248 }
249
250 // Saved Consumes and Kills bitsets so that it is easy to see
251 // if anything changed after propagation.
252 auto SavedConsumes = B.Consumes;
253 auto SavedKills = B.Kills;
254
255 for (BasicBlock *PI : predecessors(B)) {
256 auto PrevNo = Mapping.blockToIndex(PI);
257 auto &P = Block[PrevNo];
258
259 // Propagate Kills and Consumes from predecessors into B.
260 B.Consumes |= P.Consumes;
261 B.Kills |= P.Kills;
262
263 // If block P is a suspend block, it should propagate kills into block
264 // B for every block P consumes.
265 if (P.Suspend)
266 B.Kills |= P.Consumes;
267 }
268
269 if (B.Suspend) {
270 // If block B is a suspend block, it should kill all of the blocks it
271 // consumes.
272 B.Kills |= B.Consumes;
273 } else if (B.End) {
274 // If block B is an end block, it should not propagate kills as the
275 // blocks following coro.end() are reached during initial invocation
276 // of the coroutine while all the data are still available on the
277 // stack or in the registers.
278 B.Kills.reset();
279 } else {
280 // This is reached when B block it not Suspend nor coro.end and it
281 // need to make sure that it is not in the kill set.
282 B.KillLoop |= B.Kills[BBNo];
283 B.Kills.reset(BBNo);
284 }
285
286 if constexpr (!Initialize) {
287 B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
288 Changed |= B.Changed;
289 }
290 }
291
292 return Changed;
293 }
294
SuspendCrossingInfo(Function & F,coro::Shape & Shape)295 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
296 : Mapping(F) {
297 const size_t N = Mapping.size();
298 Block.resize(N);
299
300 // Initialize every block so that it consumes itself
301 for (size_t I = 0; I < N; ++I) {
302 auto &B = Block[I];
303 B.Consumes.resize(N);
304 B.Kills.resize(N);
305 B.Consumes.set(I);
306 B.Changed = true;
307 }
308
309 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
310 // the code beyond coro.end is reachable during initial invocation of the
311 // coroutine.
312 for (auto *CE : Shape.CoroEnds)
313 getBlockData(CE->getParent()).End = true;
314
315 // Mark all suspend blocks and indicate that they kill everything they
316 // consume. Note, that crossing coro.save also requires a spill, as any code
317 // between coro.save and coro.suspend may resume the coroutine and all of the
318 // state needs to be saved by that time.
319 auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
320 BasicBlock *SuspendBlock = BarrierInst->getParent();
321 auto &B = getBlockData(SuspendBlock);
322 B.Suspend = true;
323 B.Kills |= B.Consumes;
324 };
325 for (auto *CSI : Shape.CoroSuspends) {
326 markSuspendBlock(CSI);
327 if (auto *Save = CSI->getCoroSave())
328 markSuspendBlock(Save);
329 }
330
331 // It is considered to be faster to use RPO traversal for forward-edges
332 // dataflow analysis.
333 ReversePostOrderTraversal<Function *> RPOT(&F);
334 computeBlockData</*Initialize=*/true>(RPOT);
335 while (computeBlockData</*Initialize*/ false>(RPOT))
336 ;
337
338 LLVM_DEBUG(dump());
339 }
340
341 namespace {
342
343 // RematGraph is used to construct a DAG for rematerializable instructions
344 // When the constructor is invoked with a candidate instruction (which is
345 // materializable) it builds a DAG of materializable instructions from that
346 // point.
347 // Typically, for each instruction identified as re-materializable across a
348 // suspend point, a RematGraph will be created.
349 struct RematGraph {
350 // Each RematNode in the graph contains the edges to instructions providing
351 // operands in the current node.
352 struct RematNode {
353 Instruction *Node;
354 SmallVector<RematNode *> Operands;
355 RematNode() = default;
RematNode__anon408e6fe60611::RematGraph::RematNode356 RematNode(Instruction *V) : Node(V) {}
357 };
358
359 RematNode *EntryNode;
360 using RematNodeMap =
361 SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>;
362 RematNodeMap Remats;
363 const std::function<bool(Instruction &)> &MaterializableCallback;
364 SuspendCrossingInfo &Checker;
365
RematGraph__anon408e6fe60611::RematGraph366 RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
367 Instruction *I, SuspendCrossingInfo &Checker)
368 : MaterializableCallback(MaterializableCallback), Checker(Checker) {
369 std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I);
370 EntryNode = FirstNode.get();
371 std::deque<std::unique_ptr<RematNode>> WorkList;
372 addNode(std::move(FirstNode), WorkList, cast<User>(I));
373 while (WorkList.size()) {
374 std::unique_ptr<RematNode> N = std::move(WorkList.front());
375 WorkList.pop_front();
376 addNode(std::move(N), WorkList, cast<User>(I));
377 }
378 }
379
addNode__anon408e6fe60611::RematGraph380 void addNode(std::unique_ptr<RematNode> NUPtr,
381 std::deque<std::unique_ptr<RematNode>> &WorkList,
382 User *FirstUse) {
383 RematNode *N = NUPtr.get();
384 if (Remats.count(N->Node))
385 return;
386
387 // We haven't see this node yet - add to the list
388 Remats[N->Node] = std::move(NUPtr);
389 for (auto &Def : N->Node->operands()) {
390 Instruction *D = dyn_cast<Instruction>(Def.get());
391 if (!D || !MaterializableCallback(*D) ||
392 !Checker.isDefinitionAcrossSuspend(*D, FirstUse))
393 continue;
394
395 if (Remats.count(D)) {
396 // Already have this in the graph
397 N->Operands.push_back(Remats[D].get());
398 continue;
399 }
400
401 bool NoMatch = true;
402 for (auto &I : WorkList) {
403 if (I->Node == D) {
404 NoMatch = false;
405 N->Operands.push_back(I.get());
406 break;
407 }
408 }
409 if (NoMatch) {
410 // Create a new node
411 std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D);
412 N->Operands.push_back(ChildNode.get());
413 WorkList.push_back(std::move(ChildNode));
414 }
415 }
416 }
417
418 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump__anon408e6fe60611::RematGraph419 void dump() const {
420 dbgs() << "Entry (";
421 if (EntryNode->Node->getParent()->hasName())
422 dbgs() << EntryNode->Node->getParent()->getName();
423 else
424 EntryNode->Node->getParent()->printAsOperand(dbgs(), false);
425 dbgs() << ") : " << *EntryNode->Node << "\n";
426 for (auto &E : Remats) {
427 dbgs() << *(E.first) << "\n";
428 for (RematNode *U : E.second->Operands)
429 dbgs() << " " << *U->Node << "\n";
430 }
431 }
432 #endif
433 };
434 } // end anonymous namespace
435
436 namespace llvm {
437
438 template <> struct GraphTraits<RematGraph *> {
439 using NodeRef = RematGraph::RematNode *;
440 using ChildIteratorType = RematGraph::RematNode **;
441
getEntryNodellvm::GraphTraits442 static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
child_beginllvm::GraphTraits443 static ChildIteratorType child_begin(NodeRef N) {
444 return N->Operands.begin();
445 }
child_endllvm::GraphTraits446 static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
447 };
448
449 } // end namespace llvm
450
451 #undef DEBUG_TYPE // "coro-suspend-crossing"
452 #define DEBUG_TYPE "coro-frame"
453
454 namespace {
455 class FrameTypeBuilder;
456 // Mapping from the to-be-spilled value to all the users that need reload.
457 using SpillInfo = SmallMapVector<Value *, SmallVector<Instruction *, 2>, 8>;
458 struct AllocaInfo {
459 AllocaInst *Alloca;
460 DenseMap<Instruction *, std::optional<APInt>> Aliases;
461 bool MayWriteBeforeCoroBegin;
AllocaInfo__anon408e6fe60711::AllocaInfo462 AllocaInfo(AllocaInst *Alloca,
463 DenseMap<Instruction *, std::optional<APInt>> Aliases,
464 bool MayWriteBeforeCoroBegin)
465 : Alloca(Alloca), Aliases(std::move(Aliases)),
466 MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
467 };
468 struct FrameDataInfo {
469 // All the values (that are not allocas) that needs to be spilled to the
470 // frame.
471 SpillInfo Spills;
472 // Allocas contains all values defined as allocas that need to live in the
473 // frame.
474 SmallVector<AllocaInfo, 8> Allocas;
475
getAllDefs__anon408e6fe60711::FrameDataInfo476 SmallVector<Value *, 8> getAllDefs() const {
477 SmallVector<Value *, 8> Defs;
478 for (const auto &P : Spills)
479 Defs.push_back(P.first);
480 for (const auto &A : Allocas)
481 Defs.push_back(A.Alloca);
482 return Defs;
483 }
484
getFieldIndex__anon408e6fe60711::FrameDataInfo485 uint32_t getFieldIndex(Value *V) const {
486 auto Itr = FieldIndexMap.find(V);
487 assert(Itr != FieldIndexMap.end() &&
488 "Value does not have a frame field index");
489 return Itr->second;
490 }
491
setFieldIndex__anon408e6fe60711::FrameDataInfo492 void setFieldIndex(Value *V, uint32_t Index) {
493 assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
494 "Cannot set the index for the same field twice.");
495 FieldIndexMap[V] = Index;
496 }
497
getAlign__anon408e6fe60711::FrameDataInfo498 Align getAlign(Value *V) const {
499 auto Iter = FieldAlignMap.find(V);
500 assert(Iter != FieldAlignMap.end());
501 return Iter->second;
502 }
503
setAlign__anon408e6fe60711::FrameDataInfo504 void setAlign(Value *V, Align AL) {
505 assert(FieldAlignMap.count(V) == 0);
506 FieldAlignMap.insert({V, AL});
507 }
508
getDynamicAlign__anon408e6fe60711::FrameDataInfo509 uint64_t getDynamicAlign(Value *V) const {
510 auto Iter = FieldDynamicAlignMap.find(V);
511 assert(Iter != FieldDynamicAlignMap.end());
512 return Iter->second;
513 }
514
setDynamicAlign__anon408e6fe60711::FrameDataInfo515 void setDynamicAlign(Value *V, uint64_t Align) {
516 assert(FieldDynamicAlignMap.count(V) == 0);
517 FieldDynamicAlignMap.insert({V, Align});
518 }
519
getOffset__anon408e6fe60711::FrameDataInfo520 uint64_t getOffset(Value *V) const {
521 auto Iter = FieldOffsetMap.find(V);
522 assert(Iter != FieldOffsetMap.end());
523 return Iter->second;
524 }
525
setOffset__anon408e6fe60711::FrameDataInfo526 void setOffset(Value *V, uint64_t Offset) {
527 assert(FieldOffsetMap.count(V) == 0);
528 FieldOffsetMap.insert({V, Offset});
529 }
530
531 // Remap the index of every field in the frame, using the final layout index.
532 void updateLayoutIndex(FrameTypeBuilder &B);
533
534 private:
535 // LayoutIndexUpdateStarted is used to avoid updating the index of any field
536 // twice by mistake.
537 bool LayoutIndexUpdateStarted = false;
538 // Map from values to their slot indexes on the frame. They will be first set
539 // with their original insertion field index. After the frame is built, their
540 // indexes will be updated into the final layout index.
541 DenseMap<Value *, uint32_t> FieldIndexMap;
542 // Map from values to their alignment on the frame. They would be set after
543 // the frame is built.
544 DenseMap<Value *, Align> FieldAlignMap;
545 DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
546 // Map from values to their offset on the frame. They would be set after
547 // the frame is built.
548 DenseMap<Value *, uint64_t> FieldOffsetMap;
549 };
550 } // namespace
551
552 #ifndef NDEBUG
dumpSpills(StringRef Title,const SpillInfo & Spills)553 static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
554 dbgs() << "------------- " << Title << "--------------\n";
555 for (const auto &E : Spills) {
556 E.first->dump();
557 dbgs() << " user: ";
558 for (auto *I : E.second)
559 I->dump();
560 }
561 }
dumpRemats(StringRef Title,const SmallMapVector<Instruction *,std::unique_ptr<RematGraph>,8> & RM)562 static void dumpRemats(
563 StringRef Title,
564 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
565 dbgs() << "------------- " << Title << "--------------\n";
566 for (const auto &E : RM) {
567 E.second->dump();
568 dbgs() << "--\n";
569 }
570 }
571
dumpAllocas(const SmallVectorImpl<AllocaInfo> & Allocas)572 static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
573 dbgs() << "------------- Allocas --------------\n";
574 for (const auto &A : Allocas) {
575 A.Alloca->dump();
576 }
577 }
578 #endif
579
580 namespace {
581 using FieldIDType = size_t;
582 // We cannot rely solely on natural alignment of a type when building a
583 // coroutine frame and if the alignment specified on the Alloca instruction
584 // differs from the natural alignment of the alloca type we will need to insert
585 // padding.
586 class FrameTypeBuilder {
587 private:
588 struct Field {
589 uint64_t Size;
590 uint64_t Offset;
591 Type *Ty;
592 FieldIDType LayoutFieldIndex;
593 Align Alignment;
594 Align TyAlignment;
595 uint64_t DynamicAlignBuffer;
596 };
597
598 const DataLayout &DL;
599 LLVMContext &Context;
600 uint64_t StructSize = 0;
601 Align StructAlign;
602 bool IsFinished = false;
603
604 std::optional<Align> MaxFrameAlignment;
605
606 SmallVector<Field, 8> Fields;
607 DenseMap<Value*, unsigned> FieldIndexByKey;
608
609 public:
FrameTypeBuilder(LLVMContext & Context,const DataLayout & DL,std::optional<Align> MaxFrameAlignment)610 FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
611 std::optional<Align> MaxFrameAlignment)
612 : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
613
614 /// Add a field to this structure for the storage of an `alloca`
615 /// instruction.
addFieldForAlloca(AllocaInst * AI,bool IsHeader=false)616 [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
617 bool IsHeader = false) {
618 Type *Ty = AI->getAllocatedType();
619
620 // Make an array type if this is a static array allocation.
621 if (AI->isArrayAllocation()) {
622 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
623 Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
624 else
625 report_fatal_error("Coroutines cannot handle non static allocas yet");
626 }
627
628 return addField(Ty, AI->getAlign(), IsHeader);
629 }
630
631 /// We want to put the allocas whose lifetime-ranges are not overlapped
632 /// into one slot of coroutine frame.
633 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
634 ///
635 /// cppcoro::task<void> alternative_paths(bool cond) {
636 /// if (cond) {
637 /// big_structure a;
638 /// process(a);
639 /// co_await something();
640 /// } else {
641 /// big_structure b;
642 /// process2(b);
643 /// co_await something();
644 /// }
645 /// }
646 ///
647 /// We want to put variable a and variable b in the same slot to
648 /// reduce the size of coroutine frame.
649 ///
650 /// This function use StackLifetime algorithm to partition the AllocaInsts in
651 /// Spills to non-overlapped sets in order to put Alloca in the same
652 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
653 /// field for the allocas in the same non-overlapped set by using the largest
654 /// type as the field type.
655 ///
656 /// Side Effects: Because We sort the allocas, the order of allocas in the
657 /// frame may be different with the order in the source code.
658 void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
659 coro::Shape &Shape);
660
661 /// Add a field to this structure.
addField(Type * Ty,MaybeAlign MaybeFieldAlignment,bool IsHeader=false,bool IsSpillOfValue=false)662 [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
663 bool IsHeader = false,
664 bool IsSpillOfValue = false) {
665 assert(!IsFinished && "adding fields to a finished builder");
666 assert(Ty && "must provide a type for a field");
667
668 // The field size is always the alloc size of the type.
669 uint64_t FieldSize = DL.getTypeAllocSize(Ty);
670
671 // For an alloca with size=0, we don't need to add a field and they
672 // can just point to any index in the frame. Use index 0.
673 if (FieldSize == 0) {
674 return 0;
675 }
676
677 // The field alignment might not be the type alignment, but we need
678 // to remember the type alignment anyway to build the type.
679 // If we are spilling values we don't need to worry about ABI alignment
680 // concerns.
681 Align ABIAlign = DL.getABITypeAlign(Ty);
682 Align TyAlignment = ABIAlign;
683 if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
684 TyAlignment = *MaxFrameAlignment;
685 Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
686
687 // The field alignment could be bigger than the max frame case, in that case
688 // we request additional storage to be able to dynamically align the
689 // pointer.
690 uint64_t DynamicAlignBuffer = 0;
691 if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
692 DynamicAlignBuffer =
693 offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
694 FieldAlignment = *MaxFrameAlignment;
695 FieldSize = FieldSize + DynamicAlignBuffer;
696 }
697
698 // Lay out header fields immediately.
699 uint64_t Offset;
700 if (IsHeader) {
701 Offset = alignTo(StructSize, FieldAlignment);
702 StructSize = Offset + FieldSize;
703
704 // Everything else has a flexible offset.
705 } else {
706 Offset = OptimizedStructLayoutField::FlexibleOffset;
707 }
708
709 Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
710 DynamicAlignBuffer});
711 return Fields.size() - 1;
712 }
713
714 /// Finish the layout and set the body on the given type.
715 void finish(StructType *Ty);
716
getStructSize() const717 uint64_t getStructSize() const {
718 assert(IsFinished && "not yet finished!");
719 return StructSize;
720 }
721
getStructAlign() const722 Align getStructAlign() const {
723 assert(IsFinished && "not yet finished!");
724 return StructAlign;
725 }
726
getLayoutFieldIndex(FieldIDType Id) const727 FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
728 assert(IsFinished && "not yet finished!");
729 return Fields[Id].LayoutFieldIndex;
730 }
731
getLayoutField(FieldIDType Id) const732 Field getLayoutField(FieldIDType Id) const {
733 assert(IsFinished && "not yet finished!");
734 return Fields[Id];
735 }
736 };
737 } // namespace
738
updateLayoutIndex(FrameTypeBuilder & B)739 void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
740 auto Updater = [&](Value *I) {
741 auto Field = B.getLayoutField(getFieldIndex(I));
742 setFieldIndex(I, Field.LayoutFieldIndex);
743 setAlign(I, Field.Alignment);
744 uint64_t dynamicAlign =
745 Field.DynamicAlignBuffer
746 ? Field.DynamicAlignBuffer + Field.Alignment.value()
747 : 0;
748 setDynamicAlign(I, dynamicAlign);
749 setOffset(I, Field.Offset);
750 };
751 LayoutIndexUpdateStarted = true;
752 for (auto &S : Spills)
753 Updater(S.first);
754 for (const auto &A : Allocas)
755 Updater(A.Alloca);
756 LayoutIndexUpdateStarted = false;
757 }
758
addFieldForAllocas(const Function & F,FrameDataInfo & FrameData,coro::Shape & Shape)759 void FrameTypeBuilder::addFieldForAllocas(const Function &F,
760 FrameDataInfo &FrameData,
761 coro::Shape &Shape) {
762 using AllocaSetType = SmallVector<AllocaInst *, 4>;
763 SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
764
765 // We need to add field for allocas at the end of this function.
766 auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
767 for (auto AllocaList : NonOverlapedAllocas) {
768 auto *LargestAI = *AllocaList.begin();
769 FieldIDType Id = addFieldForAlloca(LargestAI);
770 for (auto *Alloca : AllocaList)
771 FrameData.setFieldIndex(Alloca, Id);
772 }
773 });
774
775 if (!Shape.OptimizeFrame) {
776 for (const auto &A : FrameData.Allocas) {
777 AllocaInst *Alloca = A.Alloca;
778 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
779 }
780 return;
781 }
782
783 // Because there are paths from the lifetime.start to coro.end
784 // for each alloca, the liferanges for every alloca is overlaped
785 // in the blocks who contain coro.end and the successor blocks.
786 // So we choose to skip there blocks when we calculate the liferange
787 // for each alloca. It should be reasonable since there shouldn't be uses
788 // in these blocks and the coroutine frame shouldn't be used outside the
789 // coroutine body.
790 //
791 // Note that the user of coro.suspend may not be SwitchInst. However, this
792 // case seems too complex to handle. And it is harmless to skip these
793 // patterns since it just prevend putting the allocas to live in the same
794 // slot.
795 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
796 for (auto *CoroSuspendInst : Shape.CoroSuspends) {
797 for (auto *U : CoroSuspendInst->users()) {
798 if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
799 auto *SWI = const_cast<SwitchInst *>(ConstSWI);
800 DefaultSuspendDest[SWI] = SWI->getDefaultDest();
801 SWI->setDefaultDest(SWI->getSuccessor(1));
802 }
803 }
804 }
805
806 auto ExtractAllocas = [&]() {
807 AllocaSetType Allocas;
808 Allocas.reserve(FrameData.Allocas.size());
809 for (const auto &A : FrameData.Allocas)
810 Allocas.push_back(A.Alloca);
811 return Allocas;
812 };
813 StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
814 StackLifetime::LivenessType::May);
815 StackLifetimeAnalyzer.run();
816 auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
817 return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
818 StackLifetimeAnalyzer.getLiveRange(AI2));
819 };
820 auto GetAllocaSize = [&](const AllocaInfo &A) {
821 std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
822 assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
823 assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
824 return RetSize->getFixedValue();
825 };
826 // Put larger allocas in the front. So the larger allocas have higher
827 // priority to merge, which can save more space potentially. Also each
828 // AllocaSet would be ordered. So we can get the largest Alloca in one
829 // AllocaSet easily.
830 sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
831 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
832 });
833 for (const auto &A : FrameData.Allocas) {
834 AllocaInst *Alloca = A.Alloca;
835 bool Merged = false;
836 // Try to find if the Alloca is not inferenced with any existing
837 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
838 // NonOverlappedAllocaSet.
839 for (auto &AllocaSet : NonOverlapedAllocas) {
840 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
841 bool NoInference = none_of(AllocaSet, [&](auto Iter) {
842 return IsAllocaInferenre(Alloca, Iter);
843 });
844 // If the alignment of A is multiple of the alignment of B, the address
845 // of A should satisfy the requirement for aligning for B.
846 //
847 // There may be other more fine-grained strategies to handle the alignment
848 // infomation during the merging process. But it seems hard to handle
849 // these strategies and benefit little.
850 bool Alignable = [&]() -> bool {
851 auto *LargestAlloca = *AllocaSet.begin();
852 return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
853 0;
854 }();
855 bool CouldMerge = NoInference && Alignable;
856 if (!CouldMerge)
857 continue;
858 AllocaSet.push_back(Alloca);
859 Merged = true;
860 break;
861 }
862 if (!Merged) {
863 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
864 }
865 }
866 // Recover the default target destination for each Switch statement
867 // reserved.
868 for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
869 SwitchInst *SWI = SwitchAndDefaultDest.first;
870 BasicBlock *DestBB = SwitchAndDefaultDest.second;
871 SWI->setDefaultDest(DestBB);
872 }
873 // This Debug Info could tell us which allocas are merged into one slot.
874 LLVM_DEBUG(for (auto &AllocaSet
875 : NonOverlapedAllocas) {
876 if (AllocaSet.size() > 1) {
877 dbgs() << "In Function:" << F.getName() << "\n";
878 dbgs() << "Find Union Set "
879 << "\n";
880 dbgs() << "\tAllocas are \n";
881 for (auto Alloca : AllocaSet)
882 dbgs() << "\t\t" << *Alloca << "\n";
883 }
884 });
885 }
886
finish(StructType * Ty)887 void FrameTypeBuilder::finish(StructType *Ty) {
888 assert(!IsFinished && "already finished!");
889
890 // Prepare the optimal-layout field array.
891 // The Id in the layout field is a pointer to our Field for it.
892 SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
893 LayoutFields.reserve(Fields.size());
894 for (auto &Field : Fields) {
895 LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
896 Field.Offset);
897 }
898
899 // Perform layout.
900 auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
901 StructSize = SizeAndAlign.first;
902 StructAlign = SizeAndAlign.second;
903
904 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
905 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
906 };
907
908 // We need to produce a packed struct type if there's a field whose
909 // assigned offset isn't a multiple of its natural type alignment.
910 bool Packed = [&] {
911 for (auto &LayoutField : LayoutFields) {
912 auto &F = getField(LayoutField);
913 if (!isAligned(F.TyAlignment, LayoutField.Offset))
914 return true;
915 }
916 return false;
917 }();
918
919 // Build the struct body.
920 SmallVector<Type*, 16> FieldTypes;
921 FieldTypes.reserve(LayoutFields.size() * 3 / 2);
922 uint64_t LastOffset = 0;
923 for (auto &LayoutField : LayoutFields) {
924 auto &F = getField(LayoutField);
925
926 auto Offset = LayoutField.Offset;
927
928 // Add a padding field if there's a padding gap and we're either
929 // building a packed struct or the padding gap is more than we'd
930 // get from aligning to the field type's natural alignment.
931 assert(Offset >= LastOffset);
932 if (Offset != LastOffset) {
933 if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
934 FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
935 Offset - LastOffset));
936 }
937
938 F.Offset = Offset;
939 F.LayoutFieldIndex = FieldTypes.size();
940
941 FieldTypes.push_back(F.Ty);
942 if (F.DynamicAlignBuffer) {
943 FieldTypes.push_back(
944 ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
945 }
946 LastOffset = Offset + F.Size;
947 }
948
949 Ty->setBody(FieldTypes, Packed);
950
951 #ifndef NDEBUG
952 // Check that the IR layout matches the offsets we expect.
953 auto Layout = DL.getStructLayout(Ty);
954 for (auto &F : Fields) {
955 assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
956 assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
957 }
958 #endif
959
960 IsFinished = true;
961 }
962
cacheDIVar(FrameDataInfo & FrameData,DenseMap<Value *,DILocalVariable * > & DIVarCache)963 static void cacheDIVar(FrameDataInfo &FrameData,
964 DenseMap<Value *, DILocalVariable *> &DIVarCache) {
965 for (auto *V : FrameData.getAllDefs()) {
966 if (DIVarCache.contains(V))
967 continue;
968
969 auto CacheIt = [&DIVarCache, V](const auto &Container) {
970 auto *I = llvm::find_if(Container, [](auto *DDI) {
971 return DDI->getExpression()->getNumElements() == 0;
972 });
973 if (I != Container.end())
974 DIVarCache.insert({V, (*I)->getVariable()});
975 };
976 CacheIt(findDbgDeclares(V));
977 CacheIt(findDVRDeclares(V));
978 }
979 }
980
981 /// Create name for Type. It uses MDString to store new created string to
982 /// avoid memory leak.
solveTypeName(Type * Ty)983 static StringRef solveTypeName(Type *Ty) {
984 if (Ty->isIntegerTy()) {
985 // The longest name in common may be '__int_128', which has 9 bits.
986 SmallString<16> Buffer;
987 raw_svector_ostream OS(Buffer);
988 OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
989 auto *MDName = MDString::get(Ty->getContext(), OS.str());
990 return MDName->getString();
991 }
992
993 if (Ty->isFloatingPointTy()) {
994 if (Ty->isFloatTy())
995 return "__float_";
996 if (Ty->isDoubleTy())
997 return "__double_";
998 return "__floating_type_";
999 }
1000
1001 if (Ty->isPointerTy())
1002 return "PointerType";
1003
1004 if (Ty->isStructTy()) {
1005 if (!cast<StructType>(Ty)->hasName())
1006 return "__LiteralStructType_";
1007
1008 auto Name = Ty->getStructName();
1009
1010 SmallString<16> Buffer(Name);
1011 for (auto &Iter : Buffer)
1012 if (Iter == '.' || Iter == ':')
1013 Iter = '_';
1014 auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
1015 return MDName->getString();
1016 }
1017
1018 return "UnknownType";
1019 }
1020
solveDIType(DIBuilder & Builder,Type * Ty,const DataLayout & Layout,DIScope * Scope,unsigned LineNum,DenseMap<Type *,DIType * > & DITypeCache)1021 static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
1022 const DataLayout &Layout, DIScope *Scope,
1023 unsigned LineNum,
1024 DenseMap<Type *, DIType *> &DITypeCache) {
1025 if (DIType *DT = DITypeCache.lookup(Ty))
1026 return DT;
1027
1028 StringRef Name = solveTypeName(Ty);
1029
1030 DIType *RetType = nullptr;
1031
1032 if (Ty->isIntegerTy()) {
1033 auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
1034 RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
1035 llvm::DINode::FlagArtificial);
1036 } else if (Ty->isFloatingPointTy()) {
1037 RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
1038 dwarf::DW_ATE_float,
1039 llvm::DINode::FlagArtificial);
1040 } else if (Ty->isPointerTy()) {
1041 // Construct PointerType points to null (aka void *) instead of exploring
1042 // pointee type to avoid infinite search problem. For example, we would be
1043 // in trouble if we traverse recursively:
1044 //
1045 // struct Node {
1046 // Node* ptr;
1047 // };
1048 RetType =
1049 Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
1050 Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
1051 /*DWARFAddressSpace=*/std::nullopt, Name);
1052 } else if (Ty->isStructTy()) {
1053 auto *DIStruct = Builder.createStructType(
1054 Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
1055 Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
1056 llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray());
1057
1058 auto *StructTy = cast<StructType>(Ty);
1059 SmallVector<Metadata *, 16> Elements;
1060 for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
1061 DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
1062 Scope, LineNum, DITypeCache);
1063 assert(DITy);
1064 Elements.push_back(Builder.createMemberType(
1065 Scope, DITy->getName(), Scope->getFile(), LineNum,
1066 DITy->getSizeInBits(), DITy->getAlignInBits(),
1067 Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
1068 llvm::DINode::FlagArtificial, DITy));
1069 }
1070
1071 Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
1072
1073 RetType = DIStruct;
1074 } else {
1075 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
1076 TypeSize Size = Layout.getTypeSizeInBits(Ty);
1077 auto *CharSizeType = Builder.createBasicType(
1078 Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
1079
1080 if (Size <= 8)
1081 RetType = CharSizeType;
1082 else {
1083 if (Size % 8 != 0)
1084 Size = TypeSize::getFixed(Size + 8 - (Size % 8));
1085
1086 RetType = Builder.createArrayType(
1087 Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
1088 Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
1089 }
1090 }
1091
1092 DITypeCache.insert({Ty, RetType});
1093 return RetType;
1094 }
1095
1096 /// Build artificial debug info for C++ coroutine frames to allow users to
1097 /// inspect the contents of the frame directly
1098 ///
1099 /// Create Debug information for coroutine frame with debug name "__coro_frame".
1100 /// The debug information for the fields of coroutine frame is constructed from
1101 /// the following way:
1102 /// 1. For all the value in the Frame, we search the use of dbg.declare to find
1103 /// the corresponding debug variables for the value. If we can find the
1104 /// debug variable, we can get full and accurate debug information.
1105 /// 2. If we can't get debug information in step 1 and 2, we could only try to
1106 /// build the DIType by Type. We did this in solveDIType. We only handle
1107 /// integer, float, double, integer type and struct type for now.
buildFrameDebugInfo(Function & F,coro::Shape & Shape,FrameDataInfo & FrameData)1108 static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
1109 FrameDataInfo &FrameData) {
1110 DISubprogram *DIS = F.getSubprogram();
1111 // If there is no DISubprogram for F, it implies the Function are not compiled
1112 // with debug info. So we also don't need to generate debug info for the frame
1113 // neither.
1114 if (!DIS || !DIS->getUnit() ||
1115 !dwarf::isCPlusPlus(
1116 (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
1117 return;
1118
1119 assert(Shape.ABI == coro::ABI::Switch &&
1120 "We could only build debug infomation for C++ coroutine now.\n");
1121
1122 DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
1123
1124 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1125 assert(PromiseAlloca &&
1126 "Coroutine with switch ABI should own Promise alloca");
1127
1128 TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(PromiseAlloca);
1129 TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(PromiseAlloca);
1130
1131 DILocalVariable *PromiseDIVariable = nullptr;
1132 DILocation *DILoc = nullptr;
1133 if (!DIs.empty()) {
1134 DbgDeclareInst *PromiseDDI = DIs.front();
1135 PromiseDIVariable = PromiseDDI->getVariable();
1136 DILoc = PromiseDDI->getDebugLoc().get();
1137 } else if (!DVRs.empty()) {
1138 DbgVariableRecord *PromiseDVR = DVRs.front();
1139 PromiseDIVariable = PromiseDVR->getVariable();
1140 DILoc = PromiseDVR->getDebugLoc().get();
1141 } else {
1142 return;
1143 }
1144
1145 DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
1146 DIFile *DFile = PromiseDIScope->getFile();
1147 unsigned LineNum = PromiseDIVariable->getLine();
1148
1149 DICompositeType *FrameDITy = DBuilder.createStructType(
1150 DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
1151 DFile, LineNum, Shape.FrameSize * 8,
1152 Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
1153 llvm::DINodeArray());
1154 StructType *FrameTy = Shape.FrameTy;
1155 SmallVector<Metadata *, 16> Elements;
1156 DataLayout Layout = F.getDataLayout();
1157
1158 DenseMap<Value *, DILocalVariable *> DIVarCache;
1159 cacheDIVar(FrameData, DIVarCache);
1160
1161 unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
1162 unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
1163 unsigned IndexIndex = Shape.SwitchLowering.IndexField;
1164
1165 DenseMap<unsigned, StringRef> NameCache;
1166 NameCache.insert({ResumeIndex, "__resume_fn"});
1167 NameCache.insert({DestroyIndex, "__destroy_fn"});
1168 NameCache.insert({IndexIndex, "__coro_index"});
1169
1170 Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
1171 *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
1172 *IndexTy = FrameTy->getElementType(IndexIndex);
1173
1174 DenseMap<unsigned, DIType *> TyCache;
1175 TyCache.insert(
1176 {ResumeIndex, DBuilder.createPointerType(
1177 nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
1178 TyCache.insert(
1179 {DestroyIndex, DBuilder.createPointerType(
1180 nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
1181
1182 /// FIXME: If we fill the field `SizeInBits` with the actual size of
1183 /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1184 TyCache.insert({IndexIndex, DBuilder.createBasicType(
1185 "__coro_index",
1186 (Layout.getTypeSizeInBits(IndexTy) < 8)
1187 ? 8
1188 : Layout.getTypeSizeInBits(IndexTy),
1189 dwarf::DW_ATE_unsigned_char)});
1190
1191 for (auto *V : FrameData.getAllDefs()) {
1192 if (!DIVarCache.contains(V))
1193 continue;
1194
1195 auto Index = FrameData.getFieldIndex(V);
1196
1197 NameCache.insert({Index, DIVarCache[V]->getName()});
1198 TyCache.insert({Index, DIVarCache[V]->getType()});
1199 }
1200
1201 // Cache from index to (Align, Offset Pair)
1202 DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
1203 // The Align and Offset of Resume function and Destroy function are fixed.
1204 OffsetCache.insert({ResumeIndex, {8, 0}});
1205 OffsetCache.insert({DestroyIndex, {8, 8}});
1206 OffsetCache.insert(
1207 {IndexIndex,
1208 {Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
1209
1210 for (auto *V : FrameData.getAllDefs()) {
1211 auto Index = FrameData.getFieldIndex(V);
1212
1213 OffsetCache.insert(
1214 {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
1215 }
1216
1217 DenseMap<Type *, DIType *> DITypeCache;
1218 // This counter is used to avoid same type names. e.g., there would be
1219 // many i32 and i64 types in one coroutine. And we would use i32_0 and
1220 // i32_1 to avoid the same type. Since it makes no sense the name of the
1221 // fields confilicts with each other.
1222 unsigned UnknownTypeNum = 0;
1223 for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
1224 if (!OffsetCache.contains(Index))
1225 continue;
1226
1227 std::string Name;
1228 uint64_t SizeInBits;
1229 uint32_t AlignInBits;
1230 uint64_t OffsetInBits;
1231 DIType *DITy = nullptr;
1232
1233 Type *Ty = FrameTy->getElementType(Index);
1234 assert(Ty->isSized() && "We can't handle type which is not sized.\n");
1235 SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
1236 AlignInBits = OffsetCache[Index].first * 8;
1237 OffsetInBits = OffsetCache[Index].second * 8;
1238
1239 if (NameCache.contains(Index)) {
1240 Name = NameCache[Index].str();
1241 DITy = TyCache[Index];
1242 } else {
1243 DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
1244 assert(DITy && "SolveDIType shouldn't return nullptr.\n");
1245 Name = DITy->getName().str();
1246 Name += "_" + std::to_string(UnknownTypeNum);
1247 UnknownTypeNum++;
1248 }
1249
1250 Elements.push_back(DBuilder.createMemberType(
1251 FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
1252 llvm::DINode::FlagArtificial, DITy));
1253 }
1254
1255 DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
1256
1257 auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",
1258 DFile, LineNum, FrameDITy,
1259 true, DINode::FlagArtificial);
1260 assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
1261
1262 // Subprogram would have ContainedNodes field which records the debug
1263 // variables it contained. So we need to add __coro_frame to the
1264 // ContainedNodes of it.
1265 //
1266 // If we don't add __coro_frame to the RetainedNodes, user may get
1267 // `no symbol __coro_frame in context` rather than `__coro_frame`
1268 // is optimized out, which is more precise.
1269 if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {
1270 auto RetainedNodes = SubProgram->getRetainedNodes();
1271 SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
1272 RetainedNodes.end());
1273 RetainedNodesVec.push_back(FrameDIVar);
1274 SubProgram->replaceOperandWith(
1275 7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
1276 }
1277
1278 if (UseNewDbgInfoFormat) {
1279 DbgVariableRecord *NewDVR =
1280 new DbgVariableRecord(ValueAsMetadata::get(Shape.FramePtr), FrameDIVar,
1281 DBuilder.createExpression(), DILoc,
1282 DbgVariableRecord::LocationType::Declare);
1283 BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
1284 It->getParent()->insertDbgRecordBefore(NewDVR, It);
1285 } else {
1286 DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
1287 DBuilder.createExpression(), DILoc,
1288 &*Shape.getInsertPtAfterFramePtr());
1289 }
1290 }
1291
1292 // Build a struct that will keep state for an active coroutine.
1293 // struct f.frame {
1294 // ResumeFnTy ResumeFnAddr;
1295 // ResumeFnTy DestroyFnAddr;
1296 // ... promise (if present) ...
1297 // int ResumeIndex;
1298 // ... spills ...
1299 // };
buildFrameType(Function & F,coro::Shape & Shape,FrameDataInfo & FrameData)1300 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
1301 FrameDataInfo &FrameData) {
1302 LLVMContext &C = F.getContext();
1303 const DataLayout &DL = F.getDataLayout();
1304 StructType *FrameTy = [&] {
1305 SmallString<32> Name(F.getName());
1306 Name.append(".Frame");
1307 return StructType::create(C, Name);
1308 }();
1309
1310 // We will use this value to cap the alignment of spilled values.
1311 std::optional<Align> MaxFrameAlignment;
1312 if (Shape.ABI == coro::ABI::Async)
1313 MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1314 FrameTypeBuilder B(C, DL, MaxFrameAlignment);
1315
1316 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1317 std::optional<FieldIDType> SwitchIndexFieldId;
1318
1319 if (Shape.ABI == coro::ABI::Switch) {
1320 auto *FnPtrTy = PointerType::getUnqual(C);
1321
1322 // Add header fields for the resume and destroy functions.
1323 // We can rely on these being perfectly packed.
1324 (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
1325 (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
1326
1327 // PromiseAlloca field needs to be explicitly added here because it's
1328 // a header field with a fixed offset based on its alignment. Hence it
1329 // needs special handling and cannot be added to FrameData.Allocas.
1330 if (PromiseAlloca)
1331 FrameData.setFieldIndex(
1332 PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
1333
1334 // Add a field to store the suspend index. This doesn't need to
1335 // be in the header.
1336 unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
1337 Type *IndexType = Type::getIntNTy(C, IndexBits);
1338
1339 SwitchIndexFieldId = B.addField(IndexType, std::nullopt);
1340 } else {
1341 assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
1342 }
1343
1344 // Because multiple allocas may own the same field slot,
1345 // we add allocas to field here.
1346 B.addFieldForAllocas(F, FrameData, Shape);
1347 // Add PromiseAlloca to Allocas list so that
1348 // 1. updateLayoutIndex could update its index after
1349 // `performOptimizedStructLayout`
1350 // 2. it is processed in insertSpills.
1351 if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
1352 // We assume that the promise alloca won't be modified before
1353 // CoroBegin and no alias will be create before CoroBegin.
1354 FrameData.Allocas.emplace_back(
1355 PromiseAlloca, DenseMap<Instruction *, std::optional<APInt>>{}, false);
1356 // Create an entry for every spilled value.
1357 for (auto &S : FrameData.Spills) {
1358 Type *FieldType = S.first->getType();
1359 // For byval arguments, we need to store the pointed value in the frame,
1360 // instead of the pointer itself.
1361 if (const Argument *A = dyn_cast<Argument>(S.first))
1362 if (A->hasByValAttr())
1363 FieldType = A->getParamByValType();
1364 FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/,
1365 true /*IsSpillOfValue*/);
1366 FrameData.setFieldIndex(S.first, Id);
1367 }
1368
1369 B.finish(FrameTy);
1370 FrameData.updateLayoutIndex(B);
1371 Shape.FrameAlign = B.getStructAlign();
1372 Shape.FrameSize = B.getStructSize();
1373
1374 switch (Shape.ABI) {
1375 case coro::ABI::Switch: {
1376 // In the switch ABI, remember the switch-index field.
1377 auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
1378 Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
1379 Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
1380 Shape.SwitchLowering.IndexOffset = IndexField.Offset;
1381
1382 // Also round the frame size up to a multiple of its alignment, as is
1383 // generally expected in C/C++.
1384 Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
1385 break;
1386 }
1387
1388 // In the retcon ABI, remember whether the frame is inline in the storage.
1389 case coro::ABI::Retcon:
1390 case coro::ABI::RetconOnce: {
1391 auto Id = Shape.getRetconCoroId();
1392 Shape.RetconLowering.IsFrameInlineInStorage
1393 = (B.getStructSize() <= Id->getStorageSize() &&
1394 B.getStructAlign() <= Id->getStorageAlignment());
1395 break;
1396 }
1397 case coro::ABI::Async: {
1398 Shape.AsyncLowering.FrameOffset =
1399 alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign);
1400 // Also make the final context size a multiple of the context alignment to
1401 // make allocation easier for allocators.
1402 Shape.AsyncLowering.ContextSize =
1403 alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
1404 Shape.AsyncLowering.getContextAlignment());
1405 if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
1406 report_fatal_error(
1407 "The alignment requirment of frame variables cannot be higher than "
1408 "the alignment of the async function context");
1409 }
1410 break;
1411 }
1412 }
1413
1414 return FrameTy;
1415 }
1416
1417 // We use a pointer use visitor to track how an alloca is being used.
1418 // The goal is to be able to answer the following three questions:
1419 // 1. Should this alloca be allocated on the frame instead.
1420 // 2. Could the content of the alloca be modified prior to CoroBegn, which would
1421 // require copying the data from alloca to the frame after CoroBegin.
1422 // 3. Is there any alias created for this alloca prior to CoroBegin, but used
1423 // after CoroBegin. In that case, we will need to recreate the alias after
1424 // CoroBegin based off the frame. To answer question 1, we track two things:
1425 // a. List of all BasicBlocks that use this alloca or any of the aliases of
1426 // the alloca. In the end, we check if there exists any two basic blocks that
1427 // cross suspension points. If so, this alloca must be put on the frame. b.
1428 // Whether the alloca or any alias of the alloca is escaped at some point,
1429 // either by storing the address somewhere, or the address is used in a
1430 // function call that might capture. If it's ever escaped, this alloca must be
1431 // put on the frame conservatively.
1432 // To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1433 // Whenever a potential write happens, either through a store instruction, a
1434 // function call or any of the memory intrinsics, we check whether this
1435 // instruction is prior to CoroBegin. To answer question 3, we track the offsets
1436 // of all aliases created for the alloca prior to CoroBegin but used after
1437 // CoroBegin. std::optional is used to be able to represent the case when the
1438 // offset is unknown (e.g. when you have a PHINode that takes in different
1439 // offset values). We cannot handle unknown offsets and will assert. This is the
1440 // potential issue left out. An ideal solution would likely require a
1441 // significant redesign.
1442 namespace {
1443 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
1444 using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor__anon408e6fe61611::AllocaUseVisitor1445 AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1446 const coro::Shape &CoroShape,
1447 const SuspendCrossingInfo &Checker,
1448 bool ShouldUseLifetimeStartInfo)
1449 : PtrUseVisitor(DL), DT(DT), CoroShape(CoroShape), Checker(Checker),
1450 ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {
1451 for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends)
1452 CoroSuspendBBs.insert(SuspendInst->getParent());
1453 }
1454
visit__anon408e6fe61611::AllocaUseVisitor1455 void visit(Instruction &I) {
1456 Users.insert(&I);
1457 Base::visit(I);
1458 // If the pointer is escaped prior to CoroBegin, we have to assume it would
1459 // be written into before CoroBegin as well.
1460 if (PI.isEscaped() &&
1461 !DT.dominates(CoroShape.CoroBegin, PI.getEscapingInst())) {
1462 MayWriteBeforeCoroBegin = true;
1463 }
1464 }
1465 // We need to provide this overload as PtrUseVisitor uses a pointer based
1466 // visiting function.
visit__anon408e6fe61611::AllocaUseVisitor1467 void visit(Instruction *I) { return visit(*I); }
1468
visitPHINode__anon408e6fe61611::AllocaUseVisitor1469 void visitPHINode(PHINode &I) {
1470 enqueueUsers(I);
1471 handleAlias(I);
1472 }
1473
visitSelectInst__anon408e6fe61611::AllocaUseVisitor1474 void visitSelectInst(SelectInst &I) {
1475 enqueueUsers(I);
1476 handleAlias(I);
1477 }
1478
visitStoreInst__anon408e6fe61611::AllocaUseVisitor1479 void visitStoreInst(StoreInst &SI) {
1480 // Regardless whether the alias of the alloca is the value operand or the
1481 // pointer operand, we need to assume the alloca is been written.
1482 handleMayWrite(SI);
1483
1484 if (SI.getValueOperand() != U->get())
1485 return;
1486
1487 // We are storing the pointer into a memory location, potentially escaping.
1488 // As an optimization, we try to detect simple cases where it doesn't
1489 // actually escape, for example:
1490 // %ptr = alloca ..
1491 // %addr = alloca ..
1492 // store %ptr, %addr
1493 // %x = load %addr
1494 // ..
1495 // If %addr is only used by loading from it, we could simply treat %x as
1496 // another alias of %ptr, and not considering %ptr being escaped.
1497 auto IsSimpleStoreThenLoad = [&]() {
1498 auto *AI = dyn_cast<AllocaInst>(SI.getPointerOperand());
1499 // If the memory location we are storing to is not an alloca, it
1500 // could be an alias of some other memory locations, which is difficult
1501 // to analyze.
1502 if (!AI)
1503 return false;
1504 // StoreAliases contains aliases of the memory location stored into.
1505 SmallVector<Instruction *, 4> StoreAliases = {AI};
1506 while (!StoreAliases.empty()) {
1507 Instruction *I = StoreAliases.pop_back_val();
1508 for (User *U : I->users()) {
1509 // If we are loading from the memory location, we are creating an
1510 // alias of the original pointer.
1511 if (auto *LI = dyn_cast<LoadInst>(U)) {
1512 enqueueUsers(*LI);
1513 handleAlias(*LI);
1514 continue;
1515 }
1516 // If we are overriding the memory location, the pointer certainly
1517 // won't escape.
1518 if (auto *S = dyn_cast<StoreInst>(U))
1519 if (S->getPointerOperand() == I)
1520 continue;
1521 if (auto *II = dyn_cast<IntrinsicInst>(U))
1522 if (II->isLifetimeStartOrEnd())
1523 continue;
1524 // BitCastInst creats aliases of the memory location being stored
1525 // into.
1526 if (auto *BI = dyn_cast<BitCastInst>(U)) {
1527 StoreAliases.push_back(BI);
1528 continue;
1529 }
1530 return false;
1531 }
1532 }
1533
1534 return true;
1535 };
1536
1537 if (!IsSimpleStoreThenLoad())
1538 PI.setEscaped(&SI);
1539 }
1540
1541 // All mem intrinsics modify the data.
visitMemIntrinsic__anon408e6fe61611::AllocaUseVisitor1542 void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); }
1543
visitBitCastInst__anon408e6fe61611::AllocaUseVisitor1544 void visitBitCastInst(BitCastInst &BC) {
1545 Base::visitBitCastInst(BC);
1546 handleAlias(BC);
1547 }
1548
visitAddrSpaceCastInst__anon408e6fe61611::AllocaUseVisitor1549 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1550 Base::visitAddrSpaceCastInst(ASC);
1551 handleAlias(ASC);
1552 }
1553
visitGetElementPtrInst__anon408e6fe61611::AllocaUseVisitor1554 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1555 // The base visitor will adjust Offset accordingly.
1556 Base::visitGetElementPtrInst(GEPI);
1557 handleAlias(GEPI);
1558 }
1559
visitIntrinsicInst__anon408e6fe61611::AllocaUseVisitor1560 void visitIntrinsicInst(IntrinsicInst &II) {
1561 // When we found the lifetime markers refers to a
1562 // subrange of the original alloca, ignore the lifetime
1563 // markers to avoid misleading the analysis.
1564 if (!IsOffsetKnown || !Offset.isZero())
1565 return Base::visitIntrinsicInst(II);
1566 switch (II.getIntrinsicID()) {
1567 default:
1568 return Base::visitIntrinsicInst(II);
1569 case Intrinsic::lifetime_start:
1570 LifetimeStarts.insert(&II);
1571 LifetimeStartBBs.push_back(II.getParent());
1572 break;
1573 case Intrinsic::lifetime_end:
1574 LifetimeEndBBs.insert(II.getParent());
1575 break;
1576 }
1577 }
1578
visitCallBase__anon408e6fe61611::AllocaUseVisitor1579 void visitCallBase(CallBase &CB) {
1580 for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op)
1581 if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
1582 PI.setEscaped(&CB);
1583 handleMayWrite(CB);
1584 }
1585
getShouldLiveOnFrame__anon408e6fe61611::AllocaUseVisitor1586 bool getShouldLiveOnFrame() const {
1587 if (!ShouldLiveOnFrame)
1588 ShouldLiveOnFrame = computeShouldLiveOnFrame();
1589 return *ShouldLiveOnFrame;
1590 }
1591
getMayWriteBeforeCoroBegin__anon408e6fe61611::AllocaUseVisitor1592 bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
1593
getAliasesCopy__anon408e6fe61611::AllocaUseVisitor1594 DenseMap<Instruction *, std::optional<APInt>> getAliasesCopy() const {
1595 assert(getShouldLiveOnFrame() && "This method should only be called if the "
1596 "alloca needs to live on the frame.");
1597 for (const auto &P : AliasOffetMap)
1598 if (!P.second)
1599 report_fatal_error("Unable to handle an alias with unknown offset "
1600 "created before CoroBegin.");
1601 return AliasOffetMap;
1602 }
1603
1604 private:
1605 const DominatorTree &DT;
1606 const coro::Shape &CoroShape;
1607 const SuspendCrossingInfo &Checker;
1608 // All alias to the original AllocaInst, created before CoroBegin and used
1609 // after CoroBegin. Each entry contains the instruction and the offset in the
1610 // original Alloca. They need to be recreated after CoroBegin off the frame.
1611 DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
1612 SmallPtrSet<Instruction *, 4> Users{};
1613 SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
1614 SmallVector<BasicBlock *> LifetimeStartBBs{};
1615 SmallPtrSet<BasicBlock *, 2> LifetimeEndBBs{};
1616 SmallPtrSet<const BasicBlock *, 2> CoroSuspendBBs{};
1617 bool MayWriteBeforeCoroBegin{false};
1618 bool ShouldUseLifetimeStartInfo{true};
1619
1620 mutable std::optional<bool> ShouldLiveOnFrame{};
1621
computeShouldLiveOnFrame__anon408e6fe61611::AllocaUseVisitor1622 bool computeShouldLiveOnFrame() const {
1623 // If lifetime information is available, we check it first since it's
1624 // more precise. We look at every pair of lifetime.start intrinsic and
1625 // every basic block that uses the pointer to see if they cross suspension
1626 // points. The uses cover both direct uses as well as indirect uses.
1627 if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1628 // If there is no explicit lifetime.end, then assume the address can
1629 // cross suspension points.
1630 if (LifetimeEndBBs.empty())
1631 return true;
1632
1633 // If there is a path from a lifetime.start to a suspend without a
1634 // corresponding lifetime.end, then the alloca's lifetime persists
1635 // beyond that suspension point and the alloca must go on the frame.
1636 llvm::SmallVector<BasicBlock *> Worklist(LifetimeStartBBs);
1637 if (isManyPotentiallyReachableFromMany(Worklist, CoroSuspendBBs,
1638 &LifetimeEndBBs, &DT))
1639 return true;
1640
1641 // Addresses are guaranteed to be identical after every lifetime.start so
1642 // we cannot use the local stack if the address escaped and there is a
1643 // suspend point between lifetime markers. This should also cover the
1644 // case of a single lifetime.start intrinsic in a loop with suspend point.
1645 if (PI.isEscaped()) {
1646 for (auto *A : LifetimeStarts) {
1647 for (auto *B : LifetimeStarts) {
1648 if (Checker.hasPathOrLoopCrossingSuspendPoint(A->getParent(),
1649 B->getParent()))
1650 return true;
1651 }
1652 }
1653 }
1654 return false;
1655 }
1656 // FIXME: Ideally the isEscaped check should come at the beginning.
1657 // However there are a few loose ends that need to be fixed first before
1658 // we can do that. We need to make sure we are not over-conservative, so
1659 // that the data accessed in-between await_suspend and symmetric transfer
1660 // is always put on the stack, and also data accessed after coro.end is
1661 // always put on the stack (esp the return object). To fix that, we need
1662 // to:
1663 // 1) Potentially treat sret as nocapture in calls
1664 // 2) Special handle the return object and put it on the stack
1665 // 3) Utilize lifetime.end intrinsic
1666 if (PI.isEscaped())
1667 return true;
1668
1669 for (auto *U1 : Users)
1670 for (auto *U2 : Users)
1671 if (Checker.isDefinitionAcrossSuspend(*U1, U2))
1672 return true;
1673
1674 return false;
1675 }
1676
handleMayWrite__anon408e6fe61611::AllocaUseVisitor1677 void handleMayWrite(const Instruction &I) {
1678 if (!DT.dominates(CoroShape.CoroBegin, &I))
1679 MayWriteBeforeCoroBegin = true;
1680 }
1681
usedAfterCoroBegin__anon408e6fe61611::AllocaUseVisitor1682 bool usedAfterCoroBegin(Instruction &I) {
1683 for (auto &U : I.uses())
1684 if (DT.dominates(CoroShape.CoroBegin, U))
1685 return true;
1686 return false;
1687 }
1688
handleAlias__anon408e6fe61611::AllocaUseVisitor1689 void handleAlias(Instruction &I) {
1690 // We track all aliases created prior to CoroBegin but used after.
1691 // These aliases may need to be recreated after CoroBegin if the alloca
1692 // need to live on the frame.
1693 if (DT.dominates(CoroShape.CoroBegin, &I) || !usedAfterCoroBegin(I))
1694 return;
1695
1696 if (!IsOffsetKnown) {
1697 AliasOffetMap[&I].reset();
1698 } else {
1699 auto Itr = AliasOffetMap.find(&I);
1700 if (Itr == AliasOffetMap.end()) {
1701 AliasOffetMap[&I] = Offset;
1702 } else if (Itr->second && *Itr->second != Offset) {
1703 // If we have seen two different possible values for this alias, we set
1704 // it to empty.
1705 AliasOffetMap[&I].reset();
1706 }
1707 }
1708 }
1709 };
1710 } // namespace
1711
1712 // We need to make room to insert a spill after initial PHIs, but before
1713 // catchswitch instruction. Placing it before violates the requirement that
1714 // catchswitch, like all other EHPads must be the first nonPHI in a block.
1715 //
1716 // Split away catchswitch into a separate block and insert in its place:
1717 //
1718 // cleanuppad <InsertPt> cleanupret.
1719 //
1720 // cleanupret instruction will act as an insert point for the spill.
splitBeforeCatchSwitch(CatchSwitchInst * CatchSwitch)1721 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
1722 BasicBlock *CurrentBlock = CatchSwitch->getParent();
1723 BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
1724 CurrentBlock->getTerminator()->eraseFromParent();
1725
1726 auto *CleanupPad =
1727 CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
1728 auto *CleanupRet =
1729 CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
1730 return CleanupRet;
1731 }
1732
1733 // Replace all alloca and SSA values that are accessed across suspend points
1734 // with GetElementPointer from coroutine frame + loads and stores. Create an
1735 // AllocaSpillBB that will become the new entry block for the resume parts of
1736 // the coroutine:
1737 //
1738 // %hdl = coro.begin(...)
1739 // whatever
1740 //
1741 // becomes:
1742 //
1743 // %hdl = coro.begin(...)
1744 // br label %AllocaSpillBB
1745 //
1746 // AllocaSpillBB:
1747 // ; geps corresponding to allocas that were moved to coroutine frame
1748 // br label PostSpill
1749 //
1750 // PostSpill:
1751 // whatever
1752 //
1753 //
insertSpills(const FrameDataInfo & FrameData,coro::Shape & Shape)1754 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1755 auto *CB = Shape.CoroBegin;
1756 LLVMContext &C = CB->getContext();
1757 Function *F = CB->getFunction();
1758 IRBuilder<> Builder(C);
1759 StructType *FrameTy = Shape.FrameTy;
1760 Value *FramePtr = Shape.FramePtr;
1761 DominatorTree DT(*F);
1762 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1763
1764 // Create a GEP with the given index into the coroutine frame for the original
1765 // value Orig. Appends an extra 0 index for array-allocas, preserving the
1766 // original type.
1767 auto GetFramePointer = [&](Value *Orig) -> Value * {
1768 FieldIDType Index = FrameData.getFieldIndex(Orig);
1769 SmallVector<Value *, 3> Indices = {
1770 ConstantInt::get(Type::getInt32Ty(C), 0),
1771 ConstantInt::get(Type::getInt32Ty(C), Index),
1772 };
1773
1774 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1775 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1776 auto Count = CI->getValue().getZExtValue();
1777 if (Count > 1) {
1778 Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
1779 }
1780 } else {
1781 report_fatal_error("Coroutines cannot handle non static allocas yet");
1782 }
1783 }
1784
1785 auto GEP = cast<GetElementPtrInst>(
1786 Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1787 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1788 if (FrameData.getDynamicAlign(Orig) != 0) {
1789 assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1790 auto *M = AI->getModule();
1791 auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1792 auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1793 auto *AlignMask =
1794 ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
1795 PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1796 PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1797 return Builder.CreateIntToPtr(PtrValue, AI->getType());
1798 }
1799 // If the type of GEP is not equal to the type of AllocaInst, it implies
1800 // that the AllocaInst may be reused in the Frame slot of other
1801 // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1802 // the Frame storage.
1803 //
1804 // Note: If we change the strategy dealing with alignment, we need to refine
1805 // this casting.
1806 if (GEP->getType() != Orig->getType())
1807 return Builder.CreateAddrSpaceCast(GEP, Orig->getType(),
1808 Orig->getName() + Twine(".cast"));
1809 }
1810 return GEP;
1811 };
1812
1813 for (auto const &E : FrameData.Spills) {
1814 Value *Def = E.first;
1815 auto SpillAlignment = Align(FrameData.getAlign(Def));
1816 // Create a store instruction storing the value into the
1817 // coroutine frame.
1818 BasicBlock::iterator InsertPt;
1819 Type *ByValTy = nullptr;
1820 if (auto *Arg = dyn_cast<Argument>(Def)) {
1821 // For arguments, we will place the store instruction right after
1822 // the coroutine frame pointer instruction, i.e. coro.begin.
1823 InsertPt = Shape.getInsertPtAfterFramePtr();
1824
1825 // If we're spilling an Argument, make sure we clear 'nocapture'
1826 // from the coroutine function.
1827 Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1828
1829 if (Arg->hasByValAttr())
1830 ByValTy = Arg->getParamByValType();
1831 } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
1832 // Don't spill immediately after a suspend; splitting assumes
1833 // that the suspend will be followed by a branch.
1834 InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
1835 } else {
1836 auto *I = cast<Instruction>(Def);
1837 if (!DT.dominates(CB, I)) {
1838 // If it is not dominated by CoroBegin, then spill should be
1839 // inserted immediately after CoroFrame is computed.
1840 InsertPt = Shape.getInsertPtAfterFramePtr();
1841 } else if (auto *II = dyn_cast<InvokeInst>(I)) {
1842 // If we are spilling the result of the invoke instruction, split
1843 // the normal edge and insert the spill in the new block.
1844 auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
1845 InsertPt = NewBB->getTerminator()->getIterator();
1846 } else if (isa<PHINode>(I)) {
1847 // Skip the PHINodes and EH pads instructions.
1848 BasicBlock *DefBlock = I->getParent();
1849 if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
1850 InsertPt = splitBeforeCatchSwitch(CSI)->getIterator();
1851 else
1852 InsertPt = DefBlock->getFirstInsertionPt();
1853 } else {
1854 assert(!I->isTerminator() && "unexpected terminator");
1855 // For all other values, the spill is placed immediately after
1856 // the definition.
1857 InsertPt = I->getNextNode()->getIterator();
1858 }
1859 }
1860
1861 auto Index = FrameData.getFieldIndex(Def);
1862 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1863 auto *G = Builder.CreateConstInBoundsGEP2_32(
1864 FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1865 if (ByValTy) {
1866 // For byval arguments, we need to store the pointed value in the frame,
1867 // instead of the pointer itself.
1868 auto *Value = Builder.CreateLoad(ByValTy, Def);
1869 Builder.CreateAlignedStore(Value, G, SpillAlignment);
1870 } else {
1871 Builder.CreateAlignedStore(Def, G, SpillAlignment);
1872 }
1873
1874 BasicBlock *CurrentBlock = nullptr;
1875 Value *CurrentReload = nullptr;
1876 for (auto *U : E.second) {
1877 // If we have not seen the use block, create a load instruction to reload
1878 // the spilled value from the coroutine frame. Populates the Value pointer
1879 // reference provided with the frame GEP.
1880 if (CurrentBlock != U->getParent()) {
1881 CurrentBlock = U->getParent();
1882 Builder.SetInsertPoint(CurrentBlock,
1883 CurrentBlock->getFirstInsertionPt());
1884
1885 auto *GEP = GetFramePointer(E.first);
1886 GEP->setName(E.first->getName() + Twine(".reload.addr"));
1887 if (ByValTy)
1888 CurrentReload = GEP;
1889 else
1890 CurrentReload = Builder.CreateAlignedLoad(
1891 FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1892 SpillAlignment, E.first->getName() + Twine(".reload"));
1893
1894 TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def);
1895 TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(Def);
1896 // Try best to find dbg.declare. If the spill is a temp, there may not
1897 // be a direct dbg.declare. Walk up the load chain to find one from an
1898 // alias.
1899 if (F->getSubprogram()) {
1900 auto *CurDef = Def;
1901 while (DIs.empty() && DVRs.empty() && isa<LoadInst>(CurDef)) {
1902 auto *LdInst = cast<LoadInst>(CurDef);
1903 // Only consider ptr to ptr same type load.
1904 if (LdInst->getPointerOperandType() != LdInst->getType())
1905 break;
1906 CurDef = LdInst->getPointerOperand();
1907 if (!isa<AllocaInst, LoadInst>(CurDef))
1908 break;
1909 DIs = findDbgDeclares(CurDef);
1910 DVRs = findDVRDeclares(CurDef);
1911 }
1912 }
1913
1914 auto SalvageOne = [&](auto *DDI) {
1915 bool AllowUnresolved = false;
1916 // This dbg.declare is preserved for all coro-split function
1917 // fragments. It will be unreachable in the main function, and
1918 // processed by coro::salvageDebugInfo() by CoroCloner.
1919 if (UseNewDbgInfoFormat) {
1920 DbgVariableRecord *NewDVR = new DbgVariableRecord(
1921 ValueAsMetadata::get(CurrentReload), DDI->getVariable(),
1922 DDI->getExpression(), DDI->getDebugLoc(),
1923 DbgVariableRecord::LocationType::Declare);
1924 Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1925 NewDVR, Builder.GetInsertPoint());
1926 } else {
1927 DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1928 .insertDeclare(CurrentReload, DDI->getVariable(),
1929 DDI->getExpression(), DDI->getDebugLoc(),
1930 &*Builder.GetInsertPoint());
1931 }
1932 // This dbg.declare is for the main function entry point. It
1933 // will be deleted in all coro-split functions.
1934 coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
1935 false /*UseEntryValue*/);
1936 };
1937 for_each(DIs, SalvageOne);
1938 for_each(DVRs, SalvageOne);
1939 }
1940
1941 // If we have a single edge PHINode, remove it and replace it with a
1942 // reload from the coroutine frame. (We already took care of multi edge
1943 // PHINodes by rewriting them in the rewritePHIs function).
1944 if (auto *PN = dyn_cast<PHINode>(U)) {
1945 assert(PN->getNumIncomingValues() == 1 &&
1946 "unexpected number of incoming "
1947 "values in the PHINode");
1948 PN->replaceAllUsesWith(CurrentReload);
1949 PN->eraseFromParent();
1950 continue;
1951 }
1952
1953 // Replace all uses of CurrentValue in the current instruction with
1954 // reload.
1955 U->replaceUsesOfWith(Def, CurrentReload);
1956 // Instructions are added to Def's user list if the attached
1957 // debug records use Def. Update those now.
1958 for (DbgVariableRecord &DVR : filterDbgVars(U->getDbgRecordRange()))
1959 DVR.replaceVariableLocationOp(Def, CurrentReload, true);
1960 }
1961 }
1962
1963 BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1964
1965 auto SpillBlock = FramePtrBB->splitBasicBlock(
1966 Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1967 SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1968 Shape.AllocaSpillBlock = SpillBlock;
1969
1970 // retcon and retcon.once lowering assumes all uses have been sunk.
1971 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1972 Shape.ABI == coro::ABI::Async) {
1973 // If we found any allocas, replace all of their remaining uses with Geps.
1974 Builder.SetInsertPoint(SpillBlock, SpillBlock->begin());
1975 for (const auto &P : FrameData.Allocas) {
1976 AllocaInst *Alloca = P.Alloca;
1977 auto *G = GetFramePointer(Alloca);
1978
1979 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1980 // here, as we are changing location of the instruction.
1981 G->takeName(Alloca);
1982 Alloca->replaceAllUsesWith(G);
1983 Alloca->eraseFromParent();
1984 }
1985 return;
1986 }
1987
1988 // If we found any alloca, replace all of their remaining uses with GEP
1989 // instructions. To remain debugbility, we replace the uses of allocas for
1990 // dbg.declares and dbg.values with the reload from the frame.
1991 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1992 // as some of the uses may not be dominated by CoroBegin.
1993 Builder.SetInsertPoint(Shape.AllocaSpillBlock,
1994 Shape.AllocaSpillBlock->begin());
1995 SmallVector<Instruction *, 4> UsersToUpdate;
1996 for (const auto &A : FrameData.Allocas) {
1997 AllocaInst *Alloca = A.Alloca;
1998 UsersToUpdate.clear();
1999 for (User *U : Alloca->users()) {
2000 auto *I = cast<Instruction>(U);
2001 if (DT.dominates(CB, I))
2002 UsersToUpdate.push_back(I);
2003 }
2004 if (UsersToUpdate.empty())
2005 continue;
2006 auto *G = GetFramePointer(Alloca);
2007 G->setName(Alloca->getName() + Twine(".reload.addr"));
2008
2009 SmallVector<DbgVariableIntrinsic *, 4> DIs;
2010 SmallVector<DbgVariableRecord *> DbgVariableRecords;
2011 findDbgUsers(DIs, Alloca, &DbgVariableRecords);
2012 for (auto *DVI : DIs)
2013 DVI->replaceUsesOfWith(Alloca, G);
2014 for (auto *DVR : DbgVariableRecords)
2015 DVR->replaceVariableLocationOp(Alloca, G);
2016
2017 for (Instruction *I : UsersToUpdate) {
2018 // It is meaningless to retain the lifetime intrinsics refer for the
2019 // member of coroutine frames and the meaningless lifetime intrinsics
2020 // are possible to block further optimizations.
2021 if (I->isLifetimeStartOrEnd()) {
2022 I->eraseFromParent();
2023 continue;
2024 }
2025
2026 I->replaceUsesOfWith(Alloca, G);
2027 }
2028 }
2029 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2030 for (const auto &A : FrameData.Allocas) {
2031 AllocaInst *Alloca = A.Alloca;
2032 if (A.MayWriteBeforeCoroBegin) {
2033 // isEscaped really means potentially modified before CoroBegin.
2034 if (Alloca->isArrayAllocation())
2035 report_fatal_error(
2036 "Coroutines cannot handle copying of array allocas yet");
2037
2038 auto *G = GetFramePointer(Alloca);
2039 auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
2040 Builder.CreateStore(Value, G);
2041 }
2042 // For each alias to Alloca created before CoroBegin but used after
2043 // CoroBegin, we recreate them after CoroBegin by appplying the offset
2044 // to the pointer in the frame.
2045 for (const auto &Alias : A.Aliases) {
2046 auto *FramePtr = GetFramePointer(Alloca);
2047 auto &Value = *Alias.second;
2048 auto ITy = IntegerType::get(C, Value.getBitWidth());
2049 auto *AliasPtr =
2050 Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
2051 Alias.first->replaceUsesWithIf(
2052 AliasPtr, [&](Use &U) { return DT.dominates(CB, U); });
2053 }
2054 }
2055
2056 // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
2057 // the case that the PromiseAlloca may have writes before CoroBegin in the
2058 // above codes. And it may be problematic in edge cases. See
2059 // https://github.com/llvm/llvm-project/issues/57861 for an example.
2060 if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
2061 AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
2062 // If there is memory accessing to promise alloca before CoroBegin;
2063 bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
2064 auto *Inst = dyn_cast<Instruction>(U.getUser());
2065 if (!Inst || DT.dominates(CB, Inst))
2066 return false;
2067
2068 if (auto *CI = dyn_cast<CallInst>(Inst)) {
2069 // It is fine if the call wouldn't write to the Promise.
2070 // This is possible for @llvm.coro.id intrinsics, which
2071 // would take the promise as the second argument as a
2072 // marker.
2073 if (CI->onlyReadsMemory() ||
2074 CI->onlyReadsMemory(CI->getArgOperandNo(&U)))
2075 return false;
2076 return true;
2077 }
2078
2079 return isa<StoreInst>(Inst) ||
2080 // It may take too much time to track the uses.
2081 // Be conservative about the case the use may escape.
2082 isa<GetElementPtrInst>(Inst) ||
2083 // There would always be a bitcast for the promise alloca
2084 // before we enabled Opaque pointers. And now given
2085 // opaque pointers are enabled by default. This should be
2086 // fine.
2087 isa<BitCastInst>(Inst);
2088 });
2089 if (HasAccessingPromiseBeforeCB) {
2090 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2091 auto *G = GetFramePointer(PA);
2092 auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
2093 Builder.CreateStore(Value, G);
2094 }
2095 }
2096 }
2097
2098 // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
2099 // PHI in InsertedBB.
movePHIValuesToInsertedBlock(BasicBlock * SuccBB,BasicBlock * InsertedBB,BasicBlock * PredBB,PHINode * UntilPHI=nullptr)2100 static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
2101 BasicBlock *InsertedBB,
2102 BasicBlock *PredBB,
2103 PHINode *UntilPHI = nullptr) {
2104 auto *PN = cast<PHINode>(&SuccBB->front());
2105 do {
2106 int Index = PN->getBasicBlockIndex(InsertedBB);
2107 Value *V = PN->getIncomingValue(Index);
2108 PHINode *InputV = PHINode::Create(
2109 V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName());
2110 InputV->insertBefore(InsertedBB->begin());
2111 InputV->addIncoming(V, PredBB);
2112 PN->setIncomingValue(Index, InputV);
2113 PN = dyn_cast<PHINode>(PN->getNextNode());
2114 } while (PN != UntilPHI);
2115 }
2116
2117 // Rewrites the PHI Nodes in a cleanuppad.
rewritePHIsForCleanupPad(BasicBlock * CleanupPadBB,CleanupPadInst * CleanupPad)2118 static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
2119 CleanupPadInst *CleanupPad) {
2120 // For every incoming edge to a CleanupPad we will create a new block holding
2121 // all incoming values in single-value PHI nodes. We will then create another
2122 // block to act as a dispather (as all unwind edges for related EH blocks
2123 // must be the same).
2124 //
2125 // cleanuppad:
2126 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
2127 // %3 = cleanuppad within none []
2128 //
2129 // It will create:
2130 //
2131 // cleanuppad.corodispatch
2132 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
2133 // %3 = cleanuppad within none []
2134 // switch i8 % 2, label %unreachable
2135 // [i8 0, label %cleanuppad.from.catchswitch
2136 // i8 1, label %cleanuppad.from.catch.1]
2137 // cleanuppad.from.catchswitch:
2138 // %4 = phi i32 [%0, %catchswitch]
2139 // br %label cleanuppad
2140 // cleanuppad.from.catch.1:
2141 // %6 = phi i32 [%1, %catch.1]
2142 // br %label cleanuppad
2143 // cleanuppad:
2144 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
2145 // [%6, %cleanuppad.from.catch.1]
2146
2147 // Unreachable BB, in case switching on an invalid value in the dispatcher.
2148 auto *UnreachBB = BasicBlock::Create(
2149 CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
2150 IRBuilder<> Builder(UnreachBB);
2151 Builder.CreateUnreachable();
2152
2153 // Create a new cleanuppad which will be the dispatcher.
2154 auto *NewCleanupPadBB =
2155 BasicBlock::Create(CleanupPadBB->getContext(),
2156 CleanupPadBB->getName() + Twine(".corodispatch"),
2157 CleanupPadBB->getParent(), CleanupPadBB);
2158 Builder.SetInsertPoint(NewCleanupPadBB);
2159 auto *SwitchType = Builder.getInt8Ty();
2160 auto *SetDispatchValuePN =
2161 Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
2162 CleanupPad->removeFromParent();
2163 CleanupPad->insertAfter(SetDispatchValuePN);
2164 auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
2165 pred_size(CleanupPadBB));
2166
2167 int SwitchIndex = 0;
2168 SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
2169 for (BasicBlock *Pred : Preds) {
2170 // Create a new cleanuppad and move the PHI values to there.
2171 auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
2172 CleanupPadBB->getName() +
2173 Twine(".from.") + Pred->getName(),
2174 CleanupPadBB->getParent(), CleanupPadBB);
2175 updatePhiNodes(CleanupPadBB, Pred, CaseBB);
2176 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
2177 Pred->getName());
2178 Builder.SetInsertPoint(CaseBB);
2179 Builder.CreateBr(CleanupPadBB);
2180 movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
2181
2182 // Update this Pred to the new unwind point.
2183 setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
2184
2185 // Setup the switch in the dispatcher.
2186 auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
2187 SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
2188 SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
2189 SwitchIndex++;
2190 }
2191 }
2192
cleanupSinglePredPHIs(Function & F)2193 static void cleanupSinglePredPHIs(Function &F) {
2194 SmallVector<PHINode *, 32> Worklist;
2195 for (auto &BB : F) {
2196 for (auto &Phi : BB.phis()) {
2197 if (Phi.getNumIncomingValues() == 1) {
2198 Worklist.push_back(&Phi);
2199 } else
2200 break;
2201 }
2202 }
2203 while (!Worklist.empty()) {
2204 auto *Phi = Worklist.pop_back_val();
2205 auto *OriginalValue = Phi->getIncomingValue(0);
2206 Phi->replaceAllUsesWith(OriginalValue);
2207 }
2208 }
2209
rewritePHIs(BasicBlock & BB)2210 static void rewritePHIs(BasicBlock &BB) {
2211 // For every incoming edge we will create a block holding all
2212 // incoming values in a single PHI nodes.
2213 //
2214 // loop:
2215 // %n.val = phi i32[%n, %entry], [%inc, %loop]
2216 //
2217 // It will create:
2218 //
2219 // loop.from.entry:
2220 // %n.loop.pre = phi i32 [%n, %entry]
2221 // br %label loop
2222 // loop.from.loop:
2223 // %inc.loop.pre = phi i32 [%inc, %loop]
2224 // br %label loop
2225 //
2226 // After this rewrite, further analysis will ignore any phi nodes with more
2227 // than one incoming edge.
2228
2229 // TODO: Simplify PHINodes in the basic block to remove duplicate
2230 // predecessors.
2231
2232 // Special case for CleanupPad: all EH blocks must have the same unwind edge
2233 // so we need to create an additional "dispatcher" block.
2234 if (auto *CleanupPad =
2235 dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
2236 SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
2237 for (BasicBlock *Pred : Preds) {
2238 if (CatchSwitchInst *CS =
2239 dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
2240 // CleanupPad with a CatchSwitch predecessor: therefore this is an
2241 // unwind destination that needs to be handle specially.
2242 assert(CS->getUnwindDest() == &BB);
2243 (void)CS;
2244 rewritePHIsForCleanupPad(&BB, CleanupPad);
2245 return;
2246 }
2247 }
2248 }
2249
2250 LandingPadInst *LandingPad = nullptr;
2251 PHINode *ReplPHI = nullptr;
2252 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
2253 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
2254 // We replace the original landing pad with a PHINode that will collect the
2255 // results from all of them.
2256 ReplPHI = PHINode::Create(LandingPad->getType(), 1, "");
2257 ReplPHI->insertBefore(LandingPad->getIterator());
2258 ReplPHI->takeName(LandingPad);
2259 LandingPad->replaceAllUsesWith(ReplPHI);
2260 // We will erase the original landing pad at the end of this function after
2261 // ehAwareSplitEdge cloned it in the transition blocks.
2262 }
2263
2264 SmallVector<BasicBlock *, 8> Preds(predecessors(&BB));
2265 for (BasicBlock *Pred : Preds) {
2266 auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
2267 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
2268
2269 // Stop the moving of values at ReplPHI, as this is either null or the PHI
2270 // that replaced the landing pad.
2271 movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
2272 }
2273
2274 if (LandingPad) {
2275 // Calls to ehAwareSplitEdge function cloned the original lading pad.
2276 // No longer need it.
2277 LandingPad->eraseFromParent();
2278 }
2279 }
2280
rewritePHIs(Function & F)2281 static void rewritePHIs(Function &F) {
2282 SmallVector<BasicBlock *, 8> WorkList;
2283
2284 for (BasicBlock &BB : F)
2285 if (auto *PN = dyn_cast<PHINode>(&BB.front()))
2286 if (PN->getNumIncomingValues() > 1)
2287 WorkList.push_back(&BB);
2288
2289 for (BasicBlock *BB : WorkList)
2290 rewritePHIs(*BB);
2291 }
2292
2293 /// Default materializable callback
2294 // Check for instructions that we can recreate on resume as opposed to spill
2295 // the result into a coroutine frame.
defaultMaterializable(Instruction & V)2296 bool coro::defaultMaterializable(Instruction &V) {
2297 return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
2298 isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V));
2299 }
2300
2301 // Check for structural coroutine intrinsics that should not be spilled into
2302 // the coroutine frame.
isCoroutineStructureIntrinsic(Instruction & I)2303 static bool isCoroutineStructureIntrinsic(Instruction &I) {
2304 return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
2305 isa<CoroSuspendInst>(&I);
2306 }
2307
2308 // For each instruction identified as materializable across the suspend point,
2309 // and its associated DAG of other rematerializable instructions,
2310 // recreate the DAG of instructions after the suspend point.
rewriteMaterializableInstructions(const SmallMapVector<Instruction *,std::unique_ptr<RematGraph>,8> & AllRemats)2311 static void rewriteMaterializableInstructions(
2312 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
2313 &AllRemats) {
2314 // This has to be done in 2 phases
2315 // Do the remats and record the required defs to be replaced in the
2316 // original use instructions
2317 // Once all the remats are complete, replace the uses in the final
2318 // instructions with the new defs
2319 typedef struct {
2320 Instruction *Use;
2321 Instruction *Def;
2322 Instruction *Remat;
2323 } ProcessNode;
2324
2325 SmallVector<ProcessNode> FinalInstructionsToProcess;
2326
2327 for (const auto &E : AllRemats) {
2328 Instruction *Use = E.first;
2329 Instruction *CurrentMaterialization = nullptr;
2330 RematGraph *RG = E.second.get();
2331 ReversePostOrderTraversal<RematGraph *> RPOT(RG);
2332 SmallVector<Instruction *> InstructionsToProcess;
2333
2334 // If the target use is actually a suspend instruction then we have to
2335 // insert the remats into the end of the predecessor (there should only be
2336 // one). This is so that suspend blocks always have the suspend instruction
2337 // as the first instruction.
2338 auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
2339 if (isa<AnyCoroSuspendInst>(Use)) {
2340 BasicBlock *SuspendPredecessorBlock =
2341 Use->getParent()->getSinglePredecessor();
2342 assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
2343 InsertPoint = SuspendPredecessorBlock->getTerminator();
2344 }
2345
2346 // Note: skip the first instruction as this is the actual use that we're
2347 // rematerializing everything for.
2348 auto I = RPOT.begin();
2349 ++I;
2350 for (; I != RPOT.end(); ++I) {
2351 Instruction *D = (*I)->Node;
2352 CurrentMaterialization = D->clone();
2353 CurrentMaterialization->setName(D->getName());
2354 CurrentMaterialization->insertBefore(InsertPoint);
2355 InsertPoint = CurrentMaterialization;
2356
2357 // Replace all uses of Def in the instructions being added as part of this
2358 // rematerialization group
2359 for (auto &I : InstructionsToProcess)
2360 I->replaceUsesOfWith(D, CurrentMaterialization);
2361
2362 // Don't replace the final use at this point as this can cause problems
2363 // for other materializations. Instead, for any final use that uses a
2364 // define that's being rematerialized, record the replace values
2365 for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
2366 if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
2367 FinalInstructionsToProcess.push_back(
2368 {Use, D, CurrentMaterialization});
2369
2370 InstructionsToProcess.push_back(CurrentMaterialization);
2371 }
2372 }
2373
2374 // Finally, replace the uses with the defines that we've just rematerialized
2375 for (auto &R : FinalInstructionsToProcess) {
2376 if (auto *PN = dyn_cast<PHINode>(R.Use)) {
2377 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
2378 "values in the PHINode");
2379 PN->replaceAllUsesWith(R.Remat);
2380 PN->eraseFromParent();
2381 continue;
2382 }
2383 R.Use->replaceUsesOfWith(R.Def, R.Remat);
2384 }
2385 }
2386
2387 // Splits the block at a particular instruction unless it is the first
2388 // instruction in the block with a single predecessor.
splitBlockIfNotFirst(Instruction * I,const Twine & Name)2389 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
2390 auto *BB = I->getParent();
2391 if (&BB->front() == I) {
2392 if (BB->getSinglePredecessor()) {
2393 BB->setName(Name);
2394 return BB;
2395 }
2396 }
2397 return BB->splitBasicBlock(I, Name);
2398 }
2399
2400 // Split above and below a particular instruction so that it
2401 // will be all alone by itself in a block.
splitAround(Instruction * I,const Twine & Name)2402 static void splitAround(Instruction *I, const Twine &Name) {
2403 splitBlockIfNotFirst(I, Name);
2404 splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
2405 }
2406
isSuspendBlock(BasicBlock * BB)2407 static bool isSuspendBlock(BasicBlock *BB) {
2408 return isa<AnyCoroSuspendInst>(BB->front());
2409 }
2410
2411 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
2412
2413 /// Does control flow starting at the given block ever reach a suspend
2414 /// instruction before reaching a block in VisitedOrFreeBBs?
isSuspendReachableFrom(BasicBlock * From,VisitedBlocksSet & VisitedOrFreeBBs)2415 static bool isSuspendReachableFrom(BasicBlock *From,
2416 VisitedBlocksSet &VisitedOrFreeBBs) {
2417 // Eagerly try to add this block to the visited set. If it's already
2418 // there, stop recursing; this path doesn't reach a suspend before
2419 // either looping or reaching a freeing block.
2420 if (!VisitedOrFreeBBs.insert(From).second)
2421 return false;
2422
2423 // We assume that we'll already have split suspends into their own blocks.
2424 if (isSuspendBlock(From))
2425 return true;
2426
2427 // Recurse on the successors.
2428 for (auto *Succ : successors(From)) {
2429 if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
2430 return true;
2431 }
2432
2433 return false;
2434 }
2435
2436 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2437 /// suspend point?
isLocalAlloca(CoroAllocaAllocInst * AI)2438 static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
2439 // Seed the visited set with all the basic blocks containing a free
2440 // so that we won't pass them up.
2441 VisitedBlocksSet VisitedOrFreeBBs;
2442 for (auto *User : AI->users()) {
2443 if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
2444 VisitedOrFreeBBs.insert(FI->getParent());
2445 }
2446
2447 return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
2448 }
2449
2450 /// After we split the coroutine, will the given basic block be along
2451 /// an obvious exit path for the resumption function?
willLeaveFunctionImmediatelyAfter(BasicBlock * BB,unsigned depth=3)2452 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
2453 unsigned depth = 3) {
2454 // If we've bottomed out our depth count, stop searching and assume
2455 // that the path might loop back.
2456 if (depth == 0) return false;
2457
2458 // If this is a suspend block, we're about to exit the resumption function.
2459 if (isSuspendBlock(BB)) return true;
2460
2461 // Recurse into the successors.
2462 for (auto *Succ : successors(BB)) {
2463 if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
2464 return false;
2465 }
2466
2467 // If none of the successors leads back in a loop, we're on an exit/abort.
2468 return true;
2469 }
2470
localAllocaNeedsStackSave(CoroAllocaAllocInst * AI)2471 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
2472 // Look for a free that isn't sufficiently obviously followed by
2473 // either a suspend or a termination, i.e. something that will leave
2474 // the coro resumption frame.
2475 for (auto *U : AI->users()) {
2476 auto FI = dyn_cast<CoroAllocaFreeInst>(U);
2477 if (!FI) continue;
2478
2479 if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
2480 return true;
2481 }
2482
2483 // If we never found one, we don't need a stack save.
2484 return false;
2485 }
2486
2487 /// Turn each of the given local allocas into a normal (dynamic) alloca
2488 /// instruction.
lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst * > LocalAllocas,SmallVectorImpl<Instruction * > & DeadInsts)2489 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
2490 SmallVectorImpl<Instruction*> &DeadInsts) {
2491 for (auto *AI : LocalAllocas) {
2492 IRBuilder<> Builder(AI);
2493
2494 // Save the stack depth. Try to avoid doing this if the stackrestore
2495 // is going to immediately precede a return or something.
2496 Value *StackSave = nullptr;
2497 if (localAllocaNeedsStackSave(AI))
2498 StackSave = Builder.CreateStackSave();
2499
2500 // Allocate memory.
2501 auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
2502 Alloca->setAlignment(AI->getAlignment());
2503
2504 for (auto *U : AI->users()) {
2505 // Replace gets with the allocation.
2506 if (isa<CoroAllocaGetInst>(U)) {
2507 U->replaceAllUsesWith(Alloca);
2508
2509 // Replace frees with stackrestores. This is safe because
2510 // alloca.alloc is required to obey a stack discipline, although we
2511 // don't enforce that structurally.
2512 } else {
2513 auto FI = cast<CoroAllocaFreeInst>(U);
2514 if (StackSave) {
2515 Builder.SetInsertPoint(FI);
2516 Builder.CreateStackRestore(StackSave);
2517 }
2518 }
2519 DeadInsts.push_back(cast<Instruction>(U));
2520 }
2521
2522 DeadInsts.push_back(AI);
2523 }
2524 }
2525
2526 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
2527 /// This happens during the all-instructions iteration, so it must not
2528 /// delete the call.
lowerNonLocalAlloca(CoroAllocaAllocInst * AI,coro::Shape & Shape,SmallVectorImpl<Instruction * > & DeadInsts)2529 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
2530 coro::Shape &Shape,
2531 SmallVectorImpl<Instruction*> &DeadInsts) {
2532 IRBuilder<> Builder(AI);
2533 auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
2534
2535 for (User *U : AI->users()) {
2536 if (isa<CoroAllocaGetInst>(U)) {
2537 U->replaceAllUsesWith(Alloc);
2538 } else {
2539 auto FI = cast<CoroAllocaFreeInst>(U);
2540 Builder.SetInsertPoint(FI);
2541 Shape.emitDealloc(Builder, Alloc, nullptr);
2542 }
2543 DeadInsts.push_back(cast<Instruction>(U));
2544 }
2545
2546 // Push this on last so that it gets deleted after all the others.
2547 DeadInsts.push_back(AI);
2548
2549 // Return the new allocation value so that we can check for needed spills.
2550 return cast<Instruction>(Alloc);
2551 }
2552
2553 /// Get the current swifterror value.
emitGetSwiftErrorValue(IRBuilder<> & Builder,Type * ValueTy,coro::Shape & Shape)2554 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
2555 coro::Shape &Shape) {
2556 // Make a fake function pointer as a sort of intrinsic.
2557 auto FnTy = FunctionType::get(ValueTy, {}, false);
2558 auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
2559
2560 auto Call = Builder.CreateCall(FnTy, Fn, {});
2561 Shape.SwiftErrorOps.push_back(Call);
2562
2563 return Call;
2564 }
2565
2566 /// Set the given value as the current swifterror value.
2567 ///
2568 /// Returns a slot that can be used as a swifterror slot.
emitSetSwiftErrorValue(IRBuilder<> & Builder,Value * V,coro::Shape & Shape)2569 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
2570 coro::Shape &Shape) {
2571 // Make a fake function pointer as a sort of intrinsic.
2572 auto FnTy = FunctionType::get(Builder.getPtrTy(),
2573 {V->getType()}, false);
2574 auto Fn = ConstantPointerNull::get(Builder.getPtrTy());
2575
2576 auto Call = Builder.CreateCall(FnTy, Fn, { V });
2577 Shape.SwiftErrorOps.push_back(Call);
2578
2579 return Call;
2580 }
2581
2582 /// Set the swifterror value from the given alloca before a call,
2583 /// then put in back in the alloca afterwards.
2584 ///
2585 /// Returns an address that will stand in for the swifterror slot
2586 /// until splitting.
emitSetAndGetSwiftErrorValueAround(Instruction * Call,AllocaInst * Alloca,coro::Shape & Shape)2587 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
2588 AllocaInst *Alloca,
2589 coro::Shape &Shape) {
2590 auto ValueTy = Alloca->getAllocatedType();
2591 IRBuilder<> Builder(Call);
2592
2593 // Load the current value from the alloca and set it as the
2594 // swifterror value.
2595 auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
2596 auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
2597
2598 // Move to after the call. Since swifterror only has a guaranteed
2599 // value on normal exits, we can ignore implicit and explicit unwind
2600 // edges.
2601 if (isa<CallInst>(Call)) {
2602 Builder.SetInsertPoint(Call->getNextNode());
2603 } else {
2604 auto Invoke = cast<InvokeInst>(Call);
2605 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
2606 }
2607
2608 // Get the current swifterror value and store it to the alloca.
2609 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
2610 Builder.CreateStore(ValueAfterCall, Alloca);
2611
2612 return Addr;
2613 }
2614
2615 /// Eliminate a formerly-swifterror alloca by inserting the get/set
2616 /// intrinsics and attempting to MemToReg the alloca away.
eliminateSwiftErrorAlloca(Function & F,AllocaInst * Alloca,coro::Shape & Shape)2617 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
2618 coro::Shape &Shape) {
2619 for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
2620 // swifterror values can only be used in very specific ways.
2621 // We take advantage of that here.
2622 auto User = Use.getUser();
2623 if (isa<LoadInst>(User) || isa<StoreInst>(User))
2624 continue;
2625
2626 assert(isa<CallInst>(User) || isa<InvokeInst>(User));
2627 auto Call = cast<Instruction>(User);
2628
2629 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
2630
2631 // Use the returned slot address as the call argument.
2632 Use.set(Addr);
2633 }
2634
2635 // All the uses should be loads and stores now.
2636 assert(isAllocaPromotable(Alloca));
2637 }
2638
2639 /// "Eliminate" a swifterror argument by reducing it to the alloca case
2640 /// and then loading and storing in the prologue and epilog.
2641 ///
2642 /// The argument keeps the swifterror flag.
eliminateSwiftErrorArgument(Function & F,Argument & Arg,coro::Shape & Shape,SmallVectorImpl<AllocaInst * > & AllocasToPromote)2643 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
2644 coro::Shape &Shape,
2645 SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
2646 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
2647
2648 auto ArgTy = cast<PointerType>(Arg.getType());
2649 auto ValueTy = PointerType::getUnqual(F.getContext());
2650
2651 // Reduce to the alloca case:
2652
2653 // Create an alloca and replace all uses of the arg with it.
2654 auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
2655 Arg.replaceAllUsesWith(Alloca);
2656
2657 // Set an initial value in the alloca. swifterror is always null on entry.
2658 auto InitialValue = Constant::getNullValue(ValueTy);
2659 Builder.CreateStore(InitialValue, Alloca);
2660
2661 // Find all the suspends in the function and save and restore around them.
2662 for (auto *Suspend : Shape.CoroSuspends) {
2663 (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
2664 }
2665
2666 // Find all the coro.ends in the function and restore the error value.
2667 for (auto *End : Shape.CoroEnds) {
2668 Builder.SetInsertPoint(End);
2669 auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
2670 (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
2671 }
2672
2673 // Now we can use the alloca logic.
2674 AllocasToPromote.push_back(Alloca);
2675 eliminateSwiftErrorAlloca(F, Alloca, Shape);
2676 }
2677
2678 /// Eliminate all problematic uses of swifterror arguments and allocas
2679 /// from the function. We'll fix them up later when splitting the function.
eliminateSwiftError(Function & F,coro::Shape & Shape)2680 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
2681 SmallVector<AllocaInst*, 4> AllocasToPromote;
2682
2683 // Look for a swifterror argument.
2684 for (auto &Arg : F.args()) {
2685 if (!Arg.hasSwiftErrorAttr()) continue;
2686
2687 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
2688 break;
2689 }
2690
2691 // Look for swifterror allocas.
2692 for (auto &Inst : F.getEntryBlock()) {
2693 auto Alloca = dyn_cast<AllocaInst>(&Inst);
2694 if (!Alloca || !Alloca->isSwiftError()) continue;
2695
2696 // Clear the swifterror flag.
2697 Alloca->setSwiftError(false);
2698
2699 AllocasToPromote.push_back(Alloca);
2700 eliminateSwiftErrorAlloca(F, Alloca, Shape);
2701 }
2702
2703 // If we have any allocas to promote, compute a dominator tree and
2704 // promote them en masse.
2705 if (!AllocasToPromote.empty()) {
2706 DominatorTree DT(F);
2707 PromoteMemToReg(AllocasToPromote, DT);
2708 }
2709 }
2710
2711 /// retcon and retcon.once conventions assume that all spill uses can be sunk
2712 /// after the coro.begin intrinsic.
sinkSpillUsesAfterCoroBegin(Function & F,const FrameDataInfo & FrameData,CoroBeginInst * CoroBegin)2713 static void sinkSpillUsesAfterCoroBegin(Function &F,
2714 const FrameDataInfo &FrameData,
2715 CoroBeginInst *CoroBegin) {
2716 DominatorTree Dom(F);
2717
2718 SmallSetVector<Instruction *, 32> ToMove;
2719 SmallVector<Instruction *, 32> Worklist;
2720
2721 // Collect all users that precede coro.begin.
2722 for (auto *Def : FrameData.getAllDefs()) {
2723 for (User *U : Def->users()) {
2724 auto Inst = cast<Instruction>(U);
2725 if (Inst->getParent() != CoroBegin->getParent() ||
2726 Dom.dominates(CoroBegin, Inst))
2727 continue;
2728 if (ToMove.insert(Inst))
2729 Worklist.push_back(Inst);
2730 }
2731 }
2732 // Recursively collect users before coro.begin.
2733 while (!Worklist.empty()) {
2734 auto *Def = Worklist.pop_back_val();
2735 for (User *U : Def->users()) {
2736 auto Inst = cast<Instruction>(U);
2737 if (Dom.dominates(CoroBegin, Inst))
2738 continue;
2739 if (ToMove.insert(Inst))
2740 Worklist.push_back(Inst);
2741 }
2742 }
2743
2744 // Sort by dominance.
2745 SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
2746 llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
2747 // If a dominates b it should preceed (<) b.
2748 return Dom.dominates(A, B);
2749 });
2750
2751 Instruction *InsertPt = CoroBegin->getNextNode();
2752 for (Instruction *Inst : InsertionList)
2753 Inst->moveBefore(InsertPt);
2754 }
2755
2756 /// For each local variable that all of its user are only used inside one of
2757 /// suspended region, we sink their lifetime.start markers to the place where
2758 /// after the suspend block. Doing so minimizes the lifetime of each variable,
2759 /// hence minimizing the amount of data we end up putting on the frame.
sinkLifetimeStartMarkers(Function & F,coro::Shape & Shape,SuspendCrossingInfo & Checker,const DominatorTree & DT)2760 static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
2761 SuspendCrossingInfo &Checker,
2762 const DominatorTree &DT) {
2763 if (F.hasOptNone())
2764 return;
2765
2766 // Collect all possible basic blocks which may dominate all uses of allocas.
2767 SmallPtrSet<BasicBlock *, 4> DomSet;
2768 DomSet.insert(&F.getEntryBlock());
2769 for (auto *CSI : Shape.CoroSuspends) {
2770 BasicBlock *SuspendBlock = CSI->getParent();
2771 assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
2772 "should have split coro.suspend into its own block");
2773 DomSet.insert(SuspendBlock->getSingleSuccessor());
2774 }
2775
2776 for (Instruction &I : instructions(F)) {
2777 AllocaInst* AI = dyn_cast<AllocaInst>(&I);
2778 if (!AI)
2779 continue;
2780
2781 for (BasicBlock *DomBB : DomSet) {
2782 bool Valid = true;
2783 SmallVector<Instruction *, 1> Lifetimes;
2784
2785 auto isLifetimeStart = [](Instruction* I) {
2786 if (auto* II = dyn_cast<IntrinsicInst>(I))
2787 return II->getIntrinsicID() == Intrinsic::lifetime_start;
2788 return false;
2789 };
2790
2791 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
2792 if (isLifetimeStart(U)) {
2793 Lifetimes.push_back(U);
2794 return true;
2795 }
2796 if (!U->hasOneUse() || U->stripPointerCasts() != AI)
2797 return false;
2798 if (isLifetimeStart(U->user_back())) {
2799 Lifetimes.push_back(U->user_back());
2800 return true;
2801 }
2802 return false;
2803 };
2804
2805 for (User *U : AI->users()) {
2806 Instruction *UI = cast<Instruction>(U);
2807 // For all users except lifetime.start markers, if they are all
2808 // dominated by one of the basic blocks and do not cross
2809 // suspend points as well, then there is no need to spill the
2810 // instruction.
2811 if (!DT.dominates(DomBB, UI->getParent()) ||
2812 Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
2813 // Skip lifetime.start, GEP and bitcast used by lifetime.start
2814 // markers.
2815 if (collectLifetimeStart(UI, AI))
2816 continue;
2817 Valid = false;
2818 break;
2819 }
2820 }
2821 // Sink lifetime.start markers to dominate block when they are
2822 // only used outside the region.
2823 if (Valid && Lifetimes.size() != 0) {
2824 auto *NewLifetime = Lifetimes[0]->clone();
2825 NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
2826 NewLifetime->insertBefore(DomBB->getTerminator());
2827
2828 // All the outsided lifetime.start markers are no longer necessary.
2829 for (Instruction *S : Lifetimes)
2830 S->eraseFromParent();
2831
2832 break;
2833 }
2834 }
2835 }
2836 }
2837
collectFrameAlloca(AllocaInst * AI,coro::Shape & Shape,const SuspendCrossingInfo & Checker,SmallVectorImpl<AllocaInfo> & Allocas,const DominatorTree & DT)2838 static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
2839 const SuspendCrossingInfo &Checker,
2840 SmallVectorImpl<AllocaInfo> &Allocas,
2841 const DominatorTree &DT) {
2842 if (Shape.CoroSuspends.empty())
2843 return;
2844
2845 // The PromiseAlloca will be specially handled since it needs to be in a
2846 // fixed position in the frame.
2847 if (AI == Shape.SwitchLowering.PromiseAlloca)
2848 return;
2849
2850 // The __coro_gro alloca should outlive the promise, make sure we
2851 // keep it outside the frame.
2852 if (AI->hasMetadata(LLVMContext::MD_coro_outside_frame))
2853 return;
2854
2855 // The code that uses lifetime.start intrinsic does not work for functions
2856 // with loops without exit. Disable it on ABIs we know to generate such
2857 // code.
2858 bool ShouldUseLifetimeStartInfo =
2859 (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2860 Shape.ABI != coro::ABI::RetconOnce);
2861 AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker,
2862 ShouldUseLifetimeStartInfo};
2863 Visitor.visitPtr(*AI);
2864 if (!Visitor.getShouldLiveOnFrame())
2865 return;
2866 Allocas.emplace_back(AI, Visitor.getAliasesCopy(),
2867 Visitor.getMayWriteBeforeCoroBegin());
2868 }
2869
2870 static std::optional<std::pair<Value &, DIExpression &>>
salvageDebugInfoImpl(SmallDenseMap<Argument *,AllocaInst *,4> & ArgToAllocaMap,bool OptimizeFrame,bool UseEntryValue,Function * F,Value * Storage,DIExpression * Expr,bool SkipOutermostLoad)2871 salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
2872 bool OptimizeFrame, bool UseEntryValue, Function *F,
2873 Value *Storage, DIExpression *Expr,
2874 bool SkipOutermostLoad) {
2875 IRBuilder<> Builder(F->getContext());
2876 auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2877 while (isa<IntrinsicInst>(InsertPt))
2878 ++InsertPt;
2879 Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
2880
2881 while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
2882 if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
2883 Storage = LdInst->getPointerOperand();
2884 // FIXME: This is a heuristic that works around the fact that
2885 // LLVM IR debug intrinsics cannot yet distinguish between
2886 // memory and value locations: Because a dbg.declare(alloca) is
2887 // implicitly a memory location no DW_OP_deref operation for the
2888 // last direct load from an alloca is necessary. This condition
2889 // effectively drops the *last* DW_OP_deref in the expression.
2890 if (!SkipOutermostLoad)
2891 Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
2892 } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
2893 Storage = StInst->getValueOperand();
2894 } else {
2895 SmallVector<uint64_t, 16> Ops;
2896 SmallVector<Value *, 0> AdditionalValues;
2897 Value *Op = llvm::salvageDebugInfoImpl(
2898 *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
2899 AdditionalValues);
2900 if (!Op || !AdditionalValues.empty()) {
2901 // If salvaging failed or salvaging produced more than one location
2902 // operand, give up.
2903 break;
2904 }
2905 Storage = Op;
2906 Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
2907 }
2908 SkipOutermostLoad = false;
2909 }
2910 if (!Storage)
2911 return std::nullopt;
2912
2913 auto *StorageAsArg = dyn_cast<Argument>(Storage);
2914 const bool IsSwiftAsyncArg =
2915 StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
2916
2917 // Swift async arguments are described by an entry value of the ABI-defined
2918 // register containing the coroutine context.
2919 // Entry values in variadic expressions are not supported.
2920 if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
2921 Expr->isSingleLocationExpression())
2922 Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
2923
2924 // If the coroutine frame is an Argument, store it in an alloca to improve
2925 // its availability (e.g. registers may be clobbered).
2926 // Avoid this if optimizations are enabled (they would remove the alloca) or
2927 // if the value is guaranteed to be available through other means (e.g. swift
2928 // ABI guarantees).
2929 if (StorageAsArg && !OptimizeFrame && !IsSwiftAsyncArg) {
2930 auto &Cached = ArgToAllocaMap[StorageAsArg];
2931 if (!Cached) {
2932 Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
2933 Storage->getName() + ".debug");
2934 Builder.CreateStore(Storage, Cached);
2935 }
2936 Storage = Cached;
2937 // FIXME: LLVM lacks nuanced semantics to differentiate between
2938 // memory and direct locations at the IR level. The backend will
2939 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2940 // location. Thus, if there are deref and offset operations in the
2941 // expression, we need to add a DW_OP_deref at the *start* of the
2942 // expression to first load the contents of the alloca before
2943 // adjusting it with the expression.
2944 Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
2945 }
2946
2947 return {{*Storage, *Expr}};
2948 }
2949
salvageDebugInfo(SmallDenseMap<Argument *,AllocaInst *,4> & ArgToAllocaMap,DbgVariableIntrinsic & DVI,bool OptimizeFrame,bool UseEntryValue)2950 void coro::salvageDebugInfo(
2951 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
2952 DbgVariableIntrinsic &DVI, bool OptimizeFrame, bool UseEntryValue) {
2953
2954 Function *F = DVI.getFunction();
2955 // Follow the pointer arithmetic all the way to the incoming
2956 // function argument and convert into a DIExpression.
2957 bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
2958 Value *OriginalStorage = DVI.getVariableLocationOp(0);
2959
2960 auto SalvagedInfo = ::salvageDebugInfoImpl(
2961 ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, OriginalStorage,
2962 DVI.getExpression(), SkipOutermostLoad);
2963 if (!SalvagedInfo)
2964 return;
2965
2966 Value *Storage = &SalvagedInfo->first;
2967 DIExpression *Expr = &SalvagedInfo->second;
2968
2969 DVI.replaceVariableLocationOp(OriginalStorage, Storage);
2970 DVI.setExpression(Expr);
2971 // We only hoist dbg.declare today since it doesn't make sense to hoist
2972 // dbg.value since it does not have the same function wide guarantees that
2973 // dbg.declare does.
2974 if (isa<DbgDeclareInst>(DVI)) {
2975 std::optional<BasicBlock::iterator> InsertPt;
2976 if (auto *I = dyn_cast<Instruction>(Storage)) {
2977 InsertPt = I->getInsertionPointAfterDef();
2978 // Update DILocation only if variable was not inlined.
2979 DebugLoc ILoc = I->getDebugLoc();
2980 DebugLoc DVILoc = DVI.getDebugLoc();
2981 if (ILoc && DVILoc &&
2982 DVILoc->getScope()->getSubprogram() ==
2983 ILoc->getScope()->getSubprogram())
2984 DVI.setDebugLoc(I->getDebugLoc());
2985 } else if (isa<Argument>(Storage))
2986 InsertPt = F->getEntryBlock().begin();
2987 if (InsertPt)
2988 DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
2989 }
2990 }
2991
salvageDebugInfo(SmallDenseMap<Argument *,AllocaInst *,4> & ArgToAllocaMap,DbgVariableRecord & DVR,bool OptimizeFrame,bool UseEntryValue)2992 void coro::salvageDebugInfo(
2993 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
2994 DbgVariableRecord &DVR, bool OptimizeFrame, bool UseEntryValue) {
2995
2996 Function *F = DVR.getFunction();
2997 // Follow the pointer arithmetic all the way to the incoming
2998 // function argument and convert into a DIExpression.
2999 bool SkipOutermostLoad = DVR.isDbgDeclare();
3000 Value *OriginalStorage = DVR.getVariableLocationOp(0);
3001
3002 auto SalvagedInfo = ::salvageDebugInfoImpl(
3003 ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, OriginalStorage,
3004 DVR.getExpression(), SkipOutermostLoad);
3005 if (!SalvagedInfo)
3006 return;
3007
3008 Value *Storage = &SalvagedInfo->first;
3009 DIExpression *Expr = &SalvagedInfo->second;
3010
3011 DVR.replaceVariableLocationOp(OriginalStorage, Storage);
3012 DVR.setExpression(Expr);
3013 // We only hoist dbg.declare today since it doesn't make sense to hoist
3014 // dbg.value since it does not have the same function wide guarantees that
3015 // dbg.declare does.
3016 if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
3017 std::optional<BasicBlock::iterator> InsertPt;
3018 if (auto *I = dyn_cast<Instruction>(Storage)) {
3019 InsertPt = I->getInsertionPointAfterDef();
3020 // Update DILocation only if variable was not inlined.
3021 DebugLoc ILoc = I->getDebugLoc();
3022 DebugLoc DVRLoc = DVR.getDebugLoc();
3023 if (ILoc && DVRLoc &&
3024 DVRLoc->getScope()->getSubprogram() ==
3025 ILoc->getScope()->getSubprogram())
3026 DVR.setDebugLoc(ILoc);
3027 } else if (isa<Argument>(Storage))
3028 InsertPt = F->getEntryBlock().begin();
3029 if (InsertPt) {
3030 DVR.removeFromParent();
3031 (*InsertPt)->getParent()->insertDbgRecordBefore(&DVR, *InsertPt);
3032 }
3033 }
3034 }
3035
doRematerializations(Function & F,SuspendCrossingInfo & Checker,const std::function<bool (Instruction &)> & MaterializableCallback)3036 static void doRematerializations(
3037 Function &F, SuspendCrossingInfo &Checker,
3038 const std::function<bool(Instruction &)> &MaterializableCallback) {
3039 if (F.hasOptNone())
3040 return;
3041
3042 SpillInfo Spills;
3043
3044 // See if there are materializable instructions across suspend points
3045 // We record these as the starting point to also identify materializable
3046 // defs of uses in these operations
3047 for (Instruction &I : instructions(F)) {
3048 if (!MaterializableCallback(I))
3049 continue;
3050 for (User *U : I.users())
3051 if (Checker.isDefinitionAcrossSuspend(I, U))
3052 Spills[&I].push_back(cast<Instruction>(U));
3053 }
3054
3055 // Process each of the identified rematerializable instructions
3056 // and add predecessor instructions that can also be rematerialized.
3057 // This is actually a graph of instructions since we could potentially
3058 // have multiple uses of a def in the set of predecessor instructions.
3059 // The approach here is to maintain a graph of instructions for each bottom
3060 // level instruction - where we have a unique set of instructions (nodes)
3061 // and edges between them. We then walk the graph in reverse post-dominator
3062 // order to insert them past the suspend point, but ensure that ordering is
3063 // correct. We also rely on CSE removing duplicate defs for remats of
3064 // different instructions with a def in common (rather than maintaining more
3065 // complex graphs for each suspend point)
3066
3067 // We can do this by adding new nodes to the list for each suspend
3068 // point. Then using standard GraphTraits to give a reverse post-order
3069 // traversal when we insert the nodes after the suspend
3070 SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats;
3071 for (auto &E : Spills) {
3072 for (Instruction *U : E.second) {
3073 // Don't process a user twice (this can happen if the instruction uses
3074 // more than one rematerializable def)
3075 if (AllRemats.count(U))
3076 continue;
3077
3078 // Constructor creates the whole RematGraph for the given Use
3079 auto RematUPtr =
3080 std::make_unique<RematGraph>(MaterializableCallback, U, Checker);
3081
3082 LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
3083 ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
3084 for (auto I = RPOT.begin(); I != RPOT.end();
3085 ++I) { (*I)->Node->dump(); } dbgs()
3086 << "\n";);
3087
3088 AllRemats[U] = std::move(RematUPtr);
3089 }
3090 }
3091
3092 // Rewrite materializable instructions to be materialized at the use
3093 // point.
3094 LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
3095 rewriteMaterializableInstructions(AllRemats);
3096 }
3097
buildCoroutineFrame(Function & F,Shape & Shape,TargetTransformInfo & TTI,const std::function<bool (Instruction &)> & MaterializableCallback)3098 void coro::buildCoroutineFrame(
3099 Function &F, Shape &Shape, TargetTransformInfo &TTI,
3100 const std::function<bool(Instruction &)> &MaterializableCallback) {
3101 // Don't eliminate swifterror in async functions that won't be split.
3102 if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
3103 eliminateSwiftError(F, Shape);
3104
3105 if (Shape.ABI == coro::ABI::Switch &&
3106 Shape.SwitchLowering.PromiseAlloca) {
3107 Shape.getSwitchCoroId()->clearPromise();
3108 }
3109
3110 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
3111 // intrinsics are in their own blocks to simplify the logic of building up
3112 // SuspendCrossing data.
3113 for (auto *CSI : Shape.CoroSuspends) {
3114 if (auto *Save = CSI->getCoroSave())
3115 splitAround(Save, "CoroSave");
3116 splitAround(CSI, "CoroSuspend");
3117 }
3118
3119 // Put CoroEnds into their own blocks.
3120 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
3121 splitAround(CE, "CoroEnd");
3122
3123 // Emit the musttail call function in a new block before the CoroEnd.
3124 // We do this here so that the right suspend crossing info is computed for
3125 // the uses of the musttail call function call. (Arguments to the coro.end
3126 // instructions would be ignored)
3127 if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
3128 auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
3129 if (!MustTailCallFn)
3130 continue;
3131 IRBuilder<> Builder(AsyncEnd);
3132 SmallVector<Value *, 8> Args(AsyncEnd->args());
3133 auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
3134 auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn,
3135 TTI, Arguments, Builder);
3136 splitAround(Call, "MustTailCall.Before.CoroEnd");
3137 }
3138 }
3139
3140 // Later code makes structural assumptions about single predecessors phis e.g
3141 // that they are not live across a suspend point.
3142 cleanupSinglePredPHIs(F);
3143
3144 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
3145 // never has its definition separated from the PHI by the suspend point.
3146 rewritePHIs(F);
3147
3148 // Build suspend crossing info.
3149 SuspendCrossingInfo Checker(F, Shape);
3150
3151 doRematerializations(F, Checker, MaterializableCallback);
3152
3153 const DominatorTree DT(F);
3154 FrameDataInfo FrameData;
3155 SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
3156 SmallVector<Instruction*, 4> DeadInstructions;
3157 if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
3158 Shape.ABI != coro::ABI::RetconOnce)
3159 sinkLifetimeStartMarkers(F, Shape, Checker, DT);
3160
3161 // Collect the spills for arguments and other not-materializable values.
3162 for (Argument &A : F.args())
3163 for (User *U : A.users())
3164 if (Checker.isDefinitionAcrossSuspend(A, U))
3165 FrameData.Spills[&A].push_back(cast<Instruction>(U));
3166
3167 for (Instruction &I : instructions(F)) {
3168 // Values returned from coroutine structure intrinsics should not be part
3169 // of the Coroutine Frame.
3170 if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
3171 continue;
3172
3173 // Handle alloca.alloc specially here.
3174 if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
3175 // Check whether the alloca's lifetime is bounded by suspend points.
3176 if (isLocalAlloca(AI)) {
3177 LocalAllocas.push_back(AI);
3178 continue;
3179 }
3180
3181 // If not, do a quick rewrite of the alloca and then add spills of
3182 // the rewritten value. The rewrite doesn't invalidate anything in
3183 // Spills because the other alloca intrinsics have no other operands
3184 // besides AI, and it doesn't invalidate the iteration because we delay
3185 // erasing AI.
3186 auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
3187
3188 for (User *U : Alloc->users()) {
3189 if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
3190 FrameData.Spills[Alloc].push_back(cast<Instruction>(U));
3191 }
3192 continue;
3193 }
3194
3195 // Ignore alloca.get; we process this as part of coro.alloca.alloc.
3196 if (isa<CoroAllocaGetInst>(I))
3197 continue;
3198
3199 if (auto *AI = dyn_cast<AllocaInst>(&I)) {
3200 collectFrameAlloca(AI, Shape, Checker, FrameData.Allocas, DT);
3201 continue;
3202 }
3203
3204 for (User *U : I.users())
3205 if (Checker.isDefinitionAcrossSuspend(I, U)) {
3206 // We cannot spill a token.
3207 if (I.getType()->isTokenTy())
3208 report_fatal_error(
3209 "token definition is separated from the use by a suspend point");
3210 FrameData.Spills[&I].push_back(cast<Instruction>(U));
3211 }
3212 }
3213
3214 LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
3215
3216 // We don't want the layout of coroutine frame to be affected
3217 // by debug information. So we only choose to salvage DbgValueInst for
3218 // whose value is already in the frame.
3219 // We would handle the dbg.values for allocas specially
3220 for (auto &Iter : FrameData.Spills) {
3221 auto *V = Iter.first;
3222 SmallVector<DbgValueInst *, 16> DVIs;
3223 SmallVector<DbgVariableRecord *, 16> DVRs;
3224 findDbgValues(DVIs, V, &DVRs);
3225 for (DbgValueInst *DVI : DVIs)
3226 if (Checker.isDefinitionAcrossSuspend(*V, DVI))
3227 FrameData.Spills[V].push_back(DVI);
3228 // Add the instructions which carry debug info that is in the frame.
3229 for (DbgVariableRecord *DVR : DVRs)
3230 if (Checker.isDefinitionAcrossSuspend(*V, DVR->Marker->MarkedInstr))
3231 FrameData.Spills[V].push_back(DVR->Marker->MarkedInstr);
3232 }
3233
3234 LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
3235 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
3236 Shape.ABI == coro::ABI::Async)
3237 sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin);
3238 Shape.FrameTy = buildFrameType(F, Shape, FrameData);
3239 Shape.FramePtr = Shape.CoroBegin;
3240 // For now, this works for C++ programs only.
3241 buildFrameDebugInfo(F, Shape, FrameData);
3242 insertSpills(FrameData, Shape);
3243 lowerLocalAllocas(LocalAllocas, DeadInstructions);
3244
3245 for (auto *I : DeadInstructions)
3246 I->eraseFromParent();
3247 }
3248