xref: /freebsd/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
16 
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/IR/CFG.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/MDBuilder.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
26 #include "llvm/Transforms/Utils/CodeExtractor.h"
27 
28 #include <sstream>
29 
30 #define DEBUG_TYPE "openmp-ir-builder"
31 
32 using namespace llvm;
33 using namespace omp;
34 using namespace types;
35 
36 static cl::opt<bool>
37     OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
38                          cl::desc("Use optimistic attributes describing "
39                                   "'as-if' properties of runtime calls."),
40                          cl::init(false));
41 
42 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
43   LLVMContext &Ctx = Fn.getContext();
44 
45 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
46 #include "llvm/Frontend/OpenMP/OMPKinds.def"
47 
48   // Add attributes to the new declaration.
49   switch (FnID) {
50 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets)                \
51   case Enum:                                                                   \
52     Fn.setAttributes(                                                          \
53         AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets));          \
54     break;
55 #include "llvm/Frontend/OpenMP/OMPKinds.def"
56   default:
57     // Attributes are optional.
58     break;
59   }
60 }
61 
62 Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
63   Function *Fn = nullptr;
64 
65   // Try to find the declation in the module first.
66   switch (FnID) {
67 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)                          \
68   case Enum:                                                                   \
69     Fn = M.getFunction(Str);                                                   \
70     break;
71 #include "llvm/Frontend/OpenMP/OMPKinds.def"
72   }
73 
74   if (!Fn) {
75     // Create a new declaration if we need one.
76     switch (FnID) {
77 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)                          \
78   case Enum:                                                                   \
79     Fn = Function::Create(FunctionType::get(ReturnType,                        \
80                                             ArrayRef<Type *>{__VA_ARGS__},     \
81                                             IsVarArg),                         \
82                           GlobalValue::ExternalLinkage, Str, M);               \
83     break;
84 #include "llvm/Frontend/OpenMP/OMPKinds.def"
85     }
86 
87     addAttributes(FnID, *Fn);
88   }
89 
90   assert(Fn && "Failed to create OpenMP runtime function");
91   return Fn;
92 }
93 
94 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
95 
96 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
97                                          IdentFlag LocFlags) {
98   // Enable "C-mode".
99   LocFlags |= OMP_IDENT_FLAG_KMPC;
100 
101   GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}];
102   if (!DefaultIdent) {
103     Constant *I32Null = ConstantInt::getNullValue(Int32);
104     Constant *IdentData[] = {I32Null,
105                              ConstantInt::get(Int32, uint64_t(LocFlags)),
106                              I32Null, I32Null, SrcLocStr};
107     Constant *Initializer = ConstantStruct::get(
108         cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
109 
110     // Look for existing encoding of the location + flags, not needed but
111     // minimizes the difference to the existing solution while we transition.
112     for (GlobalVariable &GV : M.getGlobalList())
113       if (GV.getType() == IdentPtr && GV.hasInitializer())
114         if (GV.getInitializer() == Initializer)
115           return DefaultIdent = &GV;
116 
117     DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(),
118                                       /* isConstant = */ false,
119                                       GlobalValue::PrivateLinkage, Initializer);
120     DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
121     DefaultIdent->setAlignment(Align(8));
122   }
123   return DefaultIdent;
124 }
125 
126 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
127   Constant *&SrcLocStr = SrcLocStrMap[LocStr];
128   if (!SrcLocStr) {
129     Constant *Initializer =
130         ConstantDataArray::getString(M.getContext(), LocStr);
131 
132     // Look for existing encoding of the location, not needed but minimizes the
133     // difference to the existing solution while we transition.
134     for (GlobalVariable &GV : M.getGlobalList())
135       if (GV.isConstant() && GV.hasInitializer() &&
136           GV.getInitializer() == Initializer)
137         return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
138 
139     SrcLocStr = Builder.CreateGlobalStringPtr(LocStr);
140   }
141   return SrcLocStr;
142 }
143 
144 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
145   return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
146 }
147 
148 Constant *
149 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
150   DILocation *DIL = Loc.DL.get();
151   if (!DIL)
152     return getOrCreateDefaultSrcLocStr();
153   StringRef Filename =
154       !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
155   StringRef Function = DIL->getScope()->getSubprogram()->getName();
156   Function =
157       !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
158   std::string LineStr = std::to_string(DIL->getLine());
159   std::string ColumnStr = std::to_string(DIL->getColumn());
160   std::stringstream SrcLocStr;
161   SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";"
162             << LineStr << ";" << ColumnStr << ";;";
163   return getOrCreateSrcLocStr(SrcLocStr.str());
164 }
165 
166 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
167   return Builder.CreateCall(
168       getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident,
169       "omp_global_thread_num");
170 }
171 
172 OpenMPIRBuilder::InsertPointTy
173 OpenMPIRBuilder::CreateBarrier(const LocationDescription &Loc, Directive DK,
174                                bool ForceSimpleCall, bool CheckCancelFlag) {
175   if (!updateToLocation(Loc))
176     return Loc.IP;
177   return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
178 }
179 
180 OpenMPIRBuilder::InsertPointTy
181 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
182                                  bool ForceSimpleCall, bool CheckCancelFlag) {
183   // Build call __kmpc_cancel_barrier(loc, thread_id) or
184   //            __kmpc_barrier(loc, thread_id);
185 
186   IdentFlag BarrierLocFlags;
187   switch (Kind) {
188   case OMPD_for:
189     BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
190     break;
191   case OMPD_sections:
192     BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
193     break;
194   case OMPD_single:
195     BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
196     break;
197   case OMPD_barrier:
198     BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
199     break;
200   default:
201     BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
202     break;
203   }
204 
205   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
206   Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
207                    getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
208 
209   // If we are in a cancellable parallel region, barriers are cancellation
210   // points.
211   // TODO: Check why we would force simple calls or to ignore the cancel flag.
212   bool UseCancelBarrier =
213       !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
214 
215   Value *Result = Builder.CreateCall(
216       getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
217                                                   : OMPRTL___kmpc_barrier),
218       Args);
219 
220   if (UseCancelBarrier && CheckCancelFlag)
221     emitCancelationCheckImpl(Result, OMPD_parallel);
222 
223   return Builder.saveIP();
224 }
225 
226 OpenMPIRBuilder::InsertPointTy
227 OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc,
228                               Value *IfCondition,
229                               omp::Directive CanceledDirective) {
230   if (!updateToLocation(Loc))
231     return Loc.IP;
232 
233   // LLVM utilities like blocks with terminators.
234   auto *UI = Builder.CreateUnreachable();
235 
236   Instruction *ThenTI = UI, *ElseTI = nullptr;
237   if (IfCondition)
238     SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
239   Builder.SetInsertPoint(ThenTI);
240 
241   Value *CancelKind = nullptr;
242   switch (CanceledDirective) {
243 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value)                       \
244   case DirectiveEnum:                                                          \
245     CancelKind = Builder.getInt32(Value);                                      \
246     break;
247 #include "llvm/Frontend/OpenMP/OMPKinds.def"
248   default:
249     llvm_unreachable("Unknown cancel kind!");
250   }
251 
252   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
253   Value *Ident = getOrCreateIdent(SrcLocStr);
254   Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
255   Value *Result = Builder.CreateCall(
256       getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args);
257 
258   // The actual cancel logic is shared with others, e.g., cancel_barriers.
259   emitCancelationCheckImpl(Result, CanceledDirective);
260 
261   // Update the insertion point and remove the terminator we introduced.
262   Builder.SetInsertPoint(UI->getParent());
263   UI->eraseFromParent();
264 
265   return Builder.saveIP();
266 }
267 
268 void OpenMPIRBuilder::emitCancelationCheckImpl(
269     Value *CancelFlag, omp::Directive CanceledDirective) {
270   assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
271          "Unexpected cancellation!");
272 
273   // For a cancel barrier we create two new blocks.
274   BasicBlock *BB = Builder.GetInsertBlock();
275   BasicBlock *NonCancellationBlock;
276   if (Builder.GetInsertPoint() == BB->end()) {
277     // TODO: This branch will not be needed once we moved to the
278     // OpenMPIRBuilder codegen completely.
279     NonCancellationBlock = BasicBlock::Create(
280         BB->getContext(), BB->getName() + ".cont", BB->getParent());
281   } else {
282     NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
283     BB->getTerminator()->eraseFromParent();
284     Builder.SetInsertPoint(BB);
285   }
286   BasicBlock *CancellationBlock = BasicBlock::Create(
287       BB->getContext(), BB->getName() + ".cncl", BB->getParent());
288 
289   // Jump to them based on the return value.
290   Value *Cmp = Builder.CreateIsNull(CancelFlag);
291   Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
292                        /* TODO weight */ nullptr, nullptr);
293 
294   // From the cancellation block we finalize all variables and go to the
295   // post finalization block that is known to the FiniCB callback.
296   Builder.SetInsertPoint(CancellationBlock);
297   auto &FI = FinalizationStack.back();
298   FI.FiniCB(Builder.saveIP());
299 
300   // The continuation block is where code generation continues.
301   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
302 }
303 
304 IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
305     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
306     PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
307     Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
308   if (!updateToLocation(Loc))
309     return Loc.IP;
310 
311   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
312   Value *Ident = getOrCreateIdent(SrcLocStr);
313   Value *ThreadID = getOrCreateThreadID(Ident);
314 
315   if (NumThreads) {
316     // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
317     Value *Args[] = {
318         Ident, ThreadID,
319         Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
320     Builder.CreateCall(
321         getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
322   }
323 
324   if (ProcBind != OMP_PROC_BIND_default) {
325     // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
326     Value *Args[] = {
327         Ident, ThreadID,
328         ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
329     Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
330                        Args);
331   }
332 
333   BasicBlock *InsertBB = Builder.GetInsertBlock();
334   Function *OuterFn = InsertBB->getParent();
335 
336   // Vector to remember instructions we used only during the modeling but which
337   // we want to delete at the end.
338   SmallVector<Instruction *, 4> ToBeDeleted;
339 
340   Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
341   AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
342   AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
343 
344   // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
345   // program, otherwise we only need them for modeling purposes to get the
346   // associated arguments in the outlined function. In the former case,
347   // initialize the allocas properly, in the latter case, delete them later.
348   if (IfCondition) {
349     Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
350     Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
351   } else {
352     ToBeDeleted.push_back(TIDAddr);
353     ToBeDeleted.push_back(ZeroAddr);
354   }
355 
356   // Create an artificial insertion point that will also ensure the blocks we
357   // are about to split are not degenerated.
358   auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
359 
360   Instruction *ThenTI = UI, *ElseTI = nullptr;
361   if (IfCondition)
362     SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
363 
364   BasicBlock *ThenBB = ThenTI->getParent();
365   BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
366   BasicBlock *PRegBodyBB =
367       PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
368   BasicBlock *PRegPreFiniBB =
369       PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
370   BasicBlock *PRegExitBB =
371       PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
372 
373   auto FiniCBWrapper = [&](InsertPointTy IP) {
374     // Hide "open-ended" blocks from the given FiniCB by setting the right jump
375     // target to the region exit block.
376     if (IP.getBlock()->end() == IP.getPoint()) {
377       IRBuilder<>::InsertPointGuard IPG(Builder);
378       Builder.restoreIP(IP);
379       Instruction *I = Builder.CreateBr(PRegExitBB);
380       IP = InsertPointTy(I->getParent(), I->getIterator());
381     }
382     assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
383            IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
384            "Unexpected insertion point for finalization call!");
385     return FiniCB(IP);
386   };
387 
388   FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
389 
390   // Generate the privatization allocas in the block that will become the entry
391   // of the outlined function.
392   InsertPointTy AllocaIP(PRegEntryBB,
393                          PRegEntryBB->getTerminator()->getIterator());
394   Builder.restoreIP(AllocaIP);
395   AllocaInst *PrivTIDAddr =
396       Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
397   Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
398 
399   // Add some fake uses for OpenMP provided arguments.
400   ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
401   ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
402 
403   // ThenBB
404   //   |
405   //   V
406   // PRegionEntryBB         <- Privatization allocas are placed here.
407   //   |
408   //   V
409   // PRegionBodyBB          <- BodeGen is invoked here.
410   //   |
411   //   V
412   // PRegPreFiniBB          <- The block we will start finalization from.
413   //   |
414   //   V
415   // PRegionExitBB          <- A common exit to simplify block collection.
416   //
417 
418   LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
419 
420   // Let the caller create the body.
421   assert(BodyGenCB && "Expected body generation callback!");
422   InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
423   BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
424 
425   LLVM_DEBUG(dbgs() << "After  body codegen: " << *UI->getFunction() << "\n");
426 
427   SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
428   SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
429   ParallelRegionBlockSet.insert(PRegEntryBB);
430   ParallelRegionBlockSet.insert(PRegExitBB);
431 
432   // Collect all blocks in-between PRegEntryBB and PRegExitBB.
433   Worklist.push_back(PRegEntryBB);
434   while (!Worklist.empty()) {
435     BasicBlock *BB = Worklist.pop_back_val();
436     ParallelRegionBlocks.push_back(BB);
437     for (BasicBlock *SuccBB : successors(BB))
438       if (ParallelRegionBlockSet.insert(SuccBB).second)
439         Worklist.push_back(SuccBB);
440   }
441 
442   CodeExtractorAnalysisCache CEAC(*OuterFn);
443   CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
444                           /* AggregateArgs */ false,
445                           /* BlockFrequencyInfo */ nullptr,
446                           /* BranchProbabilityInfo */ nullptr,
447                           /* AssumptionCache */ nullptr,
448                           /* AllowVarArgs */ true,
449                           /* AllowAlloca */ true,
450                           /* Suffix */ ".omp_par");
451 
452   // Find inputs to, outputs from the code region.
453   BasicBlock *CommonExit = nullptr;
454   SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
455   Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
456   Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
457 
458   LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
459 
460   FunctionCallee TIDRTLFn =
461       getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
462 
463   auto PrivHelper = [&](Value &V) {
464     if (&V == TIDAddr || &V == ZeroAddr)
465       return;
466 
467     SmallVector<Use *, 8> Uses;
468     for (Use &U : V.uses())
469       if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
470         if (ParallelRegionBlockSet.count(UserI->getParent()))
471           Uses.push_back(&U);
472 
473     Value *ReplacementValue = nullptr;
474     CallInst *CI = dyn_cast<CallInst>(&V);
475     if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
476       ReplacementValue = PrivTID;
477     } else {
478       Builder.restoreIP(
479           PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
480       assert(ReplacementValue &&
481              "Expected copy/create callback to set replacement value!");
482       if (ReplacementValue == &V)
483         return;
484     }
485 
486     for (Use *UPtr : Uses)
487       UPtr->set(ReplacementValue);
488   };
489 
490   for (Value *Input : Inputs) {
491     LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
492     PrivHelper(*Input);
493   }
494   for (Value *Output : Outputs) {
495     LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
496     PrivHelper(*Output);
497   }
498 
499   LLVM_DEBUG(dbgs() << "After  privatization: " << *UI->getFunction() << "\n");
500   LLVM_DEBUG({
501     for (auto *BB : ParallelRegionBlocks)
502       dbgs() << " PBR: " << BB->getName() << "\n";
503   });
504 
505   // Add some known attributes to the outlined function.
506   Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
507   OutlinedFn->addParamAttr(0, Attribute::NoAlias);
508   OutlinedFn->addParamAttr(1, Attribute::NoAlias);
509   OutlinedFn->addFnAttr(Attribute::NoUnwind);
510   OutlinedFn->addFnAttr(Attribute::NoRecurse);
511 
512   LLVM_DEBUG(dbgs() << "After      outlining: " << *UI->getFunction() << "\n");
513   LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
514 
515   // For compability with the clang CG we move the outlined function after the
516   // one with the parallel region.
517   OutlinedFn->removeFromParent();
518   M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
519 
520   // Remove the artificial entry introduced by the extractor right away, we
521   // made our own entry block after all.
522   {
523     BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
524     assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
525     assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
526     PRegEntryBB->moveBefore(&ArtificialEntry);
527     ArtificialEntry.eraseFromParent();
528   }
529   LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
530   assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
531 
532   assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
533   assert(OutlinedFn->arg_size() >= 2 &&
534          "Expected at least tid and bounded tid as arguments");
535   unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
536 
537   CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
538   CI->getParent()->setName("omp_parallel");
539   Builder.SetInsertPoint(CI);
540 
541   // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542   Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
543                            Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
544 
545   SmallVector<Value *, 16> RealArgs;
546   RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
547   RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
548 
549   FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
550   if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
551     if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
552       llvm::LLVMContext &Ctx = F->getContext();
553       MDBuilder MDB(Ctx);
554       // Annotate the callback behavior of the __kmpc_fork_call:
555       //  - The callback callee is argument number 2 (microtask).
556       //  - The first two arguments of the callback callee are unknown (-1).
557       //  - All variadic arguments to the __kmpc_fork_call are passed to the
558       //    callback callee.
559       F->addMetadata(
560           llvm::LLVMContext::MD_callback,
561           *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
562                                       2, {-1, -1},
563                                       /* VarArgsArePassed */ true)}));
564     }
565   }
566 
567   Builder.CreateCall(RTLFn, RealArgs);
568 
569   LLVM_DEBUG(dbgs() << "With fork_call placed: "
570                     << *Builder.GetInsertBlock()->getParent() << "\n");
571 
572   InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
573   InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
574   UI->eraseFromParent();
575 
576   // Initialize the local TID stack location with the argument value.
577   Builder.SetInsertPoint(PrivTID);
578   Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
579   Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
580 
581   // If no "if" clause was present we do not need the call created during
582   // outlining, otherwise we reuse it in the serialized parallel region.
583   if (!ElseTI) {
584     CI->eraseFromParent();
585   } else {
586 
587     // If an "if" clause was present we are now generating the serialized
588     // version into the "else" branch.
589     Builder.SetInsertPoint(ElseTI);
590 
591     // Build calls __kmpc_serialized_parallel(&Ident, GTid);
592     Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593     Builder.CreateCall(
594         getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
595         SerializedParallelCallArgs);
596 
597     // OutlinedFn(&GTid, &zero, CapturedStruct);
598     CI->removeFromParent();
599     Builder.Insert(CI);
600 
601     // __kmpc_end_serialized_parallel(&Ident, GTid);
602     Value *EndArgs[] = {Ident, ThreadID};
603     Builder.CreateCall(
604         getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
605         EndArgs);
606 
607     LLVM_DEBUG(dbgs() << "With serialized parallel region: "
608                       << *Builder.GetInsertBlock()->getParent() << "\n");
609   }
610 
611   // Adjust the finalization stack, verify the adjustment, and call the
612   // finalize function a last time to finalize values between the pre-fini block
613   // and the exit block if we left the parallel "the normal way".
614   auto FiniInfo = FinalizationStack.pop_back_val();
615   (void)FiniInfo;
616   assert(FiniInfo.DK == OMPD_parallel &&
617          "Unexpected finalization stack state!");
618 
619   Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
620   assert(PreFiniTI->getNumSuccessors() == 1 &&
621          PreFiniTI->getSuccessor(0)->size() == 1 &&
622          isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
623          "Unexpected CFG structure!");
624 
625   InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
626   FiniCB(PreFiniIP);
627 
628   for (Instruction *I : ToBeDeleted)
629     I->eraseFromParent();
630 
631   return AfterIP;
632 }
633