xref: /freebsd/contrib/llvm-project/clang/lib/AST/Interp/ByteCodeEmitter.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ByteCodeEmitter.h"
10 #include "Context.h"
11 #include "Floating.h"
12 #include "IntegralAP.h"
13 #include "Opcode.h"
14 #include "Program.h"
15 #include "clang/AST/ASTLambda.h"
16 #include "clang/AST/Attr.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/Basic/Builtins.h"
19 #include <type_traits>
20 
21 using namespace clang;
22 using namespace clang::interp;
23 
24 /// Unevaluated builtins don't get their arguments put on the stack
25 /// automatically. They instead operate on the AST of their Call
26 /// Expression.
27 /// Similar information is available via ASTContext::BuiltinInfo,
28 /// but that is not correct for our use cases.
isUnevaluatedBuiltin(unsigned BuiltinID)29 static bool isUnevaluatedBuiltin(unsigned BuiltinID) {
30   return BuiltinID == Builtin::BI__builtin_classify_type ||
31          BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size;
32 }
33 
compileFunc(const FunctionDecl * FuncDecl)34 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
35 
36   // Manually created functions that haven't been assigned proper
37   // parameters yet.
38   if (!FuncDecl->param_empty() && !FuncDecl->param_begin())
39     return nullptr;
40 
41   bool IsLambdaStaticInvoker = false;
42   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
43       MD && MD->isLambdaStaticInvoker()) {
44     // For a lambda static invoker, we might have to pick a specialized
45     // version if the lambda is generic. In that case, the picked function
46     // will *NOT* be a static invoker anymore. However, it will still
47     // be a non-static member function, this (usually) requiring an
48     // instance pointer. We suppress that later in this function.
49     IsLambdaStaticInvoker = true;
50 
51     const CXXRecordDecl *ClosureClass = MD->getParent();
52     assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
53     if (ClosureClass->isGenericLambda()) {
54       const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
55       assert(MD->isFunctionTemplateSpecialization() &&
56              "A generic lambda's static-invoker function must be a "
57              "template specialization");
58       const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
59       FunctionTemplateDecl *CallOpTemplate =
60           LambdaCallOp->getDescribedFunctionTemplate();
61       void *InsertPos = nullptr;
62       const FunctionDecl *CorrespondingCallOpSpecialization =
63           CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
64       assert(CorrespondingCallOpSpecialization);
65       FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
66     }
67   }
68 
69   // Set up argument indices.
70   unsigned ParamOffset = 0;
71   SmallVector<PrimType, 8> ParamTypes;
72   SmallVector<unsigned, 8> ParamOffsets;
73   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
74 
75   // If the return is not a primitive, a pointer to the storage where the
76   // value is initialized in is passed as the first argument. See 'RVO'
77   // elsewhere in the code.
78   QualType Ty = FuncDecl->getReturnType();
79   bool HasRVO = false;
80   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
81     HasRVO = true;
82     ParamTypes.push_back(PT_Ptr);
83     ParamOffsets.push_back(ParamOffset);
84     ParamOffset += align(primSize(PT_Ptr));
85   }
86 
87   // If the function decl is a member decl, the next parameter is
88   // the 'this' pointer. This parameter is pop()ed from the
89   // InterpStack when calling the function.
90   bool HasThisPointer = false;
91   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
92     if (!IsLambdaStaticInvoker) {
93       HasThisPointer = MD->isInstance();
94       if (MD->isImplicitObjectMemberFunction()) {
95         ParamTypes.push_back(PT_Ptr);
96         ParamOffsets.push_back(ParamOffset);
97         ParamOffset += align(primSize(PT_Ptr));
98       }
99     }
100 
101     // Set up lambda capture to closure record field mapping.
102     if (isLambdaCallOperator(MD)) {
103       // The parent record needs to be complete, we need to know about all
104       // the lambda captures.
105       if (!MD->getParent()->isCompleteDefinition())
106         return nullptr;
107 
108       const Record *R = P.getOrCreateRecord(MD->getParent());
109       llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
110       FieldDecl *LTC;
111 
112       MD->getParent()->getCaptureFields(LC, LTC);
113 
114       for (auto Cap : LC) {
115         // Static lambdas cannot have any captures. If this one does,
116         // it has already been diagnosed and we can only ignore it.
117         if (MD->isStatic())
118           return nullptr;
119 
120         unsigned Offset = R->getField(Cap.second)->Offset;
121         this->LambdaCaptures[Cap.first] = {
122             Offset, Cap.second->getType()->isReferenceType()};
123       }
124       if (LTC) {
125         QualType CaptureType = R->getField(LTC)->Decl->getType();
126         this->LambdaThisCapture = {R->getField(LTC)->Offset,
127                                    CaptureType->isReferenceType() ||
128                                        CaptureType->isPointerType()};
129       }
130     }
131   }
132 
133   // Assign descriptors to all parameters.
134   // Composite objects are lowered to pointers.
135   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
136     std::optional<PrimType> T = Ctx.classify(PD->getType());
137     PrimType PT = T.value_or(PT_Ptr);
138     Descriptor *Desc = P.createDescriptor(PD, PT);
139     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
140     Params.insert({PD, {ParamOffset, T != std::nullopt}});
141     ParamOffsets.push_back(ParamOffset);
142     ParamOffset += align(primSize(PT));
143     ParamTypes.push_back(PT);
144   }
145 
146   // Create a handle over the emitted code.
147   Function *Func = P.getFunction(FuncDecl);
148   if (!Func) {
149     bool IsUnevaluatedBuiltin = false;
150     if (unsigned BI = FuncDecl->getBuiltinID())
151       IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BI);
152 
153     Func =
154         P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
155                          std::move(ParamDescriptors), std::move(ParamOffsets),
156                          HasThisPointer, HasRVO, IsUnevaluatedBuiltin);
157   }
158 
159   assert(Func);
160   // For not-yet-defined functions, we only create a Function instance and
161   // compile their body later.
162   if (!FuncDecl->isDefined() ||
163       (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) {
164     Func->setDefined(false);
165     return Func;
166   }
167 
168   Func->setDefined(true);
169 
170   // Lambda static invokers are a special case that we emit custom code for.
171   bool IsEligibleForCompilation = false;
172   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
173     IsEligibleForCompilation = MD->isLambdaStaticInvoker();
174   if (!IsEligibleForCompilation)
175     IsEligibleForCompilation =
176         FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();
177 
178   // Compile the function body.
179   if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) {
180     Func->setIsFullyCompiled(true);
181     return Func;
182   }
183 
184   // Create scopes from descriptors.
185   llvm::SmallVector<Scope, 2> Scopes;
186   for (auto &DS : Descriptors) {
187     Scopes.emplace_back(std::move(DS));
188   }
189 
190   // Set the function's code.
191   Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
192                 std::move(Scopes), FuncDecl->hasBody());
193   Func->setIsFullyCompiled(true);
194   return Func;
195 }
196 
createLocal(Descriptor * D)197 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
198   NextLocalOffset += sizeof(Block);
199   unsigned Location = NextLocalOffset;
200   NextLocalOffset += align(D->getAllocSize());
201   return {Location, D};
202 }
203 
emitLabel(LabelTy Label)204 void ByteCodeEmitter::emitLabel(LabelTy Label) {
205   const size_t Target = Code.size();
206   LabelOffsets.insert({Label, Target});
207 
208   if (auto It = LabelRelocs.find(Label);
209       It != LabelRelocs.end()) {
210     for (unsigned Reloc : It->second) {
211       using namespace llvm::support;
212 
213       // Rewrite the operand of all jumps to this label.
214       void *Location = Code.data() + Reloc - align(sizeof(int32_t));
215       assert(aligned(Location));
216       const int32_t Offset = Target - static_cast<int64_t>(Reloc);
217       endian::write<int32_t, llvm::endianness::native>(Location, Offset);
218     }
219     LabelRelocs.erase(It);
220   }
221 }
222 
getOffset(LabelTy Label)223 int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
224   // Compute the PC offset which the jump is relative to.
225   const int64_t Position =
226       Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
227   assert(aligned(Position));
228 
229   // If target is known, compute jump offset.
230   if (auto It = LabelOffsets.find(Label);
231       It != LabelOffsets.end())
232     return It->second - Position;
233 
234   // Otherwise, record relocation and return dummy offset.
235   LabelRelocs[Label].push_back(Position);
236   return 0ull;
237 }
238 
239 /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
240 /// Pointers will be automatically marshalled as 32-bit IDs.
241 template <typename T>
emit(Program & P,std::vector<std::byte> & Code,const T & Val,bool & Success)242 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
243                  bool &Success) {
244   size_t Size;
245 
246   if constexpr (std::is_pointer_v<T>)
247     Size = sizeof(uint32_t);
248   else
249     Size = sizeof(T);
250 
251   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
252     Success = false;
253     return;
254   }
255 
256   // Access must be aligned!
257   size_t ValPos = align(Code.size());
258   Size = align(Size);
259   assert(aligned(ValPos + Size));
260   Code.resize(ValPos + Size);
261 
262   if constexpr (!std::is_pointer_v<T>) {
263     new (Code.data() + ValPos) T(Val);
264   } else {
265     uint32_t ID = P.getOrCreateNativePointer(Val);
266     new (Code.data() + ValPos) uint32_t(ID);
267   }
268 }
269 
270 /// Emits a serializable value. These usually (potentially) contain
271 /// heap-allocated memory and aren't trivially copyable.
272 template <typename T>
emitSerialized(std::vector<std::byte> & Code,const T & Val,bool & Success)273 static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
274                            bool &Success) {
275   size_t Size = Val.bytesToSerialize();
276 
277   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
278     Success = false;
279     return;
280   }
281 
282   // Access must be aligned!
283   size_t ValPos = align(Code.size());
284   Size = align(Size);
285   assert(aligned(ValPos + Size));
286   Code.resize(ValPos + Size);
287 
288   Val.serialize(Code.data() + ValPos);
289 }
290 
291 template <>
emit(Program & P,std::vector<std::byte> & Code,const Floating & Val,bool & Success)292 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
293           bool &Success) {
294   emitSerialized(Code, Val, Success);
295 }
296 
297 template <>
emit(Program & P,std::vector<std::byte> & Code,const IntegralAP<false> & Val,bool & Success)298 void emit(Program &P, std::vector<std::byte> &Code,
299           const IntegralAP<false> &Val, bool &Success) {
300   emitSerialized(Code, Val, Success);
301 }
302 
303 template <>
emit(Program & P,std::vector<std::byte> & Code,const IntegralAP<true> & Val,bool & Success)304 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
305           bool &Success) {
306   emitSerialized(Code, Val, Success);
307 }
308 
309 template <typename... Tys>
emitOp(Opcode Op,const Tys &...Args,const SourceInfo & SI)310 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
311   bool Success = true;
312 
313   // The opcode is followed by arguments. The source info is
314   // attached to the address after the opcode.
315   emit(P, Code, Op, Success);
316   if (SI)
317     SrcMap.emplace_back(Code.size(), SI);
318 
319   (..., emit(P, Code, Args, Success));
320   return Success;
321 }
322 
jumpTrue(const LabelTy & Label)323 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
324   return emitJt(getOffset(Label), SourceInfo{});
325 }
326 
jumpFalse(const LabelTy & Label)327 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
328   return emitJf(getOffset(Label), SourceInfo{});
329 }
330 
jump(const LabelTy & Label)331 bool ByteCodeEmitter::jump(const LabelTy &Label) {
332   return emitJmp(getOffset(Label), SourceInfo{});
333 }
334 
fallthrough(const LabelTy & Label)335 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
336   emitLabel(Label);
337   return true;
338 }
339 
340 //===----------------------------------------------------------------------===//
341 // Opcode emitters
342 //===----------------------------------------------------------------------===//
343 
344 #define GET_LINK_IMPL
345 #include "Opcodes.inc"
346 #undef GET_LINK_IMPL
347