1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ByteCodeEmitter.h" 10 #include "Context.h" 11 #include "Floating.h" 12 #include "IntegralAP.h" 13 #include "Opcode.h" 14 #include "Program.h" 15 #include "clang/AST/ASTLambda.h" 16 #include "clang/AST/Attr.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/Basic/Builtins.h" 19 #include <type_traits> 20 21 using namespace clang; 22 using namespace clang::interp; 23 24 /// Unevaluated builtins don't get their arguments put on the stack 25 /// automatically. They instead operate on the AST of their Call 26 /// Expression. 27 /// Similar information is available via ASTContext::BuiltinInfo, 28 /// but that is not correct for our use cases. 29 static bool isUnevaluatedBuiltin(unsigned BuiltinID) { 30 return BuiltinID == Builtin::BI__builtin_classify_type || 31 BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size; 32 } 33 34 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) { 35 36 // Manually created functions that haven't been assigned proper 37 // parameters yet. 38 if (!FuncDecl->param_empty() && !FuncDecl->param_begin()) 39 return nullptr; 40 41 bool IsLambdaStaticInvoker = false; 42 if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl); 43 MD && MD->isLambdaStaticInvoker()) { 44 // For a lambda static invoker, we might have to pick a specialized 45 // version if the lambda is generic. In that case, the picked function 46 // will *NOT* be a static invoker anymore. However, it will still 47 // be a non-static member function, this (usually) requiring an 48 // instance pointer. We suppress that later in this function. 49 IsLambdaStaticInvoker = true; 50 51 const CXXRecordDecl *ClosureClass = MD->getParent(); 52 assert(ClosureClass->captures_begin() == ClosureClass->captures_end()); 53 if (ClosureClass->isGenericLambda()) { 54 const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator(); 55 assert(MD->isFunctionTemplateSpecialization() && 56 "A generic lambda's static-invoker function must be a " 57 "template specialization"); 58 const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); 59 FunctionTemplateDecl *CallOpTemplate = 60 LambdaCallOp->getDescribedFunctionTemplate(); 61 void *InsertPos = nullptr; 62 const FunctionDecl *CorrespondingCallOpSpecialization = 63 CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos); 64 assert(CorrespondingCallOpSpecialization); 65 FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization); 66 } 67 } 68 69 // Set up argument indices. 70 unsigned ParamOffset = 0; 71 SmallVector<PrimType, 8> ParamTypes; 72 SmallVector<unsigned, 8> ParamOffsets; 73 llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors; 74 75 // If the return is not a primitive, a pointer to the storage where the 76 // value is initialized in is passed as the first argument. See 'RVO' 77 // elsewhere in the code. 78 QualType Ty = FuncDecl->getReturnType(); 79 bool HasRVO = false; 80 if (!Ty->isVoidType() && !Ctx.classify(Ty)) { 81 HasRVO = true; 82 ParamTypes.push_back(PT_Ptr); 83 ParamOffsets.push_back(ParamOffset); 84 ParamOffset += align(primSize(PT_Ptr)); 85 } 86 87 // If the function decl is a member decl, the next parameter is 88 // the 'this' pointer. This parameter is pop()ed from the 89 // InterpStack when calling the function. 90 bool HasThisPointer = false; 91 if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) { 92 if (!IsLambdaStaticInvoker) { 93 HasThisPointer = MD->isInstance(); 94 if (MD->isImplicitObjectMemberFunction()) { 95 ParamTypes.push_back(PT_Ptr); 96 ParamOffsets.push_back(ParamOffset); 97 ParamOffset += align(primSize(PT_Ptr)); 98 } 99 } 100 101 // Set up lambda capture to closure record field mapping. 102 if (isLambdaCallOperator(MD)) { 103 // The parent record needs to be complete, we need to know about all 104 // the lambda captures. 105 if (!MD->getParent()->isCompleteDefinition()) 106 return nullptr; 107 108 const Record *R = P.getOrCreateRecord(MD->getParent()); 109 llvm::DenseMap<const ValueDecl *, FieldDecl *> LC; 110 FieldDecl *LTC; 111 112 MD->getParent()->getCaptureFields(LC, LTC); 113 114 for (auto Cap : LC) { 115 // Static lambdas cannot have any captures. If this one does, 116 // it has already been diagnosed and we can only ignore it. 117 if (MD->isStatic()) 118 return nullptr; 119 120 unsigned Offset = R->getField(Cap.second)->Offset; 121 this->LambdaCaptures[Cap.first] = { 122 Offset, Cap.second->getType()->isReferenceType()}; 123 } 124 if (LTC) { 125 QualType CaptureType = R->getField(LTC)->Decl->getType(); 126 this->LambdaThisCapture = {R->getField(LTC)->Offset, 127 CaptureType->isReferenceType() || 128 CaptureType->isPointerType()}; 129 } 130 } 131 } 132 133 // Assign descriptors to all parameters. 134 // Composite objects are lowered to pointers. 135 for (const ParmVarDecl *PD : FuncDecl->parameters()) { 136 std::optional<PrimType> T = Ctx.classify(PD->getType()); 137 PrimType PT = T.value_or(PT_Ptr); 138 Descriptor *Desc = P.createDescriptor(PD, PT); 139 ParamDescriptors.insert({ParamOffset, {PT, Desc}}); 140 Params.insert({PD, {ParamOffset, T != std::nullopt}}); 141 ParamOffsets.push_back(ParamOffset); 142 ParamOffset += align(primSize(PT)); 143 ParamTypes.push_back(PT); 144 } 145 146 // Create a handle over the emitted code. 147 Function *Func = P.getFunction(FuncDecl); 148 if (!Func) { 149 bool IsUnevaluatedBuiltin = false; 150 if (unsigned BI = FuncDecl->getBuiltinID()) 151 IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BI); 152 153 Func = 154 P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes), 155 std::move(ParamDescriptors), std::move(ParamOffsets), 156 HasThisPointer, HasRVO, IsUnevaluatedBuiltin); 157 } 158 159 assert(Func); 160 // For not-yet-defined functions, we only create a Function instance and 161 // compile their body later. 162 if (!FuncDecl->isDefined() || 163 (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) { 164 Func->setDefined(false); 165 return Func; 166 } 167 168 Func->setDefined(true); 169 170 // Lambda static invokers are a special case that we emit custom code for. 171 bool IsEligibleForCompilation = false; 172 if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) 173 IsEligibleForCompilation = MD->isLambdaStaticInvoker(); 174 if (!IsEligibleForCompilation) 175 IsEligibleForCompilation = 176 FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>(); 177 178 // Compile the function body. 179 if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) { 180 Func->setIsFullyCompiled(true); 181 return Func; 182 } 183 184 // Create scopes from descriptors. 185 llvm::SmallVector<Scope, 2> Scopes; 186 for (auto &DS : Descriptors) { 187 Scopes.emplace_back(std::move(DS)); 188 } 189 190 // Set the function's code. 191 Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap), 192 std::move(Scopes), FuncDecl->hasBody()); 193 Func->setIsFullyCompiled(true); 194 return Func; 195 } 196 197 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) { 198 NextLocalOffset += sizeof(Block); 199 unsigned Location = NextLocalOffset; 200 NextLocalOffset += align(D->getAllocSize()); 201 return {Location, D}; 202 } 203 204 void ByteCodeEmitter::emitLabel(LabelTy Label) { 205 const size_t Target = Code.size(); 206 LabelOffsets.insert({Label, Target}); 207 208 if (auto It = LabelRelocs.find(Label); 209 It != LabelRelocs.end()) { 210 for (unsigned Reloc : It->second) { 211 using namespace llvm::support; 212 213 // Rewrite the operand of all jumps to this label. 214 void *Location = Code.data() + Reloc - align(sizeof(int32_t)); 215 assert(aligned(Location)); 216 const int32_t Offset = Target - static_cast<int64_t>(Reloc); 217 endian::write<int32_t, llvm::endianness::native>(Location, Offset); 218 } 219 LabelRelocs.erase(It); 220 } 221 } 222 223 int32_t ByteCodeEmitter::getOffset(LabelTy Label) { 224 // Compute the PC offset which the jump is relative to. 225 const int64_t Position = 226 Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t)); 227 assert(aligned(Position)); 228 229 // If target is known, compute jump offset. 230 if (auto It = LabelOffsets.find(Label); 231 It != LabelOffsets.end()) 232 return It->second - Position; 233 234 // Otherwise, record relocation and return dummy offset. 235 LabelRelocs[Label].push_back(Position); 236 return 0ull; 237 } 238 239 /// Helper to write bytecode and bail out if 32-bit offsets become invalid. 240 /// Pointers will be automatically marshalled as 32-bit IDs. 241 template <typename T> 242 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val, 243 bool &Success) { 244 size_t Size; 245 246 if constexpr (std::is_pointer_v<T>) 247 Size = sizeof(uint32_t); 248 else 249 Size = sizeof(T); 250 251 if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { 252 Success = false; 253 return; 254 } 255 256 // Access must be aligned! 257 size_t ValPos = align(Code.size()); 258 Size = align(Size); 259 assert(aligned(ValPos + Size)); 260 Code.resize(ValPos + Size); 261 262 if constexpr (!std::is_pointer_v<T>) { 263 new (Code.data() + ValPos) T(Val); 264 } else { 265 uint32_t ID = P.getOrCreateNativePointer(Val); 266 new (Code.data() + ValPos) uint32_t(ID); 267 } 268 } 269 270 /// Emits a serializable value. These usually (potentially) contain 271 /// heap-allocated memory and aren't trivially copyable. 272 template <typename T> 273 static void emitSerialized(std::vector<std::byte> &Code, const T &Val, 274 bool &Success) { 275 size_t Size = Val.bytesToSerialize(); 276 277 if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { 278 Success = false; 279 return; 280 } 281 282 // Access must be aligned! 283 size_t ValPos = align(Code.size()); 284 Size = align(Size); 285 assert(aligned(ValPos + Size)); 286 Code.resize(ValPos + Size); 287 288 Val.serialize(Code.data() + ValPos); 289 } 290 291 template <> 292 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val, 293 bool &Success) { 294 emitSerialized(Code, Val, Success); 295 } 296 297 template <> 298 void emit(Program &P, std::vector<std::byte> &Code, 299 const IntegralAP<false> &Val, bool &Success) { 300 emitSerialized(Code, Val, Success); 301 } 302 303 template <> 304 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val, 305 bool &Success) { 306 emitSerialized(Code, Val, Success); 307 } 308 309 template <typename... Tys> 310 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) { 311 bool Success = true; 312 313 // The opcode is followed by arguments. The source info is 314 // attached to the address after the opcode. 315 emit(P, Code, Op, Success); 316 if (SI) 317 SrcMap.emplace_back(Code.size(), SI); 318 319 (..., emit(P, Code, Args, Success)); 320 return Success; 321 } 322 323 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) { 324 return emitJt(getOffset(Label), SourceInfo{}); 325 } 326 327 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) { 328 return emitJf(getOffset(Label), SourceInfo{}); 329 } 330 331 bool ByteCodeEmitter::jump(const LabelTy &Label) { 332 return emitJmp(getOffset(Label), SourceInfo{}); 333 } 334 335 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) { 336 emitLabel(Label); 337 return true; 338 } 339 340 //===----------------------------------------------------------------------===// 341 // Opcode emitters 342 //===----------------------------------------------------------------------===// 343 344 #define GET_LINK_IMPL 345 #include "Opcodes.inc" 346 #undef GET_LINK_IMPL 347