xref: /freebsd/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines manifest constants for the wasm object file format.
10 // See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_BINARYFORMAT_WASM_H
15 #define LLVM_BINARYFORMAT_WASM_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include <optional>
22 
23 namespace llvm {
24 namespace wasm {
25 
26 // Object file magic string.
27 const char WasmMagic[] = {'\0', 'a', 's', 'm'};
28 // Wasm binary format version
29 const uint32_t WasmVersion = 0x1;
30 // Wasm linking metadata version
31 const uint32_t WasmMetadataVersion = 0x2;
32 // Wasm uses a 64k page size by default (but the custom-page-sizes proposal
33 // allows changing it)
34 const uint32_t WasmDefaultPageSize = 65536;
35 
36 enum : unsigned {
37   WASM_SEC_CUSTOM = 0,     // Custom / User-defined section
38   WASM_SEC_TYPE = 1,       // Function signature declarations
39   WASM_SEC_IMPORT = 2,     // Import declarations
40   WASM_SEC_FUNCTION = 3,   // Function declarations
41   WASM_SEC_TABLE = 4,      // Indirect function table and other tables
42   WASM_SEC_MEMORY = 5,     // Memory attributes
43   WASM_SEC_GLOBAL = 6,     // Global declarations
44   WASM_SEC_EXPORT = 7,     // Exports
45   WASM_SEC_START = 8,      // Start function declaration
46   WASM_SEC_ELEM = 9,       // Elements section
47   WASM_SEC_CODE = 10,      // Function bodies (code)
48   WASM_SEC_DATA = 11,      // Data segments
49   WASM_SEC_DATACOUNT = 12, // Data segment count
50   WASM_SEC_TAG = 13,       // Tag declarations
51   WASM_SEC_LAST_KNOWN = WASM_SEC_TAG,
52 };
53 
54 // Type immediate encodings used in various contexts.
55 enum : unsigned {
56   WASM_TYPE_I32 = 0x7F,
57   WASM_TYPE_I64 = 0x7E,
58   WASM_TYPE_F32 = 0x7D,
59   WASM_TYPE_F64 = 0x7C,
60   WASM_TYPE_V128 = 0x7B,
61   WASM_TYPE_NULLFUNCREF = 0x73,
62   WASM_TYPE_NULLEXTERNREF = 0x72,
63   WASM_TYPE_NULLEXNREF = 0x74,
64   WASM_TYPE_NULLREF = 0x71,
65   WASM_TYPE_FUNCREF = 0x70,
66   WASM_TYPE_EXTERNREF = 0x6F,
67   WASM_TYPE_EXNREF = 0x69,
68   WASM_TYPE_ANYREF = 0x6E,
69   WASM_TYPE_EQREF = 0x6D,
70   WASM_TYPE_I31REF = 0x6C,
71   WASM_TYPE_STRUCTREF = 0x6B,
72   WASM_TYPE_ARRAYREF = 0x6A,
73   WASM_TYPE_NONNULLABLE = 0x64,
74   WASM_TYPE_NULLABLE = 0x63,
75   WASM_TYPE_FUNC = 0x60,
76   WASM_TYPE_ARRAY = 0x5E,
77   WASM_TYPE_STRUCT = 0x5F,
78   WASM_TYPE_SUB = 0x50,
79   WASM_TYPE_SUB_FINAL = 0x4F,
80   WASM_TYPE_REC = 0x4E,
81   WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
82 };
83 
84 // Kinds of externals (for imports and exports).
85 enum : unsigned {
86   WASM_EXTERNAL_FUNCTION = 0x0,
87   WASM_EXTERNAL_TABLE = 0x1,
88   WASM_EXTERNAL_MEMORY = 0x2,
89   WASM_EXTERNAL_GLOBAL = 0x3,
90   WASM_EXTERNAL_TAG = 0x4,
91 };
92 
93 // Opcodes used in initializer expressions.
94 enum : unsigned {
95   WASM_OPCODE_END = 0x0b,
96   WASM_OPCODE_CALL = 0x10,
97   WASM_OPCODE_LOCAL_GET = 0x20,
98   WASM_OPCODE_LOCAL_SET = 0x21,
99   WASM_OPCODE_LOCAL_TEE = 0x22,
100   WASM_OPCODE_GLOBAL_GET = 0x23,
101   WASM_OPCODE_GLOBAL_SET = 0x24,
102   WASM_OPCODE_I32_STORE = 0x36,
103   WASM_OPCODE_I64_STORE = 0x37,
104   WASM_OPCODE_I32_CONST = 0x41,
105   WASM_OPCODE_I64_CONST = 0x42,
106   WASM_OPCODE_F32_CONST = 0x43,
107   WASM_OPCODE_F64_CONST = 0x44,
108   WASM_OPCODE_I32_ADD = 0x6a,
109   WASM_OPCODE_I32_SUB = 0x6b,
110   WASM_OPCODE_I32_MUL = 0x6c,
111   WASM_OPCODE_I64_ADD = 0x7c,
112   WASM_OPCODE_I64_SUB = 0x7d,
113   WASM_OPCODE_I64_MUL = 0x7e,
114   WASM_OPCODE_REF_NULL = 0xd0,
115   WASM_OPCODE_REF_FUNC = 0xd2,
116   WASM_OPCODE_GC_PREFIX = 0xfb,
117 };
118 
119 // Opcodes in the GC-prefixed space (0xfb)
120 enum : unsigned {
121   WASM_OPCODE_STRUCT_NEW = 0x00,
122   WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01,
123   WASM_OPCODE_ARRAY_NEW = 0x06,
124   WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07,
125   WASM_OPCODE_ARRAY_NEW_FIXED = 0x08,
126   WASM_OPCODE_REF_I31 = 0x1c,
127   // any.convert_extern and extern.convert_any don't seem to be supported by
128   // Binaryen.
129 };
130 
131 // Opcodes used in synthetic functions.
132 enum : unsigned {
133   WASM_OPCODE_BLOCK = 0x02,
134   WASM_OPCODE_BR = 0x0c,
135   WASM_OPCODE_BR_TABLE = 0x0e,
136   WASM_OPCODE_RETURN = 0x0f,
137   WASM_OPCODE_DROP = 0x1a,
138   WASM_OPCODE_MISC_PREFIX = 0xfc,
139   WASM_OPCODE_MEMORY_INIT = 0x08,
140   WASM_OPCODE_MEMORY_FILL = 0x0b,
141   WASM_OPCODE_DATA_DROP = 0x09,
142   WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
143   WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
144   WASM_OPCODE_I32_ATOMIC_WAIT = 0x01,
145   WASM_OPCODE_I32_ATOMIC_STORE = 0x17,
146   WASM_OPCODE_I32_RMW_CMPXCHG = 0x48,
147 };
148 
149 // Sub-opcodes for catch clauses in a try_table instruction
150 enum : unsigned {
151   WASM_OPCODE_CATCH = 0x00,
152   WASM_OPCODE_CATCH_REF = 0x01,
153   WASM_OPCODE_CATCH_ALL = 0x02,
154   WASM_OPCODE_CATCH_ALL_REF = 0x03,
155 };
156 
157 enum : unsigned {
158   WASM_LIMITS_FLAG_NONE = 0x0,
159   WASM_LIMITS_FLAG_HAS_MAX = 0x1,
160   WASM_LIMITS_FLAG_IS_SHARED = 0x2,
161   WASM_LIMITS_FLAG_IS_64 = 0x4,
162   WASM_LIMITS_FLAG_HAS_PAGE_SIZE = 0x8,
163 };
164 
165 enum : unsigned {
166   WASM_DATA_SEGMENT_IS_PASSIVE = 0x01,
167   WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02,
168 };
169 
170 enum : unsigned {
171   WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01,
172   WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02,   // if passive == 1
173   WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
174   WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
175 };
176 const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3;
177 
178 // Feature policy prefixes used in the custom "target_features" section
179 enum : uint8_t {
180   WASM_FEATURE_PREFIX_USED = '+',
181   WASM_FEATURE_PREFIX_DISALLOWED = '-',
182 };
183 
184 // Kind codes used in the custom "name" section
185 enum : unsigned {
186   WASM_NAMES_MODULE = 0,
187   WASM_NAMES_FUNCTION = 1,
188   WASM_NAMES_LOCAL = 2,
189   WASM_NAMES_GLOBAL = 7,
190   WASM_NAMES_DATA_SEGMENT = 9,
191 };
192 
193 // Kind codes used in the custom "linking" section
194 enum : unsigned {
195   WASM_SEGMENT_INFO = 0x5,
196   WASM_INIT_FUNCS = 0x6,
197   WASM_COMDAT_INFO = 0x7,
198   WASM_SYMBOL_TABLE = 0x8,
199 };
200 
201 // Kind codes used in the custom "dylink" section
202 enum : unsigned {
203   WASM_DYLINK_MEM_INFO = 0x1,
204   WASM_DYLINK_NEEDED = 0x2,
205   WASM_DYLINK_EXPORT_INFO = 0x3,
206   WASM_DYLINK_IMPORT_INFO = 0x4,
207   WASM_DYLINK_RUNTIME_PATH = 0x5,
208 };
209 
210 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
211 enum : unsigned {
212   WASM_COMDAT_DATA = 0x0,
213   WASM_COMDAT_FUNCTION = 0x1,
214   // GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet.
215   WASM_COMDAT_SECTION = 0x5,
216 };
217 
218 // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
219 enum WasmSymbolType : unsigned {
220   WASM_SYMBOL_TYPE_FUNCTION = 0x0,
221   WASM_SYMBOL_TYPE_DATA = 0x1,
222   WASM_SYMBOL_TYPE_GLOBAL = 0x2,
223   WASM_SYMBOL_TYPE_SECTION = 0x3,
224   WASM_SYMBOL_TYPE_TAG = 0x4,
225   WASM_SYMBOL_TYPE_TABLE = 0x5,
226 };
227 
228 enum WasmSegmentFlag : unsigned {
229   WASM_SEG_FLAG_STRINGS = 0x1,
230   WASM_SEG_FLAG_TLS = 0x2,
231   WASM_SEG_FLAG_RETAIN = 0x4,
232 };
233 
234 // Kinds of tag attributes.
235 enum WasmTagAttribute : uint8_t {
236   WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0,
237 };
238 
239 const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
240 const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
241 
242 const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
243 const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
244 const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
245 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
246 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
247 const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
248 const unsigned WASM_SYMBOL_EXPORTED = 0x20;
249 const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
250 const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
251 const unsigned WASM_SYMBOL_TLS = 0x100;
252 const unsigned WASM_SYMBOL_ABSOLUTE = 0x200;
253 
254 #define WASM_RELOC(name, value) name = value,
255 
256 enum WasmRelocType : unsigned {
257 #include "WasmRelocs.def"
258 };
259 
260 #undef WASM_RELOC
261 
262 struct WasmObjectHeader {
263   StringRef Magic;
264   uint32_t Version;
265 };
266 
267 // Subset of types that a value can have
268 enum class ValType {
269   I32 = WASM_TYPE_I32,
270   I64 = WASM_TYPE_I64,
271   F32 = WASM_TYPE_F32,
272   F64 = WASM_TYPE_F64,
273   V128 = WASM_TYPE_V128,
274   FUNCREF = WASM_TYPE_FUNCREF,
275   EXTERNREF = WASM_TYPE_EXTERNREF,
276   EXNREF = WASM_TYPE_EXNREF,
277   // Unmodeled value types include ref types with heap types other than
278   // func, extern or exn, and type-specialized funcrefs
279   OTHERREF = 0xff,
280 };
281 
282 struct WasmDylinkImportInfo {
283   StringRef Module;
284   StringRef Field;
285   uint32_t Flags;
286 };
287 
288 struct WasmDylinkExportInfo {
289   StringRef Name;
290   uint32_t Flags;
291 };
292 
293 struct WasmDylinkInfo {
294   uint32_t MemorySize; // Memory size in bytes
295   uint32_t MemoryAlignment;  // P2 alignment of memory
296   uint32_t TableSize;  // Table size in elements
297   uint32_t TableAlignment;  // P2 alignment of table
298   std::vector<StringRef> Needed; // Shared library dependencies
299   std::vector<WasmDylinkImportInfo> ImportInfo;
300   std::vector<WasmDylinkExportInfo> ExportInfo;
301   std::vector<StringRef> RuntimePath;
302 };
303 
304 struct WasmProducerInfo {
305   std::vector<std::pair<std::string, std::string>> Languages;
306   std::vector<std::pair<std::string, std::string>> Tools;
307   std::vector<std::pair<std::string, std::string>> SDKs;
308 };
309 
310 struct WasmFeatureEntry {
311   uint8_t Prefix;
312   std::string Name;
313 };
314 
315 struct WasmExport {
316   StringRef Name;
317   uint8_t Kind;
318   uint32_t Index;
319 };
320 
321 struct WasmLimits {
322   uint8_t Flags;
323   uint64_t Minimum;
324   uint64_t Maximum;
325   uint32_t PageSize;
326 };
327 
328 struct WasmTableType {
329   ValType ElemType;
330   WasmLimits Limits;
331 };
332 
333 struct WasmTable {
334   uint32_t Index;
335   WasmTableType Type;
336   StringRef SymbolName; // from the "linking" section
337 };
338 
339 struct WasmInitExprMVP {
340   uint8_t Opcode;
341   union {
342     int32_t Int32;
343     int64_t Int64;
344     uint32_t Float32;
345     uint64_t Float64;
346     uint32_t Global;
347   } Value;
348 };
349 
350 // Extended-const init exprs and exprs with GC types are not explicitly
351 // modeled, but the raw body of the expr is attached.
352 struct WasmInitExpr {
353   uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
354                     // one instruction)
355   WasmInitExprMVP Inst;
356   ArrayRef<uint8_t> Body;
357 };
358 
359 struct WasmGlobalType {
360   uint8_t Type; // TODO: make this a ValType?
361   bool Mutable;
362 };
363 
364 struct WasmGlobal {
365   uint32_t Index;
366   WasmGlobalType Type;
367   WasmInitExpr InitExpr;
368   StringRef SymbolName; // from the "linking" section
369   uint32_t Offset; // Offset of the definition in the binary's Global section
370   uint32_t Size;   // Size of the definition in the binary's Global section
371 };
372 
373 struct WasmTag {
374   uint32_t Index;
375   uint32_t SigIndex;
376   StringRef SymbolName; // from the "linking" section
377 };
378 
379 struct WasmImport {
380   StringRef Module;
381   StringRef Field;
382   uint8_t Kind;
383   union {
384     uint32_t SigIndex;
385     WasmGlobalType Global;
386     WasmTableType Table;
387     WasmLimits Memory;
388   };
389 };
390 
391 struct WasmLocalDecl {
392   uint8_t Type;
393   uint32_t Count;
394 };
395 
396 struct WasmFunction {
397   uint32_t Index;
398   uint32_t SigIndex;
399   std::vector<WasmLocalDecl> Locals;
400   ArrayRef<uint8_t> Body;
401   uint32_t CodeSectionOffset;
402   uint32_t Size;
403   uint32_t CodeOffset;  // start of Locals and Body
404   std::optional<StringRef> ExportName; // from the "export" section
405   StringRef SymbolName; // from the "linking" section
406   StringRef DebugName;  // from the "name" section
407   uint32_t Comdat;      // from the "comdat info" section
408 };
409 
410 struct WasmDataSegment {
411   uint32_t InitFlags;
412   // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX.
413   uint32_t MemoryIndex;
414   // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0.
415   WasmInitExpr Offset;
416 
417   ArrayRef<uint8_t> Content;
418   StringRef Name; // from the "segment info" section
419   uint32_t Alignment;
420   uint32_t LinkingFlags;
421   uint32_t Comdat; // from the "comdat info" section
422 };
423 
424 // 3 different element segment modes are encodable. This class is currently
425 // only used during decoding (see WasmElemSegment below).
426 enum class ElemSegmentMode { Active, Passive, Declarative };
427 
428 // Represents a Wasm element segment, with some limitations compared the spec:
429 // 1) Does not model passive or declarative segments (Segment will end up with
430 // an Offset field of i32.const 0)
431 // 2) Does not model init exprs (Segment will get an empty Functions list)
432 // 3) Does not model types other than basic funcref/externref/exnref (see
433 // ValType)
434 struct WasmElemSegment {
435   uint32_t Flags;
436   uint32_t TableNumber;
437   ValType ElemKind;
438   WasmInitExpr Offset;
439   std::vector<uint32_t> Functions;
440 };
441 
442 // Represents the location of a Wasm data symbol within a WasmDataSegment, as
443 // the index of the segment, and the offset and size within the segment.
444 struct WasmDataReference {
445   uint32_t Segment;
446   uint64_t Offset;
447   uint64_t Size;
448 };
449 
450 struct WasmRelocation {
451   uint8_t Type;    // The type of the relocation.
452   uint32_t Index;  // Index into either symbol or type index space.
453   uint64_t Offset; // Offset from the start of the section.
454   int64_t Addend;  // A value to add to the symbol.
455 
getTypeWasmRelocation456   WasmRelocType getType() const { return static_cast<WasmRelocType>(Type); }
457 };
458 
459 struct WasmInitFunc {
460   uint32_t Priority;
461   uint32_t Symbol;
462 };
463 
464 struct WasmSymbolInfo {
465   StringRef Name;
466   uint8_t Kind;
467   uint32_t Flags;
468   // For undefined symbols the module of the import
469   std::optional<StringRef> ImportModule;
470   // For undefined symbols the name of the import
471   std::optional<StringRef> ImportName;
472   // For symbols to be exported from the final module
473   std::optional<StringRef> ExportName;
474   union {
475     // For function, table, or global symbols, the index in function, table, or
476     // global index space.
477     uint32_t ElementIndex;
478     // For a data symbols, the address of the data relative to segment.
479     WasmDataReference DataRef;
480   };
481 };
482 
483 enum class NameType {
484   FUNCTION,
485   GLOBAL,
486   DATA_SEGMENT,
487 };
488 
489 struct WasmDebugName {
490   NameType Type;
491   uint32_t Index;
492   StringRef Name;
493 };
494 
495 // Info from the linking metadata section of a wasm object file.
496 struct WasmLinkingData {
497   uint32_t Version;
498   std::vector<WasmInitFunc> InitFunctions;
499   std::vector<StringRef> Comdats;
500   // The linking section also contains a symbol table. This info (represented
501   // in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead
502   // of in this structure; this allows vectors of WasmSymbols and
503   // WasmLinkingDatas to be reallocated.
504 };
505 
506 struct WasmSignature {
507   SmallVector<ValType, 1> Returns;
508   SmallVector<ValType, 4> Params;
509   // LLVM can parse types other than functions encoded in the type section,
510   // but does not actually model them. Instead a placeholder signature is
511   // created in the Object's signature list.
512   enum { Function, Tag, Placeholder } Kind = Function;
513   // Support empty and tombstone instances, needed by DenseMap.
514   enum { Plain, Empty, Tombstone } State = Plain;
515 
WasmSignatureWasmSignature516   WasmSignature(SmallVector<ValType, 1> &&InReturns,
517                 SmallVector<ValType, 4> &&InParams)
518       : Returns(InReturns), Params(InParams) {}
519   WasmSignature() = default;
520 };
521 
522 // Useful comparison operators
523 inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) {
524   return LHS.State == RHS.State && LHS.Returns == RHS.Returns &&
525          LHS.Params == RHS.Params;
526 }
527 
528 inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) {
529   return !(LHS == RHS);
530 }
531 
532 inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
533   return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable;
534 }
535 
536 inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
537   return !(LHS == RHS);
538 }
539 
540 inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) {
541   return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum &&
542          (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum
543                                                : true) &&
544          (LHS.Flags & WASM_LIMITS_FLAG_HAS_PAGE_SIZE
545               ? LHS.PageSize == RHS.PageSize
546               : true);
547 }
548 
549 inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
550   return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
551 }
552 
553 LLVM_ABI llvm::StringRef toString(WasmSymbolType type);
554 LLVM_ABI llvm::StringRef relocTypetoString(uint32_t type);
555 LLVM_ABI llvm::StringRef sectionTypeToString(uint32_t type);
556 LLVM_ABI bool relocTypeHasAddend(uint32_t type);
557 
558 } // end namespace wasm
559 } // end namespace llvm
560 
561 #endif
562