xref: /freebsd/contrib/llvm-project/llvm/include/llvm/BinaryFormat/Wasm.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines manifest constants for the wasm object file format.
10 // See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_BINARYFORMAT_WASM_H
15 #define LLVM_BINARYFORMAT_WASM_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <optional>
21 
22 namespace llvm {
23 namespace wasm {
24 
25 // Object file magic string.
26 const char WasmMagic[] = {'\0', 'a', 's', 'm'};
27 // Wasm binary format version
28 const uint32_t WasmVersion = 0x1;
29 // Wasm linking metadata version
30 const uint32_t WasmMetadataVersion = 0x2;
31 // Wasm uses a 64k page size
32 const uint32_t WasmPageSize = 65536;
33 
34 enum : unsigned {
35   WASM_SEC_CUSTOM = 0,     // Custom / User-defined section
36   WASM_SEC_TYPE = 1,       // Function signature declarations
37   WASM_SEC_IMPORT = 2,     // Import declarations
38   WASM_SEC_FUNCTION = 3,   // Function declarations
39   WASM_SEC_TABLE = 4,      // Indirect function table and other tables
40   WASM_SEC_MEMORY = 5,     // Memory attributes
41   WASM_SEC_GLOBAL = 6,     // Global declarations
42   WASM_SEC_EXPORT = 7,     // Exports
43   WASM_SEC_START = 8,      // Start function declaration
44   WASM_SEC_ELEM = 9,       // Elements section
45   WASM_SEC_CODE = 10,      // Function bodies (code)
46   WASM_SEC_DATA = 11,      // Data segments
47   WASM_SEC_DATACOUNT = 12, // Data segment count
48   WASM_SEC_TAG = 13,       // Tag declarations
49   WASM_SEC_LAST_KNOWN = WASM_SEC_TAG,
50 };
51 
52 // Type immediate encodings used in various contexts.
53 enum : unsigned {
54   WASM_TYPE_I32 = 0x7F,
55   WASM_TYPE_I64 = 0x7E,
56   WASM_TYPE_F32 = 0x7D,
57   WASM_TYPE_F64 = 0x7C,
58   WASM_TYPE_V128 = 0x7B,
59   WASM_TYPE_NULLFUNCREF = 0x73,
60   WASM_TYPE_NULLEXTERNREF = 0x72,
61   WASM_TYPE_NULLEXNREF = 0x74,
62   WASM_TYPE_NULLREF = 0x71,
63   WASM_TYPE_FUNCREF = 0x70,
64   WASM_TYPE_EXTERNREF = 0x6F,
65   WASM_TYPE_EXNREF = 0x69,
66   WASM_TYPE_ANYREF = 0x6E,
67   WASM_TYPE_EQREF = 0x6D,
68   WASM_TYPE_I31REF = 0x6C,
69   WASM_TYPE_STRUCTREF = 0x6B,
70   WASM_TYPE_ARRAYREF = 0x6A,
71   WASM_TYPE_NONNULLABLE = 0x64,
72   WASM_TYPE_NULLABLE = 0x63,
73   WASM_TYPE_FUNC = 0x60,
74   WASM_TYPE_ARRAY = 0x5E,
75   WASM_TYPE_STRUCT = 0x5F,
76   WASM_TYPE_SUB = 0x50,
77   WASM_TYPE_SUB_FINAL = 0x4F,
78   WASM_TYPE_REC = 0x4E,
79   WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
80 };
81 
82 // Kinds of externals (for imports and exports).
83 enum : unsigned {
84   WASM_EXTERNAL_FUNCTION = 0x0,
85   WASM_EXTERNAL_TABLE = 0x1,
86   WASM_EXTERNAL_MEMORY = 0x2,
87   WASM_EXTERNAL_GLOBAL = 0x3,
88   WASM_EXTERNAL_TAG = 0x4,
89 };
90 
91 // Opcodes used in initializer expressions.
92 enum : unsigned {
93   WASM_OPCODE_END = 0x0b,
94   WASM_OPCODE_CALL = 0x10,
95   WASM_OPCODE_LOCAL_GET = 0x20,
96   WASM_OPCODE_LOCAL_SET = 0x21,
97   WASM_OPCODE_LOCAL_TEE = 0x22,
98   WASM_OPCODE_GLOBAL_GET = 0x23,
99   WASM_OPCODE_GLOBAL_SET = 0x24,
100   WASM_OPCODE_I32_STORE = 0x36,
101   WASM_OPCODE_I64_STORE = 0x37,
102   WASM_OPCODE_I32_CONST = 0x41,
103   WASM_OPCODE_I64_CONST = 0x42,
104   WASM_OPCODE_F32_CONST = 0x43,
105   WASM_OPCODE_F64_CONST = 0x44,
106   WASM_OPCODE_I32_ADD = 0x6a,
107   WASM_OPCODE_I32_SUB = 0x6b,
108   WASM_OPCODE_I32_MUL = 0x6c,
109   WASM_OPCODE_I64_ADD = 0x7c,
110   WASM_OPCODE_I64_SUB = 0x7d,
111   WASM_OPCODE_I64_MUL = 0x7e,
112   WASM_OPCODE_REF_NULL = 0xd0,
113   WASM_OPCODE_REF_FUNC = 0xd2,
114   WASM_OPCODE_GC_PREFIX = 0xfb,
115 };
116 
117 // Opcodes in the GC-prefixed space (0xfb)
118 enum : unsigned {
119   WASM_OPCODE_STRUCT_NEW = 0x00,
120   WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01,
121   WASM_OPCODE_ARRAY_NEW = 0x06,
122   WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07,
123   WASM_OPCODE_ARRAY_NEW_FIXED = 0x08,
124   WASM_OPCODE_REF_I31 = 0x1c,
125   // any.convert_extern and extern.convert_any don't seem to be supported by
126   // Binaryen.
127 };
128 
129 // Opcodes used in synthetic functions.
130 enum : unsigned {
131   WASM_OPCODE_BLOCK = 0x02,
132   WASM_OPCODE_BR = 0x0c,
133   WASM_OPCODE_BR_TABLE = 0x0e,
134   WASM_OPCODE_RETURN = 0x0f,
135   WASM_OPCODE_DROP = 0x1a,
136   WASM_OPCODE_MISC_PREFIX = 0xfc,
137   WASM_OPCODE_MEMORY_INIT = 0x08,
138   WASM_OPCODE_MEMORY_FILL = 0x0b,
139   WASM_OPCODE_DATA_DROP = 0x09,
140   WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
141   WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
142   WASM_OPCODE_I32_ATOMIC_WAIT = 0x01,
143   WASM_OPCODE_I32_ATOMIC_STORE = 0x17,
144   WASM_OPCODE_I32_RMW_CMPXCHG = 0x48,
145 };
146 
147 enum : unsigned {
148   WASM_LIMITS_FLAG_NONE = 0x0,
149   WASM_LIMITS_FLAG_HAS_MAX = 0x1,
150   WASM_LIMITS_FLAG_IS_SHARED = 0x2,
151   WASM_LIMITS_FLAG_IS_64 = 0x4,
152 };
153 
154 enum : unsigned {
155   WASM_DATA_SEGMENT_IS_PASSIVE = 0x01,
156   WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02,
157 };
158 
159 enum : unsigned {
160   WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01,
161   WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02,   // if passive == 1
162   WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
163   WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
164 };
165 const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3;
166 
167 // Feature policy prefixes used in the custom "target_features" section
168 enum : uint8_t {
169   WASM_FEATURE_PREFIX_USED = '+',
170   WASM_FEATURE_PREFIX_REQUIRED = '=',
171   WASM_FEATURE_PREFIX_DISALLOWED = '-',
172 };
173 
174 // Kind codes used in the custom "name" section
175 enum : unsigned {
176   WASM_NAMES_MODULE = 0,
177   WASM_NAMES_FUNCTION = 1,
178   WASM_NAMES_LOCAL = 2,
179   WASM_NAMES_GLOBAL = 7,
180   WASM_NAMES_DATA_SEGMENT = 9,
181 };
182 
183 // Kind codes used in the custom "linking" section
184 enum : unsigned {
185   WASM_SEGMENT_INFO = 0x5,
186   WASM_INIT_FUNCS = 0x6,
187   WASM_COMDAT_INFO = 0x7,
188   WASM_SYMBOL_TABLE = 0x8,
189 };
190 
191 // Kind codes used in the custom "dylink" section
192 enum : unsigned {
193   WASM_DYLINK_MEM_INFO = 0x1,
194   WASM_DYLINK_NEEDED = 0x2,
195   WASM_DYLINK_EXPORT_INFO = 0x3,
196   WASM_DYLINK_IMPORT_INFO = 0x4,
197 };
198 
199 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
200 enum : unsigned {
201   WASM_COMDAT_DATA = 0x0,
202   WASM_COMDAT_FUNCTION = 0x1,
203   // GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet.
204   WASM_COMDAT_SECTION = 0x5,
205 };
206 
207 // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
208 enum WasmSymbolType : unsigned {
209   WASM_SYMBOL_TYPE_FUNCTION = 0x0,
210   WASM_SYMBOL_TYPE_DATA = 0x1,
211   WASM_SYMBOL_TYPE_GLOBAL = 0x2,
212   WASM_SYMBOL_TYPE_SECTION = 0x3,
213   WASM_SYMBOL_TYPE_TAG = 0x4,
214   WASM_SYMBOL_TYPE_TABLE = 0x5,
215 };
216 
217 enum WasmSegmentFlag : unsigned {
218   WASM_SEG_FLAG_STRINGS = 0x1,
219   WASM_SEG_FLAG_TLS = 0x2,
220   WASM_SEG_FLAG_RETAIN = 0x4,
221 };
222 
223 // Kinds of tag attributes.
224 enum WasmTagAttribute : uint8_t {
225   WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0,
226 };
227 
228 const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
229 const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
230 
231 const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
232 const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
233 const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
234 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
235 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
236 const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
237 const unsigned WASM_SYMBOL_EXPORTED = 0x20;
238 const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
239 const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
240 const unsigned WASM_SYMBOL_TLS = 0x100;
241 const unsigned WASM_SYMBOL_ABSOLUTE = 0x200;
242 
243 #define WASM_RELOC(name, value) name = value,
244 
245 enum : unsigned {
246 #include "WasmRelocs.def"
247 };
248 
249 #undef WASM_RELOC
250 
251 struct WasmObjectHeader {
252   StringRef Magic;
253   uint32_t Version;
254 };
255 
256 // Subset of types that a value can have
257 enum class ValType {
258   I32 = WASM_TYPE_I32,
259   I64 = WASM_TYPE_I64,
260   F32 = WASM_TYPE_F32,
261   F64 = WASM_TYPE_F64,
262   V128 = WASM_TYPE_V128,
263   FUNCREF = WASM_TYPE_FUNCREF,
264   EXTERNREF = WASM_TYPE_EXTERNREF,
265   EXNREF = WASM_TYPE_EXNREF,
266   // Unmodeled value types include ref types with heap types other than
267   // func, extern or exn, and type-specialized funcrefs
268   OTHERREF = 0xff,
269 };
270 
271 struct WasmDylinkImportInfo {
272   StringRef Module;
273   StringRef Field;
274   uint32_t Flags;
275 };
276 
277 struct WasmDylinkExportInfo {
278   StringRef Name;
279   uint32_t Flags;
280 };
281 
282 struct WasmDylinkInfo {
283   uint32_t MemorySize; // Memory size in bytes
284   uint32_t MemoryAlignment;  // P2 alignment of memory
285   uint32_t TableSize;  // Table size in elements
286   uint32_t TableAlignment;  // P2 alignment of table
287   std::vector<StringRef> Needed; // Shared library dependencies
288   std::vector<WasmDylinkImportInfo> ImportInfo;
289   std::vector<WasmDylinkExportInfo> ExportInfo;
290 };
291 
292 struct WasmProducerInfo {
293   std::vector<std::pair<std::string, std::string>> Languages;
294   std::vector<std::pair<std::string, std::string>> Tools;
295   std::vector<std::pair<std::string, std::string>> SDKs;
296 };
297 
298 struct WasmFeatureEntry {
299   uint8_t Prefix;
300   std::string Name;
301 };
302 
303 struct WasmExport {
304   StringRef Name;
305   uint8_t Kind;
306   uint32_t Index;
307 };
308 
309 struct WasmLimits {
310   uint8_t Flags;
311   uint64_t Minimum;
312   uint64_t Maximum;
313 };
314 
315 struct WasmTableType {
316   ValType ElemType;
317   WasmLimits Limits;
318 };
319 
320 struct WasmTable {
321   uint32_t Index;
322   WasmTableType Type;
323   StringRef SymbolName; // from the "linking" section
324 };
325 
326 struct WasmInitExprMVP {
327   uint8_t Opcode;
328   union {
329     int32_t Int32;
330     int64_t Int64;
331     uint32_t Float32;
332     uint64_t Float64;
333     uint32_t Global;
334   } Value;
335 };
336 
337 // Extended-const init exprs and exprs with GC types are not explicitly
338 // modeled, but the raw body of the expr is attached.
339 struct WasmInitExpr {
340   uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
341                     // one instruction)
342   WasmInitExprMVP Inst;
343   ArrayRef<uint8_t> Body;
344 };
345 
346 struct WasmGlobalType {
347   uint8_t Type; // TODO: make this a ValType?
348   bool Mutable;
349 };
350 
351 struct WasmGlobal {
352   uint32_t Index;
353   WasmGlobalType Type;
354   WasmInitExpr InitExpr;
355   StringRef SymbolName; // from the "linking" section
356   uint32_t Offset; // Offset of the definition in the binary's Global section
357   uint32_t Size;   // Size of the definition in the binary's Global section
358 };
359 
360 struct WasmTag {
361   uint32_t Index;
362   uint32_t SigIndex;
363   StringRef SymbolName; // from the "linking" section
364 };
365 
366 struct WasmImport {
367   StringRef Module;
368   StringRef Field;
369   uint8_t Kind;
370   union {
371     uint32_t SigIndex;
372     WasmGlobalType Global;
373     WasmTableType Table;
374     WasmLimits Memory;
375   };
376 };
377 
378 struct WasmLocalDecl {
379   uint8_t Type;
380   uint32_t Count;
381 };
382 
383 struct WasmFunction {
384   uint32_t Index;
385   uint32_t SigIndex;
386   std::vector<WasmLocalDecl> Locals;
387   ArrayRef<uint8_t> Body;
388   uint32_t CodeSectionOffset;
389   uint32_t Size;
390   uint32_t CodeOffset;  // start of Locals and Body
391   std::optional<StringRef> ExportName; // from the "export" section
392   StringRef SymbolName; // from the "linking" section
393   StringRef DebugName;  // from the "name" section
394   uint32_t Comdat;      // from the "comdat info" section
395 };
396 
397 struct WasmDataSegment {
398   uint32_t InitFlags;
399   // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX.
400   uint32_t MemoryIndex;
401   // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0.
402   WasmInitExpr Offset;
403 
404   ArrayRef<uint8_t> Content;
405   StringRef Name; // from the "segment info" section
406   uint32_t Alignment;
407   uint32_t LinkingFlags;
408   uint32_t Comdat; // from the "comdat info" section
409 };
410 
411 // Represents a Wasm element segment, with some limitations compared the spec:
412 // 1) Does not model passive or declarative segments (Segment will end up with
413 // an Offset field of i32.const 0)
414 // 2) Does not model init exprs (Segment will get an empty Functions list)
415 // 3) Does not model types other than basic funcref/externref/exnref (see
416 // ValType)
417 struct WasmElemSegment {
418   uint32_t Flags;
419   uint32_t TableNumber;
420   ValType ElemKind;
421   WasmInitExpr Offset;
422   std::vector<uint32_t> Functions;
423 };
424 
425 // Represents the location of a Wasm data symbol within a WasmDataSegment, as
426 // the index of the segment, and the offset and size within the segment.
427 struct WasmDataReference {
428   uint32_t Segment;
429   uint64_t Offset;
430   uint64_t Size;
431 };
432 
433 struct WasmRelocation {
434   uint8_t Type;    // The type of the relocation.
435   uint32_t Index;  // Index into either symbol or type index space.
436   uint64_t Offset; // Offset from the start of the section.
437   int64_t Addend;  // A value to add to the symbol.
438 };
439 
440 struct WasmInitFunc {
441   uint32_t Priority;
442   uint32_t Symbol;
443 };
444 
445 struct WasmSymbolInfo {
446   StringRef Name;
447   uint8_t Kind;
448   uint32_t Flags;
449   // For undefined symbols the module of the import
450   std::optional<StringRef> ImportModule;
451   // For undefined symbols the name of the import
452   std::optional<StringRef> ImportName;
453   // For symbols to be exported from the final module
454   std::optional<StringRef> ExportName;
455   union {
456     // For function, table, or global symbols, the index in function, table, or
457     // global index space.
458     uint32_t ElementIndex;
459     // For a data symbols, the address of the data relative to segment.
460     WasmDataReference DataRef;
461   };
462 };
463 
464 enum class NameType {
465   FUNCTION,
466   GLOBAL,
467   DATA_SEGMENT,
468 };
469 
470 struct WasmDebugName {
471   NameType Type;
472   uint32_t Index;
473   StringRef Name;
474 };
475 
476 // Info from the linking metadata section of a wasm object file.
477 struct WasmLinkingData {
478   uint32_t Version;
479   std::vector<WasmInitFunc> InitFunctions;
480   std::vector<StringRef> Comdats;
481   // The linking section also contains a symbol table. This info (represented
482   // in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead
483   // of in this structure; this allows vectors of WasmSymbols and
484   // WasmLinkingDatas to be reallocated.
485 };
486 
487 struct WasmSignature {
488   SmallVector<ValType, 1> Returns;
489   SmallVector<ValType, 4> Params;
490   // LLVM can parse types other than functions encoded in the type section,
491   // but does not actually model them. Instead a placeholder signature is
492   // created in the Object's signature list.
493   enum { Function, Tag, Placeholder } Kind = Function;
494   // Support empty and tombstone instances, needed by DenseMap.
495   enum { Plain, Empty, Tombstone } State = Plain;
496 
WasmSignatureWasmSignature497   WasmSignature(SmallVector<ValType, 1> &&InReturns,
498                 SmallVector<ValType, 4> &&InParams)
499       : Returns(InReturns), Params(InParams) {}
500   WasmSignature() = default;
501 };
502 
503 // Useful comparison operators
504 inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) {
505   return LHS.State == RHS.State && LHS.Returns == RHS.Returns &&
506          LHS.Params == RHS.Params;
507 }
508 
509 inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) {
510   return !(LHS == RHS);
511 }
512 
513 inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
514   return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable;
515 }
516 
517 inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
518   return !(LHS == RHS);
519 }
520 
521 inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) {
522   return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum &&
523          (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum
524                                                : true);
525 }
526 
527 inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
528   return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
529 }
530 
531 llvm::StringRef toString(WasmSymbolType type);
532 llvm::StringRef relocTypetoString(uint32_t type);
533 llvm::StringRef sectionTypeToString(uint32_t type);
534 bool relocTypeHasAddend(uint32_t type);
535 
536 } // end namespace wasm
537 } // end namespace llvm
538 
539 #endif
540