1 //===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines manifest constants for the wasm object file format. 10 // See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_BINARYFORMAT_WASM_H 15 #define LLVM_BINARYFORMAT_WASM_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/Compiler.h" 21 #include <optional> 22 23 namespace llvm { 24 namespace wasm { 25 26 // Object file magic string. 27 const char WasmMagic[] = {'\0', 'a', 's', 'm'}; 28 // Wasm binary format version 29 const uint32_t WasmVersion = 0x1; 30 // Wasm linking metadata version 31 const uint32_t WasmMetadataVersion = 0x2; 32 // Wasm uses a 64k page size by default (but the custom-page-sizes proposal 33 // allows changing it) 34 const uint32_t WasmDefaultPageSize = 65536; 35 36 enum : unsigned { 37 WASM_SEC_CUSTOM = 0, // Custom / User-defined section 38 WASM_SEC_TYPE = 1, // Function signature declarations 39 WASM_SEC_IMPORT = 2, // Import declarations 40 WASM_SEC_FUNCTION = 3, // Function declarations 41 WASM_SEC_TABLE = 4, // Indirect function table and other tables 42 WASM_SEC_MEMORY = 5, // Memory attributes 43 WASM_SEC_GLOBAL = 6, // Global declarations 44 WASM_SEC_EXPORT = 7, // Exports 45 WASM_SEC_START = 8, // Start function declaration 46 WASM_SEC_ELEM = 9, // Elements section 47 WASM_SEC_CODE = 10, // Function bodies (code) 48 WASM_SEC_DATA = 11, // Data segments 49 WASM_SEC_DATACOUNT = 12, // Data segment count 50 WASM_SEC_TAG = 13, // Tag declarations 51 WASM_SEC_LAST_KNOWN = WASM_SEC_TAG, 52 }; 53 54 // Type immediate encodings used in various contexts. 55 enum : unsigned { 56 WASM_TYPE_I32 = 0x7F, 57 WASM_TYPE_I64 = 0x7E, 58 WASM_TYPE_F32 = 0x7D, 59 WASM_TYPE_F64 = 0x7C, 60 WASM_TYPE_V128 = 0x7B, 61 WASM_TYPE_NULLFUNCREF = 0x73, 62 WASM_TYPE_NULLEXTERNREF = 0x72, 63 WASM_TYPE_NULLEXNREF = 0x74, 64 WASM_TYPE_NULLREF = 0x71, 65 WASM_TYPE_FUNCREF = 0x70, 66 WASM_TYPE_EXTERNREF = 0x6F, 67 WASM_TYPE_EXNREF = 0x69, 68 WASM_TYPE_ANYREF = 0x6E, 69 WASM_TYPE_EQREF = 0x6D, 70 WASM_TYPE_I31REF = 0x6C, 71 WASM_TYPE_STRUCTREF = 0x6B, 72 WASM_TYPE_ARRAYREF = 0x6A, 73 WASM_TYPE_NONNULLABLE = 0x64, 74 WASM_TYPE_NULLABLE = 0x63, 75 WASM_TYPE_FUNC = 0x60, 76 WASM_TYPE_ARRAY = 0x5E, 77 WASM_TYPE_STRUCT = 0x5F, 78 WASM_TYPE_SUB = 0x50, 79 WASM_TYPE_SUB_FINAL = 0x4F, 80 WASM_TYPE_REC = 0x4E, 81 WASM_TYPE_NORESULT = 0x40, // for blocks with no result values 82 }; 83 84 // Kinds of externals (for imports and exports). 85 enum : unsigned { 86 WASM_EXTERNAL_FUNCTION = 0x0, 87 WASM_EXTERNAL_TABLE = 0x1, 88 WASM_EXTERNAL_MEMORY = 0x2, 89 WASM_EXTERNAL_GLOBAL = 0x3, 90 WASM_EXTERNAL_TAG = 0x4, 91 }; 92 93 // Opcodes used in initializer expressions. 94 enum : unsigned { 95 WASM_OPCODE_END = 0x0b, 96 WASM_OPCODE_CALL = 0x10, 97 WASM_OPCODE_LOCAL_GET = 0x20, 98 WASM_OPCODE_LOCAL_SET = 0x21, 99 WASM_OPCODE_LOCAL_TEE = 0x22, 100 WASM_OPCODE_GLOBAL_GET = 0x23, 101 WASM_OPCODE_GLOBAL_SET = 0x24, 102 WASM_OPCODE_I32_STORE = 0x36, 103 WASM_OPCODE_I64_STORE = 0x37, 104 WASM_OPCODE_I32_CONST = 0x41, 105 WASM_OPCODE_I64_CONST = 0x42, 106 WASM_OPCODE_F32_CONST = 0x43, 107 WASM_OPCODE_F64_CONST = 0x44, 108 WASM_OPCODE_I32_ADD = 0x6a, 109 WASM_OPCODE_I32_SUB = 0x6b, 110 WASM_OPCODE_I32_MUL = 0x6c, 111 WASM_OPCODE_I64_ADD = 0x7c, 112 WASM_OPCODE_I64_SUB = 0x7d, 113 WASM_OPCODE_I64_MUL = 0x7e, 114 WASM_OPCODE_REF_NULL = 0xd0, 115 WASM_OPCODE_REF_FUNC = 0xd2, 116 WASM_OPCODE_GC_PREFIX = 0xfb, 117 }; 118 119 // Opcodes in the GC-prefixed space (0xfb) 120 enum : unsigned { 121 WASM_OPCODE_STRUCT_NEW = 0x00, 122 WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01, 123 WASM_OPCODE_ARRAY_NEW = 0x06, 124 WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07, 125 WASM_OPCODE_ARRAY_NEW_FIXED = 0x08, 126 WASM_OPCODE_REF_I31 = 0x1c, 127 // any.convert_extern and extern.convert_any don't seem to be supported by 128 // Binaryen. 129 }; 130 131 // Opcodes used in synthetic functions. 132 enum : unsigned { 133 WASM_OPCODE_BLOCK = 0x02, 134 WASM_OPCODE_BR = 0x0c, 135 WASM_OPCODE_BR_TABLE = 0x0e, 136 WASM_OPCODE_RETURN = 0x0f, 137 WASM_OPCODE_DROP = 0x1a, 138 WASM_OPCODE_MISC_PREFIX = 0xfc, 139 WASM_OPCODE_MEMORY_INIT = 0x08, 140 WASM_OPCODE_MEMORY_FILL = 0x0b, 141 WASM_OPCODE_DATA_DROP = 0x09, 142 WASM_OPCODE_ATOMICS_PREFIX = 0xfe, 143 WASM_OPCODE_ATOMIC_NOTIFY = 0x00, 144 WASM_OPCODE_I32_ATOMIC_WAIT = 0x01, 145 WASM_OPCODE_I32_ATOMIC_STORE = 0x17, 146 WASM_OPCODE_I32_RMW_CMPXCHG = 0x48, 147 }; 148 149 // Sub-opcodes for catch clauses in a try_table instruction 150 enum : unsigned { 151 WASM_OPCODE_CATCH = 0x00, 152 WASM_OPCODE_CATCH_REF = 0x01, 153 WASM_OPCODE_CATCH_ALL = 0x02, 154 WASM_OPCODE_CATCH_ALL_REF = 0x03, 155 }; 156 157 enum : unsigned { 158 WASM_LIMITS_FLAG_NONE = 0x0, 159 WASM_LIMITS_FLAG_HAS_MAX = 0x1, 160 WASM_LIMITS_FLAG_IS_SHARED = 0x2, 161 WASM_LIMITS_FLAG_IS_64 = 0x4, 162 WASM_LIMITS_FLAG_HAS_PAGE_SIZE = 0x8, 163 }; 164 165 enum : unsigned { 166 WASM_DATA_SEGMENT_IS_PASSIVE = 0x01, 167 WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02, 168 }; 169 170 enum : unsigned { 171 WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01, 172 WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1 173 WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0 174 WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04, 175 }; 176 const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3; 177 178 // Feature policy prefixes used in the custom "target_features" section 179 enum : uint8_t { 180 WASM_FEATURE_PREFIX_USED = '+', 181 WASM_FEATURE_PREFIX_DISALLOWED = '-', 182 }; 183 184 // Kind codes used in the custom "name" section 185 enum : unsigned { 186 WASM_NAMES_MODULE = 0, 187 WASM_NAMES_FUNCTION = 1, 188 WASM_NAMES_LOCAL = 2, 189 WASM_NAMES_GLOBAL = 7, 190 WASM_NAMES_DATA_SEGMENT = 9, 191 }; 192 193 // Kind codes used in the custom "linking" section 194 enum : unsigned { 195 WASM_SEGMENT_INFO = 0x5, 196 WASM_INIT_FUNCS = 0x6, 197 WASM_COMDAT_INFO = 0x7, 198 WASM_SYMBOL_TABLE = 0x8, 199 }; 200 201 // Kind codes used in the custom "dylink" section 202 enum : unsigned { 203 WASM_DYLINK_MEM_INFO = 0x1, 204 WASM_DYLINK_NEEDED = 0x2, 205 WASM_DYLINK_EXPORT_INFO = 0x3, 206 WASM_DYLINK_IMPORT_INFO = 0x4, 207 WASM_DYLINK_RUNTIME_PATH = 0x5, 208 }; 209 210 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO 211 enum : unsigned { 212 WASM_COMDAT_DATA = 0x0, 213 WASM_COMDAT_FUNCTION = 0x1, 214 // GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet. 215 WASM_COMDAT_SECTION = 0x5, 216 }; 217 218 // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE 219 enum WasmSymbolType : unsigned { 220 WASM_SYMBOL_TYPE_FUNCTION = 0x0, 221 WASM_SYMBOL_TYPE_DATA = 0x1, 222 WASM_SYMBOL_TYPE_GLOBAL = 0x2, 223 WASM_SYMBOL_TYPE_SECTION = 0x3, 224 WASM_SYMBOL_TYPE_TAG = 0x4, 225 WASM_SYMBOL_TYPE_TABLE = 0x5, 226 }; 227 228 enum WasmSegmentFlag : unsigned { 229 WASM_SEG_FLAG_STRINGS = 0x1, 230 WASM_SEG_FLAG_TLS = 0x2, 231 WASM_SEG_FLAG_RETAIN = 0x4, 232 }; 233 234 // Kinds of tag attributes. 235 enum WasmTagAttribute : uint8_t { 236 WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0, 237 }; 238 239 const unsigned WASM_SYMBOL_BINDING_MASK = 0x3; 240 const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc; 241 242 const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0; 243 const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1; 244 const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2; 245 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0; 246 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; 247 const unsigned WASM_SYMBOL_UNDEFINED = 0x10; 248 const unsigned WASM_SYMBOL_EXPORTED = 0x20; 249 const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; 250 const unsigned WASM_SYMBOL_NO_STRIP = 0x80; 251 const unsigned WASM_SYMBOL_TLS = 0x100; 252 const unsigned WASM_SYMBOL_ABSOLUTE = 0x200; 253 254 #define WASM_RELOC(name, value) name = value, 255 256 enum WasmRelocType : unsigned { 257 #include "WasmRelocs.def" 258 }; 259 260 #undef WASM_RELOC 261 262 struct WasmObjectHeader { 263 StringRef Magic; 264 uint32_t Version; 265 }; 266 267 // Subset of types that a value can have 268 enum class ValType { 269 I32 = WASM_TYPE_I32, 270 I64 = WASM_TYPE_I64, 271 F32 = WASM_TYPE_F32, 272 F64 = WASM_TYPE_F64, 273 V128 = WASM_TYPE_V128, 274 FUNCREF = WASM_TYPE_FUNCREF, 275 EXTERNREF = WASM_TYPE_EXTERNREF, 276 EXNREF = WASM_TYPE_EXNREF, 277 // Unmodeled value types include ref types with heap types other than 278 // func, extern or exn, and type-specialized funcrefs 279 OTHERREF = 0xff, 280 }; 281 282 struct WasmDylinkImportInfo { 283 StringRef Module; 284 StringRef Field; 285 uint32_t Flags; 286 }; 287 288 struct WasmDylinkExportInfo { 289 StringRef Name; 290 uint32_t Flags; 291 }; 292 293 struct WasmDylinkInfo { 294 uint32_t MemorySize; // Memory size in bytes 295 uint32_t MemoryAlignment; // P2 alignment of memory 296 uint32_t TableSize; // Table size in elements 297 uint32_t TableAlignment; // P2 alignment of table 298 std::vector<StringRef> Needed; // Shared library dependencies 299 std::vector<WasmDylinkImportInfo> ImportInfo; 300 std::vector<WasmDylinkExportInfo> ExportInfo; 301 std::vector<StringRef> RuntimePath; 302 }; 303 304 struct WasmProducerInfo { 305 std::vector<std::pair<std::string, std::string>> Languages; 306 std::vector<std::pair<std::string, std::string>> Tools; 307 std::vector<std::pair<std::string, std::string>> SDKs; 308 }; 309 310 struct WasmFeatureEntry { 311 uint8_t Prefix; 312 std::string Name; 313 }; 314 315 struct WasmExport { 316 StringRef Name; 317 uint8_t Kind; 318 uint32_t Index; 319 }; 320 321 struct WasmLimits { 322 uint8_t Flags; 323 uint64_t Minimum; 324 uint64_t Maximum; 325 uint32_t PageSize; 326 }; 327 328 struct WasmTableType { 329 ValType ElemType; 330 WasmLimits Limits; 331 }; 332 333 struct WasmTable { 334 uint32_t Index; 335 WasmTableType Type; 336 StringRef SymbolName; // from the "linking" section 337 }; 338 339 struct WasmInitExprMVP { 340 uint8_t Opcode; 341 union { 342 int32_t Int32; 343 int64_t Int64; 344 uint32_t Float32; 345 uint64_t Float64; 346 uint32_t Global; 347 } Value; 348 }; 349 350 // Extended-const init exprs and exprs with GC types are not explicitly 351 // modeled, but the raw body of the expr is attached. 352 struct WasmInitExpr { 353 uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than 354 // one instruction) 355 WasmInitExprMVP Inst; 356 ArrayRef<uint8_t> Body; 357 }; 358 359 struct WasmGlobalType { 360 uint8_t Type; // TODO: make this a ValType? 361 bool Mutable; 362 }; 363 364 struct WasmGlobal { 365 uint32_t Index; 366 WasmGlobalType Type; 367 WasmInitExpr InitExpr; 368 StringRef SymbolName; // from the "linking" section 369 uint32_t Offset; // Offset of the definition in the binary's Global section 370 uint32_t Size; // Size of the definition in the binary's Global section 371 }; 372 373 struct WasmTag { 374 uint32_t Index; 375 uint32_t SigIndex; 376 StringRef SymbolName; // from the "linking" section 377 }; 378 379 struct WasmImport { 380 StringRef Module; 381 StringRef Field; 382 uint8_t Kind; 383 union { 384 uint32_t SigIndex; 385 WasmGlobalType Global; 386 WasmTableType Table; 387 WasmLimits Memory; 388 }; 389 }; 390 391 struct WasmLocalDecl { 392 uint8_t Type; 393 uint32_t Count; 394 }; 395 396 struct WasmFunction { 397 uint32_t Index; 398 uint32_t SigIndex; 399 std::vector<WasmLocalDecl> Locals; 400 ArrayRef<uint8_t> Body; 401 uint32_t CodeSectionOffset; 402 uint32_t Size; 403 uint32_t CodeOffset; // start of Locals and Body 404 std::optional<StringRef> ExportName; // from the "export" section 405 StringRef SymbolName; // from the "linking" section 406 StringRef DebugName; // from the "name" section 407 uint32_t Comdat; // from the "comdat info" section 408 }; 409 410 struct WasmDataSegment { 411 uint32_t InitFlags; 412 // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX. 413 uint32_t MemoryIndex; 414 // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0. 415 WasmInitExpr Offset; 416 417 ArrayRef<uint8_t> Content; 418 StringRef Name; // from the "segment info" section 419 uint32_t Alignment; 420 uint32_t LinkingFlags; 421 uint32_t Comdat; // from the "comdat info" section 422 }; 423 424 // 3 different element segment modes are encodable. This class is currently 425 // only used during decoding (see WasmElemSegment below). 426 enum class ElemSegmentMode { Active, Passive, Declarative }; 427 428 // Represents a Wasm element segment, with some limitations compared the spec: 429 // 1) Does not model passive or declarative segments (Segment will end up with 430 // an Offset field of i32.const 0) 431 // 2) Does not model init exprs (Segment will get an empty Functions list) 432 // 3) Does not model types other than basic funcref/externref/exnref (see 433 // ValType) 434 struct WasmElemSegment { 435 uint32_t Flags; 436 uint32_t TableNumber; 437 ValType ElemKind; 438 WasmInitExpr Offset; 439 std::vector<uint32_t> Functions; 440 }; 441 442 // Represents the location of a Wasm data symbol within a WasmDataSegment, as 443 // the index of the segment, and the offset and size within the segment. 444 struct WasmDataReference { 445 uint32_t Segment; 446 uint64_t Offset; 447 uint64_t Size; 448 }; 449 450 struct WasmRelocation { 451 uint8_t Type; // The type of the relocation. 452 uint32_t Index; // Index into either symbol or type index space. 453 uint64_t Offset; // Offset from the start of the section. 454 int64_t Addend; // A value to add to the symbol. 455 getTypeWasmRelocation456 WasmRelocType getType() const { return static_cast<WasmRelocType>(Type); } 457 }; 458 459 struct WasmInitFunc { 460 uint32_t Priority; 461 uint32_t Symbol; 462 }; 463 464 struct WasmSymbolInfo { 465 StringRef Name; 466 uint8_t Kind; 467 uint32_t Flags; 468 // For undefined symbols the module of the import 469 std::optional<StringRef> ImportModule; 470 // For undefined symbols the name of the import 471 std::optional<StringRef> ImportName; 472 // For symbols to be exported from the final module 473 std::optional<StringRef> ExportName; 474 union { 475 // For function, table, or global symbols, the index in function, table, or 476 // global index space. 477 uint32_t ElementIndex; 478 // For a data symbols, the address of the data relative to segment. 479 WasmDataReference DataRef; 480 }; 481 }; 482 483 enum class NameType { 484 FUNCTION, 485 GLOBAL, 486 DATA_SEGMENT, 487 }; 488 489 struct WasmDebugName { 490 NameType Type; 491 uint32_t Index; 492 StringRef Name; 493 }; 494 495 // Info from the linking metadata section of a wasm object file. 496 struct WasmLinkingData { 497 uint32_t Version; 498 std::vector<WasmInitFunc> InitFunctions; 499 std::vector<StringRef> Comdats; 500 // The linking section also contains a symbol table. This info (represented 501 // in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead 502 // of in this structure; this allows vectors of WasmSymbols and 503 // WasmLinkingDatas to be reallocated. 504 }; 505 506 struct WasmSignature { 507 SmallVector<ValType, 1> Returns; 508 SmallVector<ValType, 4> Params; 509 // LLVM can parse types other than functions encoded in the type section, 510 // but does not actually model them. Instead a placeholder signature is 511 // created in the Object's signature list. 512 enum { Function, Tag, Placeholder } Kind = Function; 513 // Support empty and tombstone instances, needed by DenseMap. 514 enum { Plain, Empty, Tombstone } State = Plain; 515 WasmSignatureWasmSignature516 WasmSignature(SmallVector<ValType, 1> &&InReturns, 517 SmallVector<ValType, 4> &&InParams) 518 : Returns(InReturns), Params(InParams) {} 519 WasmSignature() = default; 520 }; 521 522 // Useful comparison operators 523 inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) { 524 return LHS.State == RHS.State && LHS.Returns == RHS.Returns && 525 LHS.Params == RHS.Params; 526 } 527 528 inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) { 529 return !(LHS == RHS); 530 } 531 532 inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { 533 return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable; 534 } 535 536 inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { 537 return !(LHS == RHS); 538 } 539 540 inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) { 541 return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum && 542 (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum 543 : true) && 544 (LHS.Flags & WASM_LIMITS_FLAG_HAS_PAGE_SIZE 545 ? LHS.PageSize == RHS.PageSize 546 : true); 547 } 548 549 inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) { 550 return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits; 551 } 552 553 LLVM_ABI llvm::StringRef toString(WasmSymbolType type); 554 LLVM_ABI llvm::StringRef relocTypetoString(uint32_t type); 555 LLVM_ABI llvm::StringRef sectionTypeToString(uint32_t type); 556 LLVM_ABI bool relocTypeHasAddend(uint32_t type); 557 558 } // end namespace wasm 559 } // end namespace llvm 560 561 #endif 562