1 //===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines manifest constants for the wasm object file format. 10 // See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_BINARYFORMAT_WASM_H 15 #define LLVM_BINARYFORMAT_WASM_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include <optional> 21 22 namespace llvm { 23 namespace wasm { 24 25 // Object file magic string. 26 const char WasmMagic[] = {'\0', 'a', 's', 'm'}; 27 // Wasm binary format version 28 const uint32_t WasmVersion = 0x1; 29 // Wasm linking metadata version 30 const uint32_t WasmMetadataVersion = 0x2; 31 // Wasm uses a 64k page size 32 const uint32_t WasmPageSize = 65536; 33 34 enum : unsigned { 35 WASM_SEC_CUSTOM = 0, // Custom / User-defined section 36 WASM_SEC_TYPE = 1, // Function signature declarations 37 WASM_SEC_IMPORT = 2, // Import declarations 38 WASM_SEC_FUNCTION = 3, // Function declarations 39 WASM_SEC_TABLE = 4, // Indirect function table and other tables 40 WASM_SEC_MEMORY = 5, // Memory attributes 41 WASM_SEC_GLOBAL = 6, // Global declarations 42 WASM_SEC_EXPORT = 7, // Exports 43 WASM_SEC_START = 8, // Start function declaration 44 WASM_SEC_ELEM = 9, // Elements section 45 WASM_SEC_CODE = 10, // Function bodies (code) 46 WASM_SEC_DATA = 11, // Data segments 47 WASM_SEC_DATACOUNT = 12, // Data segment count 48 WASM_SEC_TAG = 13, // Tag declarations 49 WASM_SEC_LAST_KNOWN = WASM_SEC_TAG, 50 }; 51 52 // Type immediate encodings used in various contexts. 53 enum : unsigned { 54 WASM_TYPE_I32 = 0x7F, 55 WASM_TYPE_I64 = 0x7E, 56 WASM_TYPE_F32 = 0x7D, 57 WASM_TYPE_F64 = 0x7C, 58 WASM_TYPE_V128 = 0x7B, 59 WASM_TYPE_NULLFUNCREF = 0x73, 60 WASM_TYPE_NULLEXTERNREF = 0x72, 61 WASM_TYPE_NULLEXNREF = 0x74, 62 WASM_TYPE_NULLREF = 0x71, 63 WASM_TYPE_FUNCREF = 0x70, 64 WASM_TYPE_EXTERNREF = 0x6F, 65 WASM_TYPE_EXNREF = 0x69, 66 WASM_TYPE_ANYREF = 0x6E, 67 WASM_TYPE_EQREF = 0x6D, 68 WASM_TYPE_I31REF = 0x6C, 69 WASM_TYPE_STRUCTREF = 0x6B, 70 WASM_TYPE_ARRAYREF = 0x6A, 71 WASM_TYPE_NONNULLABLE = 0x64, 72 WASM_TYPE_NULLABLE = 0x63, 73 WASM_TYPE_FUNC = 0x60, 74 WASM_TYPE_ARRAY = 0x5E, 75 WASM_TYPE_STRUCT = 0x5F, 76 WASM_TYPE_SUB = 0x50, 77 WASM_TYPE_SUB_FINAL = 0x4F, 78 WASM_TYPE_REC = 0x4E, 79 WASM_TYPE_NORESULT = 0x40, // for blocks with no result values 80 }; 81 82 // Kinds of externals (for imports and exports). 83 enum : unsigned { 84 WASM_EXTERNAL_FUNCTION = 0x0, 85 WASM_EXTERNAL_TABLE = 0x1, 86 WASM_EXTERNAL_MEMORY = 0x2, 87 WASM_EXTERNAL_GLOBAL = 0x3, 88 WASM_EXTERNAL_TAG = 0x4, 89 }; 90 91 // Opcodes used in initializer expressions. 92 enum : unsigned { 93 WASM_OPCODE_END = 0x0b, 94 WASM_OPCODE_CALL = 0x10, 95 WASM_OPCODE_LOCAL_GET = 0x20, 96 WASM_OPCODE_LOCAL_SET = 0x21, 97 WASM_OPCODE_LOCAL_TEE = 0x22, 98 WASM_OPCODE_GLOBAL_GET = 0x23, 99 WASM_OPCODE_GLOBAL_SET = 0x24, 100 WASM_OPCODE_I32_STORE = 0x36, 101 WASM_OPCODE_I64_STORE = 0x37, 102 WASM_OPCODE_I32_CONST = 0x41, 103 WASM_OPCODE_I64_CONST = 0x42, 104 WASM_OPCODE_F32_CONST = 0x43, 105 WASM_OPCODE_F64_CONST = 0x44, 106 WASM_OPCODE_I32_ADD = 0x6a, 107 WASM_OPCODE_I32_SUB = 0x6b, 108 WASM_OPCODE_I32_MUL = 0x6c, 109 WASM_OPCODE_I64_ADD = 0x7c, 110 WASM_OPCODE_I64_SUB = 0x7d, 111 WASM_OPCODE_I64_MUL = 0x7e, 112 WASM_OPCODE_REF_NULL = 0xd0, 113 WASM_OPCODE_REF_FUNC = 0xd2, 114 WASM_OPCODE_GC_PREFIX = 0xfb, 115 }; 116 117 // Opcodes in the GC-prefixed space (0xfb) 118 enum : unsigned { 119 WASM_OPCODE_STRUCT_NEW = 0x00, 120 WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01, 121 WASM_OPCODE_ARRAY_NEW = 0x06, 122 WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07, 123 WASM_OPCODE_ARRAY_NEW_FIXED = 0x08, 124 WASM_OPCODE_REF_I31 = 0x1c, 125 // any.convert_extern and extern.convert_any don't seem to be supported by 126 // Binaryen. 127 }; 128 129 // Opcodes used in synthetic functions. 130 enum : unsigned { 131 WASM_OPCODE_BLOCK = 0x02, 132 WASM_OPCODE_BR = 0x0c, 133 WASM_OPCODE_BR_TABLE = 0x0e, 134 WASM_OPCODE_RETURN = 0x0f, 135 WASM_OPCODE_DROP = 0x1a, 136 WASM_OPCODE_MISC_PREFIX = 0xfc, 137 WASM_OPCODE_MEMORY_INIT = 0x08, 138 WASM_OPCODE_MEMORY_FILL = 0x0b, 139 WASM_OPCODE_DATA_DROP = 0x09, 140 WASM_OPCODE_ATOMICS_PREFIX = 0xfe, 141 WASM_OPCODE_ATOMIC_NOTIFY = 0x00, 142 WASM_OPCODE_I32_ATOMIC_WAIT = 0x01, 143 WASM_OPCODE_I32_ATOMIC_STORE = 0x17, 144 WASM_OPCODE_I32_RMW_CMPXCHG = 0x48, 145 }; 146 147 enum : unsigned { 148 WASM_LIMITS_FLAG_NONE = 0x0, 149 WASM_LIMITS_FLAG_HAS_MAX = 0x1, 150 WASM_LIMITS_FLAG_IS_SHARED = 0x2, 151 WASM_LIMITS_FLAG_IS_64 = 0x4, 152 }; 153 154 enum : unsigned { 155 WASM_DATA_SEGMENT_IS_PASSIVE = 0x01, 156 WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02, 157 }; 158 159 enum : unsigned { 160 WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01, 161 WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1 162 WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0 163 WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04, 164 }; 165 const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3; 166 167 // Feature policy prefixes used in the custom "target_features" section 168 enum : uint8_t { 169 WASM_FEATURE_PREFIX_USED = '+', 170 WASM_FEATURE_PREFIX_REQUIRED = '=', 171 WASM_FEATURE_PREFIX_DISALLOWED = '-', 172 }; 173 174 // Kind codes used in the custom "name" section 175 enum : unsigned { 176 WASM_NAMES_MODULE = 0, 177 WASM_NAMES_FUNCTION = 1, 178 WASM_NAMES_LOCAL = 2, 179 WASM_NAMES_GLOBAL = 7, 180 WASM_NAMES_DATA_SEGMENT = 9, 181 }; 182 183 // Kind codes used in the custom "linking" section 184 enum : unsigned { 185 WASM_SEGMENT_INFO = 0x5, 186 WASM_INIT_FUNCS = 0x6, 187 WASM_COMDAT_INFO = 0x7, 188 WASM_SYMBOL_TABLE = 0x8, 189 }; 190 191 // Kind codes used in the custom "dylink" section 192 enum : unsigned { 193 WASM_DYLINK_MEM_INFO = 0x1, 194 WASM_DYLINK_NEEDED = 0x2, 195 WASM_DYLINK_EXPORT_INFO = 0x3, 196 WASM_DYLINK_IMPORT_INFO = 0x4, 197 }; 198 199 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO 200 enum : unsigned { 201 WASM_COMDAT_DATA = 0x0, 202 WASM_COMDAT_FUNCTION = 0x1, 203 // GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet. 204 WASM_COMDAT_SECTION = 0x5, 205 }; 206 207 // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE 208 enum WasmSymbolType : unsigned { 209 WASM_SYMBOL_TYPE_FUNCTION = 0x0, 210 WASM_SYMBOL_TYPE_DATA = 0x1, 211 WASM_SYMBOL_TYPE_GLOBAL = 0x2, 212 WASM_SYMBOL_TYPE_SECTION = 0x3, 213 WASM_SYMBOL_TYPE_TAG = 0x4, 214 WASM_SYMBOL_TYPE_TABLE = 0x5, 215 }; 216 217 enum WasmSegmentFlag : unsigned { 218 WASM_SEG_FLAG_STRINGS = 0x1, 219 WASM_SEG_FLAG_TLS = 0x2, 220 WASM_SEG_FLAG_RETAIN = 0x4, 221 }; 222 223 // Kinds of tag attributes. 224 enum WasmTagAttribute : uint8_t { 225 WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0, 226 }; 227 228 const unsigned WASM_SYMBOL_BINDING_MASK = 0x3; 229 const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc; 230 231 const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0; 232 const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1; 233 const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2; 234 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0; 235 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; 236 const unsigned WASM_SYMBOL_UNDEFINED = 0x10; 237 const unsigned WASM_SYMBOL_EXPORTED = 0x20; 238 const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; 239 const unsigned WASM_SYMBOL_NO_STRIP = 0x80; 240 const unsigned WASM_SYMBOL_TLS = 0x100; 241 const unsigned WASM_SYMBOL_ABSOLUTE = 0x200; 242 243 #define WASM_RELOC(name, value) name = value, 244 245 enum : unsigned { 246 #include "WasmRelocs.def" 247 }; 248 249 #undef WASM_RELOC 250 251 struct WasmObjectHeader { 252 StringRef Magic; 253 uint32_t Version; 254 }; 255 256 // Subset of types that a value can have 257 enum class ValType { 258 I32 = WASM_TYPE_I32, 259 I64 = WASM_TYPE_I64, 260 F32 = WASM_TYPE_F32, 261 F64 = WASM_TYPE_F64, 262 V128 = WASM_TYPE_V128, 263 FUNCREF = WASM_TYPE_FUNCREF, 264 EXTERNREF = WASM_TYPE_EXTERNREF, 265 EXNREF = WASM_TYPE_EXNREF, 266 // Unmodeled value types include ref types with heap types other than 267 // func, extern or exn, and type-specialized funcrefs 268 OTHERREF = 0xff, 269 }; 270 271 struct WasmDylinkImportInfo { 272 StringRef Module; 273 StringRef Field; 274 uint32_t Flags; 275 }; 276 277 struct WasmDylinkExportInfo { 278 StringRef Name; 279 uint32_t Flags; 280 }; 281 282 struct WasmDylinkInfo { 283 uint32_t MemorySize; // Memory size in bytes 284 uint32_t MemoryAlignment; // P2 alignment of memory 285 uint32_t TableSize; // Table size in elements 286 uint32_t TableAlignment; // P2 alignment of table 287 std::vector<StringRef> Needed; // Shared library dependencies 288 std::vector<WasmDylinkImportInfo> ImportInfo; 289 std::vector<WasmDylinkExportInfo> ExportInfo; 290 }; 291 292 struct WasmProducerInfo { 293 std::vector<std::pair<std::string, std::string>> Languages; 294 std::vector<std::pair<std::string, std::string>> Tools; 295 std::vector<std::pair<std::string, std::string>> SDKs; 296 }; 297 298 struct WasmFeatureEntry { 299 uint8_t Prefix; 300 std::string Name; 301 }; 302 303 struct WasmExport { 304 StringRef Name; 305 uint8_t Kind; 306 uint32_t Index; 307 }; 308 309 struct WasmLimits { 310 uint8_t Flags; 311 uint64_t Minimum; 312 uint64_t Maximum; 313 }; 314 315 struct WasmTableType { 316 ValType ElemType; 317 WasmLimits Limits; 318 }; 319 320 struct WasmTable { 321 uint32_t Index; 322 WasmTableType Type; 323 StringRef SymbolName; // from the "linking" section 324 }; 325 326 struct WasmInitExprMVP { 327 uint8_t Opcode; 328 union { 329 int32_t Int32; 330 int64_t Int64; 331 uint32_t Float32; 332 uint64_t Float64; 333 uint32_t Global; 334 } Value; 335 }; 336 337 // Extended-const init exprs and exprs with GC types are not explicitly 338 // modeled, but the raw body of the expr is attached. 339 struct WasmInitExpr { 340 uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than 341 // one instruction) 342 WasmInitExprMVP Inst; 343 ArrayRef<uint8_t> Body; 344 }; 345 346 struct WasmGlobalType { 347 uint8_t Type; // TODO: make this a ValType? 348 bool Mutable; 349 }; 350 351 struct WasmGlobal { 352 uint32_t Index; 353 WasmGlobalType Type; 354 WasmInitExpr InitExpr; 355 StringRef SymbolName; // from the "linking" section 356 uint32_t Offset; // Offset of the definition in the binary's Global section 357 uint32_t Size; // Size of the definition in the binary's Global section 358 }; 359 360 struct WasmTag { 361 uint32_t Index; 362 uint32_t SigIndex; 363 StringRef SymbolName; // from the "linking" section 364 }; 365 366 struct WasmImport { 367 StringRef Module; 368 StringRef Field; 369 uint8_t Kind; 370 union { 371 uint32_t SigIndex; 372 WasmGlobalType Global; 373 WasmTableType Table; 374 WasmLimits Memory; 375 }; 376 }; 377 378 struct WasmLocalDecl { 379 uint8_t Type; 380 uint32_t Count; 381 }; 382 383 struct WasmFunction { 384 uint32_t Index; 385 uint32_t SigIndex; 386 std::vector<WasmLocalDecl> Locals; 387 ArrayRef<uint8_t> Body; 388 uint32_t CodeSectionOffset; 389 uint32_t Size; 390 uint32_t CodeOffset; // start of Locals and Body 391 std::optional<StringRef> ExportName; // from the "export" section 392 StringRef SymbolName; // from the "linking" section 393 StringRef DebugName; // from the "name" section 394 uint32_t Comdat; // from the "comdat info" section 395 }; 396 397 struct WasmDataSegment { 398 uint32_t InitFlags; 399 // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX. 400 uint32_t MemoryIndex; 401 // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0. 402 WasmInitExpr Offset; 403 404 ArrayRef<uint8_t> Content; 405 StringRef Name; // from the "segment info" section 406 uint32_t Alignment; 407 uint32_t LinkingFlags; 408 uint32_t Comdat; // from the "comdat info" section 409 }; 410 411 // Represents a Wasm element segment, with some limitations compared the spec: 412 // 1) Does not model passive or declarative segments (Segment will end up with 413 // an Offset field of i32.const 0) 414 // 2) Does not model init exprs (Segment will get an empty Functions list) 415 // 3) Does not model types other than basic funcref/externref/exnref (see 416 // ValType) 417 struct WasmElemSegment { 418 uint32_t Flags; 419 uint32_t TableNumber; 420 ValType ElemKind; 421 WasmInitExpr Offset; 422 std::vector<uint32_t> Functions; 423 }; 424 425 // Represents the location of a Wasm data symbol within a WasmDataSegment, as 426 // the index of the segment, and the offset and size within the segment. 427 struct WasmDataReference { 428 uint32_t Segment; 429 uint64_t Offset; 430 uint64_t Size; 431 }; 432 433 struct WasmRelocation { 434 uint8_t Type; // The type of the relocation. 435 uint32_t Index; // Index into either symbol or type index space. 436 uint64_t Offset; // Offset from the start of the section. 437 int64_t Addend; // A value to add to the symbol. 438 }; 439 440 struct WasmInitFunc { 441 uint32_t Priority; 442 uint32_t Symbol; 443 }; 444 445 struct WasmSymbolInfo { 446 StringRef Name; 447 uint8_t Kind; 448 uint32_t Flags; 449 // For undefined symbols the module of the import 450 std::optional<StringRef> ImportModule; 451 // For undefined symbols the name of the import 452 std::optional<StringRef> ImportName; 453 // For symbols to be exported from the final module 454 std::optional<StringRef> ExportName; 455 union { 456 // For function, table, or global symbols, the index in function, table, or 457 // global index space. 458 uint32_t ElementIndex; 459 // For a data symbols, the address of the data relative to segment. 460 WasmDataReference DataRef; 461 }; 462 }; 463 464 enum class NameType { 465 FUNCTION, 466 GLOBAL, 467 DATA_SEGMENT, 468 }; 469 470 struct WasmDebugName { 471 NameType Type; 472 uint32_t Index; 473 StringRef Name; 474 }; 475 476 // Info from the linking metadata section of a wasm object file. 477 struct WasmLinkingData { 478 uint32_t Version; 479 std::vector<WasmInitFunc> InitFunctions; 480 std::vector<StringRef> Comdats; 481 // The linking section also contains a symbol table. This info (represented 482 // in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead 483 // of in this structure; this allows vectors of WasmSymbols and 484 // WasmLinkingDatas to be reallocated. 485 }; 486 487 struct WasmSignature { 488 SmallVector<ValType, 1> Returns; 489 SmallVector<ValType, 4> Params; 490 // LLVM can parse types other than functions encoded in the type section, 491 // but does not actually model them. Instead a placeholder signature is 492 // created in the Object's signature list. 493 enum { Function, Tag, Placeholder } Kind = Function; 494 // Support empty and tombstone instances, needed by DenseMap. 495 enum { Plain, Empty, Tombstone } State = Plain; 496 WasmSignatureWasmSignature497 WasmSignature(SmallVector<ValType, 1> &&InReturns, 498 SmallVector<ValType, 4> &&InParams) 499 : Returns(InReturns), Params(InParams) {} 500 WasmSignature() = default; 501 }; 502 503 // Useful comparison operators 504 inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) { 505 return LHS.State == RHS.State && LHS.Returns == RHS.Returns && 506 LHS.Params == RHS.Params; 507 } 508 509 inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) { 510 return !(LHS == RHS); 511 } 512 513 inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { 514 return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable; 515 } 516 517 inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { 518 return !(LHS == RHS); 519 } 520 521 inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) { 522 return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum && 523 (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum 524 : true); 525 } 526 527 inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) { 528 return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits; 529 } 530 531 llvm::StringRef toString(WasmSymbolType type); 532 llvm::StringRef relocTypetoString(uint32_t type); 533 llvm::StringRef sectionTypeToString(uint32_t type); 534 bool relocTypeHasAddend(uint32_t type); 535 536 } // end namespace wasm 537 } // end namespace llvm 538 539 #endif 540