1 //===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 10 #include "llvm/DebugInfo/CodeView/TypeRecord.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/Support/Endian.h" 13 14 using namespace llvm; 15 using namespace llvm::codeview; 16 17 static inline MethodKind getMethodKind(uint16_t Attrs) { 18 Attrs &= uint16_t(MethodOptions::MethodKindMask); 19 Attrs >>= 2; 20 return MethodKind(Attrs); 21 } 22 23 static inline bool isIntroVirtual(uint16_t Attrs) { 24 MethodKind MK = getMethodKind(Attrs); 25 return MK == MethodKind::IntroducingVirtual || 26 MK == MethodKind::PureIntroducingVirtual; 27 } 28 29 static inline PointerMode getPointerMode(uint32_t Attrs) { 30 return static_cast<PointerMode>((Attrs >> PointerRecord::PointerModeShift) & 31 PointerRecord::PointerModeMask); 32 } 33 34 static inline bool isMemberPointer(uint32_t Attrs) { 35 PointerMode Mode = getPointerMode(Attrs); 36 return Mode == PointerMode::PointerToDataMember || 37 Mode == PointerMode::PointerToMemberFunction; 38 } 39 40 static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) { 41 uint16_t N = support::endian::read16le(Data.data()); 42 if (N < LF_NUMERIC) 43 return 2; 44 45 assert(N <= LF_UQUADWORD); 46 47 constexpr uint32_t Sizes[] = { 48 1, // LF_CHAR 49 2, // LF_SHORT 50 2, // LF_USHORT 51 4, // LF_LONG 52 4, // LF_ULONG 53 4, // LF_REAL32 54 8, // LF_REAL64 55 10, // LF_REAL80 56 16, // LF_REAL128 57 8, // LF_QUADWORD 58 8, // LF_UQUADWORD 59 }; 60 61 return 2 + Sizes[N - LF_NUMERIC]; 62 } 63 64 static inline uint32_t getCStringLength(ArrayRef<uint8_t> Data) { 65 const char *S = reinterpret_cast<const char *>(Data.data()); 66 return strlen(S) + 1; 67 } 68 69 static void handleMethodOverloadList(ArrayRef<uint8_t> Content, 70 SmallVectorImpl<TiReference> &Refs) { 71 uint32_t Offset = 0; 72 73 while (!Content.empty()) { 74 // Array of: 75 // 0: Attrs 76 // 2: Padding 77 // 4: TypeIndex 78 // if (isIntroVirtual()) 79 // 8: VFTableOffset 80 81 // At least 8 bytes are guaranteed. 4 extra bytes come iff function is an 82 // intro virtual. 83 uint32_t Len = 8; 84 85 uint16_t Attrs = support::endian::read16le(Content.data()); 86 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 87 88 if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) 89 Len += 4; 90 Offset += Len; 91 Content = Content.drop_front(Len); 92 } 93 } 94 95 static uint32_t handleBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset, 96 SmallVectorImpl<TiReference> &Refs) { 97 // 0: Kind 98 // 2: Padding 99 // 4: TypeIndex 100 // 8: Encoded Integer 101 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 102 return 8 + getEncodedIntegerLength(Data.drop_front(8)); 103 } 104 105 static uint32_t handleEnumerator(ArrayRef<uint8_t> Data, uint32_t Offset, 106 SmallVectorImpl<TiReference> &Refs) { 107 // 0: Kind 108 // 2: Padding 109 // 4: Encoded Integer 110 // <next>: Name 111 uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4)); 112 return Size + getCStringLength(Data.drop_front(Size)); 113 } 114 115 static uint32_t handleDataMember(ArrayRef<uint8_t> Data, uint32_t Offset, 116 SmallVectorImpl<TiReference> &Refs) { 117 // 0: Kind 118 // 2: Padding 119 // 4: TypeIndex 120 // 8: Encoded Integer 121 // <next>: Name 122 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 123 uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8)); 124 return Size + getCStringLength(Data.drop_front(Size)); 125 } 126 127 static uint32_t handleOverloadedMethod(ArrayRef<uint8_t> Data, uint32_t Offset, 128 SmallVectorImpl<TiReference> &Refs) { 129 // 0: Kind 130 // 2: Padding 131 // 4: TypeIndex 132 // 8: Name 133 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 134 return 8 + getCStringLength(Data.drop_front(8)); 135 } 136 137 static uint32_t handleOneMethod(ArrayRef<uint8_t> Data, uint32_t Offset, 138 SmallVectorImpl<TiReference> &Refs) { 139 // 0: Kind 140 // 2: Attributes 141 // 4: Type 142 // if (isIntroVirtual) 143 // 8: VFTableOffset 144 // <next>: Name 145 uint32_t Size = 8; 146 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 147 148 uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data()); 149 if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) 150 Size += 4; 151 152 return Size + getCStringLength(Data.drop_front(Size)); 153 } 154 155 static uint32_t handleNestedType(ArrayRef<uint8_t> Data, uint32_t Offset, 156 SmallVectorImpl<TiReference> &Refs) { 157 // 0: Kind 158 // 2: Padding 159 // 4: TypeIndex 160 // 8: Name 161 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 162 return 8 + getCStringLength(Data.drop_front(8)); 163 } 164 165 static uint32_t handleStaticDataMember(ArrayRef<uint8_t> Data, uint32_t Offset, 166 SmallVectorImpl<TiReference> &Refs) { 167 // 0: Kind 168 // 2: Padding 169 // 4: TypeIndex 170 // 8: Name 171 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 172 return 8 + getCStringLength(Data.drop_front(8)); 173 } 174 175 static uint32_t handleVirtualBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset, 176 bool IsIndirect, 177 SmallVectorImpl<TiReference> &Refs) { 178 // 0: Kind 179 // 2: Attrs 180 // 4: TypeIndex 181 // 8: TypeIndex 182 // 12: Encoded Integer 183 // <next>: Encoded Integer 184 uint32_t Size = 12; 185 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2}); 186 Size += getEncodedIntegerLength(Data.drop_front(Size)); 187 Size += getEncodedIntegerLength(Data.drop_front(Size)); 188 return Size; 189 } 190 191 static uint32_t handleVFPtr(ArrayRef<uint8_t> Data, uint32_t Offset, 192 SmallVectorImpl<TiReference> &Refs) { 193 // 0: Kind 194 // 2: Padding 195 // 4: TypeIndex 196 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 197 return 8; 198 } 199 200 static uint32_t handleListContinuation(ArrayRef<uint8_t> Data, uint32_t Offset, 201 SmallVectorImpl<TiReference> &Refs) { 202 // 0: Kind 203 // 2: Padding 204 // 4: TypeIndex 205 Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); 206 return 8; 207 } 208 209 static void handleFieldList(ArrayRef<uint8_t> Content, 210 SmallVectorImpl<TiReference> &Refs) { 211 uint32_t Offset = 0; 212 uint32_t ThisLen = 0; 213 while (!Content.empty()) { 214 TypeLeafKind Kind = 215 static_cast<TypeLeafKind>(support::endian::read16le(Content.data())); 216 switch (Kind) { 217 case LF_BCLASS: 218 ThisLen = handleBaseClass(Content, Offset, Refs); 219 break; 220 case LF_ENUMERATE: 221 ThisLen = handleEnumerator(Content, Offset, Refs); 222 break; 223 case LF_MEMBER: 224 ThisLen = handleDataMember(Content, Offset, Refs); 225 break; 226 case LF_METHOD: 227 ThisLen = handleOverloadedMethod(Content, Offset, Refs); 228 break; 229 case LF_ONEMETHOD: 230 ThisLen = handleOneMethod(Content, Offset, Refs); 231 break; 232 case LF_NESTTYPE: 233 ThisLen = handleNestedType(Content, Offset, Refs); 234 break; 235 case LF_STMEMBER: 236 ThisLen = handleStaticDataMember(Content, Offset, Refs); 237 break; 238 case LF_VBCLASS: 239 case LF_IVBCLASS: 240 ThisLen = 241 handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs); 242 break; 243 case LF_VFUNCTAB: 244 ThisLen = handleVFPtr(Content, Offset, Refs); 245 break; 246 case LF_INDEX: 247 ThisLen = handleListContinuation(Content, Offset, Refs); 248 break; 249 default: 250 return; 251 } 252 Content = Content.drop_front(ThisLen); 253 Offset += ThisLen; 254 if (!Content.empty()) { 255 uint8_t Pad = Content.front(); 256 if (Pad >= LF_PAD0) { 257 uint32_t Skip = Pad & 0x0F; 258 Content = Content.drop_front(Skip); 259 Offset += Skip; 260 } 261 } 262 } 263 } 264 265 static void handlePointer(ArrayRef<uint8_t> Content, 266 SmallVectorImpl<TiReference> &Refs) { 267 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 268 269 uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data()); 270 if (isMemberPointer(Attrs)) 271 Refs.push_back({TiRefKind::TypeRef, 8, 1}); 272 } 273 274 static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind, 275 SmallVectorImpl<TiReference> &Refs) { 276 uint32_t Count; 277 // FIXME: In the future it would be nice if we could avoid hardcoding these 278 // values. One idea is to define some structures representing these types 279 // that would allow the use of offsetof(). 280 switch (Kind) { 281 case TypeLeafKind::LF_FUNC_ID: 282 Refs.push_back({TiRefKind::IndexRef, 0, 1}); 283 Refs.push_back({TiRefKind::TypeRef, 4, 1}); 284 break; 285 case TypeLeafKind::LF_MFUNC_ID: 286 Refs.push_back({TiRefKind::TypeRef, 0, 2}); 287 break; 288 case TypeLeafKind::LF_STRING_ID: 289 Refs.push_back({TiRefKind::IndexRef, 0, 1}); 290 break; 291 case TypeLeafKind::LF_SUBSTR_LIST: 292 Count = support::endian::read32le(Content.data()); 293 if (Count > 0) 294 Refs.push_back({TiRefKind::IndexRef, 4, Count}); 295 break; 296 case TypeLeafKind::LF_BUILDINFO: 297 Count = support::endian::read16le(Content.data()); 298 if (Count > 0) 299 Refs.push_back({TiRefKind::IndexRef, 2, Count}); 300 break; 301 case TypeLeafKind::LF_UDT_SRC_LINE: 302 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 303 Refs.push_back({TiRefKind::IndexRef, 4, 1}); 304 break; 305 case TypeLeafKind::LF_UDT_MOD_SRC_LINE: 306 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 307 break; 308 case TypeLeafKind::LF_MODIFIER: 309 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 310 break; 311 case TypeLeafKind::LF_PROCEDURE: 312 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 313 Refs.push_back({TiRefKind::TypeRef, 8, 1}); 314 break; 315 case TypeLeafKind::LF_MFUNCTION: 316 Refs.push_back({TiRefKind::TypeRef, 0, 3}); 317 Refs.push_back({TiRefKind::TypeRef, 16, 1}); 318 break; 319 case TypeLeafKind::LF_ARGLIST: 320 Count = support::endian::read32le(Content.data()); 321 if (Count > 0) 322 Refs.push_back({TiRefKind::TypeRef, 4, Count}); 323 break; 324 case TypeLeafKind::LF_ARRAY: 325 Refs.push_back({TiRefKind::TypeRef, 0, 2}); 326 break; 327 case TypeLeafKind::LF_CLASS: 328 case TypeLeafKind::LF_STRUCTURE: 329 case TypeLeafKind::LF_INTERFACE: 330 Refs.push_back({TiRefKind::TypeRef, 4, 3}); 331 break; 332 case TypeLeafKind::LF_UNION: 333 Refs.push_back({TiRefKind::TypeRef, 4, 1}); 334 break; 335 case TypeLeafKind::LF_ENUM: 336 Refs.push_back({TiRefKind::TypeRef, 4, 2}); 337 break; 338 case TypeLeafKind::LF_BITFIELD: 339 Refs.push_back({TiRefKind::TypeRef, 0, 1}); 340 break; 341 case TypeLeafKind::LF_VFTABLE: 342 Refs.push_back({TiRefKind::TypeRef, 0, 2}); 343 break; 344 case TypeLeafKind::LF_VTSHAPE: 345 break; 346 case TypeLeafKind::LF_METHODLIST: 347 handleMethodOverloadList(Content, Refs); 348 break; 349 case TypeLeafKind::LF_FIELDLIST: 350 handleFieldList(Content, Refs); 351 break; 352 case TypeLeafKind::LF_POINTER: 353 handlePointer(Content, Refs); 354 break; 355 default: 356 break; 357 } 358 } 359 360 static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, 361 SmallVectorImpl<TiReference> &Refs) { 362 uint32_t Count; 363 // FIXME: In the future it would be nice if we could avoid hardcoding these 364 // values. One idea is to define some structures representing these types 365 // that would allow the use of offsetof(). 366 switch (Kind) { 367 case SymbolKind::S_GPROC32_ID: 368 case SymbolKind::S_LPROC32_ID: 369 case SymbolKind::S_LPROC32_DPC: 370 case SymbolKind::S_LPROC32_DPC_ID: 371 Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID 372 break; 373 case SymbolKind::S_GPROC32: 374 case SymbolKind::S_LPROC32: 375 Refs.push_back({TiRefKind::TypeRef, 24, 1}); // Type 376 break; 377 case SymbolKind::S_UDT: 378 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT 379 break; 380 case SymbolKind::S_GDATA32: 381 case SymbolKind::S_LDATA32: 382 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 383 break; 384 case SymbolKind::S_BUILDINFO: 385 Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags 386 break; 387 case SymbolKind::S_LTHREAD32: 388 case SymbolKind::S_GTHREAD32: 389 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 390 break; 391 case SymbolKind::S_FILESTATIC: 392 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 393 break; 394 case SymbolKind::S_LOCAL: 395 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 396 break; 397 case SymbolKind::S_REGISTER: 398 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 399 break; 400 case SymbolKind::S_CONSTANT: 401 Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type 402 break; 403 case SymbolKind::S_BPREL32: 404 case SymbolKind::S_REGREL32: 405 Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type 406 break; 407 case SymbolKind::S_CALLSITEINFO: 408 Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature 409 break; 410 case SymbolKind::S_CALLERS: 411 case SymbolKind::S_CALLEES: 412 case SymbolKind::S_INLINEES: 413 // The record is a count followed by an array of type indices. 414 Count = *reinterpret_cast<const ulittle32_t *>(Content.data()); 415 Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees 416 break; 417 case SymbolKind::S_INLINESITE: 418 Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee 419 break; 420 case SymbolKind::S_HEAPALLOCSITE: 421 Refs.push_back({TiRefKind::TypeRef, 8, 1}); // UDT allocated 422 break; 423 424 // Defranges don't have types, just registers and code offsets. 425 case SymbolKind::S_DEFRANGE_REGISTER: 426 case SymbolKind::S_DEFRANGE_REGISTER_REL: 427 case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL: 428 case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE: 429 case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER: 430 case SymbolKind::S_DEFRANGE_SUBFIELD: 431 break; 432 433 // No type references. 434 case SymbolKind::S_LABEL32: 435 case SymbolKind::S_OBJNAME: 436 case SymbolKind::S_COMPILE: 437 case SymbolKind::S_COMPILE2: 438 case SymbolKind::S_COMPILE3: 439 case SymbolKind::S_ENVBLOCK: 440 case SymbolKind::S_BLOCK32: 441 case SymbolKind::S_FRAMEPROC: 442 case SymbolKind::S_THUNK32: 443 case SymbolKind::S_FRAMECOOKIE: 444 case SymbolKind::S_UNAMESPACE: 445 case SymbolKind::S_ARMSWITCHTABLE: 446 break; 447 // Scope ending symbols. 448 case SymbolKind::S_END: 449 case SymbolKind::S_INLINESITE_END: 450 case SymbolKind::S_PROC_ID_END: 451 break; 452 default: 453 return false; // Unknown symbol. 454 } 455 return true; 456 } 457 458 void llvm::codeview::discoverTypeIndices(const CVType &Type, 459 SmallVectorImpl<TiReference> &Refs) { 460 ::discoverTypeIndices(Type.content(), Type.kind(), Refs); 461 } 462 463 static void resolveTypeIndexReferences(ArrayRef<uint8_t> RecordData, 464 ArrayRef<TiReference> Refs, 465 SmallVectorImpl<TypeIndex> &Indices) { 466 Indices.clear(); 467 468 if (Refs.empty()) 469 return; 470 471 RecordData = RecordData.drop_front(sizeof(RecordPrefix)); 472 473 BinaryStreamReader Reader(RecordData, llvm::endianness::little); 474 for (const auto &Ref : Refs) { 475 Reader.setOffset(Ref.Offset); 476 FixedStreamArray<TypeIndex> Run; 477 cantFail(Reader.readArray(Run, Ref.Count)); 478 Indices.append(Run.begin(), Run.end()); 479 } 480 } 481 482 void llvm::codeview::discoverTypeIndices(const CVType &Type, 483 SmallVectorImpl<TypeIndex> &Indices) { 484 return discoverTypeIndices(Type.RecordData, Indices); 485 } 486 487 void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData, 488 SmallVectorImpl<TypeIndex> &Indices) { 489 SmallVector<TiReference, 4> Refs; 490 discoverTypeIndices(RecordData, Refs); 491 resolveTypeIndexReferences(RecordData, Refs, Indices); 492 } 493 494 void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData, 495 SmallVectorImpl<TiReference> &Refs) { 496 const RecordPrefix *P = 497 reinterpret_cast<const RecordPrefix *>(RecordData.data()); 498 TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind)); 499 ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs); 500 } 501 502 bool llvm::codeview::discoverTypeIndicesInSymbol( 503 const CVSymbol &Sym, SmallVectorImpl<TiReference> &Refs) { 504 SymbolKind K = Sym.kind(); 505 return ::discoverTypeIndices(Sym.content(), K, Refs); 506 } 507 508 bool llvm::codeview::discoverTypeIndicesInSymbol( 509 ArrayRef<uint8_t> RecordData, SmallVectorImpl<TiReference> &Refs) { 510 const RecordPrefix *P = 511 reinterpret_cast<const RecordPrefix *>(RecordData.data()); 512 SymbolKind K = static_cast<SymbolKind>(uint16_t(P->RecordKind)); 513 return ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, 514 Refs); 515 } 516 517 bool llvm::codeview::discoverTypeIndicesInSymbol( 518 ArrayRef<uint8_t> RecordData, SmallVectorImpl<TypeIndex> &Indices) { 519 SmallVector<TiReference, 2> Refs; 520 if (!discoverTypeIndicesInSymbol(RecordData, Refs)) 521 return false; 522 resolveTypeIndexReferences(RecordData, Refs, Indices); 523 return true; 524 } 525