1 #include "llvm/ProfileData/MemProf.h" 2 #include "llvm/ADT/SmallVector.h" 3 #include "llvm/IR/Function.h" 4 #include "llvm/ProfileData/InstrProf.h" 5 #include "llvm/ProfileData/SampleProf.h" 6 #include "llvm/Support/Endian.h" 7 #include "llvm/Support/EndianStream.h" 8 9 namespace llvm { 10 namespace memprof { 11 MemProfSchema getFullSchema() { 12 MemProfSchema List; 13 #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); 14 #include "llvm/ProfileData/MIBEntryDef.inc" 15 #undef MIBEntryDef 16 return List; 17 } 18 19 MemProfSchema getHotColdSchema() { 20 return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime, 21 Meta::TotalLifetimeAccessDensity}; 22 } 23 24 static size_t serializedSizeV2(const IndexedAllocationInfo &IAI, 25 const MemProfSchema &Schema) { 26 size_t Size = 0; 27 // The CallStackId 28 Size += sizeof(CallStackId); 29 // The size of the payload. 30 Size += PortableMemInfoBlock::serializedSize(Schema); 31 return Size; 32 } 33 34 static size_t serializedSizeV3(const IndexedAllocationInfo &IAI, 35 const MemProfSchema &Schema) { 36 size_t Size = 0; 37 // The linear call stack ID. 38 Size += sizeof(LinearCallStackId); 39 // The size of the payload. 40 Size += PortableMemInfoBlock::serializedSize(Schema); 41 return Size; 42 } 43 44 size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, 45 IndexedVersion Version) const { 46 switch (Version) { 47 case Version2: 48 return serializedSizeV2(*this, Schema); 49 // Combine V3 and V4 as the size calculation is the same 50 case Version3: 51 case Version4: 52 return serializedSizeV3(*this, Schema); 53 } 54 llvm_unreachable("unsupported MemProf version"); 55 } 56 57 static size_t serializedSizeV2(const IndexedMemProfRecord &Record, 58 const MemProfSchema &Schema) { 59 // The number of alloc sites to serialize. 60 size_t Result = sizeof(uint64_t); 61 for (const IndexedAllocationInfo &N : Record.AllocSites) 62 Result += N.serializedSize(Schema, Version2); 63 64 // The number of callsites we have information for. 65 Result += sizeof(uint64_t); 66 // The CallStackId 67 Result += Record.CallSites.size() * sizeof(CallStackId); 68 return Result; 69 } 70 71 static size_t serializedSizeV3(const IndexedMemProfRecord &Record, 72 const MemProfSchema &Schema) { 73 // The number of alloc sites to serialize. 74 size_t Result = sizeof(uint64_t); 75 for (const IndexedAllocationInfo &N : Record.AllocSites) 76 Result += N.serializedSize(Schema, Version3); 77 78 // The number of callsites we have information for. 79 Result += sizeof(uint64_t); 80 // The linear call stack ID. 81 // Note: V3 only stored the LinearCallStackId per call site. 82 Result += Record.CallSites.size() * sizeof(LinearCallStackId); 83 return Result; 84 } 85 86 static size_t serializedSizeV4(const IndexedMemProfRecord &Record, 87 const MemProfSchema &Schema) { 88 // The number of alloc sites to serialize. 89 size_t Result = sizeof(uint64_t); 90 for (const IndexedAllocationInfo &N : Record.AllocSites) 91 Result += N.serializedSize(Schema, Version4); 92 93 // The number of callsites we have information for. 94 Result += sizeof(uint64_t); 95 for (const auto &CS : Record.CallSites) 96 Result += sizeof(LinearCallStackId) + sizeof(uint64_t) + 97 CS.CalleeGuids.size() * sizeof(GlobalValue::GUID); 98 return Result; 99 } 100 101 size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, 102 IndexedVersion Version) const { 103 switch (Version) { 104 case Version2: 105 return serializedSizeV2(*this, Schema); 106 case Version3: 107 return serializedSizeV3(*this, Schema); 108 case Version4: 109 return serializedSizeV4(*this, Schema); 110 } 111 llvm_unreachable("unsupported MemProf version"); 112 } 113 114 static void serializeV2(const IndexedMemProfRecord &Record, 115 const MemProfSchema &Schema, raw_ostream &OS) { 116 using namespace support; 117 118 endian::Writer LE(OS, llvm::endianness::little); 119 120 LE.write<uint64_t>(Record.AllocSites.size()); 121 for (const IndexedAllocationInfo &N : Record.AllocSites) { 122 LE.write<CallStackId>(N.CSId); 123 N.Info.serialize(Schema, OS); 124 } 125 126 // Related contexts. 127 LE.write<uint64_t>(Record.CallSites.size()); 128 for (const auto &CS : Record.CallSites) 129 LE.write<CallStackId>(CS.CSId); 130 } 131 132 static void serializeV3( 133 const IndexedMemProfRecord &Record, const MemProfSchema &Schema, 134 raw_ostream &OS, 135 llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) { 136 using namespace support; 137 138 endian::Writer LE(OS, llvm::endianness::little); 139 140 LE.write<uint64_t>(Record.AllocSites.size()); 141 for (const IndexedAllocationInfo &N : Record.AllocSites) { 142 assert(MemProfCallStackIndexes.contains(N.CSId)); 143 LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]); 144 N.Info.serialize(Schema, OS); 145 } 146 147 // Related contexts. 148 LE.write<uint64_t>(Record.CallSites.size()); 149 for (const auto &CS : Record.CallSites) { 150 assert(MemProfCallStackIndexes.contains(CS.CSId)); 151 LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]); 152 } 153 } 154 155 static void serializeV4( 156 const IndexedMemProfRecord &Record, const MemProfSchema &Schema, 157 raw_ostream &OS, 158 llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) { 159 using namespace support; 160 161 endian::Writer LE(OS, llvm::endianness::little); 162 163 LE.write<uint64_t>(Record.AllocSites.size()); 164 for (const IndexedAllocationInfo &N : Record.AllocSites) { 165 assert(MemProfCallStackIndexes.contains(N.CSId)); 166 LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]); 167 N.Info.serialize(Schema, OS); 168 } 169 170 // Related contexts. 171 LE.write<uint64_t>(Record.CallSites.size()); 172 for (const auto &CS : Record.CallSites) { 173 assert(MemProfCallStackIndexes.contains(CS.CSId)); 174 LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]); 175 LE.write<uint64_t>(CS.CalleeGuids.size()); 176 for (const auto &Guid : CS.CalleeGuids) 177 LE.write<GlobalValue::GUID>(Guid); 178 } 179 } 180 181 void IndexedMemProfRecord::serialize( 182 const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, 183 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) 184 const { 185 switch (Version) { 186 case Version2: 187 serializeV2(*this, Schema, OS); 188 return; 189 case Version3: 190 serializeV3(*this, Schema, OS, *MemProfCallStackIndexes); 191 return; 192 case Version4: 193 serializeV4(*this, Schema, OS, *MemProfCallStackIndexes); 194 return; 195 } 196 llvm_unreachable("unsupported MemProf version"); 197 } 198 199 static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, 200 const unsigned char *Ptr) { 201 using namespace support; 202 203 IndexedMemProfRecord Record; 204 205 // Read the meminfo nodes. 206 const uint64_t NumNodes = 207 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 208 Record.AllocSites.reserve(NumNodes); 209 for (uint64_t I = 0; I < NumNodes; I++) { 210 IndexedAllocationInfo Node; 211 Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr); 212 Node.Info.deserialize(Schema, Ptr); 213 Ptr += PortableMemInfoBlock::serializedSize(Schema); 214 Record.AllocSites.push_back(Node); 215 } 216 217 // Read the callsite information. 218 const uint64_t NumCtxs = 219 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 220 Record.CallSites.reserve(NumCtxs); 221 for (uint64_t J = 0; J < NumCtxs; J++) { 222 CallStackId CSId = 223 endian::readNext<CallStackId, llvm::endianness::little>(Ptr); 224 Record.CallSites.emplace_back(CSId); 225 } 226 227 return Record; 228 } 229 230 static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema, 231 const unsigned char *Ptr) { 232 using namespace support; 233 234 IndexedMemProfRecord Record; 235 236 // Read the meminfo nodes. 237 const uint64_t NumNodes = 238 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 239 Record.AllocSites.reserve(NumNodes); 240 const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema); 241 for (uint64_t I = 0; I < NumNodes; I++) { 242 IndexedAllocationInfo Node; 243 Node.CSId = 244 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 245 Node.Info.deserialize(Schema, Ptr); 246 Ptr += SerializedSize; 247 Record.AllocSites.push_back(Node); 248 } 249 250 // Read the callsite information. 251 const uint64_t NumCtxs = 252 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 253 Record.CallSites.reserve(NumCtxs); 254 for (uint64_t J = 0; J < NumCtxs; J++) { 255 // We are storing LinearCallStackId in CallSiteIds, which is a vector of 256 // CallStackId. Assert that CallStackId is no smaller than 257 // LinearCallStackId. 258 static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId)); 259 LinearCallStackId CSId = 260 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 261 Record.CallSites.emplace_back(CSId); 262 } 263 264 return Record; 265 } 266 267 static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema, 268 const unsigned char *Ptr) { 269 using namespace support; 270 271 IndexedMemProfRecord Record; 272 273 // Read the meminfo nodes. 274 const uint64_t NumNodes = 275 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 276 Record.AllocSites.reserve(NumNodes); 277 const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema); 278 for (uint64_t I = 0; I < NumNodes; I++) { 279 IndexedAllocationInfo Node; 280 Node.CSId = 281 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 282 Node.Info.deserialize(Schema, Ptr); 283 Ptr += SerializedSize; 284 Record.AllocSites.push_back(Node); 285 } 286 287 // Read the callsite information. 288 const uint64_t NumCtxs = 289 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 290 Record.CallSites.reserve(NumCtxs); 291 for (uint64_t J = 0; J < NumCtxs; J++) { 292 static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId)); 293 LinearCallStackId CSId = 294 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 295 const uint64_t NumGuids = 296 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 297 SmallVector<GlobalValue::GUID, 1> Guids; 298 Guids.reserve(NumGuids); 299 for (uint64_t K = 0; K < NumGuids; ++K) 300 Guids.push_back( 301 endian::readNext<GlobalValue::GUID, llvm::endianness::little>(Ptr)); 302 Record.CallSites.emplace_back(CSId, std::move(Guids)); 303 } 304 305 return Record; 306 } 307 308 IndexedMemProfRecord 309 IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, 310 const unsigned char *Ptr, 311 IndexedVersion Version) { 312 switch (Version) { 313 case Version2: 314 return deserializeV2(Schema, Ptr); 315 case Version3: 316 return deserializeV3(Schema, Ptr); 317 case Version4: 318 return deserializeV4(Schema, Ptr); 319 } 320 llvm_unreachable("unsupported MemProf version"); 321 } 322 323 MemProfRecord IndexedMemProfRecord::toMemProfRecord( 324 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const { 325 MemProfRecord Record; 326 327 Record.AllocSites.reserve(AllocSites.size()); 328 for (const IndexedAllocationInfo &IndexedAI : AllocSites) { 329 AllocationInfo AI; 330 AI.Info = IndexedAI.Info; 331 AI.CallStack = Callback(IndexedAI.CSId); 332 Record.AllocSites.push_back(std::move(AI)); 333 } 334 335 Record.CallSites.reserve(CallSites.size()); 336 for (const IndexedCallSiteInfo &CS : CallSites) { 337 std::vector<Frame> Frames = Callback(CS.CSId); 338 Record.CallSites.emplace_back(std::move(Frames), CS.CalleeGuids); 339 } 340 341 return Record; 342 } 343 344 GlobalValue::GUID getGUID(const StringRef FunctionName) { 345 // Canonicalize the function name to drop suffixes such as ".llvm.". Note 346 // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop 347 // those by default. This is by design to differentiate internal linkage 348 // functions during matching. By dropping the other suffixes we can then match 349 // functions in the profile use phase prior to their addition. Note that this 350 // applies to both instrumented and sampled function names. 351 StringRef CanonicalName = 352 sampleprof::FunctionSamples::getCanonicalFnName(FunctionName); 353 354 // We use the function guid which we expect to be a uint64_t. At 355 // this time, it is the lower 64 bits of the md5 of the canonical 356 // function name. 357 return Function::getGUIDAssumingExternalLinkage(CanonicalName); 358 } 359 360 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) { 361 using namespace support; 362 363 const unsigned char *Ptr = Buffer; 364 const uint64_t NumSchemaIds = 365 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 366 if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) { 367 return make_error<InstrProfError>(instrprof_error::malformed, 368 "memprof schema invalid"); 369 } 370 371 MemProfSchema Result; 372 for (size_t I = 0; I < NumSchemaIds; I++) { 373 const uint64_t Tag = 374 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 375 if (Tag >= static_cast<uint64_t>(Meta::Size)) { 376 return make_error<InstrProfError>(instrprof_error::malformed, 377 "memprof schema invalid"); 378 } 379 Result.push_back(static_cast<Meta>(Tag)); 380 } 381 // Advance the buffer to one past the schema if we succeeded. 382 Buffer = Ptr; 383 return Result; 384 } 385 } // namespace memprof 386 } // namespace llvm 387