1 //===-- memprof_allocator.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler, a memory profiler. 10 // 11 // Implementation of MemProf's memory allocator, which uses the allocator 12 // from sanitizer_common. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "memprof_allocator.h" 17 #include "memprof_mapping.h" 18 #include "memprof_stack.h" 19 #include "memprof_thread.h" 20 #include "sanitizer_common/sanitizer_allocator_checks.h" 21 #include "sanitizer_common/sanitizer_allocator_interface.h" 22 #include "sanitizer_common/sanitizer_allocator_report.h" 23 #include "sanitizer_common/sanitizer_errno.h" 24 #include "sanitizer_common/sanitizer_file.h" 25 #include "sanitizer_common/sanitizer_flags.h" 26 #include "sanitizer_common/sanitizer_internal_defs.h" 27 #include "sanitizer_common/sanitizer_list.h" 28 #include "sanitizer_common/sanitizer_stackdepot.h" 29 30 #include <sched.h> 31 #include <stdlib.h> 32 #include <time.h> 33 34 namespace __memprof { 35 36 static int GetCpuId(void) { 37 // _memprof_preinit is called via the preinit_array, which subsequently calls 38 // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu 39 // will seg fault as the address of __vdso_getcpu will be null. 40 if (!memprof_init_done) 41 return -1; 42 return sched_getcpu(); 43 } 44 45 // Compute the timestamp in ms. 46 static int GetTimestamp(void) { 47 // timespec_get will segfault if called from dl_init 48 if (!memprof_timestamp_inited) { 49 // By returning 0, this will be effectively treated as being 50 // timestamped at memprof init time (when memprof_init_timestamp_s 51 // is initialized). 52 return 0; 53 } 54 timespec ts; 55 clock_gettime(CLOCK_REALTIME, &ts); 56 return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000; 57 } 58 59 static MemprofAllocator &get_allocator(); 60 61 // The memory chunk allocated from the underlying allocator looks like this: 62 // H H U U U U U U 63 // H -- ChunkHeader (32 bytes) 64 // U -- user memory. 65 66 // If there is left padding before the ChunkHeader (due to use of memalign), 67 // we store a magic value in the first uptr word of the memory block and 68 // store the address of ChunkHeader in the next uptr. 69 // M B L L L L L L L L L H H U U U U U U 70 // | ^ 71 // ---------------------| 72 // M -- magic value kAllocBegMagic 73 // B -- address of ChunkHeader pointing to the first 'H' 74 75 constexpr uptr kMaxAllowedMallocBits = 40; 76 77 // Should be no more than 32-bytes 78 struct ChunkHeader { 79 // 1-st 4 bytes. 80 u32 alloc_context_id; 81 // 2-nd 4 bytes 82 u32 cpu_id; 83 // 3-rd 4 bytes 84 u32 timestamp_ms; 85 // 4-th 4 bytes 86 // Note only 1 bit is needed for this flag if we need space in the future for 87 // more fields. 88 u32 from_memalign; 89 // 5-th and 6-th 4 bytes 90 // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this 91 // could be shrunk to kMaxAllowedMallocBits if we need space in the future for 92 // more fields. 93 atomic_uint64_t user_requested_size; 94 // 23 bits available 95 // 7-th and 8-th 4 bytes 96 u64 data_type_id; // TODO: hash of type name 97 }; 98 99 static const uptr kChunkHeaderSize = sizeof(ChunkHeader); 100 COMPILER_CHECK(kChunkHeaderSize == 32); 101 102 struct MemprofChunk : ChunkHeader { 103 uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; } 104 uptr UsedSize() { 105 return atomic_load(&user_requested_size, memory_order_relaxed); 106 } 107 void *AllocBeg() { 108 if (from_memalign) 109 return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this)); 110 return reinterpret_cast<void *>(this); 111 } 112 }; 113 114 class LargeChunkHeader { 115 static constexpr uptr kAllocBegMagic = 116 FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL); 117 atomic_uintptr_t magic; 118 MemprofChunk *chunk_header; 119 120 public: 121 MemprofChunk *Get() const { 122 return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic 123 ? chunk_header 124 : nullptr; 125 } 126 127 void Set(MemprofChunk *p) { 128 if (p) { 129 chunk_header = p; 130 atomic_store(&magic, kAllocBegMagic, memory_order_release); 131 return; 132 } 133 134 uptr old = kAllocBegMagic; 135 if (!atomic_compare_exchange_strong(&magic, &old, 0, 136 memory_order_release)) { 137 CHECK_EQ(old, kAllocBegMagic); 138 } 139 } 140 }; 141 142 void FlushUnneededMemProfShadowMemory(uptr p, uptr size) { 143 // Since memprof's mapping is compacting, the shadow chunk may be 144 // not page-aligned, so we only flush the page-aligned portion. 145 ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size)); 146 } 147 148 void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const { 149 // Statistics. 150 MemprofStats &thread_stats = GetCurrentThreadStats(); 151 thread_stats.mmaps++; 152 thread_stats.mmaped += size; 153 } 154 void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const { 155 // We are about to unmap a chunk of user memory. 156 // Mark the corresponding shadow memory as not needed. 157 FlushUnneededMemProfShadowMemory(p, size); 158 // Statistics. 159 MemprofStats &thread_stats = GetCurrentThreadStats(); 160 thread_stats.munmaps++; 161 thread_stats.munmaped += size; 162 } 163 164 AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) { 165 CHECK(ms); 166 return &ms->allocator_cache; 167 } 168 169 struct MemInfoBlock { 170 u32 alloc_count; 171 u64 total_access_count, min_access_count, max_access_count; 172 u64 total_size; 173 u32 min_size, max_size; 174 u32 alloc_timestamp, dealloc_timestamp; 175 u64 total_lifetime; 176 u32 min_lifetime, max_lifetime; 177 u32 alloc_cpu_id, dealloc_cpu_id; 178 u32 num_migrated_cpu; 179 180 // Only compared to prior deallocated object currently. 181 u32 num_lifetime_overlaps; 182 u32 num_same_alloc_cpu; 183 u32 num_same_dealloc_cpu; 184 185 u64 data_type_id; // TODO: hash of type name 186 187 MemInfoBlock() : alloc_count(0) {} 188 189 MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp, 190 u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu) 191 : alloc_count(1), total_access_count(access_count), 192 min_access_count(access_count), max_access_count(access_count), 193 total_size(size), min_size(size), max_size(size), 194 alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp), 195 total_lifetime(dealloc_timestamp - alloc_timestamp), 196 min_lifetime(total_lifetime), max_lifetime(total_lifetime), 197 alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu), 198 num_lifetime_overlaps(0), num_same_alloc_cpu(0), 199 num_same_dealloc_cpu(0) { 200 num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id; 201 } 202 203 void Print(u64 id) { 204 u64 p; 205 if (flags()->print_terse) { 206 p = total_size * 100 / alloc_count; 207 Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100, 208 min_size, max_size); 209 p = total_access_count * 100 / alloc_count; 210 Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count, 211 max_access_count); 212 p = total_lifetime * 100 / alloc_count; 213 Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime); 214 Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps, 215 num_same_alloc_cpu, num_same_dealloc_cpu); 216 } else { 217 p = total_size * 100 / alloc_count; 218 Printf("Memory allocation stack id = %llu\n", id); 219 Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n", 220 alloc_count, p / 100, p % 100, min_size, max_size); 221 p = total_access_count * 100 / alloc_count; 222 Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100, 223 p % 100, min_access_count, max_access_count); 224 p = total_lifetime * 100 / alloc_count; 225 Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100, 226 min_lifetime, max_lifetime); 227 Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " 228 "cpu: %u, num same dealloc_cpu: %u\n", 229 num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu, 230 num_same_dealloc_cpu); 231 } 232 } 233 234 static void printHeader() { 235 CHECK(flags()->print_terse); 236 Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/" 237 "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/" 238 "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/" 239 "NumSameDeallocCpu\n"); 240 } 241 242 void Merge(MemInfoBlock &newMIB) { 243 alloc_count += newMIB.alloc_count; 244 245 total_access_count += newMIB.total_access_count; 246 min_access_count = Min(min_access_count, newMIB.min_access_count); 247 max_access_count = Max(max_access_count, newMIB.max_access_count); 248 249 total_size += newMIB.total_size; 250 min_size = Min(min_size, newMIB.min_size); 251 max_size = Max(max_size, newMIB.max_size); 252 253 total_lifetime += newMIB.total_lifetime; 254 min_lifetime = Min(min_lifetime, newMIB.min_lifetime); 255 max_lifetime = Max(max_lifetime, newMIB.max_lifetime); 256 257 // We know newMIB was deallocated later, so just need to check if it was 258 // allocated before last one deallocated. 259 num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp; 260 alloc_timestamp = newMIB.alloc_timestamp; 261 dealloc_timestamp = newMIB.dealloc_timestamp; 262 263 num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id; 264 num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id; 265 alloc_cpu_id = newMIB.alloc_cpu_id; 266 dealloc_cpu_id = newMIB.dealloc_cpu_id; 267 } 268 }; 269 270 static u32 AccessCount = 0; 271 static u32 MissCount = 0; 272 273 struct SetEntry { 274 SetEntry() : id(0), MIB() {} 275 bool Empty() { return id == 0; } 276 void Print() { 277 CHECK(!Empty()); 278 MIB.Print(id); 279 } 280 // The stack id 281 u64 id; 282 MemInfoBlock MIB; 283 }; 284 285 struct CacheSet { 286 enum { kSetSize = 4 }; 287 288 void PrintAll() { 289 for (int i = 0; i < kSetSize; i++) { 290 if (Entries[i].Empty()) 291 continue; 292 Entries[i].Print(); 293 } 294 } 295 void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { 296 AccessCount++; 297 SetAccessCount++; 298 299 for (int i = 0; i < kSetSize; i++) { 300 auto id = Entries[i].id; 301 // Check if this is a hit or an empty entry. Since we always move any 302 // filled locations to the front of the array (see below), we don't need 303 // to look after finding the first empty entry. 304 if (id == new_id || !id) { 305 if (id == 0) { 306 Entries[i].id = new_id; 307 Entries[i].MIB = newMIB; 308 } else { 309 Entries[i].MIB.Merge(newMIB); 310 } 311 // Assuming some id locality, we try to swap the matching entry 312 // into the first set position. 313 if (i != 0) { 314 auto tmp = Entries[0]; 315 Entries[0] = Entries[i]; 316 Entries[i] = tmp; 317 } 318 return; 319 } 320 } 321 322 // Miss 323 MissCount++; 324 SetMissCount++; 325 326 // We try to find the entries with the lowest alloc count to be evicted: 327 int min_idx = 0; 328 u64 min_count = Entries[0].MIB.alloc_count; 329 for (int i = 1; i < kSetSize; i++) { 330 CHECK(!Entries[i].Empty()); 331 if (Entries[i].MIB.alloc_count < min_count) { 332 min_idx = i; 333 min_count = Entries[i].MIB.alloc_count; 334 } 335 } 336 337 // Print the evicted entry profile information 338 if (!flags()->print_terse) 339 Printf("Evicted:\n"); 340 Entries[min_idx].Print(); 341 342 // Similar to the hit case, put new MIB in first set position. 343 if (min_idx != 0) 344 Entries[min_idx] = Entries[0]; 345 Entries[0].id = new_id; 346 Entries[0].MIB = newMIB; 347 } 348 349 void PrintMissRate(int i) { 350 u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0; 351 Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount, 352 SetAccessCount, p / 100, p % 100); 353 } 354 355 SetEntry Entries[kSetSize]; 356 u32 SetAccessCount = 0; 357 u32 SetMissCount = 0; 358 }; 359 360 struct MemInfoBlockCache { 361 MemInfoBlockCache() { 362 if (common_flags()->print_module_map) 363 DumpProcessMap(); 364 if (flags()->print_terse) 365 MemInfoBlock::printHeader(); 366 Sets = 367 (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries); 368 Constructed = true; 369 } 370 371 ~MemInfoBlockCache() { free(Sets); } 372 373 void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) { 374 u64 hv = new_id; 375 376 // Use mod method where number of entries should be a prime close to power 377 // of 2. 378 hv %= flags()->mem_info_cache_entries; 379 380 return Sets[hv].insertOrMerge(new_id, newMIB); 381 } 382 383 void PrintAll() { 384 for (int i = 0; i < flags()->mem_info_cache_entries; i++) { 385 Sets[i].PrintAll(); 386 } 387 } 388 389 void PrintMissRate() { 390 if (!flags()->print_mem_info_cache_miss_rate) 391 return; 392 u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0; 393 Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount, 394 p / 100, p % 100); 395 if (flags()->print_mem_info_cache_miss_rate_details) 396 for (int i = 0; i < flags()->mem_info_cache_entries; i++) 397 Sets[i].PrintMissRate(i); 398 } 399 400 CacheSet *Sets; 401 // Flag when the Sets have been allocated, in case a deallocation is called 402 // very early before the static init of the Allocator and therefore this table 403 // have completed. 404 bool Constructed = false; 405 }; 406 407 // Accumulates the access count from the shadow for the given pointer and size. 408 u64 GetShadowCount(uptr p, u32 size) { 409 u64 *shadow = (u64 *)MEM_TO_SHADOW(p); 410 u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size); 411 u64 count = 0; 412 for (; shadow <= shadow_end; shadow++) 413 count += *shadow; 414 return count; 415 } 416 417 // Clears the shadow counters (when memory is allocated). 418 void ClearShadow(uptr addr, uptr size) { 419 CHECK(AddrIsAlignedByGranularity(addr)); 420 CHECK(AddrIsInMem(addr)); 421 CHECK(AddrIsAlignedByGranularity(addr + size)); 422 CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY)); 423 CHECK(REAL(memset)); 424 uptr shadow_beg = MEM_TO_SHADOW(addr); 425 uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; 426 if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) { 427 REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); 428 } else { 429 uptr page_size = GetPageSizeCached(); 430 uptr page_beg = RoundUpTo(shadow_beg, page_size); 431 uptr page_end = RoundDownTo(shadow_end, page_size); 432 433 if (page_beg >= page_end) { 434 REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); 435 } else { 436 if (page_beg != shadow_beg) { 437 REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg); 438 } 439 if (page_end != shadow_end) { 440 REAL(memset)((void *)page_end, 0, shadow_end - page_end); 441 } 442 ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr); 443 } 444 } 445 } 446 447 struct Allocator { 448 static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits; 449 450 MemprofAllocator allocator; 451 StaticSpinMutex fallback_mutex; 452 AllocatorCache fallback_allocator_cache; 453 454 uptr max_user_defined_malloc_size; 455 atomic_uint8_t rss_limit_exceeded; 456 457 MemInfoBlockCache MemInfoBlockTable; 458 bool destructing; 459 460 // ------------------- Initialization ------------------------ 461 explicit Allocator(LinkerInitialized) : destructing(false) {} 462 463 ~Allocator() { FinishAndPrint(); } 464 465 void FinishAndPrint() { 466 if (!flags()->print_terse) 467 Printf("Live on exit:\n"); 468 allocator.ForceLock(); 469 allocator.ForEachChunk( 470 [](uptr chunk, void *alloc) { 471 u64 user_requested_size; 472 MemprofChunk *m = 473 ((Allocator *)alloc) 474 ->GetMemprofChunk((void *)chunk, user_requested_size); 475 if (!m) 476 return; 477 uptr user_beg = ((uptr)m) + kChunkHeaderSize; 478 u64 c = GetShadowCount(user_beg, user_requested_size); 479 long curtime = GetTimestamp(); 480 MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, 481 m->cpu_id, GetCpuId()); 482 ((Allocator *)alloc) 483 ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); 484 }, 485 this); 486 allocator.ForceUnlock(); 487 488 destructing = true; 489 MemInfoBlockTable.PrintMissRate(); 490 MemInfoBlockTable.PrintAll(); 491 StackDepotPrintAll(); 492 } 493 494 void InitLinkerInitialized() { 495 SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); 496 allocator.InitLinkerInitialized( 497 common_flags()->allocator_release_to_os_interval_ms); 498 max_user_defined_malloc_size = common_flags()->max_allocation_size_mb 499 ? common_flags()->max_allocation_size_mb 500 << 20 501 : kMaxAllowedMallocSize; 502 } 503 504 bool RssLimitExceeded() { 505 return atomic_load(&rss_limit_exceeded, memory_order_relaxed); 506 } 507 508 void SetRssLimitExceeded(bool limit_exceeded) { 509 atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed); 510 } 511 512 // -------------------- Allocation/Deallocation routines --------------- 513 void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack, 514 AllocType alloc_type) { 515 if (UNLIKELY(!memprof_inited)) 516 MemprofInitFromRtl(); 517 if (RssLimitExceeded()) { 518 if (AllocatorMayReturnNull()) 519 return nullptr; 520 ReportRssLimitExceeded(stack); 521 } 522 CHECK(stack); 523 const uptr min_alignment = MEMPROF_ALIGNMENT; 524 if (alignment < min_alignment) 525 alignment = min_alignment; 526 if (size == 0) { 527 // We'd be happy to avoid allocating memory for zero-size requests, but 528 // some programs/tests depend on this behavior and assume that malloc 529 // would not return NULL even for zero-size allocations. Moreover, it 530 // looks like operator new should never return NULL, and results of 531 // consecutive "new" calls must be different even if the allocated size 532 // is zero. 533 size = 1; 534 } 535 CHECK(IsPowerOfTwo(alignment)); 536 uptr rounded_size = RoundUpTo(size, alignment); 537 uptr needed_size = rounded_size + kChunkHeaderSize; 538 if (alignment > min_alignment) 539 needed_size += alignment; 540 CHECK(IsAligned(needed_size, min_alignment)); 541 if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize || 542 size > max_user_defined_malloc_size) { 543 if (AllocatorMayReturnNull()) { 544 Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n", 545 (void *)size); 546 return nullptr; 547 } 548 uptr malloc_limit = 549 Min(kMaxAllowedMallocSize, max_user_defined_malloc_size); 550 ReportAllocationSizeTooBig(size, malloc_limit, stack); 551 } 552 553 MemprofThread *t = GetCurrentThread(); 554 void *allocated; 555 if (t) { 556 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); 557 allocated = allocator.Allocate(cache, needed_size, 8); 558 } else { 559 SpinMutexLock l(&fallback_mutex); 560 AllocatorCache *cache = &fallback_allocator_cache; 561 allocated = allocator.Allocate(cache, needed_size, 8); 562 } 563 if (UNLIKELY(!allocated)) { 564 SetAllocatorOutOfMemory(); 565 if (AllocatorMayReturnNull()) 566 return nullptr; 567 ReportOutOfMemory(size, stack); 568 } 569 570 uptr alloc_beg = reinterpret_cast<uptr>(allocated); 571 uptr alloc_end = alloc_beg + needed_size; 572 uptr beg_plus_header = alloc_beg + kChunkHeaderSize; 573 uptr user_beg = beg_plus_header; 574 if (!IsAligned(user_beg, alignment)) 575 user_beg = RoundUpTo(user_beg, alignment); 576 uptr user_end = user_beg + size; 577 CHECK_LE(user_end, alloc_end); 578 uptr chunk_beg = user_beg - kChunkHeaderSize; 579 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); 580 m->from_memalign = alloc_beg != chunk_beg; 581 CHECK(size); 582 583 m->cpu_id = GetCpuId(); 584 m->timestamp_ms = GetTimestamp(); 585 m->alloc_context_id = StackDepotPut(*stack); 586 587 uptr size_rounded_down_to_granularity = 588 RoundDownTo(size, SHADOW_GRANULARITY); 589 if (size_rounded_down_to_granularity) 590 ClearShadow(user_beg, size_rounded_down_to_granularity); 591 592 MemprofStats &thread_stats = GetCurrentThreadStats(); 593 thread_stats.mallocs++; 594 thread_stats.malloced += size; 595 thread_stats.malloced_overhead += needed_size - size; 596 if (needed_size > SizeClassMap::kMaxSize) 597 thread_stats.malloc_large++; 598 else 599 thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++; 600 601 void *res = reinterpret_cast<void *>(user_beg); 602 atomic_store(&m->user_requested_size, size, memory_order_release); 603 if (alloc_beg != chunk_beg) { 604 CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg); 605 reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m); 606 } 607 MEMPROF_MALLOC_HOOK(res, size); 608 return res; 609 } 610 611 void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment, 612 BufferedStackTrace *stack, AllocType alloc_type) { 613 uptr p = reinterpret_cast<uptr>(ptr); 614 if (p == 0) 615 return; 616 617 MEMPROF_FREE_HOOK(ptr); 618 619 uptr chunk_beg = p - kChunkHeaderSize; 620 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); 621 622 u64 user_requested_size = 623 atomic_exchange(&m->user_requested_size, 0, memory_order_acquire); 624 if (memprof_inited && memprof_init_done && !destructing && 625 MemInfoBlockTable.Constructed) { 626 u64 c = GetShadowCount(p, user_requested_size); 627 long curtime = GetTimestamp(); 628 629 MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, 630 m->cpu_id, GetCpuId()); 631 { 632 SpinMutexLock l(&fallback_mutex); 633 MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB); 634 } 635 } 636 637 MemprofStats &thread_stats = GetCurrentThreadStats(); 638 thread_stats.frees++; 639 thread_stats.freed += user_requested_size; 640 641 void *alloc_beg = m->AllocBeg(); 642 if (alloc_beg != m) { 643 // Clear the magic value, as allocator internals may overwrite the 644 // contents of deallocated chunk, confusing GetMemprofChunk lookup. 645 reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr); 646 } 647 648 MemprofThread *t = GetCurrentThread(); 649 if (t) { 650 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); 651 allocator.Deallocate(cache, alloc_beg); 652 } else { 653 SpinMutexLock l(&fallback_mutex); 654 AllocatorCache *cache = &fallback_allocator_cache; 655 allocator.Deallocate(cache, alloc_beg); 656 } 657 } 658 659 void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) { 660 CHECK(old_ptr && new_size); 661 uptr p = reinterpret_cast<uptr>(old_ptr); 662 uptr chunk_beg = p - kChunkHeaderSize; 663 MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); 664 665 MemprofStats &thread_stats = GetCurrentThreadStats(); 666 thread_stats.reallocs++; 667 thread_stats.realloced += new_size; 668 669 void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC); 670 if (new_ptr) { 671 CHECK_NE(REAL(memcpy), nullptr); 672 uptr memcpy_size = Min(new_size, m->UsedSize()); 673 REAL(memcpy)(new_ptr, old_ptr, memcpy_size); 674 Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC); 675 } 676 return new_ptr; 677 } 678 679 void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { 680 if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { 681 if (AllocatorMayReturnNull()) 682 return nullptr; 683 ReportCallocOverflow(nmemb, size, stack); 684 } 685 void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC); 686 // If the memory comes from the secondary allocator no need to clear it 687 // as it comes directly from mmap. 688 if (ptr && allocator.FromPrimary(ptr)) 689 REAL(memset)(ptr, 0, nmemb * size); 690 return ptr; 691 } 692 693 void CommitBack(MemprofThreadLocalMallocStorage *ms, 694 BufferedStackTrace *stack) { 695 AllocatorCache *ac = GetAllocatorCache(ms); 696 allocator.SwallowCache(ac); 697 } 698 699 // -------------------------- Chunk lookup ---------------------- 700 701 // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). 702 MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) { 703 if (!alloc_beg) 704 return nullptr; 705 MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get(); 706 if (!p) { 707 if (!allocator.FromPrimary(alloc_beg)) 708 return nullptr; 709 p = reinterpret_cast<MemprofChunk *>(alloc_beg); 710 } 711 // The size is reset to 0 on deallocation (and a min of 1 on 712 // allocation). 713 user_requested_size = 714 atomic_load(&p->user_requested_size, memory_order_acquire); 715 if (user_requested_size) 716 return p; 717 return nullptr; 718 } 719 720 MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) { 721 void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p)); 722 return GetMemprofChunk(alloc_beg, user_requested_size); 723 } 724 725 uptr AllocationSize(uptr p) { 726 u64 user_requested_size; 727 MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size); 728 if (!m) 729 return 0; 730 if (m->Beg() != p) 731 return 0; 732 return user_requested_size; 733 } 734 735 void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); } 736 737 void PrintStats() { allocator.PrintStats(); } 738 739 void ForceLock() NO_THREAD_SAFETY_ANALYSIS { 740 allocator.ForceLock(); 741 fallback_mutex.Lock(); 742 } 743 744 void ForceUnlock() NO_THREAD_SAFETY_ANALYSIS { 745 fallback_mutex.Unlock(); 746 allocator.ForceUnlock(); 747 } 748 }; 749 750 static Allocator instance(LINKER_INITIALIZED); 751 752 static MemprofAllocator &get_allocator() { return instance.allocator; } 753 754 void InitializeAllocator() { instance.InitLinkerInitialized(); } 755 756 void MemprofThreadLocalMallocStorage::CommitBack() { 757 GET_STACK_TRACE_MALLOC; 758 instance.CommitBack(this, &stack); 759 } 760 761 void PrintInternalAllocatorStats() { instance.PrintStats(); } 762 763 void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) { 764 instance.Deallocate(ptr, 0, 0, stack, alloc_type); 765 } 766 767 void memprof_delete(void *ptr, uptr size, uptr alignment, 768 BufferedStackTrace *stack, AllocType alloc_type) { 769 instance.Deallocate(ptr, size, alignment, stack, alloc_type); 770 } 771 772 void *memprof_malloc(uptr size, BufferedStackTrace *stack) { 773 return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC)); 774 } 775 776 void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { 777 return SetErrnoOnNull(instance.Calloc(nmemb, size, stack)); 778 } 779 780 void *memprof_reallocarray(void *p, uptr nmemb, uptr size, 781 BufferedStackTrace *stack) { 782 if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { 783 errno = errno_ENOMEM; 784 if (AllocatorMayReturnNull()) 785 return nullptr; 786 ReportReallocArrayOverflow(nmemb, size, stack); 787 } 788 return memprof_realloc(p, nmemb * size, stack); 789 } 790 791 void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) { 792 if (!p) 793 return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC)); 794 if (size == 0) { 795 if (flags()->allocator_frees_and_returns_null_on_realloc_zero) { 796 instance.Deallocate(p, 0, 0, stack, FROM_MALLOC); 797 return nullptr; 798 } 799 // Allocate a size of 1 if we shouldn't free() on Realloc to 0 800 size = 1; 801 } 802 return SetErrnoOnNull(instance.Reallocate(p, size, stack)); 803 } 804 805 void *memprof_valloc(uptr size, BufferedStackTrace *stack) { 806 return SetErrnoOnNull( 807 instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC)); 808 } 809 810 void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) { 811 uptr PageSize = GetPageSizeCached(); 812 if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { 813 errno = errno_ENOMEM; 814 if (AllocatorMayReturnNull()) 815 return nullptr; 816 ReportPvallocOverflow(size, stack); 817 } 818 // pvalloc(0) should allocate one page. 819 size = size ? RoundUpTo(size, PageSize) : PageSize; 820 return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC)); 821 } 822 823 void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, 824 AllocType alloc_type) { 825 if (UNLIKELY(!IsPowerOfTwo(alignment))) { 826 errno = errno_EINVAL; 827 if (AllocatorMayReturnNull()) 828 return nullptr; 829 ReportInvalidAllocationAlignment(alignment, stack); 830 } 831 return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type)); 832 } 833 834 void *memprof_aligned_alloc(uptr alignment, uptr size, 835 BufferedStackTrace *stack) { 836 if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { 837 errno = errno_EINVAL; 838 if (AllocatorMayReturnNull()) 839 return nullptr; 840 ReportInvalidAlignedAllocAlignment(size, alignment, stack); 841 } 842 return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC)); 843 } 844 845 int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, 846 BufferedStackTrace *stack) { 847 if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { 848 if (AllocatorMayReturnNull()) 849 return errno_EINVAL; 850 ReportInvalidPosixMemalignAlignment(alignment, stack); 851 } 852 void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC); 853 if (UNLIKELY(!ptr)) 854 // OOM error is already taken care of by Allocate. 855 return errno_ENOMEM; 856 CHECK(IsAligned((uptr)ptr, alignment)); 857 *memptr = ptr; 858 return 0; 859 } 860 861 uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) { 862 if (!ptr) 863 return 0; 864 uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr)); 865 return usable_size; 866 } 867 868 void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) { 869 instance.SetRssLimitExceeded(limit_exceeded); 870 } 871 872 } // namespace __memprof 873 874 // ---------------------- Interface ---------------- {{{1 875 using namespace __memprof; 876 877 #if !SANITIZER_SUPPORTS_WEAK_HOOKS 878 // Provide default (no-op) implementation of malloc hooks. 879 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr, 880 uptr size) { 881 (void)ptr; 882 (void)size; 883 } 884 885 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) { 886 (void)ptr; 887 } 888 #endif 889 890 uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } 891 892 int __sanitizer_get_ownership(const void *p) { 893 return memprof_malloc_usable_size(p, 0, 0) != 0; 894 } 895 896 uptr __sanitizer_get_allocated_size(const void *p) { 897 return memprof_malloc_usable_size(p, 0, 0); 898 } 899 900 int __memprof_profile_dump() { 901 instance.FinishAndPrint(); 902 // In the future we may want to return non-zero if there are any errors 903 // detected during the dumping process. 904 return 0; 905 } 906