1 //===-- sanitizer_procmaps_mac.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Information about the process mappings (Mac-specific parts). 10 //===----------------------------------------------------------------------===// 11 12 #include "sanitizer_platform.h" 13 #if SANITIZER_APPLE 14 #include "sanitizer_common.h" 15 #include "sanitizer_placement_new.h" 16 #include "sanitizer_procmaps.h" 17 18 #include <mach-o/dyld.h> 19 #include <mach-o/loader.h> 20 #include <mach/mach.h> 21 22 // These are not available in older macOS SDKs. 23 #ifndef CPU_SUBTYPE_X86_64_H 24 #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ 25 #endif 26 #ifndef CPU_SUBTYPE_ARM_V7S 27 #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ 28 #endif 29 #ifndef CPU_SUBTYPE_ARM_V7K 30 #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) 31 #endif 32 #ifndef CPU_TYPE_ARM64 33 #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) 34 #endif 35 36 namespace __sanitizer { 37 38 // Contains information used to iterate through sections. 39 struct MemoryMappedSegmentData { 40 char name[kMaxSegName]; 41 uptr nsects; 42 const char *current_load_cmd_addr; 43 u32 lc_type; 44 uptr base_virt_addr; 45 uptr addr_mask; 46 }; 47 48 template <typename Section> 49 static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, 50 bool isWritable) { 51 const Section *sc = (const Section *)data->current_load_cmd_addr; 52 data->current_load_cmd_addr += sizeof(Section); 53 54 uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; 55 uptr sec_end = sec_start + sc->size; 56 module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, 57 sc->sectname); 58 } 59 60 void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { 61 // Don't iterate over sections when the caller hasn't set up the 62 // data pointer, when there are no sections, or when the segment 63 // is executable. Avoid iterating over executable sections because 64 // it will confuse libignore, and because the extra granularity 65 // of information is not needed by any sanitizers. 66 if (!data_ || !data_->nsects || IsExecutable()) { 67 module->addAddressRange(start, end, IsExecutable(), IsWritable(), 68 data_ ? data_->name : nullptr); 69 return; 70 } 71 72 do { 73 if (data_->lc_type == LC_SEGMENT) { 74 NextSectionLoad<struct section>(module, data_, IsWritable()); 75 #ifdef MH_MAGIC_64 76 } else if (data_->lc_type == LC_SEGMENT_64) { 77 NextSectionLoad<struct section_64>(module, data_, IsWritable()); 78 #endif 79 } 80 } while (--data_->nsects); 81 } 82 83 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { 84 Reset(); 85 } 86 87 MemoryMappingLayout::~MemoryMappingLayout() { 88 } 89 90 bool MemoryMappingLayout::Error() const { 91 return false; 92 } 93 94 // More information about Mach-O headers can be found in mach-o/loader.h 95 // Each Mach-O image has a header (mach_header or mach_header_64) starting with 96 // a magic number, and a list of linker load commands directly following the 97 // header. 98 // A load command is at least two 32-bit words: the command type and the 99 // command size in bytes. We're interested only in segment load commands 100 // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped 101 // into the task's address space. 102 // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or 103 // segment_command_64 correspond to the memory address, memory size and the 104 // file offset of the current memory segment. 105 // Because these fields are taken from the images as is, one needs to add 106 // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. 107 108 void MemoryMappingLayout::Reset() { 109 // Count down from the top. 110 // TODO(glider): as per man 3 dyld, iterating over the headers with 111 // _dyld_image_count is thread-unsafe. We need to register callbacks for 112 // adding and removing images which will invalidate the MemoryMappingLayout 113 // state. 114 data_.current_image = _dyld_image_count(); 115 data_.current_load_cmd_count = -1; 116 data_.current_load_cmd_addr = 0; 117 data_.current_magic = 0; 118 data_.current_filetype = 0; 119 data_.current_arch = kModuleArchUnknown; 120 internal_memset(data_.current_uuid, 0, kModuleUUIDSize); 121 } 122 123 // The dyld load address should be unchanged throughout process execution, 124 // and it is expensive to compute once many libraries have been loaded, 125 // so cache it here and do not reset. 126 static mach_header *dyld_hdr = 0; 127 static const char kDyldPath[] = "/usr/lib/dyld"; 128 static const int kDyldImageIdx = -1; 129 130 // static 131 void MemoryMappingLayout::CacheMemoryMappings() { 132 // No-op on Mac for now. 133 } 134 135 void MemoryMappingLayout::LoadFromCache() { 136 // No-op on Mac for now. 137 } 138 139 static bool IsDyldHdr(const mach_header *hdr) { 140 return (hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && 141 hdr->filetype == MH_DYLINKER; 142 } 143 144 // _dyld_get_image_header() and related APIs don't report dyld itself. 145 // We work around this by manually recursing through the memory map 146 // until we hit a Mach header matching dyld instead. These recurse 147 // calls are expensive, but the first memory map generation occurs 148 // early in the process, when dyld is one of the only images loaded, 149 // so it will be hit after only a few iterations. These assumptions don't hold 150 // on macOS 13+ anymore (dyld itself has moved into the shared cache). 151 static mach_header *GetDyldImageHeaderViaVMRegion() { 152 vm_address_t address = 0; 153 154 while (true) { 155 vm_size_t size = 0; 156 unsigned depth = 1; 157 struct vm_region_submap_info_64 info; 158 mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; 159 kern_return_t err = 160 vm_region_recurse_64(mach_task_self(), &address, &size, &depth, 161 (vm_region_info_t)&info, &count); 162 if (err != KERN_SUCCESS) return nullptr; 163 164 if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { 165 mach_header *hdr = (mach_header *)address; 166 if (IsDyldHdr(hdr)) { 167 return hdr; 168 } 169 } 170 address += size; 171 } 172 } 173 174 extern "C" { 175 struct dyld_shared_cache_dylib_text_info { 176 uint64_t version; // current version 2 177 // following fields all exist in version 1 178 uint64_t loadAddressUnslid; 179 uint64_t textSegmentSize; 180 uuid_t dylibUuid; 181 const char *path; // pointer invalid at end of iterations 182 // following fields all exist in version 2 183 uint64_t textSegmentOffset; // offset from start of cache 184 }; 185 typedef struct dyld_shared_cache_dylib_text_info 186 dyld_shared_cache_dylib_text_info; 187 188 extern bool _dyld_get_shared_cache_uuid(uuid_t uuid); 189 extern const void *_dyld_get_shared_cache_range(size_t *length); 190 extern int dyld_shared_cache_iterate_text( 191 const uuid_t cacheUuid, 192 void (^callback)(const dyld_shared_cache_dylib_text_info *info)); 193 } // extern "C" 194 195 static mach_header *GetDyldImageHeaderViaSharedCache() { 196 uuid_t uuid; 197 bool hasCache = _dyld_get_shared_cache_uuid(uuid); 198 if (!hasCache) 199 return nullptr; 200 201 size_t cacheLength; 202 __block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength); 203 CHECK(cacheStart && cacheLength); 204 205 __block mach_header *dyldHdr = nullptr; 206 int res = dyld_shared_cache_iterate_text( 207 uuid, ^(const dyld_shared_cache_dylib_text_info *info) { 208 CHECK_GE(info->version, 2); 209 mach_header *hdr = 210 (mach_header *)(cacheStart + info->textSegmentOffset); 211 if (IsDyldHdr(hdr)) 212 dyldHdr = hdr; 213 }); 214 CHECK_EQ(res, 0); 215 216 return dyldHdr; 217 } 218 219 const mach_header *get_dyld_hdr() { 220 if (!dyld_hdr) { 221 // On macOS 13+, dyld itself has moved into the shared cache. Looking it up 222 // via vm_region_recurse_64() causes spins/hangs/crashes. 223 if (GetMacosAlignedVersion() >= MacosVersion(13, 0)) { 224 dyld_hdr = GetDyldImageHeaderViaSharedCache(); 225 if (!dyld_hdr) { 226 VReport(1, 227 "Failed to lookup the dyld image header in the shared cache on " 228 "macOS 13+ (or no shared cache in use). Falling back to " 229 "lookup via vm_region_recurse_64().\n"); 230 dyld_hdr = GetDyldImageHeaderViaVMRegion(); 231 } 232 } else { 233 dyld_hdr = GetDyldImageHeaderViaVMRegion(); 234 } 235 CHECK(dyld_hdr); 236 } 237 238 return dyld_hdr; 239 } 240 241 // Next and NextSegmentLoad were inspired by base/sysinfo.cc in 242 // Google Perftools, https://github.com/gperftools/gperftools. 243 244 // NextSegmentLoad scans the current image for the next segment load command 245 // and returns the start and end addresses and file offset of the corresponding 246 // segment. 247 // Note that the segment addresses are not necessarily sorted. 248 template <u32 kLCSegment, typename SegmentCommand> 249 static bool NextSegmentLoad(MemoryMappedSegment *segment, 250 MemoryMappedSegmentData *seg_data, 251 MemoryMappingLayoutData *layout_data) { 252 const char *lc = layout_data->current_load_cmd_addr; 253 layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize; 254 if (((const load_command *)lc)->cmd == kLCSegment) { 255 const SegmentCommand* sc = (const SegmentCommand *)lc; 256 uptr base_virt_addr, addr_mask; 257 if (layout_data->current_image == kDyldImageIdx) { 258 base_virt_addr = (uptr)get_dyld_hdr(); 259 // vmaddr is masked with 0xfffff because on macOS versions < 10.12, 260 // it contains an absolute address rather than an offset for dyld. 261 // To make matters even more complicated, this absolute address 262 // isn't actually the absolute segment address, but the offset portion 263 // of the address is accurate when combined with the dyld base address, 264 // and the mask will give just this offset. 265 addr_mask = 0xfffff; 266 } else { 267 base_virt_addr = 268 (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image); 269 addr_mask = ~0; 270 } 271 272 segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; 273 segment->end = segment->start + sc->vmsize; 274 // Most callers don't need section information, so only fill this struct 275 // when required. 276 if (seg_data) { 277 seg_data->nsects = sc->nsects; 278 seg_data->current_load_cmd_addr = 279 (const char *)lc + sizeof(SegmentCommand); 280 seg_data->lc_type = kLCSegment; 281 seg_data->base_virt_addr = base_virt_addr; 282 seg_data->addr_mask = addr_mask; 283 internal_strncpy(seg_data->name, sc->segname, 284 ARRAY_SIZE(seg_data->name)); 285 } 286 287 // Return the initial protection. 288 segment->protection = sc->initprot; 289 segment->offset = (layout_data->current_filetype == 290 /*MH_EXECUTE*/ 0x2) 291 ? sc->vmaddr 292 : sc->fileoff; 293 if (segment->filename) { 294 const char *src = (layout_data->current_image == kDyldImageIdx) 295 ? kDyldPath 296 : _dyld_get_image_name(layout_data->current_image); 297 internal_strncpy(segment->filename, src, segment->filename_size); 298 } 299 segment->arch = layout_data->current_arch; 300 internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize); 301 return true; 302 } 303 return false; 304 } 305 306 ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { 307 cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; 308 switch (cputype) { 309 case CPU_TYPE_I386: 310 return kModuleArchI386; 311 case CPU_TYPE_X86_64: 312 if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; 313 if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; 314 CHECK(0 && "Invalid subtype of x86_64"); 315 return kModuleArchUnknown; 316 case CPU_TYPE_ARM: 317 if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; 318 if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; 319 if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; 320 if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; 321 CHECK(0 && "Invalid subtype of ARM"); 322 return kModuleArchUnknown; 323 case CPU_TYPE_ARM64: 324 return kModuleArchARM64; 325 default: 326 CHECK(0 && "Invalid CPU type"); 327 return kModuleArchUnknown; 328 } 329 } 330 331 static const load_command *NextCommand(const load_command *lc) { 332 return (const load_command *)((const char *)lc + lc->cmdsize); 333 } 334 335 static void FindUUID(const load_command *first_lc, u8 *uuid_output) { 336 for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { 337 if (lc->cmd != LC_UUID) continue; 338 339 const uuid_command *uuid_lc = (const uuid_command *)lc; 340 const uint8_t *uuid = &uuid_lc->uuid[0]; 341 internal_memcpy(uuid_output, uuid, kModuleUUIDSize); 342 return; 343 } 344 } 345 346 static bool IsModuleInstrumented(const load_command *first_lc) { 347 for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { 348 if (lc->cmd != LC_LOAD_DYLIB) continue; 349 350 const dylib_command *dylib_lc = (const dylib_command *)lc; 351 uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; 352 const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; 353 dylib_name = StripModuleName(dylib_name); 354 if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) { 355 return true; 356 } 357 } 358 return false; 359 } 360 361 bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { 362 for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { 363 const mach_header *hdr = (data_.current_image == kDyldImageIdx) 364 ? get_dyld_hdr() 365 : _dyld_get_image_header(data_.current_image); 366 if (!hdr) continue; 367 if (data_.current_load_cmd_count < 0) { 368 // Set up for this image; 369 data_.current_load_cmd_count = hdr->ncmds; 370 data_.current_magic = hdr->magic; 371 data_.current_filetype = hdr->filetype; 372 data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); 373 switch (data_.current_magic) { 374 #ifdef MH_MAGIC_64 375 case MH_MAGIC_64: { 376 data_.current_load_cmd_addr = 377 (const char *)hdr + sizeof(mach_header_64); 378 break; 379 } 380 #endif 381 case MH_MAGIC: { 382 data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header); 383 break; 384 } 385 default: { 386 continue; 387 } 388 } 389 FindUUID((const load_command *)data_.current_load_cmd_addr, 390 data_.current_uuid); 391 data_.current_instrumented = IsModuleInstrumented( 392 (const load_command *)data_.current_load_cmd_addr); 393 } 394 395 for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) { 396 switch (data_.current_magic) { 397 // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. 398 #ifdef MH_MAGIC_64 399 case MH_MAGIC_64: { 400 if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( 401 segment, segment->data_, &data_)) 402 return true; 403 break; 404 } 405 #endif 406 case MH_MAGIC: { 407 if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( 408 segment, segment->data_, &data_)) 409 return true; 410 break; 411 } 412 } 413 } 414 // If we get here, no more load_cmd's in this image talk about 415 // segments. Go on to the next image. 416 } 417 return false; 418 } 419 420 void MemoryMappingLayout::DumpListOfModules( 421 InternalMmapVectorNoCtor<LoadedModule> *modules) { 422 Reset(); 423 InternalMmapVector<char> module_name(kMaxPathLength); 424 MemoryMappedSegment segment(module_name.data(), module_name.size()); 425 MemoryMappedSegmentData data; 426 segment.data_ = &data; 427 while (Next(&segment)) { 428 if (segment.filename[0] == '\0') continue; 429 LoadedModule *cur_module = nullptr; 430 if (!modules->empty() && 431 0 == internal_strcmp(segment.filename, modules->back().full_name())) { 432 cur_module = &modules->back(); 433 } else { 434 modules->push_back(LoadedModule()); 435 cur_module = &modules->back(); 436 cur_module->set(segment.filename, segment.start, segment.arch, 437 segment.uuid, data_.current_instrumented); 438 } 439 segment.AddAddressRanges(cur_module); 440 } 441 } 442 443 } // namespace __sanitizer 444 445 #endif // SANITIZER_APPLE 446