1 //===-- sanitizer_procmaps_mac.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Information about the process mappings (Mac-specific parts). 10 //===----------------------------------------------------------------------===// 11 12 #include "sanitizer_platform.h" 13 #if SANITIZER_MAC 14 #include "sanitizer_common.h" 15 #include "sanitizer_placement_new.h" 16 #include "sanitizer_procmaps.h" 17 18 #include <mach-o/dyld.h> 19 #include <mach-o/loader.h> 20 #include <mach/mach.h> 21 22 // These are not available in older macOS SDKs. 23 #ifndef CPU_SUBTYPE_X86_64_H 24 #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ 25 #endif 26 #ifndef CPU_SUBTYPE_ARM_V7S 27 #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ 28 #endif 29 #ifndef CPU_SUBTYPE_ARM_V7K 30 #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) 31 #endif 32 #ifndef CPU_TYPE_ARM64 33 #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) 34 #endif 35 36 namespace __sanitizer { 37 38 // Contains information used to iterate through sections. 39 struct MemoryMappedSegmentData { 40 char name[kMaxSegName]; 41 uptr nsects; 42 const char *current_load_cmd_addr; 43 u32 lc_type; 44 uptr base_virt_addr; 45 uptr addr_mask; 46 }; 47 48 template <typename Section> 49 static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, 50 bool isWritable) { 51 const Section *sc = (const Section *)data->current_load_cmd_addr; 52 data->current_load_cmd_addr += sizeof(Section); 53 54 uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; 55 uptr sec_end = sec_start + sc->size; 56 module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, 57 sc->sectname); 58 } 59 60 void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { 61 // Don't iterate over sections when the caller hasn't set up the 62 // data pointer, when there are no sections, or when the segment 63 // is executable. Avoid iterating over executable sections because 64 // it will confuse libignore, and because the extra granularity 65 // of information is not needed by any sanitizers. 66 if (!data_ || !data_->nsects || IsExecutable()) { 67 module->addAddressRange(start, end, IsExecutable(), IsWritable(), 68 data_ ? data_->name : nullptr); 69 return; 70 } 71 72 do { 73 if (data_->lc_type == LC_SEGMENT) { 74 NextSectionLoad<struct section>(module, data_, IsWritable()); 75 #ifdef MH_MAGIC_64 76 } else if (data_->lc_type == LC_SEGMENT_64) { 77 NextSectionLoad<struct section_64>(module, data_, IsWritable()); 78 #endif 79 } 80 } while (--data_->nsects); 81 } 82 83 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { 84 Reset(); 85 } 86 87 MemoryMappingLayout::~MemoryMappingLayout() { 88 } 89 90 bool MemoryMappingLayout::Error() const { 91 return false; 92 } 93 94 // More information about Mach-O headers can be found in mach-o/loader.h 95 // Each Mach-O image has a header (mach_header or mach_header_64) starting with 96 // a magic number, and a list of linker load commands directly following the 97 // header. 98 // A load command is at least two 32-bit words: the command type and the 99 // command size in bytes. We're interested only in segment load commands 100 // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped 101 // into the task's address space. 102 // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or 103 // segment_command_64 correspond to the memory address, memory size and the 104 // file offset of the current memory segment. 105 // Because these fields are taken from the images as is, one needs to add 106 // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. 107 108 void MemoryMappingLayout::Reset() { 109 // Count down from the top. 110 // TODO(glider): as per man 3 dyld, iterating over the headers with 111 // _dyld_image_count is thread-unsafe. We need to register callbacks for 112 // adding and removing images which will invalidate the MemoryMappingLayout 113 // state. 114 data_.current_image = _dyld_image_count(); 115 data_.current_load_cmd_count = -1; 116 data_.current_load_cmd_addr = 0; 117 data_.current_magic = 0; 118 data_.current_filetype = 0; 119 data_.current_arch = kModuleArchUnknown; 120 internal_memset(data_.current_uuid, 0, kModuleUUIDSize); 121 } 122 123 // The dyld load address should be unchanged throughout process execution, 124 // and it is expensive to compute once many libraries have been loaded, 125 // so cache it here and do not reset. 126 static mach_header *dyld_hdr = 0; 127 static const char kDyldPath[] = "/usr/lib/dyld"; 128 static const int kDyldImageIdx = -1; 129 130 // static 131 void MemoryMappingLayout::CacheMemoryMappings() { 132 // No-op on Mac for now. 133 } 134 135 void MemoryMappingLayout::LoadFromCache() { 136 // No-op on Mac for now. 137 } 138 139 // _dyld_get_image_header() and related APIs don't report dyld itself. 140 // We work around this by manually recursing through the memory map 141 // until we hit a Mach header matching dyld instead. These recurse 142 // calls are expensive, but the first memory map generation occurs 143 // early in the process, when dyld is one of the only images loaded, 144 // so it will be hit after only a few iterations. 145 static mach_header *get_dyld_image_header() { 146 vm_address_t address = 0; 147 148 while (true) { 149 vm_size_t size = 0; 150 unsigned depth = 1; 151 struct vm_region_submap_info_64 info; 152 mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; 153 kern_return_t err = 154 vm_region_recurse_64(mach_task_self(), &address, &size, &depth, 155 (vm_region_info_t)&info, &count); 156 if (err != KERN_SUCCESS) return nullptr; 157 158 if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { 159 mach_header *hdr = (mach_header *)address; 160 if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && 161 hdr->filetype == MH_DYLINKER) { 162 return hdr; 163 } 164 } 165 address += size; 166 } 167 } 168 169 const mach_header *get_dyld_hdr() { 170 if (!dyld_hdr) dyld_hdr = get_dyld_image_header(); 171 172 return dyld_hdr; 173 } 174 175 // Next and NextSegmentLoad were inspired by base/sysinfo.cc in 176 // Google Perftools, https://github.com/gperftools/gperftools. 177 178 // NextSegmentLoad scans the current image for the next segment load command 179 // and returns the start and end addresses and file offset of the corresponding 180 // segment. 181 // Note that the segment addresses are not necessarily sorted. 182 template <u32 kLCSegment, typename SegmentCommand> 183 static bool NextSegmentLoad(MemoryMappedSegment *segment, 184 MemoryMappedSegmentData *seg_data, 185 MemoryMappingLayoutData *layout_data) { 186 const char *lc = layout_data->current_load_cmd_addr; 187 layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize; 188 if (((const load_command *)lc)->cmd == kLCSegment) { 189 const SegmentCommand* sc = (const SegmentCommand *)lc; 190 uptr base_virt_addr, addr_mask; 191 if (layout_data->current_image == kDyldImageIdx) { 192 base_virt_addr = (uptr)get_dyld_hdr(); 193 // vmaddr is masked with 0xfffff because on macOS versions < 10.12, 194 // it contains an absolute address rather than an offset for dyld. 195 // To make matters even more complicated, this absolute address 196 // isn't actually the absolute segment address, but the offset portion 197 // of the address is accurate when combined with the dyld base address, 198 // and the mask will give just this offset. 199 addr_mask = 0xfffff; 200 } else { 201 base_virt_addr = 202 (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image); 203 addr_mask = ~0; 204 } 205 206 segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; 207 segment->end = segment->start + sc->vmsize; 208 // Most callers don't need section information, so only fill this struct 209 // when required. 210 if (seg_data) { 211 seg_data->nsects = sc->nsects; 212 seg_data->current_load_cmd_addr = 213 (const char *)lc + sizeof(SegmentCommand); 214 seg_data->lc_type = kLCSegment; 215 seg_data->base_virt_addr = base_virt_addr; 216 seg_data->addr_mask = addr_mask; 217 internal_strncpy(seg_data->name, sc->segname, 218 ARRAY_SIZE(seg_data->name)); 219 } 220 221 // Return the initial protection. 222 segment->protection = sc->initprot; 223 segment->offset = (layout_data->current_filetype == 224 /*MH_EXECUTE*/ 0x2) 225 ? sc->vmaddr 226 : sc->fileoff; 227 if (segment->filename) { 228 const char *src = (layout_data->current_image == kDyldImageIdx) 229 ? kDyldPath 230 : _dyld_get_image_name(layout_data->current_image); 231 internal_strncpy(segment->filename, src, segment->filename_size); 232 } 233 segment->arch = layout_data->current_arch; 234 internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize); 235 return true; 236 } 237 return false; 238 } 239 240 ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { 241 cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; 242 switch (cputype) { 243 case CPU_TYPE_I386: 244 return kModuleArchI386; 245 case CPU_TYPE_X86_64: 246 if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; 247 if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; 248 CHECK(0 && "Invalid subtype of x86_64"); 249 return kModuleArchUnknown; 250 case CPU_TYPE_ARM: 251 if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; 252 if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; 253 if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; 254 if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; 255 CHECK(0 && "Invalid subtype of ARM"); 256 return kModuleArchUnknown; 257 case CPU_TYPE_ARM64: 258 return kModuleArchARM64; 259 default: 260 CHECK(0 && "Invalid CPU type"); 261 return kModuleArchUnknown; 262 } 263 } 264 265 static const load_command *NextCommand(const load_command *lc) { 266 return (const load_command *)((const char *)lc + lc->cmdsize); 267 } 268 269 static void FindUUID(const load_command *first_lc, u8 *uuid_output) { 270 for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { 271 if (lc->cmd != LC_UUID) continue; 272 273 const uuid_command *uuid_lc = (const uuid_command *)lc; 274 const uint8_t *uuid = &uuid_lc->uuid[0]; 275 internal_memcpy(uuid_output, uuid, kModuleUUIDSize); 276 return; 277 } 278 } 279 280 static bool IsModuleInstrumented(const load_command *first_lc) { 281 for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) { 282 if (lc->cmd != LC_LOAD_DYLIB) continue; 283 284 const dylib_command *dylib_lc = (const dylib_command *)lc; 285 uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; 286 const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; 287 dylib_name = StripModuleName(dylib_name); 288 if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) { 289 return true; 290 } 291 } 292 return false; 293 } 294 295 bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { 296 for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { 297 const mach_header *hdr = (data_.current_image == kDyldImageIdx) 298 ? get_dyld_hdr() 299 : _dyld_get_image_header(data_.current_image); 300 if (!hdr) continue; 301 if (data_.current_load_cmd_count < 0) { 302 // Set up for this image; 303 data_.current_load_cmd_count = hdr->ncmds; 304 data_.current_magic = hdr->magic; 305 data_.current_filetype = hdr->filetype; 306 data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); 307 switch (data_.current_magic) { 308 #ifdef MH_MAGIC_64 309 case MH_MAGIC_64: { 310 data_.current_load_cmd_addr = 311 (const char *)hdr + sizeof(mach_header_64); 312 break; 313 } 314 #endif 315 case MH_MAGIC: { 316 data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header); 317 break; 318 } 319 default: { 320 continue; 321 } 322 } 323 FindUUID((const load_command *)data_.current_load_cmd_addr, 324 data_.current_uuid); 325 data_.current_instrumented = IsModuleInstrumented( 326 (const load_command *)data_.current_load_cmd_addr); 327 } 328 329 for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) { 330 switch (data_.current_magic) { 331 // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. 332 #ifdef MH_MAGIC_64 333 case MH_MAGIC_64: { 334 if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( 335 segment, segment->data_, &data_)) 336 return true; 337 break; 338 } 339 #endif 340 case MH_MAGIC: { 341 if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( 342 segment, segment->data_, &data_)) 343 return true; 344 break; 345 } 346 } 347 } 348 // If we get here, no more load_cmd's in this image talk about 349 // segments. Go on to the next image. 350 } 351 return false; 352 } 353 354 void MemoryMappingLayout::DumpListOfModules( 355 InternalMmapVectorNoCtor<LoadedModule> *modules) { 356 Reset(); 357 InternalMmapVector<char> module_name(kMaxPathLength); 358 MemoryMappedSegment segment(module_name.data(), module_name.size()); 359 MemoryMappedSegmentData data; 360 segment.data_ = &data; 361 while (Next(&segment)) { 362 if (segment.filename[0] == '\0') continue; 363 LoadedModule *cur_module = nullptr; 364 if (!modules->empty() && 365 0 == internal_strcmp(segment.filename, modules->back().full_name())) { 366 cur_module = &modules->back(); 367 } else { 368 modules->push_back(LoadedModule()); 369 cur_module = &modules->back(); 370 cur_module->set(segment.filename, segment.start, segment.arch, 371 segment.uuid, data_.current_instrumented); 372 } 373 segment.AddAddressRanges(cur_module); 374 } 375 } 376 377 } // namespace __sanitizer 378 379 #endif // SANITIZER_MAC 380