1 //===-- dfsan.cpp ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of DataFlowSanitizer. 10 // 11 // DataFlowSanitizer runtime. This file defines the public interface to 12 // DataFlowSanitizer as well as the definition of certain runtime functions 13 // called automatically by the compiler (specifically the instrumentation pass 14 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp). 15 // 16 // The public interface is defined in include/sanitizer/dfsan_interface.h whose 17 // functions are prefixed dfsan_ while the compiler interface functions are 18 // prefixed __dfsan_. 19 //===----------------------------------------------------------------------===// 20 21 #include "dfsan/dfsan.h" 22 23 #include "dfsan/dfsan_chained_origin_depot.h" 24 #include "dfsan/dfsan_flags.h" 25 #include "dfsan/dfsan_origin.h" 26 #include "dfsan/dfsan_thread.h" 27 #include "sanitizer_common/sanitizer_atomic.h" 28 #include "sanitizer_common/sanitizer_common.h" 29 #include "sanitizer_common/sanitizer_file.h" 30 #include "sanitizer_common/sanitizer_flag_parser.h" 31 #include "sanitizer_common/sanitizer_flags.h" 32 #include "sanitizer_common/sanitizer_internal_defs.h" 33 #include "sanitizer_common/sanitizer_libc.h" 34 #include "sanitizer_common/sanitizer_report_decorator.h" 35 #include "sanitizer_common/sanitizer_stacktrace.h" 36 37 using namespace __dfsan; 38 39 Flags __dfsan::flags_data; 40 41 // The size of TLS variables. These constants must be kept in sync with the ones 42 // in DataFlowSanitizer.cpp. 43 static const int kDFsanArgTlsSize = 800; 44 static const int kDFsanRetvalTlsSize = 800; 45 static const int kDFsanArgOriginTlsSize = 800; 46 47 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 48 __dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)]; 49 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls; 50 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 51 __dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)]; 52 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 53 __dfsan_arg_origin_tls[kDFsanArgOriginTlsSize / sizeof(u32)]; 54 55 // Instrumented code may set this value in terms of -dfsan-track-origins. 56 // * undefined or 0: do not track origins. 57 // * 1: track origins at memory store operations. 58 // * 2: track origins at memory load and store operations. 59 // TODO: track callsites. 60 extern "C" SANITIZER_WEAK_ATTRIBUTE const int __dfsan_track_origins; 61 62 extern "C" SANITIZER_INTERFACE_ATTRIBUTE int dfsan_get_track_origins() { 63 return &__dfsan_track_origins ? __dfsan_track_origins : 0; 64 } 65 66 // On Linux/x86_64, memory is laid out as follows: 67 // 68 // +--------------------+ 0x800000000000 (top of memory) 69 // | application 3 | 70 // +--------------------+ 0x700000000000 71 // | invalid | 72 // +--------------------+ 0x610000000000 73 // | origin 1 | 74 // +--------------------+ 0x600000000000 75 // | application 2 | 76 // +--------------------+ 0x510000000000 77 // | shadow 1 | 78 // +--------------------+ 0x500000000000 79 // | invalid | 80 // +--------------------+ 0x400000000000 81 // | origin 3 | 82 // +--------------------+ 0x300000000000 83 // | shadow 3 | 84 // +--------------------+ 0x200000000000 85 // | origin 2 | 86 // +--------------------+ 0x110000000000 87 // | invalid | 88 // +--------------------+ 0x100000000000 89 // | shadow 2 | 90 // +--------------------+ 0x010000000000 91 // | application 1 | 92 // +--------------------+ 0x000000000000 93 // 94 // MEM_TO_SHADOW(mem) = mem ^ 0x500000000000 95 // SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000 96 97 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 98 dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) { 99 dfsan_label label = ls[0]; 100 for (uptr i = 1; i != n; ++i) 101 label |= ls[i]; 102 return label; 103 } 104 105 // Return the union of all the n labels from addr at the high 32 bit, and the 106 // origin of the first taint byte at the low 32 bit. 107 extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64 108 __dfsan_load_label_and_origin(const void *addr, uptr n) { 109 dfsan_label label = 0; 110 u64 ret = 0; 111 uptr p = (uptr)addr; 112 dfsan_label *s = shadow_for((void *)p); 113 for (uptr i = 0; i < n; ++i) { 114 dfsan_label l = s[i]; 115 if (!l) 116 continue; 117 label |= l; 118 if (!ret) 119 ret = *(dfsan_origin *)origin_for((void *)(p + i)); 120 } 121 return ret | (u64)label << 32; 122 } 123 124 extern "C" SANITIZER_INTERFACE_ATTRIBUTE 125 void __dfsan_unimplemented(char *fname) { 126 if (flags().warn_unimplemented) 127 Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n", 128 fname); 129 } 130 131 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_wrapper_extern_weak_null( 132 const void *addr, char *fname) { 133 if (!addr) 134 Report( 135 "ERROR: DataFlowSanitizer: dfsan generated wrapper calling null " 136 "extern_weak function %s\nIf this only happens with dfsan, the " 137 "dfsan instrumentation pass may be accidentally optimizing out a " 138 "null check\n", 139 fname); 140 } 141 142 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function 143 // to try to figure out where labels are being introduced in a nominally 144 // label-free program. 145 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() { 146 if (flags().warn_nonzero_labels) 147 Report("WARNING: DataFlowSanitizer: saw nonzero label\n"); 148 } 149 150 // Indirect call to an uninstrumented vararg function. We don't have a way of 151 // handling these at the moment. 152 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void 153 __dfsan_vararg_wrapper(const char *fname) { 154 Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg " 155 "function %s\n", fname); 156 Die(); 157 } 158 159 // Resolves the union of two labels. 160 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 161 dfsan_union(dfsan_label l1, dfsan_label l2) { 162 return l1 | l2; 163 } 164 165 static const uptr kOriginAlign = sizeof(dfsan_origin); 166 static const uptr kOriginAlignMask = ~(kOriginAlign - 1UL); 167 168 static uptr OriginAlignUp(uptr u) { 169 return (u + kOriginAlign - 1) & kOriginAlignMask; 170 } 171 172 static uptr OriginAlignDown(uptr u) { return u & kOriginAlignMask; } 173 174 // Return the origin of the first taint byte in the size bytes from the address 175 // addr. 176 static dfsan_origin GetOriginIfTainted(uptr addr, uptr size) { 177 for (uptr i = 0; i < size; ++i, ++addr) { 178 dfsan_label *s = shadow_for((void *)addr); 179 180 if (*s) { 181 // Validate address region. 182 CHECK(MEM_IS_SHADOW(s)); 183 return *(dfsan_origin *)origin_for((void *)addr); 184 } 185 } 186 return 0; 187 } 188 189 // For platforms which support slow unwinder only, we need to restrict the store 190 // context size to 1, basically only storing the current pc, because the slow 191 // unwinder which is based on libunwind is not async signal safe and causes 192 // random freezes in forking applications as well as in signal handlers. 193 // DFSan supports only Linux. So we do not restrict the store context size. 194 #define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ 195 BufferedStackTrace stack; \ 196 stack.Unwind(pc, bp, nullptr, true, flags().store_context_size); 197 198 #define PRINT_CALLER_STACK_TRACE \ 199 { \ 200 GET_CALLER_PC_BP_SP; \ 201 (void)sp; \ 202 GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ 203 stack.Print(); \ 204 } 205 206 // Return a chain with the previous ID id and the current stack. 207 // from_init = true if this is the first chain of an origin tracking path. 208 static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) { 209 // StackDepot is not async signal safe. Do not create new chains in a signal 210 // handler. 211 DFsanThread *t = GetCurrentThread(); 212 if (t && t->InSignalHandler()) 213 return id; 214 215 // As an optimization the origin of an application byte is updated only when 216 // its shadow is non-zero. Because we are only interested in the origins of 217 // taint labels, it does not matter what origin a zero label has. This reduces 218 // memory write cost. MSan does similar optimization. The following invariant 219 // may not hold because of some bugs. We check the invariant to help debug. 220 if (!from_init && id == 0 && flags().check_origin_invariant) { 221 Printf(" DFSan found invalid origin invariant\n"); 222 PRINT_CALLER_STACK_TRACE 223 } 224 225 Origin o = Origin::FromRawId(id); 226 stack->tag = StackTrace::TAG_UNKNOWN; 227 Origin chained = Origin::CreateChainedOrigin(o, stack); 228 return chained.raw_id(); 229 } 230 231 static void ChainAndWriteOriginIfTainted(uptr src, uptr size, uptr dst, 232 StackTrace *stack) { 233 dfsan_origin o = GetOriginIfTainted(src, size); 234 if (o) { 235 o = ChainOrigin(o, stack); 236 *(dfsan_origin *)origin_for((void *)dst) = o; 237 } 238 } 239 240 // Copy the origins of the size bytes from src to dst. The source and target 241 // memory ranges cannot be overlapped. This is used by memcpy. stack records the 242 // stack trace of the memcpy. When dst and src are not 4-byte aligned properly, 243 // origins at the unaligned address boundaries may be overwritten because four 244 // contiguous bytes share the same origin. 245 static void CopyOrigin(const void *dst, const void *src, uptr size, 246 StackTrace *stack) { 247 uptr d = (uptr)dst; 248 uptr beg = OriginAlignDown(d); 249 // Copy left unaligned origin if that memory is tainted. 250 if (beg < d) { 251 ChainAndWriteOriginIfTainted((uptr)src, beg + kOriginAlign - d, beg, stack); 252 beg += kOriginAlign; 253 } 254 255 uptr end = OriginAlignDown(d + size); 256 // If both ends fall into the same 4-byte slot, we are done. 257 if (end < beg) 258 return; 259 260 // Copy right unaligned origin if that memory is tainted. 261 if (end < d + size) 262 ChainAndWriteOriginIfTainted((uptr)src + (end - d), (d + size) - end, end, 263 stack); 264 265 if (beg >= end) 266 return; 267 268 // Align src up. 269 uptr src_a = OriginAlignUp((uptr)src); 270 dfsan_origin *src_o = origin_for((void *)src_a); 271 u32 *src_s = (u32 *)shadow_for((void *)src_a); 272 dfsan_origin *src_end = origin_for((void *)(src_a + (end - beg))); 273 dfsan_origin *dst_o = origin_for((void *)beg); 274 dfsan_origin last_src_o = 0; 275 dfsan_origin last_dst_o = 0; 276 for (; src_o < src_end; ++src_o, ++src_s, ++dst_o) { 277 if (!*src_s) 278 continue; 279 if (*src_o != last_src_o) { 280 last_src_o = *src_o; 281 last_dst_o = ChainOrigin(last_src_o, stack); 282 } 283 *dst_o = last_dst_o; 284 } 285 } 286 287 // Copy the origins of the size bytes from src to dst. The source and target 288 // memory ranges may be overlapped. So the copy is done in a reverse order. 289 // This is used by memmove. stack records the stack trace of the memmove. 290 static void ReverseCopyOrigin(const void *dst, const void *src, uptr size, 291 StackTrace *stack) { 292 uptr d = (uptr)dst; 293 uptr end = OriginAlignDown(d + size); 294 295 // Copy right unaligned origin if that memory is tainted. 296 if (end < d + size) 297 ChainAndWriteOriginIfTainted((uptr)src + (end - d), (d + size) - end, end, 298 stack); 299 300 uptr beg = OriginAlignDown(d); 301 302 if (beg + kOriginAlign < end) { 303 // Align src up. 304 uptr src_a = OriginAlignUp((uptr)src); 305 void *src_end = (void *)(src_a + end - beg - kOriginAlign); 306 dfsan_origin *src_end_o = origin_for(src_end); 307 u32 *src_end_s = (u32 *)shadow_for(src_end); 308 dfsan_origin *src_begin_o = origin_for((void *)src_a); 309 dfsan_origin *dst = origin_for((void *)(end - kOriginAlign)); 310 dfsan_origin last_src_o = 0; 311 dfsan_origin last_dst_o = 0; 312 for (; src_end_o >= src_begin_o; --src_end_o, --src_end_s, --dst) { 313 if (!*src_end_s) 314 continue; 315 if (*src_end_o != last_src_o) { 316 last_src_o = *src_end_o; 317 last_dst_o = ChainOrigin(last_src_o, stack); 318 } 319 *dst = last_dst_o; 320 } 321 } 322 323 // Copy left unaligned origin if that memory is tainted. 324 if (beg < d) 325 ChainAndWriteOriginIfTainted((uptr)src, beg + kOriginAlign - d, beg, stack); 326 } 327 328 // Copy or move the origins of the len bytes from src to dst. The source and 329 // target memory ranges may or may not be overlapped. This is used by memory 330 // transfer operations. stack records the stack trace of the memory transfer 331 // operation. 332 static void MoveOrigin(const void *dst, const void *src, uptr size, 333 StackTrace *stack) { 334 // Validate address regions. 335 if (!MEM_IS_SHADOW(shadow_for(dst)) || 336 !MEM_IS_SHADOW(shadow_for((void *)((uptr)dst + size))) || 337 !MEM_IS_SHADOW(shadow_for(src)) || 338 !MEM_IS_SHADOW(shadow_for((void *)((uptr)src + size)))) { 339 CHECK(false); 340 return; 341 } 342 // If destination origin range overlaps with source origin range, move 343 // origins by copying origins in a reverse order; otherwise, copy origins in 344 // a normal order. The orders of origin transfer are consistent with the 345 // orders of how memcpy and memmove transfer user data. 346 uptr src_aligned_beg = OriginAlignDown((uptr)src); 347 uptr src_aligned_end = OriginAlignDown((uptr)src + size); 348 uptr dst_aligned_beg = OriginAlignDown((uptr)dst); 349 if (dst_aligned_beg < src_aligned_end && dst_aligned_beg >= src_aligned_beg) 350 return ReverseCopyOrigin(dst, src, size, stack); 351 return CopyOrigin(dst, src, size, stack); 352 } 353 354 // Set the size bytes from the addres dst to be the origin value. 355 static void SetOrigin(const void *dst, uptr size, u32 origin) { 356 if (size == 0) 357 return; 358 359 // Origin mapping is 4 bytes per 4 bytes of application memory. 360 // Here we extend the range such that its left and right bounds are both 361 // 4 byte aligned. 362 uptr x = unaligned_origin_for((uptr)dst); 363 uptr beg = OriginAlignDown(x); 364 uptr end = OriginAlignUp(x + size); // align up. 365 u64 origin64 = ((u64)origin << 32) | origin; 366 // This is like memset, but the value is 32-bit. We unroll by 2 to write 367 // 64 bits at once. May want to unroll further to get 128-bit stores. 368 if (beg & 7ULL) { 369 if (*(u32 *)beg != origin) 370 *(u32 *)beg = origin; 371 beg += 4; 372 } 373 for (uptr addr = beg; addr < (end & ~7UL); addr += 8) { 374 if (*(u64 *)addr == origin64) 375 continue; 376 *(u64 *)addr = origin64; 377 } 378 if (end & 7ULL) 379 if (*(u32 *)(end - kOriginAlign) != origin) 380 *(u32 *)(end - kOriginAlign) = origin; 381 } 382 383 #define RET_CHAIN_ORIGIN(id) \ 384 GET_CALLER_PC_BP_SP; \ 385 (void)sp; \ 386 GET_STORE_STACK_TRACE_PC_BP(pc, bp); \ 387 return ChainOrigin(id, &stack); 388 389 // Return a new origin chain with the previous ID id and the current stack 390 // trace. 391 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin 392 __dfsan_chain_origin(dfsan_origin id) { 393 RET_CHAIN_ORIGIN(id) 394 } 395 396 // Return a new origin chain with the previous ID id and the current stack 397 // trace if the label is tainted. 398 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin 399 __dfsan_chain_origin_if_tainted(dfsan_label label, dfsan_origin id) { 400 if (!label) 401 return id; 402 RET_CHAIN_ORIGIN(id) 403 } 404 405 // Copy or move the origins of the len bytes from src to dst. 406 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_mem_origin_transfer( 407 const void *dst, const void *src, uptr len) { 408 if (src == dst) 409 return; 410 GET_CALLER_PC_BP; 411 GET_STORE_STACK_TRACE_PC_BP(pc, bp); 412 MoveOrigin(dst, src, len, &stack); 413 } 414 415 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_origin_transfer( 416 const void *dst, const void *src, uptr len) { 417 __dfsan_mem_origin_transfer(dst, src, len); 418 } 419 420 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_shadow_transfer( 421 void *dst, const void *src, uptr len) { 422 internal_memcpy((void *)__dfsan::shadow_for(dst), 423 (const void *)__dfsan::shadow_for(src), 424 len * sizeof(dfsan_label)); 425 } 426 427 namespace __dfsan { 428 429 bool dfsan_inited = false; 430 bool dfsan_init_is_running = false; 431 432 void dfsan_copy_memory(void *dst, const void *src, uptr size) { 433 internal_memcpy(dst, src, size); 434 dfsan_mem_shadow_transfer(dst, src, size); 435 if (dfsan_get_track_origins()) 436 dfsan_mem_origin_transfer(dst, src, size); 437 } 438 439 // Releases the pages within the origin address range. 440 static void ReleaseOrigins(void *addr, uptr size) { 441 const uptr beg_origin_addr = (uptr)__dfsan::origin_for(addr); 442 const void *end_addr = (void *)((uptr)addr + size); 443 const uptr end_origin_addr = (uptr)__dfsan::origin_for(end_addr); 444 445 if (end_origin_addr - beg_origin_addr < 446 common_flags()->clear_shadow_mmap_threshold) 447 return; 448 449 const uptr page_size = GetPageSizeCached(); 450 const uptr beg_aligned = RoundUpTo(beg_origin_addr, page_size); 451 const uptr end_aligned = RoundDownTo(end_origin_addr, page_size); 452 453 if (!MmapFixedSuperNoReserve(beg_aligned, end_aligned - beg_aligned)) 454 Die(); 455 } 456 457 static void WriteZeroShadowInRange(uptr beg, uptr end) { 458 // Don't write the label if it is already the value we need it to be. 459 // In a program where most addresses are not labeled, it is common that 460 // a page of shadow memory is entirely zeroed. The Linux copy-on-write 461 // implementation will share all of the zeroed pages, making a copy of a 462 // page when any value is written. The un-sharing will happen even if 463 // the value written does not change the value in memory. Avoiding the 464 // write when both |label| and |*labelp| are zero dramatically reduces 465 // the amount of real memory used by large programs. 466 if (!mem_is_zero((const char *)beg, end - beg)) 467 internal_memset((void *)beg, 0, end - beg); 468 } 469 470 // Releases the pages within the shadow address range, and sets 471 // the shadow addresses not on the pages to be 0. 472 static void ReleaseOrClearShadows(void *addr, uptr size) { 473 const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); 474 const void *end_addr = (void *)((uptr)addr + size); 475 const uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr); 476 477 if (end_shadow_addr - beg_shadow_addr < 478 common_flags()->clear_shadow_mmap_threshold) { 479 WriteZeroShadowInRange(beg_shadow_addr, end_shadow_addr); 480 return; 481 } 482 483 const uptr page_size = GetPageSizeCached(); 484 const uptr beg_aligned = RoundUpTo(beg_shadow_addr, page_size); 485 const uptr end_aligned = RoundDownTo(end_shadow_addr, page_size); 486 487 if (beg_aligned >= end_aligned) { 488 WriteZeroShadowInRange(beg_shadow_addr, end_shadow_addr); 489 } else { 490 if (beg_aligned != beg_shadow_addr) 491 WriteZeroShadowInRange(beg_shadow_addr, beg_aligned); 492 if (end_aligned != end_shadow_addr) 493 WriteZeroShadowInRange(end_aligned, end_shadow_addr); 494 if (!MmapFixedSuperNoReserve(beg_aligned, end_aligned - beg_aligned)) 495 Die(); 496 } 497 } 498 499 void SetShadow(dfsan_label label, void *addr, uptr size, dfsan_origin origin) { 500 if (0 != label) { 501 const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); 502 internal_memset((void *)beg_shadow_addr, label, size); 503 if (dfsan_get_track_origins()) 504 SetOrigin(addr, size, origin); 505 return; 506 } 507 508 if (dfsan_get_track_origins()) 509 ReleaseOrigins(addr, size); 510 511 ReleaseOrClearShadows(addr, size); 512 } 513 514 } // namespace __dfsan 515 516 // If the label s is tainted, set the size bytes from the address p to be a new 517 // origin chain with the previous ID o and the current stack trace. This is 518 // used by instrumentation to reduce code size when too much code is inserted. 519 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin( 520 dfsan_label s, void *p, uptr size, dfsan_origin o) { 521 if (UNLIKELY(s)) { 522 GET_CALLER_PC_BP_SP; 523 (void)sp; 524 GET_STORE_STACK_TRACE_PC_BP(pc, bp); 525 SetOrigin(p, size, ChainOrigin(o, &stack)); 526 } 527 } 528 529 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label( 530 dfsan_label label, dfsan_origin origin, void *addr, uptr size) { 531 __dfsan::SetShadow(label, addr, size, origin); 532 } 533 534 SANITIZER_INTERFACE_ATTRIBUTE 535 void dfsan_set_label(dfsan_label label, void *addr, uptr size) { 536 dfsan_origin init_origin = 0; 537 if (label && dfsan_get_track_origins()) { 538 GET_CALLER_PC_BP; 539 GET_STORE_STACK_TRACE_PC_BP(pc, bp); 540 init_origin = ChainOrigin(0, &stack, true); 541 } 542 __dfsan::SetShadow(label, addr, size, init_origin); 543 } 544 545 SANITIZER_INTERFACE_ATTRIBUTE 546 void dfsan_add_label(dfsan_label label, void *addr, uptr size) { 547 if (0 == label) 548 return; 549 550 if (dfsan_get_track_origins()) { 551 GET_CALLER_PC_BP; 552 GET_STORE_STACK_TRACE_PC_BP(pc, bp); 553 dfsan_origin init_origin = ChainOrigin(0, &stack, true); 554 SetOrigin(addr, size, init_origin); 555 } 556 557 for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp) 558 *labelp |= label; 559 } 560 561 // Unlike the other dfsan interface functions the behavior of this function 562 // depends on the label of one of its arguments. Hence it is implemented as a 563 // custom function. 564 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 565 __dfsw_dfsan_get_label(long data, dfsan_label data_label, 566 dfsan_label *ret_label) { 567 *ret_label = 0; 568 return data_label; 569 } 570 571 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label __dfso_dfsan_get_label( 572 long data, dfsan_label data_label, dfsan_label *ret_label, 573 dfsan_origin data_origin, dfsan_origin *ret_origin) { 574 *ret_label = 0; 575 *ret_origin = 0; 576 return data_label; 577 } 578 579 // This function is used if dfsan_get_origin is called when origin tracking is 580 // off. 581 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfsw_dfsan_get_origin( 582 long data, dfsan_label data_label, dfsan_label *ret_label) { 583 *ret_label = 0; 584 return 0; 585 } 586 587 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfso_dfsan_get_origin( 588 long data, dfsan_label data_label, dfsan_label *ret_label, 589 dfsan_origin data_origin, dfsan_origin *ret_origin) { 590 *ret_label = 0; 591 *ret_origin = 0; 592 return data_origin; 593 } 594 595 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 596 dfsan_read_label(const void *addr, uptr size) { 597 if (size == 0) 598 return 0; 599 return __dfsan_union_load(shadow_for(addr), size); 600 } 601 602 SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin 603 dfsan_read_origin_of_first_taint(const void *addr, uptr size) { 604 return GetOriginIfTainted((uptr)addr, size); 605 } 606 607 SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_label_origin(dfsan_label label, 608 dfsan_origin origin, 609 void *addr, 610 uptr size) { 611 __dfsan_set_label(label, origin, addr, size); 612 } 613 614 extern "C" SANITIZER_INTERFACE_ATTRIBUTE int 615 dfsan_has_label(dfsan_label label, dfsan_label elem) { 616 return (label & elem) == elem; 617 } 618 619 namespace __dfsan { 620 621 typedef void (*dfsan_conditional_callback_t)(dfsan_label label, 622 dfsan_origin origin); 623 static dfsan_conditional_callback_t conditional_callback = nullptr; 624 static dfsan_label labels_in_signal_conditional = 0; 625 626 static void ConditionalCallback(dfsan_label label, dfsan_origin origin) { 627 // Programs have many branches. For efficiency the conditional sink callback 628 // handler needs to ignore as many as possible as early as possible. 629 if (label == 0) { 630 return; 631 } 632 if (conditional_callback == nullptr) { 633 return; 634 } 635 636 // This initial ConditionalCallback handler needs to be in here in dfsan 637 // runtime (rather than being an entirely user implemented hook) so that it 638 // has access to dfsan thread information. 639 DFsanThread *t = GetCurrentThread(); 640 // A callback operation which does useful work (like record the flow) will 641 // likely be too long executed in a signal handler. 642 if (t && t->InSignalHandler()) { 643 // Record set of labels used in signal handler for completeness. 644 labels_in_signal_conditional |= label; 645 return; 646 } 647 648 conditional_callback(label, origin); 649 } 650 651 } // namespace __dfsan 652 653 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void 654 __dfsan_conditional_callback_origin(dfsan_label label, dfsan_origin origin) { 655 __dfsan::ConditionalCallback(label, origin); 656 } 657 658 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_conditional_callback( 659 dfsan_label label) { 660 __dfsan::ConditionalCallback(label, 0); 661 } 662 663 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_conditional_callback( 664 __dfsan::dfsan_conditional_callback_t callback) { 665 __dfsan::conditional_callback = callback; 666 } 667 668 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label 669 dfsan_get_labels_in_signal_conditional() { 670 return __dfsan::labels_in_signal_conditional; 671 } 672 673 class Decorator : public __sanitizer::SanitizerCommonDecorator { 674 public: 675 Decorator() : SanitizerCommonDecorator() {} 676 const char *Origin() const { return Magenta(); } 677 }; 678 679 namespace { 680 681 void PrintNoOriginTrackingWarning() { 682 Decorator d; 683 Printf( 684 " %sDFSan: origin tracking is not enabled. Did you specify the " 685 "-dfsan-track-origins=1 option?%s\n", 686 d.Warning(), d.Default()); 687 } 688 689 void PrintNoTaintWarning(const void *address) { 690 Decorator d; 691 Printf(" %sDFSan: no tainted value at %x%s\n", d.Warning(), address, 692 d.Default()); 693 } 694 695 void PrintInvalidOriginWarning(dfsan_label label, const void *address) { 696 Decorator d; 697 Printf( 698 " %sTaint value 0x%x (at %p) has invalid origin tracking. This can " 699 "be a DFSan bug.%s\n", 700 d.Warning(), label, address, d.Default()); 701 } 702 703 void PrintInvalidOriginIdWarning(dfsan_origin origin) { 704 Decorator d; 705 Printf( 706 " %sOrigin Id %d has invalid origin tracking. This can " 707 "be a DFSan bug.%s\n", 708 d.Warning(), origin, d.Default()); 709 } 710 711 bool PrintOriginTraceFramesToStr(Origin o, InternalScopedString *out) { 712 Decorator d; 713 bool found = false; 714 715 while (o.isChainedOrigin()) { 716 StackTrace stack; 717 dfsan_origin origin_id = o.raw_id(); 718 o = o.getNextChainedOrigin(&stack); 719 if (o.isChainedOrigin()) 720 out->append( 721 " %sOrigin value: 0x%x, Taint value was stored to memory at%s\n", 722 d.Origin(), origin_id, d.Default()); 723 else 724 out->append(" %sOrigin value: 0x%x, Taint value was created at%s\n", 725 d.Origin(), origin_id, d.Default()); 726 727 // Includes a trailing newline, so no need to add it again. 728 stack.PrintTo(out); 729 found = true; 730 } 731 732 return found; 733 } 734 735 bool PrintOriginTraceToStr(const void *addr, const char *description, 736 InternalScopedString *out) { 737 CHECK(out); 738 CHECK(dfsan_get_track_origins()); 739 Decorator d; 740 741 const dfsan_label label = *__dfsan::shadow_for(addr); 742 CHECK(label); 743 744 const dfsan_origin origin = *__dfsan::origin_for(addr); 745 746 out->append(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n", 747 d.Origin(), label, addr, description ? description : "", 748 d.Default()); 749 750 Origin o = Origin::FromRawId(origin); 751 return PrintOriginTraceFramesToStr(o, out); 752 } 753 754 } // namespace 755 756 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace( 757 const void *addr, const char *description) { 758 if (!dfsan_get_track_origins()) { 759 PrintNoOriginTrackingWarning(); 760 return; 761 } 762 763 const dfsan_label label = *__dfsan::shadow_for(addr); 764 if (!label) { 765 PrintNoTaintWarning(addr); 766 return; 767 } 768 769 InternalScopedString trace; 770 bool success = PrintOriginTraceToStr(addr, description, &trace); 771 772 if (trace.length()) 773 Printf("%s", trace.data()); 774 775 if (!success) 776 PrintInvalidOriginWarning(label, addr); 777 } 778 779 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr 780 dfsan_sprint_origin_trace(const void *addr, const char *description, 781 char *out_buf, uptr out_buf_size) { 782 CHECK(out_buf); 783 784 if (!dfsan_get_track_origins()) { 785 PrintNoOriginTrackingWarning(); 786 return 0; 787 } 788 789 const dfsan_label label = *__dfsan::shadow_for(addr); 790 if (!label) { 791 PrintNoTaintWarning(addr); 792 return 0; 793 } 794 795 InternalScopedString trace; 796 bool success = PrintOriginTraceToStr(addr, description, &trace); 797 798 if (!success) { 799 PrintInvalidOriginWarning(label, addr); 800 return 0; 801 } 802 803 if (out_buf_size) { 804 internal_strncpy(out_buf, trace.data(), out_buf_size - 1); 805 out_buf[out_buf_size - 1] = '\0'; 806 } 807 808 return trace.length(); 809 } 810 811 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_id_trace( 812 dfsan_origin origin) { 813 if (!dfsan_get_track_origins()) { 814 PrintNoOriginTrackingWarning(); 815 return; 816 } 817 Origin o = Origin::FromRawId(origin); 818 819 InternalScopedString trace; 820 bool success = PrintOriginTraceFramesToStr(o, &trace); 821 822 if (trace.length()) 823 Printf("%s", trace.data()); 824 825 if (!success) 826 PrintInvalidOriginIdWarning(origin); 827 } 828 829 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr dfsan_sprint_origin_id_trace( 830 dfsan_origin origin, char *out_buf, uptr out_buf_size) { 831 CHECK(out_buf); 832 833 if (!dfsan_get_track_origins()) { 834 PrintNoOriginTrackingWarning(); 835 return 0; 836 } 837 Origin o = Origin::FromRawId(origin); 838 839 InternalScopedString trace; 840 bool success = PrintOriginTraceFramesToStr(o, &trace); 841 842 if (!success) { 843 PrintInvalidOriginIdWarning(origin); 844 return 0; 845 } 846 847 if (out_buf_size) { 848 internal_strncpy(out_buf, trace.data(), out_buf_size - 1); 849 out_buf[out_buf_size - 1] = '\0'; 850 } 851 852 return trace.length(); 853 } 854 855 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin 856 dfsan_get_init_origin(const void *addr) { 857 if (!dfsan_get_track_origins()) 858 return 0; 859 860 const dfsan_label label = *__dfsan::shadow_for(addr); 861 if (!label) 862 return 0; 863 864 const dfsan_origin origin = *__dfsan::origin_for(addr); 865 866 Origin o = Origin::FromRawId(origin); 867 dfsan_origin origin_id = o.raw_id(); 868 while (o.isChainedOrigin()) { 869 StackTrace stack; 870 origin_id = o.raw_id(); 871 o = o.getNextChainedOrigin(&stack); 872 } 873 return origin_id; 874 } 875 876 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp, 877 void *context, 878 bool request_fast, 879 u32 max_depth) { 880 using namespace __dfsan; 881 DFsanThread *t = GetCurrentThread(); 882 if (!t || !StackTrace::WillUseFastUnwind(request_fast)) { 883 return Unwind(max_depth, pc, bp, context, 0, 0, false); 884 } 885 Unwind(max_depth, pc, bp, nullptr, t->stack_top(), t->stack_bottom(), true); 886 } 887 888 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() { 889 GET_CALLER_PC_BP; 890 GET_STORE_STACK_TRACE_PC_BP(pc, bp); 891 stack.Print(); 892 } 893 894 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr 895 dfsan_sprint_stack_trace(char *out_buf, uptr out_buf_size) { 896 CHECK(out_buf); 897 GET_CALLER_PC_BP; 898 GET_STORE_STACK_TRACE_PC_BP(pc, bp); 899 return stack.PrintTo(out_buf, out_buf_size); 900 } 901 902 void Flags::SetDefaults() { 903 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; 904 #include "dfsan_flags.inc" 905 #undef DFSAN_FLAG 906 } 907 908 static void RegisterDfsanFlags(FlagParser *parser, Flags *f) { 909 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \ 910 RegisterFlag(parser, #Name, Description, &f->Name); 911 #include "dfsan_flags.inc" 912 #undef DFSAN_FLAG 913 } 914 915 static void InitializeFlags() { 916 SetCommonFlagsDefaults(); 917 { 918 CommonFlags cf; 919 cf.CopyFrom(*common_flags()); 920 cf.intercept_tls_get_addr = true; 921 OverrideCommonFlags(cf); 922 } 923 flags().SetDefaults(); 924 925 FlagParser parser; 926 RegisterCommonFlags(&parser); 927 RegisterDfsanFlags(&parser, &flags()); 928 parser.ParseStringFromEnv("DFSAN_OPTIONS"); 929 InitializeCommonFlags(); 930 if (Verbosity()) ReportUnrecognizedFlags(); 931 if (common_flags()->help) parser.PrintFlagDescriptions(); 932 } 933 934 SANITIZER_INTERFACE_ATTRIBUTE 935 void dfsan_clear_arg_tls(uptr offset, uptr size) { 936 internal_memset((void *)((uptr)__dfsan_arg_tls + offset), 0, size); 937 } 938 939 SANITIZER_INTERFACE_ATTRIBUTE 940 void dfsan_clear_thread_local_state() { 941 internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls)); 942 internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls)); 943 944 if (dfsan_get_track_origins()) { 945 internal_memset(__dfsan_arg_origin_tls, 0, sizeof(__dfsan_arg_origin_tls)); 946 internal_memset(&__dfsan_retval_origin_tls, 0, 947 sizeof(__dfsan_retval_origin_tls)); 948 } 949 } 950 951 SANITIZER_INTERFACE_ATTRIBUTE 952 void dfsan_set_arg_tls(uptr offset, dfsan_label label) { 953 // 2x to match ShadowTLSAlignment. 954 // ShadowTLSAlignment should probably be changed. 955 // TODO: Consider reducing ShadowTLSAlignment to 1. 956 // Aligning to 2 bytes is probably a remnant of fast16 mode. 957 ((dfsan_label *)__dfsan_arg_tls)[offset * 2] = label; 958 } 959 960 SANITIZER_INTERFACE_ATTRIBUTE 961 void dfsan_set_arg_origin_tls(uptr offset, dfsan_origin o) { 962 __dfsan_arg_origin_tls[offset] = o; 963 } 964 965 extern "C" void dfsan_flush() { 966 const uptr maxVirtualAddress = GetMaxUserVirtualAddress(); 967 for (unsigned i = 0; i < kMemoryLayoutSize; ++i) { 968 uptr start = kMemoryLayout[i].start; 969 uptr end = kMemoryLayout[i].end; 970 uptr size = end - start; 971 MappingDesc::Type type = kMemoryLayout[i].type; 972 973 if (type != MappingDesc::SHADOW && type != MappingDesc::ORIGIN) 974 continue; 975 976 // Check if the segment should be mapped based on platform constraints. 977 if (start >= maxVirtualAddress) 978 continue; 979 980 if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name)) { 981 Printf("FATAL: DataFlowSanitizer: failed to clear memory region\n"); 982 Die(); 983 } 984 } 985 __dfsan::labels_in_signal_conditional = 0; 986 } 987 988 // TODO: CheckMemoryLayoutSanity is based on msan. 989 // Consider refactoring these into a shared implementation. 990 static void CheckMemoryLayoutSanity() { 991 uptr prev_end = 0; 992 for (unsigned i = 0; i < kMemoryLayoutSize; ++i) { 993 uptr start = kMemoryLayout[i].start; 994 uptr end = kMemoryLayout[i].end; 995 MappingDesc::Type type = kMemoryLayout[i].type; 996 CHECK_LT(start, end); 997 CHECK_EQ(prev_end, start); 998 CHECK(addr_is_type(start, type)); 999 CHECK(addr_is_type((start + end) / 2, type)); 1000 CHECK(addr_is_type(end - 1, type)); 1001 if (type == MappingDesc::APP) { 1002 uptr addr = start; 1003 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr))); 1004 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr))); 1005 CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr))); 1006 1007 addr = (start + end) / 2; 1008 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr))); 1009 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr))); 1010 CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr))); 1011 1012 addr = end - 1; 1013 CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr))); 1014 CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr))); 1015 CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr))); 1016 } 1017 prev_end = end; 1018 } 1019 } 1020 1021 // TODO: CheckMemoryRangeAvailability is based on msan. 1022 // Consider refactoring these into a shared implementation. 1023 static bool CheckMemoryRangeAvailability(uptr beg, uptr size) { 1024 if (size > 0) { 1025 uptr end = beg + size - 1; 1026 if (!MemoryRangeIsAvailable(beg, end)) { 1027 Printf("FATAL: Memory range %p - %p is not available.\n", beg, end); 1028 return false; 1029 } 1030 } 1031 return true; 1032 } 1033 1034 // TODO: ProtectMemoryRange is based on msan. 1035 // Consider refactoring these into a shared implementation. 1036 static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) { 1037 if (size > 0) { 1038 void *addr = MmapFixedNoAccess(beg, size, name); 1039 if (beg == 0 && addr) { 1040 // Depending on the kernel configuration, we may not be able to protect 1041 // the page at address zero. 1042 uptr gap = 16 * GetPageSizeCached(); 1043 beg += gap; 1044 size -= gap; 1045 addr = MmapFixedNoAccess(beg, size, name); 1046 } 1047 if ((uptr)addr != beg) { 1048 uptr end = beg + size - 1; 1049 Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg, end, 1050 name); 1051 return false; 1052 } 1053 } 1054 return true; 1055 } 1056 1057 // TODO: InitShadow is based on msan. 1058 // Consider refactoring these into a shared implementation. 1059 bool InitShadow(bool init_origins) { 1060 // Let user know mapping parameters first. 1061 VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init); 1062 for (unsigned i = 0; i < kMemoryLayoutSize; ++i) 1063 VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout[i].name, kMemoryLayout[i].start, 1064 kMemoryLayout[i].end - 1); 1065 1066 CheckMemoryLayoutSanity(); 1067 1068 if (!MEM_IS_APP(&__dfsan::dfsan_init)) { 1069 Printf("FATAL: Code %p is out of application range. Non-PIE build?\n", 1070 (uptr)&__dfsan::dfsan_init); 1071 return false; 1072 } 1073 1074 const uptr maxVirtualAddress = GetMaxUserVirtualAddress(); 1075 1076 for (unsigned i = 0; i < kMemoryLayoutSize; ++i) { 1077 uptr start = kMemoryLayout[i].start; 1078 uptr end = kMemoryLayout[i].end; 1079 uptr size = end - start; 1080 MappingDesc::Type type = kMemoryLayout[i].type; 1081 1082 // Check if the segment should be mapped based on platform constraints. 1083 if (start >= maxVirtualAddress) 1084 continue; 1085 1086 bool map = type == MappingDesc::SHADOW || 1087 (init_origins && type == MappingDesc::ORIGIN); 1088 bool protect = type == MappingDesc::INVALID || 1089 (!init_origins && type == MappingDesc::ORIGIN); 1090 CHECK(!(map && protect)); 1091 if (!map && !protect) 1092 CHECK(type == MappingDesc::APP); 1093 if (map) { 1094 if (!CheckMemoryRangeAvailability(start, size)) 1095 return false; 1096 if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name)) 1097 return false; 1098 if (common_flags()->use_madv_dontdump) 1099 DontDumpShadowMemory(start, size); 1100 } 1101 if (protect) { 1102 if (!CheckMemoryRangeAvailability(start, size)) 1103 return false; 1104 if (!ProtectMemoryRange(start, size, kMemoryLayout[i].name)) 1105 return false; 1106 } 1107 } 1108 1109 return true; 1110 } 1111 1112 static void DFsanInit(int argc, char **argv, char **envp) { 1113 CHECK(!dfsan_init_is_running); 1114 if (dfsan_inited) 1115 return; 1116 dfsan_init_is_running = true; 1117 SanitizerToolName = "DataflowSanitizer"; 1118 1119 AvoidCVE_2016_2143(); 1120 1121 InitializeFlags(); 1122 1123 CheckASLR(); 1124 1125 InitShadow(dfsan_get_track_origins()); 1126 1127 initialize_interceptors(); 1128 1129 // Set up threads 1130 DFsanTSDInit(DFsanTSDDtor); 1131 1132 dfsan_allocator_init(); 1133 1134 DFsanThread *main_thread = DFsanThread::Create(nullptr, nullptr); 1135 SetCurrentThread(main_thread); 1136 main_thread->Init(); 1137 1138 dfsan_init_is_running = false; 1139 dfsan_inited = true; 1140 } 1141 1142 namespace __dfsan { 1143 1144 void dfsan_init() { DFsanInit(0, nullptr, nullptr); } 1145 1146 } // namespace __dfsan 1147 1148 #if SANITIZER_CAN_USE_PREINIT_ARRAY 1149 __attribute__((section(".preinit_array"), 1150 used)) static void (*dfsan_init_ptr)(int, char **, 1151 char **) = DFsanInit; 1152 #endif 1153