xref: /freebsd/contrib/llvm-project/compiler-rt/lib/dfsan/dfsan.cpp (revision 79ac3c12a714bcd3f2354c52d948aed9575c46d6)
1 //===-- dfsan.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of DataFlowSanitizer.
10 //
11 // DataFlowSanitizer runtime.  This file defines the public interface to
12 // DataFlowSanitizer as well as the definition of certain runtime functions
13 // called automatically by the compiler (specifically the instrumentation pass
14 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
15 //
16 // The public interface is defined in include/sanitizer/dfsan_interface.h whose
17 // functions are prefixed dfsan_ while the compiler interface functions are
18 // prefixed __dfsan_.
19 //===----------------------------------------------------------------------===//
20 
21 #include "dfsan/dfsan.h"
22 
23 #include "sanitizer_common/sanitizer_atomic.h"
24 #include "sanitizer_common/sanitizer_common.h"
25 #include "sanitizer_common/sanitizer_file.h"
26 #include "sanitizer_common/sanitizer_flag_parser.h"
27 #include "sanitizer_common/sanitizer_flags.h"
28 #include "sanitizer_common/sanitizer_internal_defs.h"
29 #include "sanitizer_common/sanitizer_libc.h"
30 #include "sanitizer_common/sanitizer_stacktrace.h"
31 
32 using namespace __dfsan;
33 
34 typedef atomic_uint16_t atomic_dfsan_label;
35 static const dfsan_label kInitializingLabel = -1;
36 
37 static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
38 
39 static atomic_dfsan_label __dfsan_last_label;
40 static dfsan_label_info __dfsan_label_info[kNumLabels];
41 
42 Flags __dfsan::flags_data;
43 
44 // The size of TLS variables. These constants must be kept in sync with the ones
45 // in DataFlowSanitizer.cpp.
46 static const int kDFsanArgTlsSize = 800;
47 static const int kDFsanRetvalTlsSize = 800;
48 
49 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64
50     __dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)];
51 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64
52     __dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)];
53 
54 SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask;
55 
56 // On Linux/x86_64, memory is laid out as follows:
57 //
58 // +--------------------+ 0x800000000000 (top of memory)
59 // | application memory |
60 // +--------------------+ 0x700000008000 (kAppAddr)
61 // |                    |
62 // |       unused       |
63 // |                    |
64 // +--------------------+ 0x200200000000 (kUnusedAddr)
65 // |    union table     |
66 // +--------------------+ 0x200000000000 (kUnionTableAddr)
67 // |   shadow memory    |
68 // +--------------------+ 0x000000010000 (kShadowAddr)
69 // | reserved by kernel |
70 // +--------------------+ 0x000000000000
71 //
72 // To derive a shadow memory address from an application memory address,
73 // bits 44-46 are cleared to bring the address into the range
74 // [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
75 // account for the double byte representation of shadow labels and move the
76 // address into the shadow memory range.  See the function shadow_for below.
77 
78 // On Linux/MIPS64, memory is laid out as follows:
79 //
80 // +--------------------+ 0x10000000000 (top of memory)
81 // | application memory |
82 // +--------------------+ 0xF000008000 (kAppAddr)
83 // |                    |
84 // |       unused       |
85 // |                    |
86 // +--------------------+ 0x2200000000 (kUnusedAddr)
87 // |    union table     |
88 // +--------------------+ 0x2000000000 (kUnionTableAddr)
89 // |   shadow memory    |
90 // +--------------------+ 0x0000010000 (kShadowAddr)
91 // | reserved by kernel |
92 // +--------------------+ 0x0000000000
93 
94 // On Linux/AArch64 (39-bit VMA), memory is laid out as follow:
95 //
96 // +--------------------+ 0x8000000000 (top of memory)
97 // | application memory |
98 // +--------------------+ 0x7000008000 (kAppAddr)
99 // |                    |
100 // |       unused       |
101 // |                    |
102 // +--------------------+ 0x1200000000 (kUnusedAddr)
103 // |    union table     |
104 // +--------------------+ 0x1000000000 (kUnionTableAddr)
105 // |   shadow memory    |
106 // +--------------------+ 0x0000010000 (kShadowAddr)
107 // | reserved by kernel |
108 // +--------------------+ 0x0000000000
109 
110 // On Linux/AArch64 (42-bit VMA), memory is laid out as follow:
111 //
112 // +--------------------+ 0x40000000000 (top of memory)
113 // | application memory |
114 // +--------------------+ 0x3ff00008000 (kAppAddr)
115 // |                    |
116 // |       unused       |
117 // |                    |
118 // +--------------------+ 0x1200000000 (kUnusedAddr)
119 // |    union table     |
120 // +--------------------+ 0x8000000000 (kUnionTableAddr)
121 // |   shadow memory    |
122 // +--------------------+ 0x0000010000 (kShadowAddr)
123 // | reserved by kernel |
124 // +--------------------+ 0x0000000000
125 
126 // On Linux/AArch64 (48-bit VMA), memory is laid out as follow:
127 //
128 // +--------------------+ 0x1000000000000 (top of memory)
129 // | application memory |
130 // +--------------------+ 0xffff00008000 (kAppAddr)
131 // |       unused       |
132 // +--------------------+ 0xaaaab0000000 (top of PIE address)
133 // | application PIE    |
134 // +--------------------+ 0xaaaaa0000000 (top of PIE address)
135 // |                    |
136 // |       unused       |
137 // |                    |
138 // +--------------------+ 0x1200000000 (kUnusedAddr)
139 // |    union table     |
140 // +--------------------+ 0x8000000000 (kUnionTableAddr)
141 // |   shadow memory    |
142 // +--------------------+ 0x0000010000 (kShadowAddr)
143 // | reserved by kernel |
144 // +--------------------+ 0x0000000000
145 
146 typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
147 
148 #ifdef DFSAN_RUNTIME_VMA
149 // Runtime detected VMA size.
150 int __dfsan::vmaSize;
151 #endif
152 
153 static uptr UnusedAddr() {
154   return UnionTableAddr() + sizeof(dfsan_union_table_t);
155 }
156 
157 static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
158   return &(*(dfsan_union_table_t *) UnionTableAddr())[l1][l2];
159 }
160 
161 // Checks we do not run out of labels.
162 static void dfsan_check_label(dfsan_label label) {
163   if (label == kInitializingLabel) {
164     Report("FATAL: DataFlowSanitizer: out of labels\n");
165     Die();
166   }
167 }
168 
169 // Resolves the union of two unequal labels.  Nonequality is a precondition for
170 // this function (the instrumentation pass inlines the equality test).
171 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
172 dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
173   DCHECK_NE(l1, l2);
174 
175   if (l1 == 0)
176     return l2;
177   if (l2 == 0)
178     return l1;
179 
180   // If no labels have been created, yet l1 and l2 are non-zero, we are using
181   // fast16labels mode.
182   if (atomic_load(&__dfsan_last_label, memory_order_relaxed) == 0)
183     return l1 | l2;
184 
185   if (l1 > l2)
186     Swap(l1, l2);
187 
188   atomic_dfsan_label *table_ent = union_table(l1, l2);
189   // We need to deal with the case where two threads concurrently request
190   // a union of the same pair of labels.  If the table entry is uninitialized,
191   // (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
192   // (i.e. -1) to mark that we are initializing it.
193   dfsan_label label = 0;
194   if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
195                                      memory_order_acquire)) {
196     // Check whether l2 subsumes l1.  We don't need to check whether l1
197     // subsumes l2 because we are guaranteed here that l1 < l2, and (at least
198     // in the cases we are interested in) a label may only subsume labels
199     // created earlier (i.e. with a lower numerical value).
200     if (__dfsan_label_info[l2].l1 == l1 ||
201         __dfsan_label_info[l2].l2 == l1) {
202       label = l2;
203     } else {
204       label =
205         atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
206       dfsan_check_label(label);
207       __dfsan_label_info[label].l1 = l1;
208       __dfsan_label_info[label].l2 = l2;
209     }
210     atomic_store(table_ent, label, memory_order_release);
211   } else if (label == kInitializingLabel) {
212     // Another thread is initializing the entry.  Wait until it is finished.
213     do {
214       internal_sched_yield();
215       label = atomic_load(table_ent, memory_order_acquire);
216     } while (label == kInitializingLabel);
217   }
218   return label;
219 }
220 
221 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
222 dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) {
223   dfsan_label label = ls[0];
224   for (uptr i = 1; i != n; ++i) {
225     dfsan_label next_label = ls[i];
226     if (label != next_label)
227       label = __dfsan_union(label, next_label);
228   }
229   return label;
230 }
231 
232 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
233 dfsan_label __dfsan_union_load_fast16labels(const dfsan_label *ls, uptr n) {
234   dfsan_label label = ls[0];
235   for (uptr i = 1; i != n; ++i)
236     label |= ls[i];
237   return label;
238 }
239 
240 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
241 void __dfsan_unimplemented(char *fname) {
242   if (flags().warn_unimplemented)
243     Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",
244            fname);
245 }
246 
247 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function
248 // to try to figure out where labels are being introduced in a nominally
249 // label-free program.
250 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() {
251   if (flags().warn_nonzero_labels)
252     Report("WARNING: DataFlowSanitizer: saw nonzero label\n");
253 }
254 
255 // Indirect call to an uninstrumented vararg function. We don't have a way of
256 // handling these at the moment.
257 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
258 __dfsan_vararg_wrapper(const char *fname) {
259   Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "
260          "function %s\n", fname);
261   Die();
262 }
263 
264 // Like __dfsan_union, but for use from the client or custom functions.  Hence
265 // the equality comparison is done here before calling __dfsan_union.
266 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
267 dfsan_union(dfsan_label l1, dfsan_label l2) {
268   if (l1 == l2)
269     return l1;
270   return __dfsan_union(l1, l2);
271 }
272 
273 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
274 dfsan_label dfsan_create_label(const char *desc, void *userdata) {
275   dfsan_label label =
276       atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
277   dfsan_check_label(label);
278   __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
279   __dfsan_label_info[label].desc = desc;
280   __dfsan_label_info[label].userdata = userdata;
281   return label;
282 }
283 
284 static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr,
285                                    uptr size) {
286   dfsan_label *labelp = (dfsan_label *)shadow_addr;
287   for (; size != 0; --size, ++labelp) {
288     // Don't write the label if it is already the value we need it to be.
289     // In a program where most addresses are not labeled, it is common that
290     // a page of shadow memory is entirely zeroed.  The Linux copy-on-write
291     // implementation will share all of the zeroed pages, making a copy of a
292     // page when any value is written.  The un-sharing will happen even if
293     // the value written does not change the value in memory.  Avoiding the
294     // write when both |label| and |*labelp| are zero dramatically reduces
295     // the amount of real memory used by large programs.
296     if (label == *labelp)
297       continue;
298 
299     *labelp = label;
300   }
301 }
302 
303 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label(
304     dfsan_label label, void *addr, uptr size) {
305   const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr);
306 
307   if (0 != label) {
308     WriteShadowIfDifferent(label, beg_shadow_addr, size);
309     return;
310   }
311 
312   // If label is 0, releases the pages within the shadow address range, and sets
313   // the shadow addresses not on the pages to be 0.
314   const void *end_addr = (void *)((uptr)addr + size);
315   const uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr);
316   const uptr page_size = GetPageSizeCached();
317   const uptr beg_aligned = RoundUpTo(beg_shadow_addr, page_size);
318   const uptr end_aligned = RoundDownTo(end_shadow_addr, page_size);
319 
320   // dfsan_set_label can be called from the following cases
321   // 1) mapped ranges by new/delete and malloc/free. This case has shadow memory
322   // size > 100k, and happens less frequently.
323   // 2) zero-filling internal data structures by utility libraries. This case
324   // has shadow memory size < 32k, and happens more often.
325   // Set kNumPagesThreshold to be 8 to avoid releasing small pages.
326   const int kNumPagesThreshold = 8;
327   if (beg_aligned + kNumPagesThreshold * page_size >= end_aligned)
328     return WriteShadowIfDifferent(label, beg_shadow_addr, size);
329 
330   WriteShadowIfDifferent(label, beg_shadow_addr, beg_aligned - beg_shadow_addr);
331   ReleaseMemoryPagesToOS(beg_aligned, end_aligned);
332   WriteShadowIfDifferent(label, end_aligned, end_shadow_addr - end_aligned);
333 }
334 
335 SANITIZER_INTERFACE_ATTRIBUTE
336 void dfsan_set_label(dfsan_label label, void *addr, uptr size) {
337   __dfsan_set_label(label, addr, size);
338 }
339 
340 SANITIZER_INTERFACE_ATTRIBUTE
341 void dfsan_add_label(dfsan_label label, void *addr, uptr size) {
342   for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
343     if (*labelp != label)
344       *labelp = __dfsan_union(*labelp, label);
345 }
346 
347 // Unlike the other dfsan interface functions the behavior of this function
348 // depends on the label of one of its arguments.  Hence it is implemented as a
349 // custom function.
350 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
351 __dfsw_dfsan_get_label(long data, dfsan_label data_label,
352                        dfsan_label *ret_label) {
353   *ret_label = 0;
354   return data_label;
355 }
356 
357 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
358 dfsan_read_label(const void *addr, uptr size) {
359   if (size == 0)
360     return 0;
361   return __dfsan_union_load(shadow_for(addr), size);
362 }
363 
364 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
365 const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
366   return &__dfsan_label_info[label];
367 }
368 
369 extern "C" SANITIZER_INTERFACE_ATTRIBUTE int
370 dfsan_has_label(dfsan_label label, dfsan_label elem) {
371   if (label == elem)
372     return true;
373   const dfsan_label_info *info = dfsan_get_label_info(label);
374   if (info->l1 != 0) {
375     return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
376   } else {
377     return false;
378   }
379 }
380 
381 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
382 dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
383   const dfsan_label_info *info = dfsan_get_label_info(label);
384   if (info->l1 != 0) {
385     return dfsan_has_label_with_desc(info->l1, desc) ||
386            dfsan_has_label_with_desc(info->l2, desc);
387   } else {
388     return internal_strcmp(desc, info->desc) == 0;
389   }
390 }
391 
392 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
393 dfsan_get_label_count(void) {
394   dfsan_label max_label_allocated =
395       atomic_load(&__dfsan_last_label, memory_order_relaxed);
396 
397   return static_cast<uptr>(max_label_allocated);
398 }
399 
400 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
401 dfsan_dump_labels(int fd) {
402   dfsan_label last_label =
403       atomic_load(&__dfsan_last_label, memory_order_relaxed);
404   for (uptr l = 1; l <= last_label; ++l) {
405     char buf[64];
406     internal_snprintf(buf, sizeof(buf), "%u %u %u ", l,
407                       __dfsan_label_info[l].l1, __dfsan_label_info[l].l2);
408     WriteToFile(fd, buf, internal_strlen(buf));
409     if (__dfsan_label_info[l].l1 == 0 && __dfsan_label_info[l].desc) {
410       WriteToFile(fd, __dfsan_label_info[l].desc,
411                   internal_strlen(__dfsan_label_info[l].desc));
412     }
413     WriteToFile(fd, "\n", 1);
414   }
415 }
416 
417 #define GET_FATAL_STACK_TRACE_PC_BP(pc, bp) \
418   BufferedStackTrace stack;                 \
419   stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal);
420 
421 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
422                                                  void *context,
423                                                  bool request_fast,
424                                                  u32 max_depth) {
425   Unwind(max_depth, pc, bp, context, 0, 0, false);
426 }
427 
428 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() {
429   GET_FATAL_STACK_TRACE_PC_BP(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME());
430   stack.Print();
431 }
432 
433 void Flags::SetDefaults() {
434 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
435 #include "dfsan_flags.inc"
436 #undef DFSAN_FLAG
437 }
438 
439 static void RegisterDfsanFlags(FlagParser *parser, Flags *f) {
440 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \
441   RegisterFlag(parser, #Name, Description, &f->Name);
442 #include "dfsan_flags.inc"
443 #undef DFSAN_FLAG
444 }
445 
446 static void InitializeFlags() {
447   SetCommonFlagsDefaults();
448   flags().SetDefaults();
449 
450   FlagParser parser;
451   RegisterCommonFlags(&parser);
452   RegisterDfsanFlags(&parser, &flags());
453   parser.ParseStringFromEnv("DFSAN_OPTIONS");
454   InitializeCommonFlags();
455   if (Verbosity()) ReportUnrecognizedFlags();
456   if (common_flags()->help) parser.PrintFlagDescriptions();
457 }
458 
459 static void InitializePlatformEarly() {
460   AvoidCVE_2016_2143();
461 #ifdef DFSAN_RUNTIME_VMA
462   __dfsan::vmaSize =
463     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
464   if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42 ||
465       __dfsan::vmaSize == 48) {
466     __dfsan_shadow_ptr_mask = ShadowMask();
467   } else {
468     Printf("FATAL: DataFlowSanitizer: unsupported VMA range\n");
469     Printf("FATAL: Found %d - Supported 39, 42, and 48\n", __dfsan::vmaSize);
470     Die();
471   }
472 #endif
473 }
474 
475 static void dfsan_fini() {
476   if (internal_strcmp(flags().dump_labels_at_exit, "") != 0) {
477     fd_t fd = OpenFile(flags().dump_labels_at_exit, WrOnly);
478     if (fd == kInvalidFd) {
479       Report("WARNING: DataFlowSanitizer: unable to open output file %s\n",
480              flags().dump_labels_at_exit);
481       return;
482     }
483 
484     Report("INFO: DataFlowSanitizer: dumping labels to %s\n",
485            flags().dump_labels_at_exit);
486     dfsan_dump_labels(fd);
487     CloseFile(fd);
488   }
489 }
490 
491 extern "C" void dfsan_flush() {
492   if (!MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
493     Die();
494 }
495 
496 static void dfsan_init(int argc, char **argv, char **envp) {
497   InitializeFlags();
498 
499   ::InitializePlatformEarly();
500 
501   if (!MmapFixedSuperNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
502     Die();
503   if (common_flags()->use_madv_dontdump)
504     DontDumpShadowMemory(ShadowAddr(), UnusedAddr() - ShadowAddr());
505 
506   // Protect the region of memory we don't use, to preserve the one-to-one
507   // mapping from application to shadow memory. But if ASLR is disabled, Linux
508   // will load our executable in the middle of our unused region. This mostly
509   // works so long as the program doesn't use too much memory. We support this
510   // case by disabling memory protection when ASLR is disabled.
511   uptr init_addr = (uptr)&dfsan_init;
512   if (!(init_addr >= UnusedAddr() && init_addr < AppAddr()))
513     MmapFixedNoAccess(UnusedAddr(), AppAddr() - UnusedAddr());
514 
515   InitializeInterceptors();
516 
517   // Register the fini callback to run when the program terminates successfully
518   // or it is killed by the runtime.
519   Atexit(dfsan_fini);
520   AddDieCallback(dfsan_fini);
521 
522   __dfsan_label_info[kInitializingLabel].desc = "<init label>";
523 }
524 
525 #if SANITIZER_CAN_USE_PREINIT_ARRAY
526 __attribute__((section(".preinit_array"), used))
527 static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
528 #endif
529