1 //===-- dfsan_interface.h -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of DataFlowSanitizer. 10 // 11 // Public interface header. 12 //===----------------------------------------------------------------------===// 13 #ifndef DFSAN_INTERFACE_H 14 #define DFSAN_INTERFACE_H 15 16 #include <sanitizer/common_interface_defs.h> 17 #include <stddef.h> 18 #include <stdint.h> 19 20 #ifdef __cplusplus 21 extern "C" { 22 #endif 23 24 typedef uint8_t dfsan_label; 25 typedef uint32_t dfsan_origin; 26 27 /// Signature of the callback argument to dfsan_set_write_callback(). 28 typedef void(SANITIZER_CDECL *dfsan_write_callback_t)(int fd, const void *buf, 29 size_t count); 30 31 /// Signature of the callback argument to dfsan_set_conditional_callback(). 32 typedef void(SANITIZER_CDECL *dfsan_conditional_callback_t)( 33 dfsan_label label, dfsan_origin origin); 34 35 /// Signature of the callback argument to dfsan_set_reaches_function_callback(). 36 /// The description is intended to hold the name of the variable. 37 typedef void(SANITIZER_CDECL *dfsan_reaches_function_callback_t)( 38 dfsan_label label, dfsan_origin origin, const char *file, unsigned int line, 39 const char *function); 40 41 /// Computes the union of \c l1 and \c l2, resulting in a union label. 42 dfsan_label SANITIZER_CDECL dfsan_union(dfsan_label l1, dfsan_label l2); 43 44 /// Sets the label for each address in [addr,addr+size) to \c label. 45 void SANITIZER_CDECL dfsan_set_label(dfsan_label label, void *addr, 46 size_t size); 47 48 /// Sets the label for each address in [addr,addr+size) to the union of the 49 /// current label for that address and \c label. 50 void SANITIZER_CDECL dfsan_add_label(dfsan_label label, void *addr, 51 size_t size); 52 53 /// Retrieves the label associated with the given data. 54 /// 55 /// The type of 'data' is arbitrary. The function accepts a value of any type, 56 /// which can be truncated or extended (implicitly or explicitly) as necessary. 57 /// The truncation/extension operations will preserve the label of the original 58 /// value. 59 dfsan_label SANITIZER_CDECL dfsan_get_label(long data); 60 61 /// Retrieves the immediate origin associated with the given data. The returned 62 /// origin may point to another origin. 63 /// 64 /// The type of 'data' is arbitrary. 65 dfsan_origin SANITIZER_CDECL dfsan_get_origin(long data); 66 67 /// Retrieves the label associated with the data at the given address. 68 dfsan_label SANITIZER_CDECL dfsan_read_label(const void *addr, size_t size); 69 70 /// Return the origin associated with the first taint byte in the size bytes 71 /// from the address addr. 72 dfsan_origin SANITIZER_CDECL dfsan_read_origin_of_first_taint(const void *addr, 73 size_t size); 74 75 /// Returns whether the given label contains the label elem. 76 int SANITIZER_CDECL dfsan_has_label(dfsan_label label, dfsan_label elem); 77 78 /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated 79 /// with the application memory. Use this call to start over the taint tracking 80 /// within the same process. 81 /// 82 /// Note: If another thread is working with tainted data during the flush, that 83 /// taint could still be written to shadow after the flush. 84 void SANITIZER_CDECL dfsan_flush(void); 85 86 /// Sets a callback to be invoked on calls to write(). The callback is invoked 87 /// before the write is done. The write is not guaranteed to succeed when the 88 /// callback executes. Pass in NULL to remove any callback. 89 void SANITIZER_CDECL 90 dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); 91 92 /// Sets a callback to be invoked on any conditional expressions which have a 93 /// taint label set. This can be used to find where tainted data influences 94 /// the behavior of the program. 95 /// These callbacks will only be added when -dfsan-conditional-callbacks=true. 96 void SANITIZER_CDECL 97 dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); 98 99 /// Conditional expressions occur during signal handlers. 100 /// Making callbacks that handle signals well is tricky, so when 101 /// -dfsan-conditional-callbacks=true, conditional expressions used in signal 102 /// handlers will add the labels they see into a global (bitwise-or together). 103 /// This function returns all label bits seen in signal handler conditions. 104 dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_conditional(); 105 106 /// Sets a callback to be invoked when tainted data reaches a function. 107 /// This could occur at function entry, or at a load instruction. 108 /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1. 109 void SANITIZER_CDECL 110 dfsan_set_reaches_function_callback(dfsan_reaches_function_callback_t callback); 111 112 /// Making callbacks that handle signals well is tricky, so when 113 /// -dfsan-reaches-function-callbacks=true, functions reached in signal 114 /// handlers will add the labels they see into a global (bitwise-or together). 115 /// This function returns all label bits seen during signal handlers. 116 dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_reaches_function(); 117 118 /// Interceptor hooks. 119 /// Whenever a dfsan's custom function is called the corresponding 120 /// hook is called it non-zero. The hooks should be defined by the user. 121 /// The primary use case is taint-guided fuzzing, where the fuzzer 122 /// needs to see the parameters of the function and the labels. 123 /// FIXME: implement more hooks. 124 void SANITIZER_CDECL dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, 125 const void *s2, size_t n, 126 dfsan_label s1_label, 127 dfsan_label s2_label, 128 dfsan_label n_label); 129 void SANITIZER_CDECL dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, 130 const char *s2, size_t n, 131 dfsan_label s1_label, 132 dfsan_label s2_label, 133 dfsan_label n_label); 134 135 /// Prints the origin trace of the label at the address addr to stderr. It also 136 /// prints description at the beginning of the trace. If origin tracking is not 137 /// on, or the address is not labeled, it prints nothing. 138 void SANITIZER_CDECL dfsan_print_origin_trace(const void *addr, 139 const char *description); 140 /// As above, but use an origin id from dfsan_get_origin() instead of address. 141 /// Does not include header line with taint label and address information. 142 void SANITIZER_CDECL dfsan_print_origin_id_trace(dfsan_origin origin); 143 144 /// Prints the origin trace of the label at the address \p addr to a 145 /// pre-allocated output buffer. If origin tracking is not on, or the address is 146 /// not labeled, it prints nothing. 147 /// 148 /// Typical usage: 149 /// \code 150 /// char kDescription[] = "..."; 151 /// char buf[1024]; 152 /// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf)); 153 /// \endcode 154 /// 155 /// Typical usage that handles truncation: 156 /// \code 157 /// char buf[1024]; 158 /// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf)); 159 /// 160 /// if (len < sizeof(buf)) { 161 /// ProcessOriginTrace(buf); 162 /// } else { 163 /// char *tmpbuf = new char[len + 1]; 164 /// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1); 165 /// ProcessOriginTrace(tmpbuf); 166 /// delete[] tmpbuf; 167 /// } 168 /// \endcode 169 /// 170 /// \param addr The tainted memory address whose origin we are printing. 171 /// \param description A description printed at the beginning of the trace. 172 /// \param [out] out_buf The output buffer to write the results to. 173 /// \param out_buf_size The size of \p out_buf. 174 /// 175 /// \returns The number of symbols that should have been written to \p out_buf 176 /// (not including trailing null byte '\0'). Thus, the string is truncated iff 177 /// return value is not less than \p out_buf_size. 178 size_t SANITIZER_CDECL dfsan_sprint_origin_trace(const void *addr, 179 const char *description, 180 char *out_buf, 181 size_t out_buf_size); 182 /// As above, but use an origin id from dfsan_get_origin() instead of address. 183 /// Does not include header line with taint label and address information. 184 size_t SANITIZER_CDECL dfsan_sprint_origin_id_trace(dfsan_origin origin, 185 char *out_buf, 186 size_t out_buf_size); 187 188 /// Prints the stack trace leading to this call to a pre-allocated output 189 /// buffer. 190 /// 191 /// For usage examples, see dfsan_sprint_origin_trace. 192 /// 193 /// \param [out] out_buf The output buffer to write the results to. 194 /// \param out_buf_size The size of \p out_buf. 195 /// 196 /// \returns The number of symbols that should have been written to \p out_buf 197 /// (not including trailing null byte '\0'). Thus, the string is truncated iff 198 /// return value is not less than \p out_buf_size. 199 size_t SANITIZER_CDECL dfsan_sprint_stack_trace(char *out_buf, 200 size_t out_buf_size); 201 202 /// Retrieves the very first origin associated with the data at the given 203 /// address. 204 dfsan_origin SANITIZER_CDECL dfsan_get_init_origin(const void *addr); 205 206 /// Returns the value of -dfsan-track-origins. 207 /// * 0: do not track origins. 208 /// * 1: track origins at memory store operations. 209 /// * 2: track origins at memory load and store operations. 210 int SANITIZER_CDECL dfsan_get_track_origins(void); 211 #ifdef __cplusplus 212 } // extern "C" 213 214 template <typename T> void dfsan_set_label(dfsan_label label, T &data) { 215 dfsan_set_label(label, (void *)&data, sizeof(T)); 216 } 217 218 #endif 219 220 #endif // DFSAN_INTERFACE_H 221