10b57cec5SDimitry Andric //===-- dfsan_interface.h -------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file is a part of DataFlowSanitizer. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // Public interface header. 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric #ifndef DFSAN_INTERFACE_H 140b57cec5SDimitry Andric #define DFSAN_INTERFACE_H 150b57cec5SDimitry Andric 16*5f757f3fSDimitry Andric #include <sanitizer/common_interface_defs.h> 170b57cec5SDimitry Andric #include <stddef.h> 180b57cec5SDimitry Andric #include <stdint.h> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #ifdef __cplusplus 210b57cec5SDimitry Andric extern "C" { 220b57cec5SDimitry Andric #endif 230b57cec5SDimitry Andric 24fe6060f1SDimitry Andric typedef uint8_t dfsan_label; 25fe6060f1SDimitry Andric typedef uint32_t dfsan_origin; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric /// Signature of the callback argument to dfsan_set_write_callback(). 28*5f757f3fSDimitry Andric typedef void(SANITIZER_CDECL *dfsan_write_callback_t)(int fd, const void *buf, 29*5f757f3fSDimitry Andric size_t count); 300b57cec5SDimitry Andric 3104eeddc0SDimitry Andric /// Signature of the callback argument to dfsan_set_conditional_callback(). 32*5f757f3fSDimitry Andric typedef void(SANITIZER_CDECL *dfsan_conditional_callback_t)( 33*5f757f3fSDimitry Andric dfsan_label label, dfsan_origin origin); 3404eeddc0SDimitry Andric 35bdd1243dSDimitry Andric /// Signature of the callback argument to dfsan_set_reaches_function_callback(). 36bdd1243dSDimitry Andric /// The description is intended to hold the name of the variable. 37*5f757f3fSDimitry Andric typedef void(SANITIZER_CDECL *dfsan_reaches_function_callback_t)( 38*5f757f3fSDimitry Andric dfsan_label label, dfsan_origin origin, const char *file, unsigned int line, 39bdd1243dSDimitry Andric const char *function); 40bdd1243dSDimitry Andric 41fe6060f1SDimitry Andric /// Computes the union of \c l1 and \c l2, resulting in a union label. 42*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_union(dfsan_label l1, dfsan_label l2); 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to \c label. 45*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_set_label(dfsan_label label, void *addr, 46*5f757f3fSDimitry Andric size_t size); 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to the union of the 490b57cec5SDimitry Andric /// current label for that address and \c label. 50*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_add_label(dfsan_label label, void *addr, 51*5f757f3fSDimitry Andric size_t size); 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric /// Retrieves the label associated with the given data. 540b57cec5SDimitry Andric /// 550b57cec5SDimitry Andric /// The type of 'data' is arbitrary. The function accepts a value of any type, 560b57cec5SDimitry Andric /// which can be truncated or extended (implicitly or explicitly) as necessary. 570b57cec5SDimitry Andric /// The truncation/extension operations will preserve the label of the original 580b57cec5SDimitry Andric /// value. 59*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_get_label(long data); 600b57cec5SDimitry Andric 61fe6060f1SDimitry Andric /// Retrieves the immediate origin associated with the given data. The returned 62fe6060f1SDimitry Andric /// origin may point to another origin. 63fe6060f1SDimitry Andric /// 64fe6060f1SDimitry Andric /// The type of 'data' is arbitrary. 65*5f757f3fSDimitry Andric dfsan_origin SANITIZER_CDECL dfsan_get_origin(long data); 66fe6060f1SDimitry Andric 670b57cec5SDimitry Andric /// Retrieves the label associated with the data at the given address. 68*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_read_label(const void *addr, size_t size); 690b57cec5SDimitry Andric 700eae32dcSDimitry Andric /// Return the origin associated with the first taint byte in the size bytes 710eae32dcSDimitry Andric /// from the address addr. 72*5f757f3fSDimitry Andric dfsan_origin SANITIZER_CDECL dfsan_read_origin_of_first_taint(const void *addr, 73*5f757f3fSDimitry Andric size_t size); 740eae32dcSDimitry Andric 75*5f757f3fSDimitry Andric /// Returns whether the given label contains the label elem. 76*5f757f3fSDimitry Andric int SANITIZER_CDECL dfsan_has_label(dfsan_label label, dfsan_label elem); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated 79e8d8bef9SDimitry Andric /// with the application memory. Use this call to start over the taint tracking 80e8d8bef9SDimitry Andric /// within the same process. 81e8d8bef9SDimitry Andric /// 82e8d8bef9SDimitry Andric /// Note: If another thread is working with tainted data during the flush, that 83e8d8bef9SDimitry Andric /// taint could still be written to shadow after the flush. 84*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_flush(void); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric /// Sets a callback to be invoked on calls to write(). The callback is invoked 870b57cec5SDimitry Andric /// before the write is done. The write is not guaranteed to succeed when the 880b57cec5SDimitry Andric /// callback executes. Pass in NULL to remove any callback. 89*5f757f3fSDimitry Andric void SANITIZER_CDECL 90*5f757f3fSDimitry Andric dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); 910b57cec5SDimitry Andric 9204eeddc0SDimitry Andric /// Sets a callback to be invoked on any conditional expressions which have a 9304eeddc0SDimitry Andric /// taint label set. This can be used to find where tainted data influences 9404eeddc0SDimitry Andric /// the behavior of the program. 9504eeddc0SDimitry Andric /// These callbacks will only be added when -dfsan-conditional-callbacks=true. 96*5f757f3fSDimitry Andric void SANITIZER_CDECL 97*5f757f3fSDimitry Andric dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); 9804eeddc0SDimitry Andric 9904eeddc0SDimitry Andric /// Conditional expressions occur during signal handlers. 10004eeddc0SDimitry Andric /// Making callbacks that handle signals well is tricky, so when 10104eeddc0SDimitry Andric /// -dfsan-conditional-callbacks=true, conditional expressions used in signal 10204eeddc0SDimitry Andric /// handlers will add the labels they see into a global (bitwise-or together). 10304eeddc0SDimitry Andric /// This function returns all label bits seen in signal handler conditions. 104*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_conditional(); 10504eeddc0SDimitry Andric 106bdd1243dSDimitry Andric /// Sets a callback to be invoked when tainted data reaches a function. 107bdd1243dSDimitry Andric /// This could occur at function entry, or at a load instruction. 108bdd1243dSDimitry Andric /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1. 109*5f757f3fSDimitry Andric void SANITIZER_CDECL 110*5f757f3fSDimitry Andric dfsan_set_reaches_function_callback(dfsan_reaches_function_callback_t callback); 111bdd1243dSDimitry Andric 112bdd1243dSDimitry Andric /// Making callbacks that handle signals well is tricky, so when 113bdd1243dSDimitry Andric /// -dfsan-reaches-function-callbacks=true, functions reached in signal 114bdd1243dSDimitry Andric /// handlers will add the labels they see into a global (bitwise-or together). 115bdd1243dSDimitry Andric /// This function returns all label bits seen during signal handlers. 116*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_reaches_function(); 117bdd1243dSDimitry Andric 1180b57cec5SDimitry Andric /// Interceptor hooks. 1190b57cec5SDimitry Andric /// Whenever a dfsan's custom function is called the corresponding 1200b57cec5SDimitry Andric /// hook is called it non-zero. The hooks should be defined by the user. 1210b57cec5SDimitry Andric /// The primary use case is taint-guided fuzzing, where the fuzzer 1220b57cec5SDimitry Andric /// needs to see the parameters of the function and the labels. 1230b57cec5SDimitry Andric /// FIXME: implement more hooks. 124*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, 125*5f757f3fSDimitry Andric const void *s2, size_t n, 126*5f757f3fSDimitry Andric dfsan_label s1_label, 127*5f757f3fSDimitry Andric dfsan_label s2_label, 128*5f757f3fSDimitry Andric dfsan_label n_label); 129*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, 130*5f757f3fSDimitry Andric const char *s2, size_t n, 131*5f757f3fSDimitry Andric dfsan_label s1_label, 132*5f757f3fSDimitry Andric dfsan_label s2_label, 133*5f757f3fSDimitry Andric dfsan_label n_label); 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address addr to stderr. It also 136fe6060f1SDimitry Andric /// prints description at the beginning of the trace. If origin tracking is not 137fe6060f1SDimitry Andric /// on, or the address is not labeled, it prints nothing. 138*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_print_origin_trace(const void *addr, 139*5f757f3fSDimitry Andric const char *description); 1400eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address. 1410eae32dcSDimitry Andric /// Does not include header line with taint label and address information. 142*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_print_origin_id_trace(dfsan_origin origin); 143fe6060f1SDimitry Andric 144fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address \p addr to a 145fe6060f1SDimitry Andric /// pre-allocated output buffer. If origin tracking is not on, or the address is 146fe6060f1SDimitry Andric /// not labeled, it prints nothing. 147fe6060f1SDimitry Andric /// 148fe6060f1SDimitry Andric /// Typical usage: 149fe6060f1SDimitry Andric /// \code 150fe6060f1SDimitry Andric /// char kDescription[] = "..."; 151fe6060f1SDimitry Andric /// char buf[1024]; 152fe6060f1SDimitry Andric /// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf)); 153fe6060f1SDimitry Andric /// \endcode 154fe6060f1SDimitry Andric /// 155fe6060f1SDimitry Andric /// Typical usage that handles truncation: 156fe6060f1SDimitry Andric /// \code 157fe6060f1SDimitry Andric /// char buf[1024]; 158fe6060f1SDimitry Andric /// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf)); 159fe6060f1SDimitry Andric /// 160fe6060f1SDimitry Andric /// if (len < sizeof(buf)) { 161fe6060f1SDimitry Andric /// ProcessOriginTrace(buf); 162fe6060f1SDimitry Andric /// } else { 163fe6060f1SDimitry Andric /// char *tmpbuf = new char[len + 1]; 164fe6060f1SDimitry Andric /// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1); 165fe6060f1SDimitry Andric /// ProcessOriginTrace(tmpbuf); 166fe6060f1SDimitry Andric /// delete[] tmpbuf; 167fe6060f1SDimitry Andric /// } 168fe6060f1SDimitry Andric /// \endcode 169fe6060f1SDimitry Andric /// 170fe6060f1SDimitry Andric /// \param addr The tainted memory address whose origin we are printing. 171fe6060f1SDimitry Andric /// \param description A description printed at the beginning of the trace. 172fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to. 173fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf. 174fe6060f1SDimitry Andric /// 175fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf 176fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff 177fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size. 178*5f757f3fSDimitry Andric size_t SANITIZER_CDECL dfsan_sprint_origin_trace(const void *addr, 179*5f757f3fSDimitry Andric const char *description, 180*5f757f3fSDimitry Andric char *out_buf, 181*5f757f3fSDimitry Andric size_t out_buf_size); 1820eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address. 1830eae32dcSDimitry Andric /// Does not include header line with taint label and address information. 184*5f757f3fSDimitry Andric size_t SANITIZER_CDECL dfsan_sprint_origin_id_trace(dfsan_origin origin, 185*5f757f3fSDimitry Andric char *out_buf, 1860eae32dcSDimitry Andric size_t out_buf_size); 187fe6060f1SDimitry Andric 188fe6060f1SDimitry Andric /// Prints the stack trace leading to this call to a pre-allocated output 189fe6060f1SDimitry Andric /// buffer. 190fe6060f1SDimitry Andric /// 191fe6060f1SDimitry Andric /// For usage examples, see dfsan_sprint_origin_trace. 192fe6060f1SDimitry Andric /// 193fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to. 194fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf. 195fe6060f1SDimitry Andric /// 196fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf 197fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff 198fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size. 199*5f757f3fSDimitry Andric size_t SANITIZER_CDECL dfsan_sprint_stack_trace(char *out_buf, 200*5f757f3fSDimitry Andric size_t out_buf_size); 201fe6060f1SDimitry Andric 202fe6060f1SDimitry Andric /// Retrieves the very first origin associated with the data at the given 203fe6060f1SDimitry Andric /// address. 204*5f757f3fSDimitry Andric dfsan_origin SANITIZER_CDECL dfsan_get_init_origin(const void *addr); 205fe6060f1SDimitry Andric 206fe6060f1SDimitry Andric /// Returns the value of -dfsan-track-origins. 207fe6060f1SDimitry Andric /// * 0: do not track origins. 208fe6060f1SDimitry Andric /// * 1: track origins at memory store operations. 209fe6060f1SDimitry Andric /// * 2: track origins at memory load and store operations. 210*5f757f3fSDimitry Andric int SANITIZER_CDECL dfsan_get_track_origins(void); 2110b57cec5SDimitry Andric #ifdef __cplusplus 2120b57cec5SDimitry Andric } // extern "C" 2130b57cec5SDimitry Andric 214349cc55cSDimitry Andric template <typename T> void dfsan_set_label(dfsan_label label, T &data) { 2150b57cec5SDimitry Andric dfsan_set_label(label, (void *)&data, sizeof(T)); 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric #endif 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric #endif // DFSAN_INTERFACE_H 221