10b57cec5SDimitry Andric //===-- dfsan_interface.h -------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file is a part of DataFlowSanitizer. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // Public interface header. 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric #ifndef DFSAN_INTERFACE_H 140b57cec5SDimitry Andric #define DFSAN_INTERFACE_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include <stddef.h> 170b57cec5SDimitry Andric #include <stdint.h> 180b57cec5SDimitry Andric #include <sanitizer/common_interface_defs.h> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #ifdef __cplusplus 210b57cec5SDimitry Andric extern "C" { 220b57cec5SDimitry Andric #endif 230b57cec5SDimitry Andric 24fe6060f1SDimitry Andric typedef uint8_t dfsan_label; 25fe6060f1SDimitry Andric typedef uint32_t dfsan_origin; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric /// Signature of the callback argument to dfsan_set_write_callback(). 280b57cec5SDimitry Andric typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count); 290b57cec5SDimitry Andric 30fe6060f1SDimitry Andric /// Computes the union of \c l1 and \c l2, resulting in a union label. 310b57cec5SDimitry Andric dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to \c label. 340b57cec5SDimitry Andric void dfsan_set_label(dfsan_label label, void *addr, size_t size); 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to the union of the 370b57cec5SDimitry Andric /// current label for that address and \c label. 380b57cec5SDimitry Andric void dfsan_add_label(dfsan_label label, void *addr, size_t size); 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric /// Retrieves the label associated with the given data. 410b57cec5SDimitry Andric /// 420b57cec5SDimitry Andric /// The type of 'data' is arbitrary. The function accepts a value of any type, 430b57cec5SDimitry Andric /// which can be truncated or extended (implicitly or explicitly) as necessary. 440b57cec5SDimitry Andric /// The truncation/extension operations will preserve the label of the original 450b57cec5SDimitry Andric /// value. 460b57cec5SDimitry Andric dfsan_label dfsan_get_label(long data); 470b57cec5SDimitry Andric 48fe6060f1SDimitry Andric /// Retrieves the immediate origin associated with the given data. The returned 49fe6060f1SDimitry Andric /// origin may point to another origin. 50fe6060f1SDimitry Andric /// 51fe6060f1SDimitry Andric /// The type of 'data' is arbitrary. 52fe6060f1SDimitry Andric dfsan_origin dfsan_get_origin(long data); 53fe6060f1SDimitry Andric 540b57cec5SDimitry Andric /// Retrieves the label associated with the data at the given address. 550b57cec5SDimitry Andric dfsan_label dfsan_read_label(const void *addr, size_t size); 560b57cec5SDimitry Andric 57*0eae32dcSDimitry Andric /// Return the origin associated with the first taint byte in the size bytes 58*0eae32dcSDimitry Andric /// from the address addr. 59*0eae32dcSDimitry Andric dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size); 60*0eae32dcSDimitry Andric 610b57cec5SDimitry Andric /// Returns whether the given label label contains the label elem. 620b57cec5SDimitry Andric int dfsan_has_label(dfsan_label label, dfsan_label elem); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated 65e8d8bef9SDimitry Andric /// with the application memory. Use this call to start over the taint tracking 66e8d8bef9SDimitry Andric /// within the same process. 67e8d8bef9SDimitry Andric /// 68e8d8bef9SDimitry Andric /// Note: If another thread is working with tainted data during the flush, that 69e8d8bef9SDimitry Andric /// taint could still be written to shadow after the flush. 700b57cec5SDimitry Andric void dfsan_flush(void); 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric /// Sets a callback to be invoked on calls to write(). The callback is invoked 730b57cec5SDimitry Andric /// before the write is done. The write is not guaranteed to succeed when the 740b57cec5SDimitry Andric /// callback executes. Pass in NULL to remove any callback. 750b57cec5SDimitry Andric void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric /// Interceptor hooks. 780b57cec5SDimitry Andric /// Whenever a dfsan's custom function is called the corresponding 790b57cec5SDimitry Andric /// hook is called it non-zero. The hooks should be defined by the user. 800b57cec5SDimitry Andric /// The primary use case is taint-guided fuzzing, where the fuzzer 810b57cec5SDimitry Andric /// needs to see the parameters of the function and the labels. 820b57cec5SDimitry Andric /// FIXME: implement more hooks. 830b57cec5SDimitry Andric void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, 840b57cec5SDimitry Andric size_t n, dfsan_label s1_label, 850b57cec5SDimitry Andric dfsan_label s2_label, dfsan_label n_label); 860b57cec5SDimitry Andric void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, 870b57cec5SDimitry Andric size_t n, dfsan_label s1_label, 880b57cec5SDimitry Andric dfsan_label s2_label, dfsan_label n_label); 89fe6060f1SDimitry Andric 90fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address addr to stderr. It also 91fe6060f1SDimitry Andric /// prints description at the beginning of the trace. If origin tracking is not 92fe6060f1SDimitry Andric /// on, or the address is not labeled, it prints nothing. 93fe6060f1SDimitry Andric void dfsan_print_origin_trace(const void *addr, const char *description); 94*0eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address. 95*0eae32dcSDimitry Andric /// Does not include header line with taint label and address information. 96*0eae32dcSDimitry Andric void dfsan_print_origin_id_trace(dfsan_origin origin); 97fe6060f1SDimitry Andric 98fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address \p addr to a 99fe6060f1SDimitry Andric /// pre-allocated output buffer. If origin tracking is not on, or the address is 100fe6060f1SDimitry Andric /// not labeled, it prints nothing. 101fe6060f1SDimitry Andric /// 102fe6060f1SDimitry Andric /// Typical usage: 103fe6060f1SDimitry Andric /// \code 104fe6060f1SDimitry Andric /// char kDescription[] = "..."; 105fe6060f1SDimitry Andric /// char buf[1024]; 106fe6060f1SDimitry Andric /// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf)); 107fe6060f1SDimitry Andric /// \endcode 108fe6060f1SDimitry Andric /// 109fe6060f1SDimitry Andric /// Typical usage that handles truncation: 110fe6060f1SDimitry Andric /// \code 111fe6060f1SDimitry Andric /// char buf[1024]; 112fe6060f1SDimitry Andric /// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf)); 113fe6060f1SDimitry Andric /// 114fe6060f1SDimitry Andric /// if (len < sizeof(buf)) { 115fe6060f1SDimitry Andric /// ProcessOriginTrace(buf); 116fe6060f1SDimitry Andric /// } else { 117fe6060f1SDimitry Andric /// char *tmpbuf = new char[len + 1]; 118fe6060f1SDimitry Andric /// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1); 119fe6060f1SDimitry Andric /// ProcessOriginTrace(tmpbuf); 120fe6060f1SDimitry Andric /// delete[] tmpbuf; 121fe6060f1SDimitry Andric /// } 122fe6060f1SDimitry Andric /// \endcode 123fe6060f1SDimitry Andric /// 124fe6060f1SDimitry Andric /// \param addr The tainted memory address whose origin we are printing. 125fe6060f1SDimitry Andric /// \param description A description printed at the beginning of the trace. 126fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to. 127fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf. 128fe6060f1SDimitry Andric /// 129fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf 130fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff 131fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size. 132fe6060f1SDimitry Andric size_t dfsan_sprint_origin_trace(const void *addr, const char *description, 133fe6060f1SDimitry Andric char *out_buf, size_t out_buf_size); 134*0eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address. 135*0eae32dcSDimitry Andric /// Does not include header line with taint label and address information. 136*0eae32dcSDimitry Andric size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf, 137*0eae32dcSDimitry Andric size_t out_buf_size); 138fe6060f1SDimitry Andric 139fe6060f1SDimitry Andric /// Prints the stack trace leading to this call to a pre-allocated output 140fe6060f1SDimitry Andric /// buffer. 141fe6060f1SDimitry Andric /// 142fe6060f1SDimitry Andric /// For usage examples, see dfsan_sprint_origin_trace. 143fe6060f1SDimitry Andric /// 144fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to. 145fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf. 146fe6060f1SDimitry Andric /// 147fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf 148fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff 149fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size. 150fe6060f1SDimitry Andric size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size); 151fe6060f1SDimitry Andric 152fe6060f1SDimitry Andric /// Retrieves the very first origin associated with the data at the given 153fe6060f1SDimitry Andric /// address. 154fe6060f1SDimitry Andric dfsan_origin dfsan_get_init_origin(const void *addr); 155fe6060f1SDimitry Andric 156fe6060f1SDimitry Andric /// Returns the value of -dfsan-track-origins. 157fe6060f1SDimitry Andric /// * 0: do not track origins. 158fe6060f1SDimitry Andric /// * 1: track origins at memory store operations. 159fe6060f1SDimitry Andric /// * 2: track origins at memory load and store operations. 160fe6060f1SDimitry Andric int dfsan_get_track_origins(void); 1610b57cec5SDimitry Andric #ifdef __cplusplus 1620b57cec5SDimitry Andric } // extern "C" 1630b57cec5SDimitry Andric 164349cc55cSDimitry Andric template <typename T> void dfsan_set_label(dfsan_label label, T &data) { 1650b57cec5SDimitry Andric dfsan_set_label(label, (void *)&data, sizeof(T)); 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric #endif 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric #endif // DFSAN_INTERFACE_H 171