xref: /freebsd/contrib/llvm-project/compiler-rt/include/sanitizer/dfsan_interface.h (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
10b57cec5SDimitry Andric //===-- dfsan_interface.h -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file is a part of DataFlowSanitizer.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // Public interface header.
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric #ifndef DFSAN_INTERFACE_H
140b57cec5SDimitry Andric #define DFSAN_INTERFACE_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include <stddef.h>
170b57cec5SDimitry Andric #include <stdint.h>
180b57cec5SDimitry Andric #include <sanitizer/common_interface_defs.h>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #ifdef __cplusplus
210b57cec5SDimitry Andric extern "C" {
220b57cec5SDimitry Andric #endif
230b57cec5SDimitry Andric 
24fe6060f1SDimitry Andric typedef uint8_t dfsan_label;
25fe6060f1SDimitry Andric typedef uint32_t dfsan_origin;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric /// Signature of the callback argument to dfsan_set_write_callback().
280b57cec5SDimitry Andric typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
290b57cec5SDimitry Andric 
30fe6060f1SDimitry Andric /// Computes the union of \c l1 and \c l2, resulting in a union label.
310b57cec5SDimitry Andric dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to \c label.
340b57cec5SDimitry Andric void dfsan_set_label(dfsan_label label, void *addr, size_t size);
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to the union of the
370b57cec5SDimitry Andric /// current label for that address and \c label.
380b57cec5SDimitry Andric void dfsan_add_label(dfsan_label label, void *addr, size_t size);
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric /// Retrieves the label associated with the given data.
410b57cec5SDimitry Andric ///
420b57cec5SDimitry Andric /// The type of 'data' is arbitrary.  The function accepts a value of any type,
430b57cec5SDimitry Andric /// which can be truncated or extended (implicitly or explicitly) as necessary.
440b57cec5SDimitry Andric /// The truncation/extension operations will preserve the label of the original
450b57cec5SDimitry Andric /// value.
460b57cec5SDimitry Andric dfsan_label dfsan_get_label(long data);
470b57cec5SDimitry Andric 
48fe6060f1SDimitry Andric /// Retrieves the immediate origin associated with the given data. The returned
49fe6060f1SDimitry Andric /// origin may point to another origin.
50fe6060f1SDimitry Andric ///
51fe6060f1SDimitry Andric /// The type of 'data' is arbitrary.
52fe6060f1SDimitry Andric dfsan_origin dfsan_get_origin(long data);
53fe6060f1SDimitry Andric 
540b57cec5SDimitry Andric /// Retrieves the label associated with the data at the given address.
550b57cec5SDimitry Andric dfsan_label dfsan_read_label(const void *addr, size_t size);
560b57cec5SDimitry Andric 
57*0eae32dcSDimitry Andric /// Return the origin associated with the first taint byte in the size bytes
58*0eae32dcSDimitry Andric /// from the address addr.
59*0eae32dcSDimitry Andric dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size);
60*0eae32dcSDimitry Andric 
610b57cec5SDimitry Andric /// Returns whether the given label label contains the label elem.
620b57cec5SDimitry Andric int dfsan_has_label(dfsan_label label, dfsan_label elem);
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
65e8d8bef9SDimitry Andric /// with the application memory.  Use this call to start over the taint tracking
66e8d8bef9SDimitry Andric /// within the same process.
67e8d8bef9SDimitry Andric ///
68e8d8bef9SDimitry Andric /// Note: If another thread is working with tainted data during the flush, that
69e8d8bef9SDimitry Andric /// taint could still be written to shadow after the flush.
700b57cec5SDimitry Andric void dfsan_flush(void);
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric /// Sets a callback to be invoked on calls to write().  The callback is invoked
730b57cec5SDimitry Andric /// before the write is done.  The write is not guaranteed to succeed when the
740b57cec5SDimitry Andric /// callback executes.  Pass in NULL to remove any callback.
750b57cec5SDimitry Andric void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric /// Interceptor hooks.
780b57cec5SDimitry Andric /// Whenever a dfsan's custom function is called the corresponding
790b57cec5SDimitry Andric /// hook is called it non-zero. The hooks should be defined by the user.
800b57cec5SDimitry Andric /// The primary use case is taint-guided fuzzing, where the fuzzer
810b57cec5SDimitry Andric /// needs to see the parameters of the function and the labels.
820b57cec5SDimitry Andric /// FIXME: implement more hooks.
830b57cec5SDimitry Andric void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
840b57cec5SDimitry Andric                             size_t n, dfsan_label s1_label,
850b57cec5SDimitry Andric                             dfsan_label s2_label, dfsan_label n_label);
860b57cec5SDimitry Andric void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
870b57cec5SDimitry Andric                              size_t n, dfsan_label s1_label,
880b57cec5SDimitry Andric                              dfsan_label s2_label, dfsan_label n_label);
89fe6060f1SDimitry Andric 
90fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address addr to stderr. It also
91fe6060f1SDimitry Andric /// prints description at the beginning of the trace. If origin tracking is not
92fe6060f1SDimitry Andric /// on, or the address is not labeled, it prints nothing.
93fe6060f1SDimitry Andric void dfsan_print_origin_trace(const void *addr, const char *description);
94*0eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address.
95*0eae32dcSDimitry Andric /// Does not include header line with taint label and address information.
96*0eae32dcSDimitry Andric void dfsan_print_origin_id_trace(dfsan_origin origin);
97fe6060f1SDimitry Andric 
98fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address \p addr to a
99fe6060f1SDimitry Andric /// pre-allocated output buffer. If origin tracking is not on, or the address is
100fe6060f1SDimitry Andric /// not labeled, it prints nothing.
101fe6060f1SDimitry Andric ///
102fe6060f1SDimitry Andric /// Typical usage:
103fe6060f1SDimitry Andric /// \code
104fe6060f1SDimitry Andric ///   char kDescription[] = "...";
105fe6060f1SDimitry Andric ///   char buf[1024];
106fe6060f1SDimitry Andric ///   dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
107fe6060f1SDimitry Andric /// \endcode
108fe6060f1SDimitry Andric ///
109fe6060f1SDimitry Andric /// Typical usage that handles truncation:
110fe6060f1SDimitry Andric /// \code
111fe6060f1SDimitry Andric ///   char buf[1024];
112fe6060f1SDimitry Andric ///   int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
113fe6060f1SDimitry Andric ///
114fe6060f1SDimitry Andric ///   if (len < sizeof(buf)) {
115fe6060f1SDimitry Andric ///     ProcessOriginTrace(buf);
116fe6060f1SDimitry Andric ///   } else {
117fe6060f1SDimitry Andric ///     char *tmpbuf = new char[len + 1];
118fe6060f1SDimitry Andric ///     dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
119fe6060f1SDimitry Andric ///     ProcessOriginTrace(tmpbuf);
120fe6060f1SDimitry Andric ///     delete[] tmpbuf;
121fe6060f1SDimitry Andric ///   }
122fe6060f1SDimitry Andric /// \endcode
123fe6060f1SDimitry Andric ///
124fe6060f1SDimitry Andric /// \param addr The tainted memory address whose origin we are printing.
125fe6060f1SDimitry Andric /// \param description A description printed at the beginning of the trace.
126fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to.
127fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf.
128fe6060f1SDimitry Andric ///
129fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf
130fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff
131fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size.
132fe6060f1SDimitry Andric size_t dfsan_sprint_origin_trace(const void *addr, const char *description,
133fe6060f1SDimitry Andric                                  char *out_buf, size_t out_buf_size);
134*0eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address.
135*0eae32dcSDimitry Andric /// Does not include header line with taint label and address information.
136*0eae32dcSDimitry Andric size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf,
137*0eae32dcSDimitry Andric                                     size_t out_buf_size);
138fe6060f1SDimitry Andric 
139fe6060f1SDimitry Andric /// Prints the stack trace leading to this call to a pre-allocated output
140fe6060f1SDimitry Andric /// buffer.
141fe6060f1SDimitry Andric ///
142fe6060f1SDimitry Andric /// For usage examples, see dfsan_sprint_origin_trace.
143fe6060f1SDimitry Andric ///
144fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to.
145fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf.
146fe6060f1SDimitry Andric ///
147fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf
148fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff
149fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size.
150fe6060f1SDimitry Andric size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size);
151fe6060f1SDimitry Andric 
152fe6060f1SDimitry Andric /// Retrieves the very first origin associated with the data at the given
153fe6060f1SDimitry Andric /// address.
154fe6060f1SDimitry Andric dfsan_origin dfsan_get_init_origin(const void *addr);
155fe6060f1SDimitry Andric 
156fe6060f1SDimitry Andric /// Returns the value of -dfsan-track-origins.
157fe6060f1SDimitry Andric /// * 0: do not track origins.
158fe6060f1SDimitry Andric /// * 1: track origins at memory store operations.
159fe6060f1SDimitry Andric /// * 2: track origins at memory load and store operations.
160fe6060f1SDimitry Andric int dfsan_get_track_origins(void);
1610b57cec5SDimitry Andric #ifdef __cplusplus
1620b57cec5SDimitry Andric }  // extern "C"
1630b57cec5SDimitry Andric 
164349cc55cSDimitry Andric template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
1650b57cec5SDimitry Andric   dfsan_set_label(label, (void *)&data, sizeof(T));
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric #endif
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric #endif  // DFSAN_INTERFACE_H
171