xref: /freebsd/contrib/llvm-project/compiler-rt/include/sanitizer/dfsan_interface.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- dfsan_interface.h -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file is a part of DataFlowSanitizer.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // Public interface header.
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric #ifndef DFSAN_INTERFACE_H
140b57cec5SDimitry Andric #define DFSAN_INTERFACE_H
150b57cec5SDimitry Andric 
16*5f757f3fSDimitry Andric #include <sanitizer/common_interface_defs.h>
170b57cec5SDimitry Andric #include <stddef.h>
180b57cec5SDimitry Andric #include <stdint.h>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric #ifdef __cplusplus
210b57cec5SDimitry Andric extern "C" {
220b57cec5SDimitry Andric #endif
230b57cec5SDimitry Andric 
24fe6060f1SDimitry Andric typedef uint8_t dfsan_label;
25fe6060f1SDimitry Andric typedef uint32_t dfsan_origin;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric /// Signature of the callback argument to dfsan_set_write_callback().
28*5f757f3fSDimitry Andric typedef void(SANITIZER_CDECL *dfsan_write_callback_t)(int fd, const void *buf,
29*5f757f3fSDimitry Andric                                                       size_t count);
300b57cec5SDimitry Andric 
3104eeddc0SDimitry Andric /// Signature of the callback argument to dfsan_set_conditional_callback().
32*5f757f3fSDimitry Andric typedef void(SANITIZER_CDECL *dfsan_conditional_callback_t)(
33*5f757f3fSDimitry Andric     dfsan_label label, dfsan_origin origin);
3404eeddc0SDimitry Andric 
35bdd1243dSDimitry Andric /// Signature of the callback argument to dfsan_set_reaches_function_callback().
36bdd1243dSDimitry Andric /// The description is intended to hold the name of the variable.
37*5f757f3fSDimitry Andric typedef void(SANITIZER_CDECL *dfsan_reaches_function_callback_t)(
38*5f757f3fSDimitry Andric     dfsan_label label, dfsan_origin origin, const char *file, unsigned int line,
39bdd1243dSDimitry Andric     const char *function);
40bdd1243dSDimitry Andric 
41fe6060f1SDimitry Andric /// Computes the union of \c l1 and \c l2, resulting in a union label.
42*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_union(dfsan_label l1, dfsan_label l2);
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to \c label.
45*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_set_label(dfsan_label label, void *addr,
46*5f757f3fSDimitry Andric                                      size_t size);
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric /// Sets the label for each address in [addr,addr+size) to the union of the
490b57cec5SDimitry Andric /// current label for that address and \c label.
50*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_add_label(dfsan_label label, void *addr,
51*5f757f3fSDimitry Andric                                      size_t size);
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric /// Retrieves the label associated with the given data.
540b57cec5SDimitry Andric ///
550b57cec5SDimitry Andric /// The type of 'data' is arbitrary.  The function accepts a value of any type,
560b57cec5SDimitry Andric /// which can be truncated or extended (implicitly or explicitly) as necessary.
570b57cec5SDimitry Andric /// The truncation/extension operations will preserve the label of the original
580b57cec5SDimitry Andric /// value.
59*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_get_label(long data);
600b57cec5SDimitry Andric 
61fe6060f1SDimitry Andric /// Retrieves the immediate origin associated with the given data. The returned
62fe6060f1SDimitry Andric /// origin may point to another origin.
63fe6060f1SDimitry Andric ///
64fe6060f1SDimitry Andric /// The type of 'data' is arbitrary.
65*5f757f3fSDimitry Andric dfsan_origin SANITIZER_CDECL dfsan_get_origin(long data);
66fe6060f1SDimitry Andric 
670b57cec5SDimitry Andric /// Retrieves the label associated with the data at the given address.
68*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_read_label(const void *addr, size_t size);
690b57cec5SDimitry Andric 
700eae32dcSDimitry Andric /// Return the origin associated with the first taint byte in the size bytes
710eae32dcSDimitry Andric /// from the address addr.
72*5f757f3fSDimitry Andric dfsan_origin SANITIZER_CDECL dfsan_read_origin_of_first_taint(const void *addr,
73*5f757f3fSDimitry Andric                                                               size_t size);
740eae32dcSDimitry Andric 
75*5f757f3fSDimitry Andric /// Returns whether the given label contains the label elem.
76*5f757f3fSDimitry Andric int SANITIZER_CDECL dfsan_has_label(dfsan_label label, dfsan_label elem);
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
79e8d8bef9SDimitry Andric /// with the application memory.  Use this call to start over the taint tracking
80e8d8bef9SDimitry Andric /// within the same process.
81e8d8bef9SDimitry Andric ///
82e8d8bef9SDimitry Andric /// Note: If another thread is working with tainted data during the flush, that
83e8d8bef9SDimitry Andric /// taint could still be written to shadow after the flush.
84*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_flush(void);
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric /// Sets a callback to be invoked on calls to write().  The callback is invoked
870b57cec5SDimitry Andric /// before the write is done.  The write is not guaranteed to succeed when the
880b57cec5SDimitry Andric /// callback executes.  Pass in NULL to remove any callback.
89*5f757f3fSDimitry Andric void SANITIZER_CDECL
90*5f757f3fSDimitry Andric dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
910b57cec5SDimitry Andric 
9204eeddc0SDimitry Andric /// Sets a callback to be invoked on any conditional expressions which have a
9304eeddc0SDimitry Andric /// taint label set. This can be used to find where tainted data influences
9404eeddc0SDimitry Andric /// the behavior of the program.
9504eeddc0SDimitry Andric /// These callbacks will only be added when -dfsan-conditional-callbacks=true.
96*5f757f3fSDimitry Andric void SANITIZER_CDECL
97*5f757f3fSDimitry Andric dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
9804eeddc0SDimitry Andric 
9904eeddc0SDimitry Andric /// Conditional expressions occur during signal handlers.
10004eeddc0SDimitry Andric /// Making callbacks that handle signals well is tricky, so when
10104eeddc0SDimitry Andric /// -dfsan-conditional-callbacks=true, conditional expressions used in signal
10204eeddc0SDimitry Andric /// handlers will add the labels they see into a global (bitwise-or together).
10304eeddc0SDimitry Andric /// This function returns all label bits seen in signal handler conditions.
104*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_conditional();
10504eeddc0SDimitry Andric 
106bdd1243dSDimitry Andric /// Sets a callback to be invoked when tainted data reaches a function.
107bdd1243dSDimitry Andric /// This could occur at function entry, or at a load instruction.
108bdd1243dSDimitry Andric /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1.
109*5f757f3fSDimitry Andric void SANITIZER_CDECL
110*5f757f3fSDimitry Andric dfsan_set_reaches_function_callback(dfsan_reaches_function_callback_t callback);
111bdd1243dSDimitry Andric 
112bdd1243dSDimitry Andric /// Making callbacks that handle signals well is tricky, so when
113bdd1243dSDimitry Andric /// -dfsan-reaches-function-callbacks=true, functions reached in signal
114bdd1243dSDimitry Andric /// handlers will add the labels they see into a global (bitwise-or together).
115bdd1243dSDimitry Andric /// This function returns all label bits seen during signal handlers.
116*5f757f3fSDimitry Andric dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_reaches_function();
117bdd1243dSDimitry Andric 
1180b57cec5SDimitry Andric /// Interceptor hooks.
1190b57cec5SDimitry Andric /// Whenever a dfsan's custom function is called the corresponding
1200b57cec5SDimitry Andric /// hook is called it non-zero. The hooks should be defined by the user.
1210b57cec5SDimitry Andric /// The primary use case is taint-guided fuzzing, where the fuzzer
1220b57cec5SDimitry Andric /// needs to see the parameters of the function and the labels.
1230b57cec5SDimitry Andric /// FIXME: implement more hooks.
124*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_weak_hook_memcmp(void *caller_pc, const void *s1,
125*5f757f3fSDimitry Andric                                             const void *s2, size_t n,
126*5f757f3fSDimitry Andric                                             dfsan_label s1_label,
127*5f757f3fSDimitry Andric                                             dfsan_label s2_label,
128*5f757f3fSDimitry Andric                                             dfsan_label n_label);
129*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_weak_hook_strncmp(void *caller_pc, const char *s1,
130*5f757f3fSDimitry Andric                                              const char *s2, size_t n,
131*5f757f3fSDimitry Andric                                              dfsan_label s1_label,
132*5f757f3fSDimitry Andric                                              dfsan_label s2_label,
133*5f757f3fSDimitry Andric                                              dfsan_label n_label);
134fe6060f1SDimitry Andric 
135fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address addr to stderr. It also
136fe6060f1SDimitry Andric /// prints description at the beginning of the trace. If origin tracking is not
137fe6060f1SDimitry Andric /// on, or the address is not labeled, it prints nothing.
138*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_print_origin_trace(const void *addr,
139*5f757f3fSDimitry Andric                                               const char *description);
1400eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address.
1410eae32dcSDimitry Andric /// Does not include header line with taint label and address information.
142*5f757f3fSDimitry Andric void SANITIZER_CDECL dfsan_print_origin_id_trace(dfsan_origin origin);
143fe6060f1SDimitry Andric 
144fe6060f1SDimitry Andric /// Prints the origin trace of the label at the address \p addr to a
145fe6060f1SDimitry Andric /// pre-allocated output buffer. If origin tracking is not on, or the address is
146fe6060f1SDimitry Andric /// not labeled, it prints nothing.
147fe6060f1SDimitry Andric ///
148fe6060f1SDimitry Andric /// Typical usage:
149fe6060f1SDimitry Andric /// \code
150fe6060f1SDimitry Andric ///   char kDescription[] = "...";
151fe6060f1SDimitry Andric ///   char buf[1024];
152fe6060f1SDimitry Andric ///   dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
153fe6060f1SDimitry Andric /// \endcode
154fe6060f1SDimitry Andric ///
155fe6060f1SDimitry Andric /// Typical usage that handles truncation:
156fe6060f1SDimitry Andric /// \code
157fe6060f1SDimitry Andric ///   char buf[1024];
158fe6060f1SDimitry Andric ///   int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
159fe6060f1SDimitry Andric ///
160fe6060f1SDimitry Andric ///   if (len < sizeof(buf)) {
161fe6060f1SDimitry Andric ///     ProcessOriginTrace(buf);
162fe6060f1SDimitry Andric ///   } else {
163fe6060f1SDimitry Andric ///     char *tmpbuf = new char[len + 1];
164fe6060f1SDimitry Andric ///     dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
165fe6060f1SDimitry Andric ///     ProcessOriginTrace(tmpbuf);
166fe6060f1SDimitry Andric ///     delete[] tmpbuf;
167fe6060f1SDimitry Andric ///   }
168fe6060f1SDimitry Andric /// \endcode
169fe6060f1SDimitry Andric ///
170fe6060f1SDimitry Andric /// \param addr The tainted memory address whose origin we are printing.
171fe6060f1SDimitry Andric /// \param description A description printed at the beginning of the trace.
172fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to.
173fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf.
174fe6060f1SDimitry Andric ///
175fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf
176fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff
177fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size.
178*5f757f3fSDimitry Andric size_t SANITIZER_CDECL dfsan_sprint_origin_trace(const void *addr,
179*5f757f3fSDimitry Andric                                                  const char *description,
180*5f757f3fSDimitry Andric                                                  char *out_buf,
181*5f757f3fSDimitry Andric                                                  size_t out_buf_size);
1820eae32dcSDimitry Andric /// As above, but use an origin id from dfsan_get_origin() instead of address.
1830eae32dcSDimitry Andric /// Does not include header line with taint label and address information.
184*5f757f3fSDimitry Andric size_t SANITIZER_CDECL dfsan_sprint_origin_id_trace(dfsan_origin origin,
185*5f757f3fSDimitry Andric                                                     char *out_buf,
1860eae32dcSDimitry Andric                                                     size_t out_buf_size);
187fe6060f1SDimitry Andric 
188fe6060f1SDimitry Andric /// Prints the stack trace leading to this call to a pre-allocated output
189fe6060f1SDimitry Andric /// buffer.
190fe6060f1SDimitry Andric ///
191fe6060f1SDimitry Andric /// For usage examples, see dfsan_sprint_origin_trace.
192fe6060f1SDimitry Andric ///
193fe6060f1SDimitry Andric /// \param [out] out_buf The output buffer to write the results to.
194fe6060f1SDimitry Andric /// \param out_buf_size The size of \p out_buf.
195fe6060f1SDimitry Andric ///
196fe6060f1SDimitry Andric /// \returns The number of symbols that should have been written to \p out_buf
197fe6060f1SDimitry Andric /// (not including trailing null byte '\0'). Thus, the string is truncated iff
198fe6060f1SDimitry Andric /// return value is not less than \p out_buf_size.
199*5f757f3fSDimitry Andric size_t SANITIZER_CDECL dfsan_sprint_stack_trace(char *out_buf,
200*5f757f3fSDimitry Andric                                                 size_t out_buf_size);
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric /// Retrieves the very first origin associated with the data at the given
203fe6060f1SDimitry Andric /// address.
204*5f757f3fSDimitry Andric dfsan_origin SANITIZER_CDECL dfsan_get_init_origin(const void *addr);
205fe6060f1SDimitry Andric 
206fe6060f1SDimitry Andric /// Returns the value of -dfsan-track-origins.
207fe6060f1SDimitry Andric /// * 0: do not track origins.
208fe6060f1SDimitry Andric /// * 1: track origins at memory store operations.
209fe6060f1SDimitry Andric /// * 2: track origins at memory load and store operations.
210*5f757f3fSDimitry Andric int SANITIZER_CDECL dfsan_get_track_origins(void);
2110b57cec5SDimitry Andric #ifdef __cplusplus
2120b57cec5SDimitry Andric } // extern "C"
2130b57cec5SDimitry Andric 
214349cc55cSDimitry Andric template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
2150b57cec5SDimitry Andric   dfsan_set_label(label, (void *)&data, sizeof(T));
2160b57cec5SDimitry Andric }
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric #endif
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric #endif // DFSAN_INTERFACE_H
221