1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares helper functions for running LLVM in a multi-threaded
10 // environment.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_SUPPORT_THREADING_H
15 #define LLVM_SUPPORT_THREADING_H
16
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
20 #include "llvm/Support/Compiler.h"
21 #include <optional>
22
23 #if defined(_MSC_VER)
24 // MSVC's call_once implementation worked since VS 2015, which is the minimum
25 // supported version as of this writing.
26 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
27 #elif defined(LLVM_ON_UNIX) && \
28 (defined(_LIBCPP_VERSION) || \
29 !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__)))
30 // std::call_once from libc++ is used on all Unix platforms. Other
31 // implementations like libstdc++ are known to have problems on NetBSD,
32 // OpenBSD and PowerPC.
33 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
34 #elif defined(LLVM_ON_UNIX) && \
35 (defined(__powerpc__) && defined(__LITTLE_ENDIAN__))
36 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
37 #else
38 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
39 #endif
40
41 #if LLVM_THREADING_USE_STD_CALL_ONCE
42 #include <mutex>
43 #else
44 #include "llvm/Support/Atomic.h"
45 #endif
46
47 namespace llvm {
48 class Twine;
49
50 /// Returns true if LLVM is compiled with support for multi-threading, and
51 /// false otherwise.
llvm_is_multithreaded()52 constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
53
54 #if LLVM_THREADING_USE_STD_CALL_ONCE
55
56 typedef std::once_flag once_flag;
57
58 #else
59
60 enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
61
62 /// The llvm::once_flag structure
63 ///
64 /// This type is modeled after std::once_flag to use with llvm::call_once.
65 /// This structure must be used as an opaque object. It is a struct to force
66 /// autoinitialization and behave like std::once_flag.
67 struct once_flag {
68 volatile sys::cas_flag status = Uninitialized;
69 };
70
71 #endif
72
73 /// Execute the function specified as a parameter once.
74 ///
75 /// Typical usage:
76 /// \code
77 /// void foo() {...};
78 /// ...
79 /// static once_flag flag;
80 /// call_once(flag, foo);
81 /// \endcode
82 ///
83 /// \param flag Flag used for tracking whether or not this has run.
84 /// \param F Function to call once.
85 template <typename Function, typename... Args>
call_once(once_flag & flag,Function && F,Args &&...ArgList)86 void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
87 #if LLVM_THREADING_USE_STD_CALL_ONCE
88 std::call_once(flag, std::forward<Function>(F),
89 std::forward<Args>(ArgList)...);
90 #else
91 // For other platforms we use a generic (if brittle) version based on our
92 // atomics.
93 sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
94 if (old_val == Uninitialized) {
95 std::forward<Function>(F)(std::forward<Args>(ArgList)...);
96 sys::MemoryFence();
97 TsanIgnoreWritesBegin();
98 TsanHappensBefore(&flag.status);
99 flag.status = Done;
100 TsanIgnoreWritesEnd();
101 } else {
102 // Wait until any thread doing the call has finished.
103 sys::cas_flag tmp = flag.status;
104 sys::MemoryFence();
105 while (tmp != Done) {
106 tmp = flag.status;
107 sys::MemoryFence();
108 }
109 }
110 TsanHappensAfter(&flag.status);
111 #endif
112 }
113
114 /// This tells how a thread pool will be used
115 class ThreadPoolStrategy {
116 public:
117 // The default value (0) means all available threads should be used,
118 // taking the affinity mask into account. If set, this value only represents
119 // a suggested high bound, the runtime might choose a lower value (not
120 // higher).
121 unsigned ThreadsRequested = 0;
122
123 // If SMT is active, use hyper threads. If false, there will be only one
124 // std::thread per core.
125 bool UseHyperThreads = true;
126
127 // If set, will constrain 'ThreadsRequested' to the number of hardware
128 // threads, or hardware cores.
129 bool Limit = false;
130
131 /// Retrieves the max available threads for the current strategy. This
132 /// accounts for affinity masks and takes advantage of all CPU sockets.
133 LLVM_ABI unsigned compute_thread_count() const;
134
135 /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
136 /// multi-socket system, this ensures threads are assigned to all CPU
137 /// sockets. \p ThreadPoolNum represents a number bounded by [0,
138 /// compute_thread_count()).
139 LLVM_ABI void apply_thread_strategy(unsigned ThreadPoolNum) const;
140
141 /// Finds the CPU socket where a thread should go. Returns 'std::nullopt' if
142 /// the thread shall remain on the actual CPU socket.
143 LLVM_ABI std::optional<unsigned>
144 compute_cpu_socket(unsigned ThreadPoolNum) const;
145 };
146
147 /// Build a strategy from a number of threads as a string provided in \p Num.
148 /// When Num is above the max number of threads specified by the \p Default
149 /// strategy, we attempt to equally allocate the threads on all CPU sockets.
150 /// "0" or an empty string will return the \p Default strategy.
151 /// "all" for using all hardware threads.
152 LLVM_ABI std::optional<ThreadPoolStrategy>
153 get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
154
155 /// Returns a thread strategy for tasks requiring significant memory or other
156 /// resources. To be used for workloads where hardware_concurrency() proves to
157 /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
158 /// based on physical cores, if available for the host system, otherwise falls
159 /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
160 /// LLVM_ENABLE_THREADS = OFF.
161 inline ThreadPoolStrategy
162 heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
163 ThreadPoolStrategy S;
164 S.UseHyperThreads = false;
165 S.ThreadsRequested = ThreadCount;
166 return S;
167 }
168
169 /// Like heavyweight_hardware_concurrency() above, but builds a strategy
170 /// based on the rules described for get_threadpool_strategy().
171 /// If \p Num is invalid, returns a default strategy where one thread per
172 /// hardware core is used.
heavyweight_hardware_concurrency(StringRef Num)173 inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
174 std::optional<ThreadPoolStrategy> S =
175 get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
176 if (S)
177 return *S;
178 return heavyweight_hardware_concurrency();
179 }
180
181 /// Returns a default thread strategy where all available hardware resources
182 /// are to be used, except for those initially excluded by an affinity mask.
183 /// This function takes affinity into consideration. Returns 1 when LLVM is
184 /// configured with LLVM_ENABLE_THREADS=OFF.
185 inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
186 ThreadPoolStrategy S;
187 S.ThreadsRequested = ThreadCount;
188 return S;
189 }
190
191 /// Like hardware_concurrency() above, but builds a strategy
192 /// based on the rules described for get_threadpool_strategy().
193 /// If \p Num is invalid, returns a default strategy where one thread per
194 /// hardware core is used.
hardware_concurrency(StringRef Num)195 inline ThreadPoolStrategy hardware_concurrency(StringRef Num) {
196 std::optional<ThreadPoolStrategy> S =
197 get_threadpool_strategy(Num, hardware_concurrency());
198 if (S)
199 return *S;
200 return hardware_concurrency();
201 }
202
203 /// Returns an optimal thread strategy to execute specified amount of tasks.
204 /// This strategy should prevent us from creating too many threads if we
205 /// occasionaly have an unexpectedly small amount of tasks.
206 inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
207 ThreadPoolStrategy S;
208 S.Limit = true;
209 S.ThreadsRequested = TaskCount;
210 return S;
211 }
212
213 /// Return the current thread id, as used in various OS system calls.
214 /// Note that not all platforms guarantee that the value returned will be
215 /// unique across the entire system, so portable code should not assume
216 /// this.
217 LLVM_ABI uint64_t get_threadid();
218
219 /// Get the maximum length of a thread name on this platform.
220 /// A value of 0 means there is no limit.
221 LLVM_ABI uint32_t get_max_thread_name_length();
222
223 /// Set the name of the current thread. Setting a thread's name can
224 /// be helpful for enabling useful diagnostics under a debugger or when
225 /// logging. The level of support for setting a thread's name varies
226 /// wildly across operating systems, and we only make a best effort to
227 /// perform the operation on supported platforms. No indication of success
228 /// or failure is returned.
229 LLVM_ABI void set_thread_name(const Twine &Name);
230
231 /// Get the name of the current thread. The level of support for
232 /// getting a thread's name varies wildly across operating systems, and it
233 /// is not even guaranteed that if you can successfully set a thread's name
234 /// that you can later get it back. This function is intended for diagnostic
235 /// purposes, and as with setting a thread's name no indication of whether
236 /// the operation succeeded or failed is returned.
237 LLVM_ABI void get_thread_name(SmallVectorImpl<char> &Name);
238
239 /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
240 /// group, the calling thread can be executed. On Windows, threads cannot
241 /// cross CPU sockets boundaries.
242 LLVM_ABI llvm::BitVector get_thread_affinity_mask();
243
244 /// Returns how many physical CPUs or NUMA groups the system has.
245 LLVM_ABI unsigned get_cpus();
246
247 /// Returns how many physical cores (as opposed to logical cores returned from
248 /// thread::hardware_concurrency(), which includes hyperthreads).
249 /// Returns -1 if unknown for the current host system.
250 LLVM_ABI int get_physical_cores();
251
252 enum class ThreadPriority {
253 /// Lower the current thread's priority as much as possible. Can be used
254 /// for long-running tasks that are not time critical; more energy-
255 /// efficient than Low.
256 Background = 0,
257
258 /// Lower the current thread's priority such that it does not affect
259 /// foreground tasks significantly. This is a good default for long-
260 /// running, latency-insensitive tasks to make sure cpu is not hogged
261 /// by this task.
262 Low = 1,
263
264 /// Restore the current thread's priority to default scheduling priority.
265 Default = 2,
266 };
267 enum class SetThreadPriorityResult { FAILURE, SUCCESS };
268 LLVM_ABI SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
269 }
270
271 #endif
272