xref: /freebsd/contrib/llvm-project/llvm/lib/Support/Windows/Threading.inc (revision 2c2ec6bbc9cc7762a250ffe903bda6c2e44d25ff)
1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Win32 specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/Twine.h"
15#include "llvm/Support/thread.h"
16
17#include "llvm/Support/Windows/WindowsSupport.h"
18#include <process.h>
19
20#include <bitset>
21
22// Windows will at times define MemoryFence.
23#ifdef MemoryFence
24#undef MemoryFence
25#endif
26
27namespace llvm {
28HANDLE
29llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
30                            std::optional<unsigned> StackSizeInBytes) {
31  HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
32                                            ThreadFunc, Arg, 0, NULL);
33
34  if (!hThread) {
35    ReportLastErrorFatal("_beginthreadex failed");
36  }
37
38  return hThread;
39}
40
41void llvm_thread_join_impl(HANDLE hThread) {
42  if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
43    ReportLastErrorFatal("WaitForSingleObject failed");
44  }
45}
46
47void llvm_thread_detach_impl(HANDLE hThread) {
48  if (::CloseHandle(hThread) == FALSE) {
49    ReportLastErrorFatal("CloseHandle failed");
50  }
51}
52
53DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
54
55DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
56
57} // namespace llvm
58
59uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
60
61uint32_t llvm::get_max_thread_name_length() { return 0; }
62
63#if defined(_MSC_VER)
64static void SetThreadName(DWORD Id, LPCSTR Name) {
65  constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
66
67#pragma pack(push, 8)
68  struct THREADNAME_INFO {
69    DWORD dwType;     // Must be 0x1000.
70    LPCSTR szName;    // Pointer to thread name
71    DWORD dwThreadId; // Thread ID (-1 == current thread)
72    DWORD dwFlags;    // Reserved.  Do not use.
73  };
74#pragma pack(pop)
75
76  THREADNAME_INFO info;
77  info.dwType = 0x1000;
78  info.szName = Name;
79  info.dwThreadId = Id;
80  info.dwFlags = 0;
81
82  __try {
83    ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
84                     (ULONG_PTR *)&info);
85  } __except (EXCEPTION_EXECUTE_HANDLER) {
86  }
87}
88#endif
89
90void llvm::set_thread_name(const Twine &Name) {
91#if defined(_MSC_VER)
92  // Make sure the input is null terminated.
93  SmallString<64> Storage;
94  StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
95  SetThreadName(::GetCurrentThreadId(), NameStr.data());
96#endif
97}
98
99void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
100  // "Name" is not an inherent property of a thread on Windows.  In fact, when
101  // you "set" the name, you are only firing a one-time message to a debugger
102  // which it interprets as a program setting its threads' name.  We may be
103  // able to get fancy by creating a TLS entry when someone calls
104  // set_thread_name so that subsequent calls to get_thread_name return this
105  // value.
106  Name.clear();
107}
108
109SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
110  // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
111  // Begin background processing mode. The system lowers the resource scheduling
112  // priorities of the thread so that it can perform background work without
113  // significantly affecting activity in the foreground.
114  // End background processing mode. The system restores the resource scheduling
115  // priorities of the thread as they were before the thread entered background
116  // processing mode.
117  //
118  // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
119  return SetThreadPriority(GetCurrentThread(),
120                           Priority != ThreadPriority::Default
121                               ? THREAD_MODE_BACKGROUND_BEGIN
122                               : THREAD_MODE_BACKGROUND_END)
123             ? SetThreadPriorityResult::SUCCESS
124             : SetThreadPriorityResult::FAILURE;
125}
126
127struct ProcessorGroup {
128  unsigned ID;
129  unsigned AllThreads;
130  unsigned UsableThreads;
131  unsigned ThreadsPerCore;
132  uint64_t Affinity;
133
134  unsigned useableCores() const {
135    return std::max(1U, UsableThreads / ThreadsPerCore);
136  }
137};
138
139template <typename F>
140static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
141  DWORD Len = 0;
142  BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
143  if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
144    return false;
145  }
146  auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
147  R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
148  if (R) {
149    auto *End =
150        (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
151    for (auto *Curr = Info; Curr < End;
152         Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
153                                                            Curr->Size)) {
154      if (Curr->Relationship != Relationship)
155        continue;
156      Fn(Curr);
157    }
158  }
159  free(Info);
160  return true;
161}
162
163static std::optional<std::vector<USHORT>> getActiveGroups() {
164  USHORT Count = 0;
165  if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
166    return std::nullopt;
167
168  if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
169    return std::nullopt;
170
171  std::vector<USHORT> Groups;
172  Groups.resize(Count);
173  if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
174    return std::nullopt;
175
176  return Groups;
177}
178
179static ArrayRef<ProcessorGroup> getProcessorGroups() {
180  auto computeGroups = []() {
181    SmallVector<ProcessorGroup, 4> Groups;
182
183    auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
184      GROUP_RELATIONSHIP &El = ProcInfo->Group;
185      for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
186        ProcessorGroup G;
187        G.ID = Groups.size();
188        G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
189        G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
190        assert(G.UsableThreads <= 64);
191        G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
192        Groups.push_back(G);
193      }
194    };
195
196    if (!IterateProcInfo(RelationGroup, HandleGroup))
197      return std::vector<ProcessorGroup>();
198
199    auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
200      PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
201      assert(El.GroupCount == 1);
202      unsigned NumHyperThreads = 1;
203      // If the flag is set, each core supports more than one hyper-thread.
204      if (El.Flags & LTP_PC_SMT)
205        NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
206      unsigned I = El.GroupMask[0].Group;
207      Groups[I].ThreadsPerCore = NumHyperThreads;
208    };
209
210    if (!IterateProcInfo(RelationProcessorCore, HandleProc))
211      return std::vector<ProcessorGroup>();
212
213    auto ActiveGroups = getActiveGroups();
214    if (!ActiveGroups)
215      return std::vector<ProcessorGroup>();
216
217    // If there's an affinity mask set, assume the user wants to constrain the
218    // current process to only a single CPU group. On Windows, it is not
219    // possible for affinity masks to cross CPU group boundaries.
220    DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
221    if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
222                                 &SystemAffinityMask)) {
223
224      if (ProcessAffinityMask != SystemAffinityMask) {
225        if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
226          // The process affinity mask is spurious, due to an OS bug, ignore it.
227          return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
228        }
229
230        assert(ActiveGroups->size() == 1 &&
231               "When an affinity mask is set, the process is expected to be "
232               "assigned to a single processor group!");
233
234        unsigned CurrentGroupID = (*ActiveGroups)[0];
235        ProcessorGroup NewG{Groups[CurrentGroupID]};
236        NewG.Affinity = ProcessAffinityMask;
237        NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
238        Groups.clear();
239        Groups.push_back(NewG);
240      }
241    }
242    return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
243  };
244  static auto Groups = computeGroups();
245  return ArrayRef<ProcessorGroup>(Groups);
246}
247
248template <typename R, typename UnaryPredicate>
249static unsigned aggregate(R &&Range, UnaryPredicate P) {
250  unsigned I{};
251  for (const auto &It : Range)
252    I += P(It);
253  return I;
254}
255
256int llvm::get_physical_cores() {
257  static unsigned Cores =
258      aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
259        return G.UsableThreads / G.ThreadsPerCore;
260      });
261  return Cores;
262}
263
264static int computeHostNumHardwareThreads() {
265  static unsigned Threads =
266      aggregate(getProcessorGroups(),
267                [](const ProcessorGroup &G) { return G.UsableThreads; });
268  return Threads;
269}
270
271// Finds the proper CPU socket where a thread number should go. Returns
272// 'std::nullopt' if the thread shall remain on the actual CPU socket.
273std::optional<unsigned>
274llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
275  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
276  // Only one CPU socket in the system or process affinity was set, no need to
277  // move the thread(s) to another CPU socket.
278  if (Groups.size() <= 1)
279    return std::nullopt;
280
281  // We ask for less threads than there are hardware threads per CPU socket, no
282  // need to dispatch threads to other CPU sockets.
283  unsigned MaxThreadsPerSocket =
284      UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
285  if (compute_thread_count() <= MaxThreadsPerSocket)
286    return std::nullopt;
287
288  assert(ThreadPoolNum < compute_thread_count() &&
289         "The thread index is not within thread strategy's range!");
290
291  // Assumes the same number of hardware threads per CPU socket.
292  return (ThreadPoolNum * Groups.size()) / compute_thread_count();
293}
294
295// Assign the current thread to a more appropriate CPU socket or CPU group
296void llvm::ThreadPoolStrategy::apply_thread_strategy(
297    unsigned ThreadPoolNum) const {
298
299  // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
300  // since a process automatically gains access to all processor groups.
301  if (llvm::RunningWindows11OrGreater())
302    return;
303
304  std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
305  if (!Socket)
306    return;
307  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
308  GROUP_AFFINITY Affinity{};
309  Affinity.Group = Groups[*Socket].ID;
310  Affinity.Mask = Groups[*Socket].Affinity;
311  SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
312}
313
314llvm::BitVector llvm::get_thread_affinity_mask() {
315  GROUP_AFFINITY Affinity{};
316  GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
317
318  static unsigned All =
319      aggregate(getProcessorGroups(),
320                [](const ProcessorGroup &G) { return G.AllThreads; });
321
322  unsigned StartOffset =
323      aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
324        return G.ID < Affinity.Group ? G.AllThreads : 0;
325      });
326
327  llvm::BitVector V;
328  V.resize(All);
329  for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
330    if ((Affinity.Mask >> I) & 1)
331      V.set(StartOffset + I);
332  }
333  return V;
334}
335
336unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
337