1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Win32 specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/Twine.h" 15#include "llvm/Support/thread.h" 16 17#include "llvm/Support/Windows/WindowsSupport.h" 18#include <process.h> 19 20#include <bitset> 21 22// Windows will at times define MemoryFence. 23#ifdef MemoryFence 24#undef MemoryFence 25#endif 26 27namespace llvm { 28HANDLE 29llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg, 30 std::optional<unsigned> StackSizeInBytes) { 31 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0), 32 ThreadFunc, Arg, 0, NULL); 33 34 if (!hThread) { 35 ReportLastErrorFatal("_beginthreadex failed"); 36 } 37 38 return hThread; 39} 40 41void llvm_thread_join_impl(HANDLE hThread) { 42 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { 43 ReportLastErrorFatal("WaitForSingleObject failed"); 44 } 45} 46 47void llvm_thread_detach_impl(HANDLE hThread) { 48 if (::CloseHandle(hThread) == FALSE) { 49 ReportLastErrorFatal("CloseHandle failed"); 50 } 51} 52 53DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); } 54 55DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); } 56 57} // namespace llvm 58 59uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); } 60 61uint32_t llvm::get_max_thread_name_length() { return 0; } 62 63#if defined(_MSC_VER) 64static void SetThreadName(DWORD Id, LPCSTR Name) { 65 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; 66 67#pragma pack(push, 8) 68 struct THREADNAME_INFO { 69 DWORD dwType; // Must be 0x1000. 70 LPCSTR szName; // Pointer to thread name 71 DWORD dwThreadId; // Thread ID (-1 == current thread) 72 DWORD dwFlags; // Reserved. Do not use. 73 }; 74#pragma pack(pop) 75 76 THREADNAME_INFO info; 77 info.dwType = 0x1000; 78 info.szName = Name; 79 info.dwThreadId = Id; 80 info.dwFlags = 0; 81 82 __try { 83 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), 84 (ULONG_PTR *)&info); 85 } __except (EXCEPTION_EXECUTE_HANDLER) { 86 } 87} 88#endif 89 90void llvm::set_thread_name(const Twine &Name) { 91#if defined(_MSC_VER) 92 // Make sure the input is null terminated. 93 SmallString<64> Storage; 94 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 95 SetThreadName(::GetCurrentThreadId(), NameStr.data()); 96#endif 97} 98 99void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 100 // "Name" is not an inherent property of a thread on Windows. In fact, when 101 // you "set" the name, you are only firing a one-time message to a debugger 102 // which it interprets as a program setting its threads' name. We may be 103 // able to get fancy by creating a TLS entry when someone calls 104 // set_thread_name so that subsequent calls to get_thread_name return this 105 // value. 106 Name.clear(); 107} 108 109SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 110 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 111 // Begin background processing mode. The system lowers the resource scheduling 112 // priorities of the thread so that it can perform background work without 113 // significantly affecting activity in the foreground. 114 // End background processing mode. The system restores the resource scheduling 115 // priorities of the thread as they were before the thread entered background 116 // processing mode. 117 // 118 // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low 119 return SetThreadPriority(GetCurrentThread(), 120 Priority != ThreadPriority::Default 121 ? THREAD_MODE_BACKGROUND_BEGIN 122 : THREAD_MODE_BACKGROUND_END) 123 ? SetThreadPriorityResult::SUCCESS 124 : SetThreadPriorityResult::FAILURE; 125} 126 127struct ProcessorGroup { 128 unsigned ID; 129 unsigned AllThreads; 130 unsigned UsableThreads; 131 unsigned ThreadsPerCore; 132 uint64_t Affinity; 133 134 unsigned useableCores() const { 135 return std::max(1U, UsableThreads / ThreadsPerCore); 136 } 137}; 138 139template <typename F> 140static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { 141 DWORD Len = 0; 142 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); 143 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 144 return false; 145 } 146 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); 147 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); 148 if (R) { 149 auto *End = 150 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); 151 for (auto *Curr = Info; Curr < End; 152 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + 153 Curr->Size)) { 154 if (Curr->Relationship != Relationship) 155 continue; 156 Fn(Curr); 157 } 158 } 159 free(Info); 160 return true; 161} 162 163static std::optional<std::vector<USHORT>> getActiveGroups() { 164 USHORT Count = 0; 165 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr)) 166 return std::nullopt; 167 168 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) 169 return std::nullopt; 170 171 std::vector<USHORT> Groups; 172 Groups.resize(Count); 173 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data())) 174 return std::nullopt; 175 176 return Groups; 177} 178 179static ArrayRef<ProcessorGroup> getProcessorGroups() { 180 auto computeGroups = []() { 181 SmallVector<ProcessorGroup, 4> Groups; 182 183 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 184 GROUP_RELATIONSHIP &El = ProcInfo->Group; 185 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { 186 ProcessorGroup G; 187 G.ID = Groups.size(); 188 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; 189 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; 190 assert(G.UsableThreads <= 64); 191 G.Affinity = El.GroupInfo[J].ActiveProcessorMask; 192 Groups.push_back(G); 193 } 194 }; 195 196 if (!IterateProcInfo(RelationGroup, HandleGroup)) 197 return std::vector<ProcessorGroup>(); 198 199 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 200 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; 201 assert(El.GroupCount == 1); 202 unsigned NumHyperThreads = 1; 203 // If the flag is set, each core supports more than one hyper-thread. 204 if (El.Flags & LTP_PC_SMT) 205 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); 206 unsigned I = El.GroupMask[0].Group; 207 Groups[I].ThreadsPerCore = NumHyperThreads; 208 }; 209 210 if (!IterateProcInfo(RelationProcessorCore, HandleProc)) 211 return std::vector<ProcessorGroup>(); 212 213 auto ActiveGroups = getActiveGroups(); 214 if (!ActiveGroups) 215 return std::vector<ProcessorGroup>(); 216 217 // If there's an affinity mask set, assume the user wants to constrain the 218 // current process to only a single CPU group. On Windows, it is not 219 // possible for affinity masks to cross CPU group boundaries. 220 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; 221 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, 222 &SystemAffinityMask)) { 223 224 if (ProcessAffinityMask != SystemAffinityMask) { 225 if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) { 226 // The process affinity mask is spurious, due to an OS bug, ignore it. 227 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 228 } 229 230 assert(ActiveGroups->size() == 1 && 231 "When an affinity mask is set, the process is expected to be " 232 "assigned to a single processor group!"); 233 234 unsigned CurrentGroupID = (*ActiveGroups)[0]; 235 ProcessorGroup NewG{Groups[CurrentGroupID]}; 236 NewG.Affinity = ProcessAffinityMask; 237 NewG.UsableThreads = llvm::popcount(ProcessAffinityMask); 238 Groups.clear(); 239 Groups.push_back(NewG); 240 } 241 } 242 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 243 }; 244 static auto Groups = computeGroups(); 245 return ArrayRef<ProcessorGroup>(Groups); 246} 247 248template <typename R, typename UnaryPredicate> 249static unsigned aggregate(R &&Range, UnaryPredicate P) { 250 unsigned I{}; 251 for (const auto &It : Range) 252 I += P(It); 253 return I; 254} 255 256int llvm::get_physical_cores() { 257 static unsigned Cores = 258 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { 259 return G.UsableThreads / G.ThreadsPerCore; 260 }); 261 return Cores; 262} 263 264static int computeHostNumHardwareThreads() { 265 static unsigned Threads = 266 aggregate(getProcessorGroups(), 267 [](const ProcessorGroup &G) { return G.UsableThreads; }); 268 return Threads; 269} 270 271// Finds the proper CPU socket where a thread number should go. Returns 272// 'std::nullopt' if the thread shall remain on the actual CPU socket. 273std::optional<unsigned> 274llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { 275 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 276 // Only one CPU socket in the system or process affinity was set, no need to 277 // move the thread(s) to another CPU socket. 278 if (Groups.size() <= 1) 279 return std::nullopt; 280 281 // We ask for less threads than there are hardware threads per CPU socket, no 282 // need to dispatch threads to other CPU sockets. 283 unsigned MaxThreadsPerSocket = 284 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); 285 if (compute_thread_count() <= MaxThreadsPerSocket) 286 return std::nullopt; 287 288 assert(ThreadPoolNum < compute_thread_count() && 289 "The thread index is not within thread strategy's range!"); 290 291 // Assumes the same number of hardware threads per CPU socket. 292 return (ThreadPoolNum * Groups.size()) / compute_thread_count(); 293} 294 295// Assign the current thread to a more appropriate CPU socket or CPU group 296void llvm::ThreadPoolStrategy::apply_thread_strategy( 297 unsigned ThreadPoolNum) const { 298 299 // After Windows 11 and Windows Server 2022, let the OS do the scheduling, 300 // since a process automatically gains access to all processor groups. 301 if (llvm::RunningWindows11OrGreater()) 302 return; 303 304 std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); 305 if (!Socket) 306 return; 307 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 308 GROUP_AFFINITY Affinity{}; 309 Affinity.Group = Groups[*Socket].ID; 310 Affinity.Mask = Groups[*Socket].Affinity; 311 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); 312} 313 314llvm::BitVector llvm::get_thread_affinity_mask() { 315 GROUP_AFFINITY Affinity{}; 316 GetThreadGroupAffinity(GetCurrentThread(), &Affinity); 317 318 static unsigned All = 319 aggregate(getProcessorGroups(), 320 [](const ProcessorGroup &G) { return G.AllThreads; }); 321 322 unsigned StartOffset = 323 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { 324 return G.ID < Affinity.Group ? G.AllThreads : 0; 325 }); 326 327 llvm::BitVector V; 328 V.resize(All); 329 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { 330 if ((Affinity.Mask >> I) & 1) 331 V.set(StartOffset + I); 332 } 333 return V; 334} 335 336unsigned llvm::get_cpus() { return getProcessorGroups().size(); } 337