1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Win32 specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/Twine.h" 15 16#include "llvm/Support/Windows/WindowsSupport.h" 17#include <process.h> 18 19#include <bitset> 20 21// Windows will at times define MemoryFence. 22#ifdef MemoryFence 23#undef MemoryFence 24#endif 25 26namespace llvm { 27HANDLE 28llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg, 29 std::optional<unsigned> StackSizeInBytes) { 30 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0), 31 ThreadFunc, Arg, 0, NULL); 32 33 if (!hThread) { 34 ReportLastErrorFatal("_beginthreadex failed"); 35 } 36 37 return hThread; 38} 39 40void llvm_thread_join_impl(HANDLE hThread) { 41 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { 42 ReportLastErrorFatal("WaitForSingleObject failed"); 43 } 44} 45 46void llvm_thread_detach_impl(HANDLE hThread) { 47 if (::CloseHandle(hThread) == FALSE) { 48 ReportLastErrorFatal("CloseHandle failed"); 49 } 50} 51 52DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); } 53 54DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); } 55 56} // namespace llvm 57 58uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); } 59 60uint32_t llvm::get_max_thread_name_length() { return 0; } 61 62#if defined(_MSC_VER) 63static void SetThreadName(DWORD Id, LPCSTR Name) { 64 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; 65 66#pragma pack(push, 8) 67 struct THREADNAME_INFO { 68 DWORD dwType; // Must be 0x1000. 69 LPCSTR szName; // Pointer to thread name 70 DWORD dwThreadId; // Thread ID (-1 == current thread) 71 DWORD dwFlags; // Reserved. Do not use. 72 }; 73#pragma pack(pop) 74 75 THREADNAME_INFO info; 76 info.dwType = 0x1000; 77 info.szName = Name; 78 info.dwThreadId = Id; 79 info.dwFlags = 0; 80 81 __try { 82 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), 83 (ULONG_PTR *)&info); 84 } __except (EXCEPTION_EXECUTE_HANDLER) { 85 } 86} 87#endif 88 89void llvm::set_thread_name(const Twine &Name) { 90#if defined(_MSC_VER) 91 // Make sure the input is null terminated. 92 SmallString<64> Storage; 93 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 94 SetThreadName(::GetCurrentThreadId(), NameStr.data()); 95#endif 96} 97 98void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 99 // "Name" is not an inherent property of a thread on Windows. In fact, when 100 // you "set" the name, you are only firing a one-time message to a debugger 101 // which it interprets as a program setting its threads' name. We may be 102 // able to get fancy by creating a TLS entry when someone calls 103 // set_thread_name so that subsequent calls to get_thread_name return this 104 // value. 105 Name.clear(); 106} 107 108SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 109 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 110 // Begin background processing mode. The system lowers the resource scheduling 111 // priorities of the thread so that it can perform background work without 112 // significantly affecting activity in the foreground. 113 // End background processing mode. The system restores the resource scheduling 114 // priorities of the thread as they were before the thread entered background 115 // processing mode. 116 // 117 // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low 118 return SetThreadPriority(GetCurrentThread(), 119 Priority != ThreadPriority::Default 120 ? THREAD_MODE_BACKGROUND_BEGIN 121 : THREAD_MODE_BACKGROUND_END) 122 ? SetThreadPriorityResult::SUCCESS 123 : SetThreadPriorityResult::FAILURE; 124} 125 126struct ProcessorGroup { 127 unsigned ID; 128 unsigned AllThreads; 129 unsigned UsableThreads; 130 unsigned ThreadsPerCore; 131 uint64_t Affinity; 132 133 unsigned useableCores() const { 134 return std::max(1U, UsableThreads / ThreadsPerCore); 135 } 136}; 137 138template <typename F> 139static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { 140 DWORD Len = 0; 141 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); 142 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 143 return false; 144 } 145 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); 146 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); 147 if (R) { 148 auto *End = 149 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); 150 for (auto *Curr = Info; Curr < End; 151 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + 152 Curr->Size)) { 153 if (Curr->Relationship != Relationship) 154 continue; 155 Fn(Curr); 156 } 157 } 158 free(Info); 159 return true; 160} 161 162static std::optional<std::vector<USHORT>> getActiveGroups() { 163 USHORT Count = 0; 164 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr)) 165 return std::nullopt; 166 167 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) 168 return std::nullopt; 169 170 std::vector<USHORT> Groups; 171 Groups.resize(Count); 172 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data())) 173 return std::nullopt; 174 175 return Groups; 176} 177 178static ArrayRef<ProcessorGroup> getProcessorGroups() { 179 auto computeGroups = []() { 180 SmallVector<ProcessorGroup, 4> Groups; 181 182 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 183 GROUP_RELATIONSHIP &El = ProcInfo->Group; 184 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { 185 ProcessorGroup G; 186 G.ID = Groups.size(); 187 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; 188 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; 189 assert(G.UsableThreads <= 64); 190 G.Affinity = El.GroupInfo[J].ActiveProcessorMask; 191 Groups.push_back(G); 192 } 193 }; 194 195 if (!IterateProcInfo(RelationGroup, HandleGroup)) 196 return std::vector<ProcessorGroup>(); 197 198 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 199 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; 200 assert(El.GroupCount == 1); 201 unsigned NumHyperThreads = 1; 202 // If the flag is set, each core supports more than one hyper-thread. 203 if (El.Flags & LTP_PC_SMT) 204 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); 205 unsigned I = El.GroupMask[0].Group; 206 Groups[I].ThreadsPerCore = NumHyperThreads; 207 }; 208 209 if (!IterateProcInfo(RelationProcessorCore, HandleProc)) 210 return std::vector<ProcessorGroup>(); 211 212 auto ActiveGroups = getActiveGroups(); 213 if (!ActiveGroups) 214 return std::vector<ProcessorGroup>(); 215 216 // If there's an affinity mask set, assume the user wants to constrain the 217 // current process to only a single CPU group. On Windows, it is not 218 // possible for affinity masks to cross CPU group boundaries. 219 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; 220 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, 221 &SystemAffinityMask)) { 222 223 if (ProcessAffinityMask != SystemAffinityMask) { 224 if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) { 225 // The process affinity mask is spurious, due to an OS bug, ignore it. 226 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 227 } 228 229 assert(ActiveGroups->size() == 1 && 230 "When an affinity mask is set, the process is expected to be " 231 "assigned to a single processor group!"); 232 233 unsigned CurrentGroupID = (*ActiveGroups)[0]; 234 ProcessorGroup NewG{Groups[CurrentGroupID]}; 235 NewG.Affinity = ProcessAffinityMask; 236 NewG.UsableThreads = llvm::popcount(ProcessAffinityMask); 237 Groups.clear(); 238 Groups.push_back(NewG); 239 } 240 } 241 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 242 }; 243 static auto Groups = computeGroups(); 244 return ArrayRef<ProcessorGroup>(Groups); 245} 246 247template <typename R, typename UnaryPredicate> 248static unsigned aggregate(R &&Range, UnaryPredicate P) { 249 unsigned I{}; 250 for (const auto &It : Range) 251 I += P(It); 252 return I; 253} 254 255int llvm::get_physical_cores() { 256 static unsigned Cores = 257 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { 258 return G.UsableThreads / G.ThreadsPerCore; 259 }); 260 return Cores; 261} 262 263static int computeHostNumHardwareThreads() { 264 static unsigned Threads = 265 aggregate(getProcessorGroups(), 266 [](const ProcessorGroup &G) { return G.UsableThreads; }); 267 return Threads; 268} 269 270// Finds the proper CPU socket where a thread number should go. Returns 271// 'std::nullopt' if the thread shall remain on the actual CPU socket. 272std::optional<unsigned> 273llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { 274 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 275 // Only one CPU socket in the system or process affinity was set, no need to 276 // move the thread(s) to another CPU socket. 277 if (Groups.size() <= 1) 278 return std::nullopt; 279 280 // We ask for less threads than there are hardware threads per CPU socket, no 281 // need to dispatch threads to other CPU sockets. 282 unsigned MaxThreadsPerSocket = 283 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); 284 if (compute_thread_count() <= MaxThreadsPerSocket) 285 return std::nullopt; 286 287 assert(ThreadPoolNum < compute_thread_count() && 288 "The thread index is not within thread strategy's range!"); 289 290 // Assumes the same number of hardware threads per CPU socket. 291 return (ThreadPoolNum * Groups.size()) / compute_thread_count(); 292} 293 294// Assign the current thread to a more appropriate CPU socket or CPU group 295void llvm::ThreadPoolStrategy::apply_thread_strategy( 296 unsigned ThreadPoolNum) const { 297 298 // After Windows 11 and Windows Server 2022, let the OS do the scheduling, 299 // since a process automatically gains access to all processor groups. 300 if (llvm::RunningWindows11OrGreater()) 301 return; 302 303 std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); 304 if (!Socket) 305 return; 306 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 307 GROUP_AFFINITY Affinity{}; 308 Affinity.Group = Groups[*Socket].ID; 309 Affinity.Mask = Groups[*Socket].Affinity; 310 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); 311} 312 313llvm::BitVector llvm::get_thread_affinity_mask() { 314 GROUP_AFFINITY Affinity{}; 315 GetThreadGroupAffinity(GetCurrentThread(), &Affinity); 316 317 static unsigned All = 318 aggregate(getProcessorGroups(), 319 [](const ProcessorGroup &G) { return G.AllThreads; }); 320 321 unsigned StartOffset = 322 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { 323 return G.ID < Affinity.Group ? G.AllThreads : 0; 324 }); 325 326 llvm::BitVector V; 327 V.resize(All); 328 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { 329 if ((Affinity.Mask >> I) & 1) 330 V.set(StartOffset + I); 331 } 332 return V; 333} 334 335unsigned llvm::get_cpus() { return getProcessorGroups().size(); } 336