1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Win32 specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/Twine.h" 15 16#include "llvm/Support/Windows/WindowsSupport.h" 17#include <process.h> 18 19#include <bitset> 20 21// Windows will at times define MemoryFence. 22#ifdef MemoryFence 23#undef MemoryFence 24#endif 25 26static unsigned __stdcall threadFuncSync(void *Arg) { 27 SyncThreadInfo *TI = static_cast<SyncThreadInfo *>(Arg); 28 TI->UserFn(TI->UserData); 29 return 0; 30} 31 32static unsigned __stdcall threadFuncAsync(void *Arg) { 33 std::unique_ptr<AsyncThreadInfo> Info(static_cast<AsyncThreadInfo *>(Arg)); 34 (*Info)(); 35 return 0; 36} 37 38static void 39llvm_execute_on_thread_impl(unsigned (__stdcall *ThreadFunc)(void *), void *Arg, 40 llvm::Optional<unsigned> StackSizeInBytes, 41 JoiningPolicy JP) { 42 HANDLE hThread = (HANDLE)::_beginthreadex( 43 NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL); 44 45 if (!hThread) { 46 ReportLastErrorFatal("_beginthreadex failed"); 47 } 48 49 if (JP == JoiningPolicy::Join) { 50 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { 51 ReportLastErrorFatal("WaitForSingleObject failed"); 52 } 53 } 54 if (::CloseHandle(hThread) == FALSE) { 55 ReportLastErrorFatal("CloseHandle failed"); 56 } 57} 58 59uint64_t llvm::get_threadid() { 60 return uint64_t(::GetCurrentThreadId()); 61} 62 63uint32_t llvm::get_max_thread_name_length() { return 0; } 64 65#if defined(_MSC_VER) 66static void SetThreadName(DWORD Id, LPCSTR Name) { 67 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; 68 69#pragma pack(push, 8) 70 struct THREADNAME_INFO { 71 DWORD dwType; // Must be 0x1000. 72 LPCSTR szName; // Pointer to thread name 73 DWORD dwThreadId; // Thread ID (-1 == current thread) 74 DWORD dwFlags; // Reserved. Do not use. 75 }; 76#pragma pack(pop) 77 78 THREADNAME_INFO info; 79 info.dwType = 0x1000; 80 info.szName = Name; 81 info.dwThreadId = Id; 82 info.dwFlags = 0; 83 84 __try { 85 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), 86 (ULONG_PTR *)&info); 87 } 88 __except (EXCEPTION_EXECUTE_HANDLER) { 89 } 90} 91#endif 92 93void llvm::set_thread_name(const Twine &Name) { 94#if defined(_MSC_VER) 95 // Make sure the input is null terminated. 96 SmallString<64> Storage; 97 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 98 SetThreadName(::GetCurrentThreadId(), NameStr.data()); 99#endif 100} 101 102void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 103 // "Name" is not an inherent property of a thread on Windows. In fact, when 104 // you "set" the name, you are only firing a one-time message to a debugger 105 // which it interprets as a program setting its threads' name. We may be 106 // able to get fancy by creating a TLS entry when someone calls 107 // set_thread_name so that subsequent calls to get_thread_name return this 108 // value. 109 Name.clear(); 110} 111 112SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 113 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 114 // Begin background processing mode. The system lowers the resource scheduling 115 // priorities of the thread so that it can perform background work without 116 // significantly affecting activity in the foreground. 117 // End background processing mode. The system restores the resource scheduling 118 // priorities of the thread as they were before the thread entered background 119 // processing mode. 120 return SetThreadPriority(GetCurrentThread(), 121 Priority == ThreadPriority::Background 122 ? THREAD_MODE_BACKGROUND_BEGIN 123 : THREAD_MODE_BACKGROUND_END) 124 ? SetThreadPriorityResult::SUCCESS 125 : SetThreadPriorityResult::FAILURE; 126} 127 128struct ProcessorGroup { 129 unsigned ID; 130 unsigned AllThreads; 131 unsigned UsableThreads; 132 unsigned ThreadsPerCore; 133 uint64_t Affinity; 134 135 unsigned useableCores() const { 136 return std::max(1U, UsableThreads / ThreadsPerCore); 137 } 138}; 139 140template <typename F> 141static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { 142 DWORD Len = 0; 143 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); 144 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 145 return false; 146 } 147 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); 148 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); 149 if (R) { 150 auto *End = 151 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); 152 for (auto *Curr = Info; Curr < End; 153 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + 154 Curr->Size)) { 155 if (Curr->Relationship != Relationship) 156 continue; 157 Fn(Curr); 158 } 159 } 160 free(Info); 161 return true; 162} 163 164static ArrayRef<ProcessorGroup> getProcessorGroups() { 165 auto computeGroups = []() { 166 SmallVector<ProcessorGroup, 4> Groups; 167 168 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 169 GROUP_RELATIONSHIP &El = ProcInfo->Group; 170 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { 171 ProcessorGroup G; 172 G.ID = Groups.size(); 173 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; 174 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; 175 assert(G.UsableThreads <= 64); 176 G.Affinity = El.GroupInfo[J].ActiveProcessorMask; 177 Groups.push_back(G); 178 } 179 }; 180 181 if (!IterateProcInfo(RelationGroup, HandleGroup)) 182 return std::vector<ProcessorGroup>(); 183 184 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 185 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; 186 assert(El.GroupCount == 1); 187 unsigned NumHyperThreads = 1; 188 // If the flag is set, each core supports more than one hyper-thread. 189 if (El.Flags & LTP_PC_SMT) 190 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); 191 unsigned I = El.GroupMask[0].Group; 192 Groups[I].ThreadsPerCore = NumHyperThreads; 193 }; 194 195 if (!IterateProcInfo(RelationProcessorCore, HandleProc)) 196 return std::vector<ProcessorGroup>(); 197 198 // If there's an affinity mask set, assume the user wants to constrain the 199 // current process to only a single CPU group. On Windows, it is not 200 // possible for affinity masks to cross CPU group boundaries. 201 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; 202 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, 203 &SystemAffinityMask) && 204 ProcessAffinityMask != SystemAffinityMask) { 205 // We don't expect more that 4 CPU groups on Windows (256 processors). 206 USHORT GroupCount = 4; 207 USHORT GroupArray[4]{}; 208 if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, 209 GroupArray)) { 210 assert(GroupCount == 1 && 211 "On startup, a program is expected to be assigned only to " 212 "one processor group!"); 213 unsigned CurrentGroupID = GroupArray[0]; 214 ProcessorGroup NewG{Groups[CurrentGroupID]}; 215 NewG.Affinity = ProcessAffinityMask; 216 NewG.UsableThreads = countPopulation(ProcessAffinityMask); 217 Groups.clear(); 218 Groups.push_back(NewG); 219 } 220 } 221 222 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 223 }; 224 static auto Groups = computeGroups(); 225 return ArrayRef<ProcessorGroup>(Groups); 226} 227 228template <typename R, typename UnaryPredicate> 229static unsigned aggregate(R &&Range, UnaryPredicate P) { 230 unsigned I{}; 231 for (const auto &It : Range) 232 I += P(It); 233 return I; 234} 235 236// for sys::getHostNumPhysicalCores 237int computeHostNumPhysicalCores() { 238 static unsigned Cores = 239 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { 240 return G.UsableThreads / G.ThreadsPerCore; 241 }); 242 return Cores; 243} 244 245int computeHostNumHardwareThreads() { 246 static unsigned Threads = 247 aggregate(getProcessorGroups(), 248 [](const ProcessorGroup &G) { return G.UsableThreads; }); 249 return Threads; 250} 251 252// Finds the proper CPU socket where a thread number should go. Returns 'None' 253// if the thread shall remain on the actual CPU socket. 254Optional<unsigned> 255llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { 256 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 257 // Only one CPU socket in the system or process affinity was set, no need to 258 // move the thread(s) to another CPU socket. 259 if (Groups.size() <= 1) 260 return None; 261 262 // We ask for less threads than there are hardware threads per CPU socket, no 263 // need to dispatch threads to other CPU sockets. 264 unsigned MaxThreadsPerSocket = 265 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); 266 if (compute_thread_count() <= MaxThreadsPerSocket) 267 return None; 268 269 assert(ThreadPoolNum < compute_thread_count() && 270 "The thread index is not within thread strategy's range!"); 271 272 // Assumes the same number of hardware threads per CPU socket. 273 return (ThreadPoolNum * Groups.size()) / compute_thread_count(); 274} 275 276// Assign the current thread to a more appropriate CPU socket or CPU group 277void llvm::ThreadPoolStrategy::apply_thread_strategy( 278 unsigned ThreadPoolNum) const { 279 Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); 280 if (!Socket) 281 return; 282 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 283 GROUP_AFFINITY Affinity{}; 284 Affinity.Group = Groups[*Socket].ID; 285 Affinity.Mask = Groups[*Socket].Affinity; 286 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); 287} 288 289llvm::BitVector llvm::get_thread_affinity_mask() { 290 GROUP_AFFINITY Affinity{}; 291 GetThreadGroupAffinity(GetCurrentThread(), &Affinity); 292 293 static unsigned All = 294 aggregate(getProcessorGroups(), 295 [](const ProcessorGroup &G) { return G.AllThreads; }); 296 297 unsigned StartOffset = 298 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { 299 return G.ID < Affinity.Group ? G.AllThreads : 0; 300 }); 301 302 llvm::BitVector V; 303 V.resize(All); 304 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { 305 if ((Affinity.Mask >> I) & 1) 306 V.set(StartOffset + I); 307 } 308 return V; 309} 310 311unsigned llvm::get_cpus() { return getProcessorGroups().size(); } 312