1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Win32 specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/Twine.h" 15 16#include "llvm/Support/Windows/WindowsSupport.h" 17#include <process.h> 18 19#include <bitset> 20 21// Windows will at times define MemoryFence. 22#ifdef MemoryFence 23#undef MemoryFence 24#endif 25 26namespace llvm { 27HANDLE 28llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg, 29 llvm::Optional<unsigned> StackSizeInBytes) { 30 HANDLE hThread = (HANDLE)::_beginthreadex( 31 NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL); 32 33 if (!hThread) { 34 ReportLastErrorFatal("_beginthreadex failed"); 35 } 36 37 return hThread; 38} 39 40void llvm_thread_join_impl(HANDLE hThread) { 41 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { 42 ReportLastErrorFatal("WaitForSingleObject failed"); 43 } 44} 45 46void llvm_thread_detach_impl(HANDLE hThread) { 47 if (::CloseHandle(hThread) == FALSE) { 48 ReportLastErrorFatal("CloseHandle failed"); 49 } 50} 51 52DWORD llvm_thread_get_id_impl(HANDLE hThread) { 53 return ::GetThreadId(hThread); 54} 55 56DWORD llvm_thread_get_current_id_impl() { 57 return ::GetCurrentThreadId(); 58} 59 60} // namespace llvm 61 62uint64_t llvm::get_threadid() { 63 return uint64_t(::GetCurrentThreadId()); 64} 65 66uint32_t llvm::get_max_thread_name_length() { return 0; } 67 68#if defined(_MSC_VER) 69static void SetThreadName(DWORD Id, LPCSTR Name) { 70 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; 71 72#pragma pack(push, 8) 73 struct THREADNAME_INFO { 74 DWORD dwType; // Must be 0x1000. 75 LPCSTR szName; // Pointer to thread name 76 DWORD dwThreadId; // Thread ID (-1 == current thread) 77 DWORD dwFlags; // Reserved. Do not use. 78 }; 79#pragma pack(pop) 80 81 THREADNAME_INFO info; 82 info.dwType = 0x1000; 83 info.szName = Name; 84 info.dwThreadId = Id; 85 info.dwFlags = 0; 86 87 __try { 88 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), 89 (ULONG_PTR *)&info); 90 } 91 __except (EXCEPTION_EXECUTE_HANDLER) { 92 } 93} 94#endif 95 96void llvm::set_thread_name(const Twine &Name) { 97#if defined(_MSC_VER) 98 // Make sure the input is null terminated. 99 SmallString<64> Storage; 100 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 101 SetThreadName(::GetCurrentThreadId(), NameStr.data()); 102#endif 103} 104 105void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 106 // "Name" is not an inherent property of a thread on Windows. In fact, when 107 // you "set" the name, you are only firing a one-time message to a debugger 108 // which it interprets as a program setting its threads' name. We may be 109 // able to get fancy by creating a TLS entry when someone calls 110 // set_thread_name so that subsequent calls to get_thread_name return this 111 // value. 112 Name.clear(); 113} 114 115SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 116 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 117 // Begin background processing mode. The system lowers the resource scheduling 118 // priorities of the thread so that it can perform background work without 119 // significantly affecting activity in the foreground. 120 // End background processing mode. The system restores the resource scheduling 121 // priorities of the thread as they were before the thread entered background 122 // processing mode. 123 return SetThreadPriority(GetCurrentThread(), 124 Priority == ThreadPriority::Background 125 ? THREAD_MODE_BACKGROUND_BEGIN 126 : THREAD_MODE_BACKGROUND_END) 127 ? SetThreadPriorityResult::SUCCESS 128 : SetThreadPriorityResult::FAILURE; 129} 130 131struct ProcessorGroup { 132 unsigned ID; 133 unsigned AllThreads; 134 unsigned UsableThreads; 135 unsigned ThreadsPerCore; 136 uint64_t Affinity; 137 138 unsigned useableCores() const { 139 return std::max(1U, UsableThreads / ThreadsPerCore); 140 } 141}; 142 143template <typename F> 144static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { 145 DWORD Len = 0; 146 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); 147 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 148 return false; 149 } 150 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); 151 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); 152 if (R) { 153 auto *End = 154 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); 155 for (auto *Curr = Info; Curr < End; 156 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + 157 Curr->Size)) { 158 if (Curr->Relationship != Relationship) 159 continue; 160 Fn(Curr); 161 } 162 } 163 free(Info); 164 return true; 165} 166 167static ArrayRef<ProcessorGroup> getProcessorGroups() { 168 auto computeGroups = []() { 169 SmallVector<ProcessorGroup, 4> Groups; 170 171 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 172 GROUP_RELATIONSHIP &El = ProcInfo->Group; 173 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { 174 ProcessorGroup G; 175 G.ID = Groups.size(); 176 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; 177 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; 178 assert(G.UsableThreads <= 64); 179 G.Affinity = El.GroupInfo[J].ActiveProcessorMask; 180 Groups.push_back(G); 181 } 182 }; 183 184 if (!IterateProcInfo(RelationGroup, HandleGroup)) 185 return std::vector<ProcessorGroup>(); 186 187 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 188 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; 189 assert(El.GroupCount == 1); 190 unsigned NumHyperThreads = 1; 191 // If the flag is set, each core supports more than one hyper-thread. 192 if (El.Flags & LTP_PC_SMT) 193 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); 194 unsigned I = El.GroupMask[0].Group; 195 Groups[I].ThreadsPerCore = NumHyperThreads; 196 }; 197 198 if (!IterateProcInfo(RelationProcessorCore, HandleProc)) 199 return std::vector<ProcessorGroup>(); 200 201 // If there's an affinity mask set, assume the user wants to constrain the 202 // current process to only a single CPU group. On Windows, it is not 203 // possible for affinity masks to cross CPU group boundaries. 204 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; 205 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, 206 &SystemAffinityMask) && 207 ProcessAffinityMask != SystemAffinityMask) { 208 // We don't expect more that 4 CPU groups on Windows (256 processors). 209 USHORT GroupCount = 4; 210 USHORT GroupArray[4]{}; 211 if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, 212 GroupArray)) { 213 assert(GroupCount == 1 && 214 "On startup, a program is expected to be assigned only to " 215 "one processor group!"); 216 unsigned CurrentGroupID = GroupArray[0]; 217 ProcessorGroup NewG{Groups[CurrentGroupID]}; 218 NewG.Affinity = ProcessAffinityMask; 219 NewG.UsableThreads = countPopulation(ProcessAffinityMask); 220 Groups.clear(); 221 Groups.push_back(NewG); 222 } 223 } 224 225 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 226 }; 227 static auto Groups = computeGroups(); 228 return ArrayRef<ProcessorGroup>(Groups); 229} 230 231template <typename R, typename UnaryPredicate> 232static unsigned aggregate(R &&Range, UnaryPredicate P) { 233 unsigned I{}; 234 for (const auto &It : Range) 235 I += P(It); 236 return I; 237} 238 239// for sys::getHostNumPhysicalCores 240int computeHostNumPhysicalCores() { 241 static unsigned Cores = 242 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { 243 return G.UsableThreads / G.ThreadsPerCore; 244 }); 245 return Cores; 246} 247 248int computeHostNumHardwareThreads() { 249 static unsigned Threads = 250 aggregate(getProcessorGroups(), 251 [](const ProcessorGroup &G) { return G.UsableThreads; }); 252 return Threads; 253} 254 255// Finds the proper CPU socket where a thread number should go. Returns 'None' 256// if the thread shall remain on the actual CPU socket. 257Optional<unsigned> 258llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { 259 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 260 // Only one CPU socket in the system or process affinity was set, no need to 261 // move the thread(s) to another CPU socket. 262 if (Groups.size() <= 1) 263 return None; 264 265 // We ask for less threads than there are hardware threads per CPU socket, no 266 // need to dispatch threads to other CPU sockets. 267 unsigned MaxThreadsPerSocket = 268 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); 269 if (compute_thread_count() <= MaxThreadsPerSocket) 270 return None; 271 272 assert(ThreadPoolNum < compute_thread_count() && 273 "The thread index is not within thread strategy's range!"); 274 275 // Assumes the same number of hardware threads per CPU socket. 276 return (ThreadPoolNum * Groups.size()) / compute_thread_count(); 277} 278 279// Assign the current thread to a more appropriate CPU socket or CPU group 280void llvm::ThreadPoolStrategy::apply_thread_strategy( 281 unsigned ThreadPoolNum) const { 282 Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); 283 if (!Socket) 284 return; 285 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 286 GROUP_AFFINITY Affinity{}; 287 Affinity.Group = Groups[*Socket].ID; 288 Affinity.Mask = Groups[*Socket].Affinity; 289 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); 290} 291 292llvm::BitVector llvm::get_thread_affinity_mask() { 293 GROUP_AFFINITY Affinity{}; 294 GetThreadGroupAffinity(GetCurrentThread(), &Affinity); 295 296 static unsigned All = 297 aggregate(getProcessorGroups(), 298 [](const ProcessorGroup &G) { return G.AllThreads; }); 299 300 unsigned StartOffset = 301 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { 302 return G.ID < Affinity.Group ? G.AllThreads : 0; 303 }); 304 305 llvm::BitVector V; 306 V.resize(All); 307 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { 308 if ((Affinity.Mask >> I) & 1) 309 V.set(StartOffset + I); 310 } 311 return V; 312} 313 314unsigned llvm::get_cpus() { return getProcessorGroups().size(); } 315