1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Win32 specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/Twine.h" 15 16#include "llvm/Support/Windows/WindowsSupport.h" 17#include <process.h> 18 19#include <bitset> 20 21// Windows will at times define MemoryFence. 22#ifdef MemoryFence 23#undef MemoryFence 24#endif 25 26namespace llvm { 27HANDLE 28llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg, 29 llvm::Optional<unsigned> StackSizeInBytes) { 30 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0), 31 ThreadFunc, Arg, 0, NULL); 32 33 if (!hThread) { 34 ReportLastErrorFatal("_beginthreadex failed"); 35 } 36 37 return hThread; 38} 39 40void llvm_thread_join_impl(HANDLE hThread) { 41 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { 42 ReportLastErrorFatal("WaitForSingleObject failed"); 43 } 44} 45 46void llvm_thread_detach_impl(HANDLE hThread) { 47 if (::CloseHandle(hThread) == FALSE) { 48 ReportLastErrorFatal("CloseHandle failed"); 49 } 50} 51 52DWORD llvm_thread_get_id_impl(HANDLE hThread) { 53 return ::GetThreadId(hThread); 54} 55 56DWORD llvm_thread_get_current_id_impl() { 57 return ::GetCurrentThreadId(); 58} 59 60} // namespace llvm 61 62uint64_t llvm::get_threadid() { 63 return uint64_t(::GetCurrentThreadId()); 64} 65 66uint32_t llvm::get_max_thread_name_length() { return 0; } 67 68#if defined(_MSC_VER) 69static void SetThreadName(DWORD Id, LPCSTR Name) { 70 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; 71 72#pragma pack(push, 8) 73 struct THREADNAME_INFO { 74 DWORD dwType; // Must be 0x1000. 75 LPCSTR szName; // Pointer to thread name 76 DWORD dwThreadId; // Thread ID (-1 == current thread) 77 DWORD dwFlags; // Reserved. Do not use. 78 }; 79#pragma pack(pop) 80 81 THREADNAME_INFO info; 82 info.dwType = 0x1000; 83 info.szName = Name; 84 info.dwThreadId = Id; 85 info.dwFlags = 0; 86 87 __try { 88 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), 89 (ULONG_PTR *)&info); 90 } 91 __except (EXCEPTION_EXECUTE_HANDLER) { 92 } 93} 94#endif 95 96void llvm::set_thread_name(const Twine &Name) { 97#if defined(_MSC_VER) 98 // Make sure the input is null terminated. 99 SmallString<64> Storage; 100 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 101 SetThreadName(::GetCurrentThreadId(), NameStr.data()); 102#endif 103} 104 105void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 106 // "Name" is not an inherent property of a thread on Windows. In fact, when 107 // you "set" the name, you are only firing a one-time message to a debugger 108 // which it interprets as a program setting its threads' name. We may be 109 // able to get fancy by creating a TLS entry when someone calls 110 // set_thread_name so that subsequent calls to get_thread_name return this 111 // value. 112 Name.clear(); 113} 114 115SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 116 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 117 // Begin background processing mode. The system lowers the resource scheduling 118 // priorities of the thread so that it can perform background work without 119 // significantly affecting activity in the foreground. 120 // End background processing mode. The system restores the resource scheduling 121 // priorities of the thread as they were before the thread entered background 122 // processing mode. 123 // 124 // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low 125 return SetThreadPriority(GetCurrentThread(), 126 Priority != ThreadPriority::Default 127 ? THREAD_MODE_BACKGROUND_BEGIN 128 : THREAD_MODE_BACKGROUND_END) 129 ? SetThreadPriorityResult::SUCCESS 130 : SetThreadPriorityResult::FAILURE; 131} 132 133struct ProcessorGroup { 134 unsigned ID; 135 unsigned AllThreads; 136 unsigned UsableThreads; 137 unsigned ThreadsPerCore; 138 uint64_t Affinity; 139 140 unsigned useableCores() const { 141 return std::max(1U, UsableThreads / ThreadsPerCore); 142 } 143}; 144 145template <typename F> 146static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { 147 DWORD Len = 0; 148 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); 149 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 150 return false; 151 } 152 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); 153 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); 154 if (R) { 155 auto *End = 156 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); 157 for (auto *Curr = Info; Curr < End; 158 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + 159 Curr->Size)) { 160 if (Curr->Relationship != Relationship) 161 continue; 162 Fn(Curr); 163 } 164 } 165 free(Info); 166 return true; 167} 168 169static ArrayRef<ProcessorGroup> getProcessorGroups() { 170 auto computeGroups = []() { 171 SmallVector<ProcessorGroup, 4> Groups; 172 173 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 174 GROUP_RELATIONSHIP &El = ProcInfo->Group; 175 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { 176 ProcessorGroup G; 177 G.ID = Groups.size(); 178 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; 179 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; 180 assert(G.UsableThreads <= 64); 181 G.Affinity = El.GroupInfo[J].ActiveProcessorMask; 182 Groups.push_back(G); 183 } 184 }; 185 186 if (!IterateProcInfo(RelationGroup, HandleGroup)) 187 return std::vector<ProcessorGroup>(); 188 189 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 190 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; 191 assert(El.GroupCount == 1); 192 unsigned NumHyperThreads = 1; 193 // If the flag is set, each core supports more than one hyper-thread. 194 if (El.Flags & LTP_PC_SMT) 195 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); 196 unsigned I = El.GroupMask[0].Group; 197 Groups[I].ThreadsPerCore = NumHyperThreads; 198 }; 199 200 if (!IterateProcInfo(RelationProcessorCore, HandleProc)) 201 return std::vector<ProcessorGroup>(); 202 203 // If there's an affinity mask set, assume the user wants to constrain the 204 // current process to only a single CPU group. On Windows, it is not 205 // possible for affinity masks to cross CPU group boundaries. 206 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; 207 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, 208 &SystemAffinityMask) && 209 ProcessAffinityMask != SystemAffinityMask) { 210 // We don't expect more that 4 CPU groups on Windows (256 processors). 211 USHORT GroupCount = 4; 212 USHORT GroupArray[4]{}; 213 if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, 214 GroupArray)) { 215 assert(GroupCount == 1 && 216 "On startup, a program is expected to be assigned only to " 217 "one processor group!"); 218 unsigned CurrentGroupID = GroupArray[0]; 219 ProcessorGroup NewG{Groups[CurrentGroupID]}; 220 NewG.Affinity = ProcessAffinityMask; 221 NewG.UsableThreads = countPopulation(ProcessAffinityMask); 222 Groups.clear(); 223 Groups.push_back(NewG); 224 } 225 } 226 227 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 228 }; 229 static auto Groups = computeGroups(); 230 return ArrayRef<ProcessorGroup>(Groups); 231} 232 233template <typename R, typename UnaryPredicate> 234static unsigned aggregate(R &&Range, UnaryPredicate P) { 235 unsigned I{}; 236 for (const auto &It : Range) 237 I += P(It); 238 return I; 239} 240 241// for sys::getHostNumPhysicalCores 242int computeHostNumPhysicalCores() { 243 static unsigned Cores = 244 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { 245 return G.UsableThreads / G.ThreadsPerCore; 246 }); 247 return Cores; 248} 249 250int computeHostNumHardwareThreads() { 251 static unsigned Threads = 252 aggregate(getProcessorGroups(), 253 [](const ProcessorGroup &G) { return G.UsableThreads; }); 254 return Threads; 255} 256 257// Finds the proper CPU socket where a thread number should go. Returns 'None' 258// if the thread shall remain on the actual CPU socket. 259Optional<unsigned> 260llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { 261 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 262 // Only one CPU socket in the system or process affinity was set, no need to 263 // move the thread(s) to another CPU socket. 264 if (Groups.size() <= 1) 265 return None; 266 267 // We ask for less threads than there are hardware threads per CPU socket, no 268 // need to dispatch threads to other CPU sockets. 269 unsigned MaxThreadsPerSocket = 270 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); 271 if (compute_thread_count() <= MaxThreadsPerSocket) 272 return None; 273 274 assert(ThreadPoolNum < compute_thread_count() && 275 "The thread index is not within thread strategy's range!"); 276 277 // Assumes the same number of hardware threads per CPU socket. 278 return (ThreadPoolNum * Groups.size()) / compute_thread_count(); 279} 280 281// Assign the current thread to a more appropriate CPU socket or CPU group 282void llvm::ThreadPoolStrategy::apply_thread_strategy( 283 unsigned ThreadPoolNum) const { 284 Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); 285 if (!Socket) 286 return; 287 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 288 GROUP_AFFINITY Affinity{}; 289 Affinity.Group = Groups[*Socket].ID; 290 Affinity.Mask = Groups[*Socket].Affinity; 291 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); 292} 293 294llvm::BitVector llvm::get_thread_affinity_mask() { 295 GROUP_AFFINITY Affinity{}; 296 GetThreadGroupAffinity(GetCurrentThread(), &Affinity); 297 298 static unsigned All = 299 aggregate(getProcessorGroups(), 300 [](const ProcessorGroup &G) { return G.AllThreads; }); 301 302 unsigned StartOffset = 303 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { 304 return G.ID < Affinity.Group ? G.AllThreads : 0; 305 }); 306 307 llvm::BitVector V; 308 V.resize(All); 309 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { 310 if ((Affinity.Mask >> I) & 1) 311 V.set(StartOffset + I); 312 } 313 return V; 314} 315 316unsigned llvm::get_cpus() { return getProcessorGroups().size(); } 317