1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Win32 specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/Twine.h" 15 16#include "llvm/Support/Windows/WindowsSupport.h" 17#include <process.h> 18 19#include <bitset> 20 21// Windows will at times define MemoryFence. 22#ifdef MemoryFence 23#undef MemoryFence 24#endif 25 26static unsigned __stdcall threadFuncSync(void *Arg) { 27 SyncThreadInfo *TI = static_cast<SyncThreadInfo *>(Arg); 28 TI->UserFn(TI->UserData); 29 return 0; 30} 31 32static unsigned __stdcall threadFuncAsync(void *Arg) { 33 std::unique_ptr<AsyncThreadInfo> Info(static_cast<AsyncThreadInfo *>(Arg)); 34 (*Info)(); 35 return 0; 36} 37 38static void 39llvm_execute_on_thread_impl(unsigned (__stdcall *ThreadFunc)(void *), void *Arg, 40 llvm::Optional<unsigned> StackSizeInBytes, 41 JoiningPolicy JP) { 42 HANDLE hThread = (HANDLE)::_beginthreadex( 43 NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL); 44 45 if (!hThread) { 46 ReportLastErrorFatal("_beginthreadex failed"); 47 } 48 49 if (JP == JoiningPolicy::Join) { 50 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { 51 ReportLastErrorFatal("WaitForSingleObject failed"); 52 } 53 } 54 if (::CloseHandle(hThread) == FALSE) { 55 ReportLastErrorFatal("CloseHandle failed"); 56 } 57} 58 59uint64_t llvm::get_threadid() { 60 return uint64_t(::GetCurrentThreadId()); 61} 62 63uint32_t llvm::get_max_thread_name_length() { return 0; } 64 65#if defined(_MSC_VER) 66static void SetThreadName(DWORD Id, LPCSTR Name) { 67 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; 68 69#pragma pack(push, 8) 70 struct THREADNAME_INFO { 71 DWORD dwType; // Must be 0x1000. 72 LPCSTR szName; // Pointer to thread name 73 DWORD dwThreadId; // Thread ID (-1 == current thread) 74 DWORD dwFlags; // Reserved. Do not use. 75 }; 76#pragma pack(pop) 77 78 THREADNAME_INFO info; 79 info.dwType = 0x1000; 80 info.szName = Name; 81 info.dwThreadId = Id; 82 info.dwFlags = 0; 83 84 __try { 85 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), 86 (ULONG_PTR *)&info); 87 } 88 __except (EXCEPTION_EXECUTE_HANDLER) { 89 } 90} 91#endif 92 93void llvm::set_thread_name(const Twine &Name) { 94#if defined(_MSC_VER) 95 // Make sure the input is null terminated. 96 SmallString<64> Storage; 97 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 98 SetThreadName(::GetCurrentThreadId(), NameStr.data()); 99#endif 100} 101 102void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 103 // "Name" is not an inherent property of a thread on Windows. In fact, when 104 // you "set" the name, you are only firing a one-time message to a debugger 105 // which it interprets as a program setting its threads' name. We may be 106 // able to get fancy by creating a TLS entry when someone calls 107 // set_thread_name so that subsequent calls to get_thread_name return this 108 // value. 109 Name.clear(); 110} 111 112SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 113 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 114 // Begin background processing mode. The system lowers the resource scheduling 115 // priorities of the thread so that it can perform background work without 116 // significantly affecting activity in the foreground. 117 // End background processing mode. The system restores the resource scheduling 118 // priorities of the thread as they were before the thread entered background 119 // processing mode. 120 return SetThreadPriority(GetCurrentThread(), 121 Priority == ThreadPriority::Background 122 ? THREAD_MODE_BACKGROUND_BEGIN 123 : THREAD_MODE_BACKGROUND_END) 124 ? SetThreadPriorityResult::SUCCESS 125 : SetThreadPriorityResult::FAILURE; 126} 127 128struct ProcessorGroup { 129 unsigned ID; 130 unsigned AllThreads; 131 unsigned UsableThreads; 132 unsigned ThreadsPerCore; 133 uint64_t Affinity; 134 135 unsigned useableCores() const { 136 return std::max(1U, UsableThreads / ThreadsPerCore); 137 } 138}; 139 140template <typename F> 141static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { 142 DWORD Len = 0; 143 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); 144 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 145 return false; 146 } 147 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); 148 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); 149 if (R) { 150 auto *End = 151 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); 152 for (auto *Curr = Info; Curr < End; 153 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + 154 Curr->Size)) { 155 if (Curr->Relationship != Relationship) 156 continue; 157 Fn(Curr); 158 } 159 } 160 free(Info); 161 return true; 162} 163 164static ArrayRef<ProcessorGroup> getProcessorGroups() { 165 auto computeGroups = []() { 166 SmallVector<ProcessorGroup, 4> Groups; 167 168 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 169 GROUP_RELATIONSHIP &El = ProcInfo->Group; 170 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { 171 ProcessorGroup G; 172 G.ID = Groups.size(); 173 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; 174 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; 175 assert(G.UsableThreads <= 64); 176 G.Affinity = El.GroupInfo[J].ActiveProcessorMask; 177 Groups.push_back(G); 178 } 179 }; 180 181 if (!IterateProcInfo(RelationGroup, HandleGroup)) 182 return std::vector<ProcessorGroup>(); 183 184 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { 185 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; 186 assert(El.GroupCount == 1); 187 unsigned NumHyperThreads = 1; 188 // If the flag is set, each core supports more than one hyper-thread. 189 if (El.Flags & LTP_PC_SMT) 190 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); 191 unsigned I = El.GroupMask[0].Group; 192 Groups[I].ThreadsPerCore = NumHyperThreads; 193 }; 194 195 if (!IterateProcInfo(RelationProcessorCore, HandleProc)) 196 return std::vector<ProcessorGroup>(); 197 198 // If there's an affinity mask set on one of the CPUs, then assume the user 199 // wants to constrain the current process to only a single CPU. 200 for (auto &G : Groups) { 201 if (G.UsableThreads != G.AllThreads) { 202 ProcessorGroup NewG{G}; 203 Groups.clear(); 204 Groups.push_back(NewG); 205 break; 206 } 207 } 208 209 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end()); 210 }; 211 static auto Groups = computeGroups(); 212 return ArrayRef<ProcessorGroup>(Groups); 213} 214 215template <typename R, typename UnaryPredicate> 216static unsigned aggregate(R &&Range, UnaryPredicate P) { 217 unsigned I{}; 218 for (const auto &It : Range) 219 I += P(It); 220 return I; 221} 222 223// for sys::getHostNumPhysicalCores 224int computeHostNumPhysicalCores() { 225 static unsigned Cores = 226 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { 227 return G.UsableThreads / G.ThreadsPerCore; 228 }); 229 return Cores; 230} 231 232int computeHostNumHardwareThreads() { 233 static unsigned Threads = 234 aggregate(getProcessorGroups(), 235 [](const ProcessorGroup &G) { return G.UsableThreads; }); 236 return Threads; 237} 238 239// Finds the proper CPU socket where a thread number should go. Returns 'None' 240// if the thread shall remain on the actual CPU socket. 241Optional<unsigned> 242llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { 243 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 244 // Only one CPU socket in the system or process affinity was set, no need to 245 // move the thread(s) to another CPU socket. 246 if (Groups.size() <= 1) 247 return None; 248 249 // We ask for less threads than there are hardware threads per CPU socket, no 250 // need to dispatch threads to other CPU sockets. 251 unsigned MaxThreadsPerSocket = 252 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); 253 if (compute_thread_count() <= MaxThreadsPerSocket) 254 return None; 255 256 assert(ThreadPoolNum < compute_thread_count() && 257 "The thread index is not within thread strategy's range!"); 258 259 // Assumes the same number of hardware threads per CPU socket. 260 return (ThreadPoolNum * Groups.size()) / compute_thread_count(); 261} 262 263// Assign the current thread to a more appropriate CPU socket or CPU group 264void llvm::ThreadPoolStrategy::apply_thread_strategy( 265 unsigned ThreadPoolNum) const { 266 Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum); 267 if (!Socket) 268 return; 269 ArrayRef<ProcessorGroup> Groups = getProcessorGroups(); 270 GROUP_AFFINITY Affinity{}; 271 Affinity.Group = Groups[*Socket].ID; 272 Affinity.Mask = Groups[*Socket].Affinity; 273 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); 274} 275 276llvm::BitVector llvm::get_thread_affinity_mask() { 277 GROUP_AFFINITY Affinity{}; 278 GetThreadGroupAffinity(GetCurrentThread(), &Affinity); 279 280 static unsigned All = 281 aggregate(getProcessorGroups(), 282 [](const ProcessorGroup &G) { return G.AllThreads; }); 283 284 unsigned StartOffset = 285 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { 286 return G.ID < Affinity.Group ? G.AllThreads : 0; 287 }); 288 289 llvm::BitVector V; 290 V.resize(All); 291 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { 292 if ((Affinity.Mask >> I) & 1) 293 V.set(StartOffset + I); 294 } 295 return V; 296} 297 298unsigned llvm::get_cpus() { return getProcessorGroups().size(); } 299