1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Unix specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "Unix.h" 14#include "llvm/ADT/ScopeExit.h" 15#include "llvm/ADT/SmallString.h" 16#include "llvm/ADT/SmallVector.h" 17#include "llvm/ADT/StringRef.h" 18#include "llvm/ADT/Twine.h" 19#include "llvm/Support/MemoryBuffer.h" 20#include "llvm/Support/raw_ostream.h" 21 22#if defined(__APPLE__) 23#include <mach/mach_init.h> 24#include <mach/mach_port.h> 25#include <pthread/qos.h> 26#include <sys/sysctl.h> 27#include <sys/types.h> 28#endif 29 30#include <pthread.h> 31 32#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) 33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np() 34#endif 35 36// Must be included after Threading.inc to provide definition for llvm::thread 37// because FreeBSD's condvar.h (included by user.h) misuses the "thread" 38// keyword. 39#ifndef __FreeBSD__ 40#include "llvm/Support/thread.h" 41#endif 42 43#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 44#include <errno.h> 45#include <sys/cpuset.h> 46#include <sys/sysctl.h> 47#include <sys/user.h> 48#include <unistd.h> 49#endif 50 51#if defined(__NetBSD__) 52#include <lwp.h> // For _lwp_self() 53#endif 54 55#if defined(__OpenBSD__) 56#include <unistd.h> // For getthrid() 57#endif 58 59#if defined(__linux__) 60#include <sched.h> // For sched_getaffinity 61#include <sys/syscall.h> // For syscall codes 62#include <unistd.h> // For syscall() 63#endif 64 65#if defined(__CYGWIN__) 66#include <sys/cpuset.h> 67#endif 68 69#if defined(__HAIKU__) 70#include <OS.h> // For B_OS_NAME_LENGTH 71#endif 72 73namespace llvm { 74pthread_t 75llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg, 76 std::optional<unsigned> StackSizeInBytes) { 77 int errnum; 78 79 // Construct the attributes object. 80 pthread_attr_t Attr; 81 if ((errnum = ::pthread_attr_init(&Attr)) != 0) { 82 ReportErrnumFatal("pthread_attr_init failed", errnum); 83 } 84 85 auto AttrGuard = llvm::make_scope_exit([&] { 86 if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) { 87 ReportErrnumFatal("pthread_attr_destroy failed", errnum); 88 } 89 }); 90 91 // Set the requested stack size, if given. 92 if (StackSizeInBytes) { 93 if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) { 94 ReportErrnumFatal("pthread_attr_setstacksize failed", errnum); 95 } 96 } 97 98 // Construct and execute the thread. 99 pthread_t Thread; 100 if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0) 101 ReportErrnumFatal("pthread_create failed", errnum); 102 103 return Thread; 104} 105 106void llvm_thread_detach_impl(pthread_t Thread) { 107 int errnum; 108 109 if ((errnum = ::pthread_detach(Thread)) != 0) { 110 ReportErrnumFatal("pthread_detach failed", errnum); 111 } 112} 113 114void llvm_thread_join_impl(pthread_t Thread) { 115 int errnum; 116 117 if ((errnum = ::pthread_join(Thread, nullptr)) != 0) { 118 ReportErrnumFatal("pthread_join failed", errnum); 119 } 120} 121 122pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; } 123 124pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); } 125 126} // namespace llvm 127 128uint64_t llvm::get_threadid() { 129#if defined(__APPLE__) 130 // Calling "mach_thread_self()" bumps the reference count on the thread 131 // port, so we need to deallocate it. mach_task_self() doesn't bump the ref 132 // count. 133 static thread_local thread_port_t Self = [] { 134 thread_port_t InitSelf = mach_thread_self(); 135 mach_port_deallocate(mach_task_self(), Self); 136 return InitSelf; 137 }(); 138 return Self; 139#elif defined(__FreeBSD__) || defined(__DragonFly__) 140 return uint64_t(pthread_getthreadid_np()); 141#elif defined(__NetBSD__) 142 return uint64_t(_lwp_self()); 143#elif defined(__OpenBSD__) 144 return uint64_t(getthrid()); 145#elif defined(__ANDROID__) 146 return uint64_t(gettid()); 147#elif defined(__linux__) 148 return uint64_t(syscall(__NR_gettid)); 149#elif defined(_AIX) 150 return uint64_t(thread_self()); 151#else 152 return uint64_t(pthread_self()); 153#endif 154} 155 156static constexpr uint32_t get_max_thread_name_length_impl() { 157#if defined(PTHREAD_MAX_NAMELEN_NP) 158 return PTHREAD_MAX_NAMELEN_NP; 159#elif defined(__HAIKU__) 160 return B_OS_NAME_LENGTH; 161#elif defined(__APPLE__) 162 return 64; 163#elif defined(__sun__) && defined(__svr4__) 164 return 31; 165#elif defined(__linux__) && HAVE_PTHREAD_SETNAME_NP 166 return 16; 167#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ 168 defined(__DragonFly__) 169 return 16; 170#elif defined(__OpenBSD__) 171 return 24; 172#elif defined(__CYGWIN__) 173 return 16; 174#else 175 return 0; 176#endif 177} 178 179uint32_t llvm::get_max_thread_name_length() { 180 return get_max_thread_name_length_impl(); 181} 182 183void llvm::set_thread_name(const Twine &Name) { 184 // Make sure the input is null terminated. 185 SmallString<64> Storage; 186 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 187 188 // Truncate from the beginning, not the end, if the specified name is too 189 // long. For one, this ensures that the resulting string is still null 190 // terminated, but additionally the end of a long thread name will usually 191 // be more unique than the beginning, since a common pattern is for similar 192 // threads to share a common prefix. 193 // Note that the name length includes the null terminator. 194 if (get_max_thread_name_length() > 0) 195 NameStr = NameStr.take_back(get_max_thread_name_length() - 1); 196 (void)NameStr; 197#if defined(HAVE_PTHREAD_SET_NAME_NP) 198 ::pthread_set_name_np(::pthread_self(), NameStr.data()); 199#elif defined(HAVE_PTHREAD_SETNAME_NP) 200#if defined(__NetBSD__) 201 ::pthread_setname_np(::pthread_self(), "%s", 202 const_cast<char *>(NameStr.data())); 203#elif defined(__APPLE__) 204 ::pthread_setname_np(NameStr.data()); 205#else 206 ::pthread_setname_np(::pthread_self(), NameStr.data()); 207#endif 208#endif 209} 210 211void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 212 Name.clear(); 213 214#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 215 int pid = ::getpid(); 216 uint64_t tid = get_threadid(); 217 218 struct kinfo_proc *kp = nullptr, *nkp; 219 size_t len = 0; 220 int error; 221 int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD, 222 (int)pid}; 223 224 while (1) { 225 error = sysctl(ctl, 4, kp, &len, nullptr, 0); 226 if (kp == nullptr || (error != 0 && errno == ENOMEM)) { 227 // Add extra space in case threads are added before next call. 228 len += sizeof(*kp) + len / 10; 229 nkp = (struct kinfo_proc *)::realloc(kp, len); 230 if (nkp == nullptr) { 231 free(kp); 232 return; 233 } 234 kp = nkp; 235 continue; 236 } 237 if (error != 0) 238 len = 0; 239 break; 240 } 241 242 for (size_t i = 0; i < len / sizeof(*kp); i++) { 243 if (kp[i].ki_tid == (lwpid_t)tid) { 244 Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname)); 245 break; 246 } 247 } 248 free(kp); 249 return; 250#elif (defined(__linux__) || defined(__CYGWIN__)) && HAVE_PTHREAD_GETNAME_NP 251 constexpr uint32_t len = get_max_thread_name_length_impl(); 252 char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. 253 if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len)) 254 Name.append(Buffer, Buffer + strlen(Buffer)); 255#elif defined(HAVE_PTHREAD_GET_NAME_NP) && HAVE_PTHREAD_GET_NAME_NP 256 constexpr uint32_t len = get_max_thread_name_length_impl(); 257 char buf[len]; 258 ::pthread_get_name_np(::pthread_self(), buf, len); 259 260 Name.append(buf, buf + strlen(buf)); 261 262#elif defined(HAVE_PTHREAD_GETNAME_NP) && HAVE_PTHREAD_GETNAME_NP 263 constexpr uint32_t len = get_max_thread_name_length_impl(); 264 char buf[len]; 265 ::pthread_getname_np(::pthread_self(), buf, len); 266 267 Name.append(buf, buf + strlen(buf)); 268#endif 269} 270 271SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 272#if (defined(__linux__) || defined(__CYGWIN__)) && defined(SCHED_IDLE) 273 // Some *really* old glibcs are missing SCHED_IDLE. 274 // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html 275 // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html 276 sched_param priority; 277 // For each of the above policies, param->sched_priority must be 0. 278 priority.sched_priority = 0; 279 // SCHED_IDLE for running very low priority background jobs. 280 // SCHED_OTHER the standard round-robin time-sharing policy; 281 return !pthread_setschedparam( 282 pthread_self(), 283 // FIXME: consider SCHED_BATCH for Low 284 Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE, 285 &priority) 286 ? SetThreadPriorityResult::SUCCESS 287 : SetThreadPriorityResult::FAILURE; 288#elif defined(__APPLE__) 289 // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon 290 // 291 // Background - Applies to work that isn’t visible to the user and may take 292 // significant time to complete. Examples include indexing, backing up, or 293 // synchronizing data. This class emphasizes energy efficiency. 294 // 295 // Utility - Applies to work that takes anywhere from a few seconds to a few 296 // minutes to complete. Examples include downloading a document or importing 297 // data. This class offers a balance between responsiveness, performance, and 298 // energy efficiency. 299 const auto qosClass = [&]() { 300 switch (Priority) { 301 case ThreadPriority::Background: 302 return QOS_CLASS_BACKGROUND; 303 case ThreadPriority::Low: 304 return QOS_CLASS_UTILITY; 305 case ThreadPriority::Default: 306 return QOS_CLASS_DEFAULT; 307 } 308 }(); 309 return !pthread_set_qos_class_self_np(qosClass, 0) 310 ? SetThreadPriorityResult::SUCCESS 311 : SetThreadPriorityResult::FAILURE; 312#endif 313 return SetThreadPriorityResult::FAILURE; 314} 315 316#include <thread> 317 318static int computeHostNumHardwareThreads() { 319#if defined(__FreeBSD__) 320 cpuset_t mask; 321 CPU_ZERO(&mask); 322 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask), 323 &mask) == 0) 324 return CPU_COUNT(&mask); 325#elif (defined(__linux__) || defined(__CYGWIN__)) 326 cpu_set_t Set; 327 CPU_ZERO(&Set); 328 if (sched_getaffinity(0, sizeof(Set), &Set) == 0) 329 return CPU_COUNT(&Set); 330#endif 331 // Guard against std::thread::hardware_concurrency() returning 0. 332 if (unsigned Val = std::thread::hardware_concurrency()) 333 return Val; 334 return 1; 335} 336 337void llvm::ThreadPoolStrategy::apply_thread_strategy( 338 unsigned ThreadPoolNum) const {} 339 340llvm::BitVector llvm::get_thread_affinity_mask() { 341 // FIXME: Implement 342 llvm_unreachable("Not implemented!"); 343} 344 345unsigned llvm::get_cpus() { return 1; } 346 347#if (defined(__linux__) || defined(__CYGWIN__)) && \ 348 (defined(__i386__) || defined(__x86_64__)) 349// On Linux, the number of physical cores can be computed from /proc/cpuinfo, 350// using the number of unique physical/core id pairs. The following 351// implementation reads the /proc/cpuinfo format on an x86_64 system. 352static int computeHostNumPhysicalCores() { 353 // Enabled represents the number of physical id/core id pairs with at least 354 // one processor id enabled by the CPU affinity mask. 355 cpu_set_t Affinity, Enabled; 356 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) 357 return -1; 358 CPU_ZERO(&Enabled); 359 360 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 361 // mmapped because it appears to have 0 size. 362 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 363 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 364 if (std::error_code EC = Text.getError()) { 365 llvm::errs() << "Can't read " 366 << "/proc/cpuinfo: " << EC.message() << "\n"; 367 return -1; 368 } 369 SmallVector<StringRef, 8> strs; 370 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 371 /*KeepEmpty=*/false); 372 int CurProcessor = -1; 373 int CurPhysicalId = -1; 374 int CurSiblings = -1; 375 int CurCoreId = -1; 376 for (StringRef Line : strs) { 377 std::pair<StringRef, StringRef> Data = Line.split(':'); 378 auto Name = Data.first.trim(); 379 auto Val = Data.second.trim(); 380 // These fields are available if the kernel is configured with CONFIG_SMP. 381 if (Name == "processor") 382 Val.getAsInteger(10, CurProcessor); 383 else if (Name == "physical id") 384 Val.getAsInteger(10, CurPhysicalId); 385 else if (Name == "siblings") 386 Val.getAsInteger(10, CurSiblings); 387 else if (Name == "core id") { 388 Val.getAsInteger(10, CurCoreId); 389 // The processor id corresponds to an index into cpu_set_t. 390 if (CPU_ISSET(CurProcessor, &Affinity)) 391 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); 392 } 393 } 394 return CPU_COUNT(&Enabled); 395} 396#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX) 397static int computeHostNumPhysicalCores() { 398 return sysconf(_SC_NPROCESSORS_ONLN); 399} 400#elif defined(__linux__) 401static int computeHostNumPhysicalCores() { 402 cpu_set_t Affinity; 403 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) 404 return CPU_COUNT(&Affinity); 405 406 // The call to sched_getaffinity() may have failed because the Affinity 407 // mask is too small for the number of CPU's on the system (i.e. the 408 // system has more than 1024 CPUs). Allocate a mask large enough for 409 // twice as many CPUs. 410 cpu_set_t *DynAffinity; 411 DynAffinity = CPU_ALLOC(2048); 412 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { 413 int NumCPUs = CPU_COUNT(DynAffinity); 414 CPU_FREE(DynAffinity); 415 return NumCPUs; 416 } 417 return -1; 418} 419#elif defined(__APPLE__) 420// Gets the number of *physical cores* on the machine. 421static int computeHostNumPhysicalCores() { 422 uint32_t count; 423 size_t len = sizeof(count); 424 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 425 if (count < 1) { 426 int nm[2]; 427 nm[0] = CTL_HW; 428 nm[1] = HW_AVAILCPU; 429 sysctl(nm, 2, &count, &len, NULL, 0); 430 if (count < 1) 431 return -1; 432 } 433 return count; 434} 435#elif defined(__MVS__) 436static int computeHostNumPhysicalCores() { 437 enum { 438 // Byte offset of the pointer to the Communications Vector Table (CVT) in 439 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and 440 // will be zero-extended to uintptr_t. 441 FLCCVT = 16, 442 // Byte offset of the pointer to the Common System Data Area (CSD) in the 443 // CVT. The table entry is a 31-bit pointer and will be zero-extended to 444 // uintptr_t. 445 CVTCSD = 660, 446 // Byte offset to the number of live CPs in the LPAR, stored as a signed 447 // 32-bit value in the table. 448 CSD_NUMBER_ONLINE_STANDARD_CPS = 264, 449 }; 450 char *PSA = 0; 451 char *CVT = reinterpret_cast<char *>( 452 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); 453 char *CSD = reinterpret_cast<char *>( 454 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); 455 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); 456} 457#else 458// On other systems, return -1 to indicate unknown. 459static int computeHostNumPhysicalCores() { return -1; } 460#endif 461 462int llvm::get_physical_cores() { 463 static int NumCores = computeHostNumPhysicalCores(); 464 return NumCores; 465} 466