1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file provides the Unix specific implementation of Threading functions. 10// 11//===----------------------------------------------------------------------===// 12 13#include "Unix.h" 14#include "llvm/ADT/ScopeExit.h" 15#include "llvm/ADT/SmallString.h" 16#include "llvm/ADT/SmallVector.h" 17#include "llvm/ADT/StringRef.h" 18#include "llvm/ADT/Twine.h" 19#include "llvm/Support/MemoryBuffer.h" 20#include "llvm/Support/raw_ostream.h" 21 22#if defined(__APPLE__) 23#include <mach/mach_init.h> 24#include <mach/mach_port.h> 25#include <pthread/qos.h> 26#include <sys/sysctl.h> 27#include <sys/types.h> 28#endif 29 30#include <pthread.h> 31 32#if defined(__FreeBSD__) || defined(__OpenBSD__) 33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np() 34#endif 35 36#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 37#include <errno.h> 38#include <sys/cpuset.h> 39#include <sys/sysctl.h> 40#include <sys/user.h> 41#include <unistd.h> 42#endif 43 44#if defined(__NetBSD__) 45#include <lwp.h> // For _lwp_self() 46#endif 47 48#if defined(__OpenBSD__) 49#include <unistd.h> // For getthrid() 50#endif 51 52#if defined(__linux__) 53#include <sched.h> // For sched_getaffinity 54#include <sys/syscall.h> // For syscall codes 55#include <unistd.h> // For syscall() 56#endif 57 58namespace llvm { 59pthread_t 60llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg, 61 std::optional<unsigned> StackSizeInBytes) { 62 int errnum; 63 64 // Construct the attributes object. 65 pthread_attr_t Attr; 66 if ((errnum = ::pthread_attr_init(&Attr)) != 0) { 67 ReportErrnumFatal("pthread_attr_init failed", errnum); 68 } 69 70 auto AttrGuard = llvm::make_scope_exit([&] { 71 if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) { 72 ReportErrnumFatal("pthread_attr_destroy failed", errnum); 73 } 74 }); 75 76 // Set the requested stack size, if given. 77 if (StackSizeInBytes) { 78 if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) { 79 ReportErrnumFatal("pthread_attr_setstacksize failed", errnum); 80 } 81 } 82 83 // Construct and execute the thread. 84 pthread_t Thread; 85 if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0) 86 ReportErrnumFatal("pthread_create failed", errnum); 87 88 return Thread; 89} 90 91void llvm_thread_detach_impl(pthread_t Thread) { 92 int errnum; 93 94 if ((errnum = ::pthread_detach(Thread)) != 0) { 95 ReportErrnumFatal("pthread_detach failed", errnum); 96 } 97} 98 99void llvm_thread_join_impl(pthread_t Thread) { 100 int errnum; 101 102 if ((errnum = ::pthread_join(Thread, nullptr)) != 0) { 103 ReportErrnumFatal("pthread_join failed", errnum); 104 } 105} 106 107pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; } 108 109pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); } 110 111} // namespace llvm 112 113uint64_t llvm::get_threadid() { 114#if defined(__APPLE__) 115 // Calling "mach_thread_self()" bumps the reference count on the thread 116 // port, so we need to deallocate it. mach_task_self() doesn't bump the ref 117 // count. 118 static thread_local thread_port_t Self = [] { 119 thread_port_t InitSelf = mach_thread_self(); 120 mach_port_deallocate(mach_task_self(), Self); 121 return InitSelf; 122 }(); 123 return Self; 124#elif defined(__FreeBSD__) 125 return uint64_t(pthread_getthreadid_np()); 126#elif defined(__NetBSD__) 127 return uint64_t(_lwp_self()); 128#elif defined(__OpenBSD__) 129 return uint64_t(getthrid()); 130#elif defined(__ANDROID__) 131 return uint64_t(gettid()); 132#elif defined(__linux__) 133 return uint64_t(syscall(SYS_gettid)); 134#else 135 return uint64_t(pthread_self()); 136#endif 137} 138 139static constexpr uint32_t get_max_thread_name_length_impl() { 140#if defined(__NetBSD__) 141 return PTHREAD_MAX_NAMELEN_NP; 142#elif defined(__APPLE__) 143 return 64; 144#elif defined(__linux__) 145#if HAVE_PTHREAD_SETNAME_NP 146 return 16; 147#else 148 return 0; 149#endif 150#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 151 return 16; 152#elif defined(__OpenBSD__) 153 return 32; 154#else 155 return 0; 156#endif 157} 158 159uint32_t llvm::get_max_thread_name_length() { 160 return get_max_thread_name_length_impl(); 161} 162 163void llvm::set_thread_name(const Twine &Name) { 164 // Make sure the input is null terminated. 165 SmallString<64> Storage; 166 StringRef NameStr = Name.toNullTerminatedStringRef(Storage); 167 168 // Truncate from the beginning, not the end, if the specified name is too 169 // long. For one, this ensures that the resulting string is still null 170 // terminated, but additionally the end of a long thread name will usually 171 // be more unique than the beginning, since a common pattern is for similar 172 // threads to share a common prefix. 173 // Note that the name length includes the null terminator. 174 if (get_max_thread_name_length() > 0) 175 NameStr = NameStr.take_back(get_max_thread_name_length() - 1); 176 (void)NameStr; 177#if defined(__linux__) 178#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__) 179#if HAVE_PTHREAD_SETNAME_NP 180 ::pthread_setname_np(::pthread_self(), NameStr.data()); 181#endif 182#endif 183#elif defined(__FreeBSD__) || defined(__OpenBSD__) 184 ::pthread_set_name_np(::pthread_self(), NameStr.data()); 185#elif defined(__NetBSD__) 186 ::pthread_setname_np(::pthread_self(), "%s", 187 const_cast<char *>(NameStr.data())); 188#elif defined(__APPLE__) 189 ::pthread_setname_np(NameStr.data()); 190#endif 191} 192 193void llvm::get_thread_name(SmallVectorImpl<char> &Name) { 194 Name.clear(); 195 196#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 197 int pid = ::getpid(); 198 uint64_t tid = get_threadid(); 199 200 struct kinfo_proc *kp = nullptr, *nkp; 201 size_t len = 0; 202 int error; 203 int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD, 204 (int)pid}; 205 206 while (1) { 207 error = sysctl(ctl, 4, kp, &len, nullptr, 0); 208 if (kp == nullptr || (error != 0 && errno == ENOMEM)) { 209 // Add extra space in case threads are added before next call. 210 len += sizeof(*kp) + len / 10; 211 nkp = (struct kinfo_proc *)::realloc(kp, len); 212 if (nkp == nullptr) { 213 free(kp); 214 return; 215 } 216 kp = nkp; 217 continue; 218 } 219 if (error != 0) 220 len = 0; 221 break; 222 } 223 224 for (size_t i = 0; i < len / sizeof(*kp); i++) { 225 if (kp[i].ki_tid == (lwpid_t)tid) { 226 Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname)); 227 break; 228 } 229 } 230 free(kp); 231 return; 232#elif defined(__NetBSD__) 233 constexpr uint32_t len = get_max_thread_name_length_impl(); 234 char buf[len]; 235 ::pthread_getname_np(::pthread_self(), buf, len); 236 237 Name.append(buf, buf + strlen(buf)); 238#elif defined(__OpenBSD__) 239 constexpr uint32_t len = get_max_thread_name_length_impl(); 240 char buf[len]; 241 ::pthread_get_name_np(::pthread_self(), buf, len); 242 243 Name.append(buf, buf + strlen(buf)); 244#elif defined(__linux__) 245#if HAVE_PTHREAD_GETNAME_NP 246 constexpr uint32_t len = get_max_thread_name_length_impl(); 247 char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. 248 if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len)) 249 Name.append(Buffer, Buffer + strlen(Buffer)); 250#endif 251#endif 252} 253 254SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { 255#if defined(__linux__) && defined(SCHED_IDLE) 256 // Some *really* old glibcs are missing SCHED_IDLE. 257 // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html 258 // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html 259 sched_param priority; 260 // For each of the above policies, param->sched_priority must be 0. 261 priority.sched_priority = 0; 262 // SCHED_IDLE for running very low priority background jobs. 263 // SCHED_OTHER the standard round-robin time-sharing policy; 264 return !pthread_setschedparam( 265 pthread_self(), 266 // FIXME: consider SCHED_BATCH for Low 267 Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE, 268 &priority) 269 ? SetThreadPriorityResult::SUCCESS 270 : SetThreadPriorityResult::FAILURE; 271#elif defined(__APPLE__) 272 // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon 273 // 274 // Background - Applies to work that isn’t visible to the user and may take 275 // significant time to complete. Examples include indexing, backing up, or 276 // synchronizing data. This class emphasizes energy efficiency. 277 // 278 // Utility - Applies to work that takes anywhere from a few seconds to a few 279 // minutes to complete. Examples include downloading a document or importing 280 // data. This class offers a balance between responsiveness, performance, and 281 // energy efficiency. 282 const auto qosClass = [&]() { 283 switch (Priority) { 284 case ThreadPriority::Background: 285 return QOS_CLASS_BACKGROUND; 286 case ThreadPriority::Low: 287 return QOS_CLASS_UTILITY; 288 case ThreadPriority::Default: 289 return QOS_CLASS_DEFAULT; 290 } 291 }(); 292 return !pthread_set_qos_class_self_np(qosClass, 0) 293 ? SetThreadPriorityResult::SUCCESS 294 : SetThreadPriorityResult::FAILURE; 295#endif 296 return SetThreadPriorityResult::FAILURE; 297} 298 299#include <thread> 300 301static int computeHostNumHardwareThreads() { 302#if defined(__FreeBSD__) 303 cpuset_t mask; 304 CPU_ZERO(&mask); 305 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask), 306 &mask) == 0) 307 return CPU_COUNT(&mask); 308#elif defined(__linux__) 309 cpu_set_t Set; 310 if (sched_getaffinity(0, sizeof(Set), &Set) == 0) 311 return CPU_COUNT(&Set); 312#endif 313 // Guard against std::thread::hardware_concurrency() returning 0. 314 if (unsigned Val = std::thread::hardware_concurrency()) 315 return Val; 316 return 1; 317} 318 319void llvm::ThreadPoolStrategy::apply_thread_strategy( 320 unsigned ThreadPoolNum) const {} 321 322llvm::BitVector llvm::get_thread_affinity_mask() { 323 // FIXME: Implement 324 llvm_unreachable("Not implemented!"); 325} 326 327unsigned llvm::get_cpus() { return 1; } 328 329#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) 330// On Linux, the number of physical cores can be computed from /proc/cpuinfo, 331// using the number of unique physical/core id pairs. The following 332// implementation reads the /proc/cpuinfo format on an x86_64 system. 333static int computeHostNumPhysicalCores() { 334 // Enabled represents the number of physical id/core id pairs with at least 335 // one processor id enabled by the CPU affinity mask. 336 cpu_set_t Affinity, Enabled; 337 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) 338 return -1; 339 CPU_ZERO(&Enabled); 340 341 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be 342 // mmapped because it appears to have 0 size. 343 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 344 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 345 if (std::error_code EC = Text.getError()) { 346 llvm::errs() << "Can't read " 347 << "/proc/cpuinfo: " << EC.message() << "\n"; 348 return -1; 349 } 350 SmallVector<StringRef, 8> strs; 351 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, 352 /*KeepEmpty=*/false); 353 int CurProcessor = -1; 354 int CurPhysicalId = -1; 355 int CurSiblings = -1; 356 int CurCoreId = -1; 357 for (StringRef Line : strs) { 358 std::pair<StringRef, StringRef> Data = Line.split(':'); 359 auto Name = Data.first.trim(); 360 auto Val = Data.second.trim(); 361 // These fields are available if the kernel is configured with CONFIG_SMP. 362 if (Name == "processor") 363 Val.getAsInteger(10, CurProcessor); 364 else if (Name == "physical id") 365 Val.getAsInteger(10, CurPhysicalId); 366 else if (Name == "siblings") 367 Val.getAsInteger(10, CurSiblings); 368 else if (Name == "core id") { 369 Val.getAsInteger(10, CurCoreId); 370 // The processor id corresponds to an index into cpu_set_t. 371 if (CPU_ISSET(CurProcessor, &Affinity)) 372 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); 373 } 374 } 375 return CPU_COUNT(&Enabled); 376} 377#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX) 378static int computeHostNumPhysicalCores() { 379 return sysconf(_SC_NPROCESSORS_ONLN); 380} 381#elif defined(__linux__) && !defined(__ANDROID__) 382static int computeHostNumPhysicalCores() { 383 cpu_set_t Affinity; 384 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) 385 return CPU_COUNT(&Affinity); 386 387 // The call to sched_getaffinity() may have failed because the Affinity 388 // mask is too small for the number of CPU's on the system (i.e. the 389 // system has more than 1024 CPUs). Allocate a mask large enough for 390 // twice as many CPUs. 391 cpu_set_t *DynAffinity; 392 DynAffinity = CPU_ALLOC(2048); 393 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { 394 int NumCPUs = CPU_COUNT(DynAffinity); 395 CPU_FREE(DynAffinity); 396 return NumCPUs; 397 } 398 return -1; 399} 400#elif defined(__APPLE__) 401// Gets the number of *physical cores* on the machine. 402static int computeHostNumPhysicalCores() { 403 uint32_t count; 404 size_t len = sizeof(count); 405 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); 406 if (count < 1) { 407 int nm[2]; 408 nm[0] = CTL_HW; 409 nm[1] = HW_AVAILCPU; 410 sysctl(nm, 2, &count, &len, NULL, 0); 411 if (count < 1) 412 return -1; 413 } 414 return count; 415} 416#elif defined(__MVS__) 417static int computeHostNumPhysicalCores() { 418 enum { 419 // Byte offset of the pointer to the Communications Vector Table (CVT) in 420 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and 421 // will be zero-extended to uintptr_t. 422 FLCCVT = 16, 423 // Byte offset of the pointer to the Common System Data Area (CSD) in the 424 // CVT. The table entry is a 31-bit pointer and will be zero-extended to 425 // uintptr_t. 426 CVTCSD = 660, 427 // Byte offset to the number of live CPs in the LPAR, stored as a signed 428 // 32-bit value in the table. 429 CSD_NUMBER_ONLINE_STANDARD_CPS = 264, 430 }; 431 char *PSA = 0; 432 char *CVT = reinterpret_cast<char *>( 433 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); 434 char *CSD = reinterpret_cast<char *>( 435 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); 436 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); 437} 438#else 439// On other systems, return -1 to indicate unknown. 440static int computeHostNumPhysicalCores() { return -1; } 441#endif 442 443int llvm::get_physical_cores() { 444 static int NumCores = computeHostNumPhysicalCores(); 445 return NumCores; 446} 447