xref: /freebsd/contrib/llvm-project/llvm/lib/Support/Unix/Threading.inc (revision 700637cbb5e582861067a11aaca4d053546871d2)
1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Unix specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Unix.h"
14#include "llvm/ADT/ScopeExit.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/Support/MemoryBuffer.h"
20#include "llvm/Support/raw_ostream.h"
21
22#if defined(__APPLE__)
23#include <mach/mach_init.h>
24#include <mach/mach_port.h>
25#include <pthread/qos.h>
26#include <sys/sysctl.h>
27#include <sys/types.h>
28#endif
29
30#include <pthread.h>
31
32#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
34#endif
35
36// Must be included after Threading.inc to provide definition for llvm::thread
37// because FreeBSD's condvar.h (included by user.h) misuses the "thread"
38// keyword.
39#ifndef __FreeBSD__
40#include "llvm/Support/thread.h"
41#endif
42
43#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
44#include <errno.h>
45#include <sys/cpuset.h>
46#include <sys/sysctl.h>
47#include <sys/user.h>
48#include <unistd.h>
49#endif
50
51#if defined(__NetBSD__)
52#include <lwp.h> // For _lwp_self()
53#endif
54
55#if defined(__OpenBSD__)
56#include <unistd.h> // For getthrid()
57#endif
58
59#if defined(__linux__)
60#include <sched.h>       // For sched_getaffinity
61#include <sys/syscall.h> // For syscall codes
62#include <unistd.h>      // For syscall()
63#endif
64
65#if defined(__CYGWIN__)
66#include <sys/cpuset.h>
67#endif
68
69#if defined(__HAIKU__)
70#include <OS.h> // For B_OS_NAME_LENGTH
71#endif
72
73namespace llvm {
74pthread_t
75llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
76                            std::optional<unsigned> StackSizeInBytes) {
77  int errnum;
78
79  // Construct the attributes object.
80  pthread_attr_t Attr;
81  if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
82    ReportErrnumFatal("pthread_attr_init failed", errnum);
83  }
84
85  auto AttrGuard = llvm::make_scope_exit([&] {
86    if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
87      ReportErrnumFatal("pthread_attr_destroy failed", errnum);
88    }
89  });
90
91  // Set the requested stack size, if given.
92  if (StackSizeInBytes) {
93    if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
94      ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
95    }
96  }
97
98  // Construct and execute the thread.
99  pthread_t Thread;
100  if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
101    ReportErrnumFatal("pthread_create failed", errnum);
102
103  return Thread;
104}
105
106void llvm_thread_detach_impl(pthread_t Thread) {
107  int errnum;
108
109  if ((errnum = ::pthread_detach(Thread)) != 0) {
110    ReportErrnumFatal("pthread_detach failed", errnum);
111  }
112}
113
114void llvm_thread_join_impl(pthread_t Thread) {
115  int errnum;
116
117  if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
118    ReportErrnumFatal("pthread_join failed", errnum);
119  }
120}
121
122pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
123
124pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
125
126} // namespace llvm
127
128uint64_t llvm::get_threadid() {
129#if defined(__APPLE__)
130  // Calling "mach_thread_self()" bumps the reference count on the thread
131  // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
132  // count.
133  static thread_local thread_port_t Self = [] {
134    thread_port_t InitSelf = mach_thread_self();
135    mach_port_deallocate(mach_task_self(), Self);
136    return InitSelf;
137  }();
138  return Self;
139#elif defined(__FreeBSD__) || defined(__DragonFly__)
140  return uint64_t(pthread_getthreadid_np());
141#elif defined(__NetBSD__)
142  return uint64_t(_lwp_self());
143#elif defined(__OpenBSD__)
144  return uint64_t(getthrid());
145#elif defined(__ANDROID__)
146  return uint64_t(gettid());
147#elif defined(__linux__)
148  return uint64_t(syscall(__NR_gettid));
149#elif defined(_AIX)
150  return uint64_t(thread_self());
151#else
152  return uint64_t(pthread_self());
153#endif
154}
155
156static constexpr uint32_t get_max_thread_name_length_impl() {
157#if defined(PTHREAD_MAX_NAMELEN_NP)
158  return PTHREAD_MAX_NAMELEN_NP;
159#elif defined(__HAIKU__)
160  return B_OS_NAME_LENGTH;
161#elif defined(__APPLE__)
162  return 64;
163#elif defined(__sun__) && defined(__svr4__)
164  return 31;
165#elif defined(__linux__) && HAVE_PTHREAD_SETNAME_NP
166  return 16;
167#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) ||                   \
168    defined(__DragonFly__)
169  return 16;
170#elif defined(__OpenBSD__)
171  return 24;
172#elif defined(__CYGWIN__)
173  return 16;
174#else
175  return 0;
176#endif
177}
178
179uint32_t llvm::get_max_thread_name_length() {
180  return get_max_thread_name_length_impl();
181}
182
183void llvm::set_thread_name(const Twine &Name) {
184  // Make sure the input is null terminated.
185  SmallString<64> Storage;
186  StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
187
188  // Truncate from the beginning, not the end, if the specified name is too
189  // long.  For one, this ensures that the resulting string is still null
190  // terminated, but additionally the end of a long thread name will usually
191  // be more unique than the beginning, since a common pattern is for similar
192  // threads to share a common prefix.
193  // Note that the name length includes the null terminator.
194  if (get_max_thread_name_length() > 0)
195    NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
196  (void)NameStr;
197#if defined(HAVE_PTHREAD_SET_NAME_NP)
198  ::pthread_set_name_np(::pthread_self(), NameStr.data());
199#elif defined(HAVE_PTHREAD_SETNAME_NP)
200#if defined(__NetBSD__)
201  ::pthread_setname_np(::pthread_self(), "%s",
202                       const_cast<char *>(NameStr.data()));
203#elif defined(__APPLE__)
204  ::pthread_setname_np(NameStr.data());
205#else
206  ::pthread_setname_np(::pthread_self(), NameStr.data());
207#endif
208#endif
209}
210
211void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
212  Name.clear();
213
214#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
215  int pid = ::getpid();
216  uint64_t tid = get_threadid();
217
218  struct kinfo_proc *kp = nullptr, *nkp;
219  size_t len = 0;
220  int error;
221  int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
222                (int)pid};
223
224  while (1) {
225    error = sysctl(ctl, 4, kp, &len, nullptr, 0);
226    if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
227      // Add extra space in case threads are added before next call.
228      len += sizeof(*kp) + len / 10;
229      nkp = (struct kinfo_proc *)::realloc(kp, len);
230      if (nkp == nullptr) {
231        free(kp);
232        return;
233      }
234      kp = nkp;
235      continue;
236    }
237    if (error != 0)
238      len = 0;
239    break;
240  }
241
242  for (size_t i = 0; i < len / sizeof(*kp); i++) {
243    if (kp[i].ki_tid == (lwpid_t)tid) {
244      Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
245      break;
246    }
247  }
248  free(kp);
249  return;
250#elif (defined(__linux__) || defined(__CYGWIN__)) && HAVE_PTHREAD_GETNAME_NP
251  constexpr uint32_t len = get_max_thread_name_length_impl();
252  char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
253  if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
254    Name.append(Buffer, Buffer + strlen(Buffer));
255#elif defined(HAVE_PTHREAD_GET_NAME_NP) && HAVE_PTHREAD_GET_NAME_NP
256  constexpr uint32_t len = get_max_thread_name_length_impl();
257  char buf[len];
258  ::pthread_get_name_np(::pthread_self(), buf, len);
259
260  Name.append(buf, buf + strlen(buf));
261
262#elif defined(HAVE_PTHREAD_GETNAME_NP) && HAVE_PTHREAD_GETNAME_NP
263  constexpr uint32_t len = get_max_thread_name_length_impl();
264  char buf[len];
265  ::pthread_getname_np(::pthread_self(), buf, len);
266
267  Name.append(buf, buf + strlen(buf));
268#endif
269}
270
271SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
272#if (defined(__linux__) || defined(__CYGWIN__)) && defined(SCHED_IDLE)
273  // Some *really* old glibcs are missing SCHED_IDLE.
274  // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
275  // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
276  sched_param priority;
277  // For each of the above policies, param->sched_priority must be 0.
278  priority.sched_priority = 0;
279  // SCHED_IDLE    for running very low priority background jobs.
280  // SCHED_OTHER   the standard round-robin time-sharing policy;
281  return !pthread_setschedparam(
282             pthread_self(),
283             // FIXME: consider SCHED_BATCH for Low
284             Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
285             &priority)
286             ? SetThreadPriorityResult::SUCCESS
287             : SetThreadPriorityResult::FAILURE;
288#elif defined(__APPLE__)
289  // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
290  //
291  // Background - Applies to work that isn’t visible to the user and may take
292  // significant time to complete. Examples include indexing, backing up, or
293  // synchronizing data. This class emphasizes energy efficiency.
294  //
295  // Utility - Applies to work that takes anywhere from a few seconds to a few
296  // minutes to complete. Examples include downloading a document or importing
297  // data. This class offers a balance between responsiveness, performance, and
298  // energy efficiency.
299  const auto qosClass = [&]() {
300    switch (Priority) {
301    case ThreadPriority::Background:
302      return QOS_CLASS_BACKGROUND;
303    case ThreadPriority::Low:
304      return QOS_CLASS_UTILITY;
305    case ThreadPriority::Default:
306      return QOS_CLASS_DEFAULT;
307    }
308  }();
309  return !pthread_set_qos_class_self_np(qosClass, 0)
310             ? SetThreadPriorityResult::SUCCESS
311             : SetThreadPriorityResult::FAILURE;
312#endif
313  return SetThreadPriorityResult::FAILURE;
314}
315
316#include <thread>
317
318static int computeHostNumHardwareThreads() {
319#if defined(__FreeBSD__)
320  cpuset_t mask;
321  CPU_ZERO(&mask);
322  if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
323                         &mask) == 0)
324    return CPU_COUNT(&mask);
325#elif (defined(__linux__) || defined(__CYGWIN__))
326  cpu_set_t Set;
327  CPU_ZERO(&Set);
328  if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
329    return CPU_COUNT(&Set);
330#endif
331  // Guard against std::thread::hardware_concurrency() returning 0.
332  if (unsigned Val = std::thread::hardware_concurrency())
333    return Val;
334  return 1;
335}
336
337void llvm::ThreadPoolStrategy::apply_thread_strategy(
338    unsigned ThreadPoolNum) const {}
339
340llvm::BitVector llvm::get_thread_affinity_mask() {
341  // FIXME: Implement
342  llvm_unreachable("Not implemented!");
343}
344
345unsigned llvm::get_cpus() { return 1; }
346
347#if (defined(__linux__) || defined(__CYGWIN__)) &&                             \
348    (defined(__i386__) || defined(__x86_64__))
349// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
350// using the number of unique physical/core id pairs. The following
351// implementation reads the /proc/cpuinfo format on an x86_64 system.
352static int computeHostNumPhysicalCores() {
353  // Enabled represents the number of physical id/core id pairs with at least
354  // one processor id enabled by the CPU affinity mask.
355  cpu_set_t Affinity, Enabled;
356  if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
357    return -1;
358  CPU_ZERO(&Enabled);
359
360  // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
361  // mmapped because it appears to have 0 size.
362  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
363      llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
364  if (std::error_code EC = Text.getError()) {
365    llvm::errs() << "Can't read "
366                 << "/proc/cpuinfo: " << EC.message() << "\n";
367    return -1;
368  }
369  SmallVector<StringRef, 8> strs;
370  (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
371                             /*KeepEmpty=*/false);
372  int CurProcessor = -1;
373  int CurPhysicalId = -1;
374  int CurSiblings = -1;
375  int CurCoreId = -1;
376  for (StringRef Line : strs) {
377    std::pair<StringRef, StringRef> Data = Line.split(':');
378    auto Name = Data.first.trim();
379    auto Val = Data.second.trim();
380    // These fields are available if the kernel is configured with CONFIG_SMP.
381    if (Name == "processor")
382      Val.getAsInteger(10, CurProcessor);
383    else if (Name == "physical id")
384      Val.getAsInteger(10, CurPhysicalId);
385    else if (Name == "siblings")
386      Val.getAsInteger(10, CurSiblings);
387    else if (Name == "core id") {
388      Val.getAsInteger(10, CurCoreId);
389      // The processor id corresponds to an index into cpu_set_t.
390      if (CPU_ISSET(CurProcessor, &Affinity))
391        CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
392    }
393  }
394  return CPU_COUNT(&Enabled);
395}
396#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
397static int computeHostNumPhysicalCores() {
398  return sysconf(_SC_NPROCESSORS_ONLN);
399}
400#elif defined(__linux__)
401static int computeHostNumPhysicalCores() {
402  cpu_set_t Affinity;
403  if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
404    return CPU_COUNT(&Affinity);
405
406  // The call to sched_getaffinity() may have failed because the Affinity
407  // mask is too small for the number of CPU's on the system (i.e. the
408  // system has more than 1024 CPUs). Allocate a mask large enough for
409  // twice as many CPUs.
410  cpu_set_t *DynAffinity;
411  DynAffinity = CPU_ALLOC(2048);
412  if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
413    int NumCPUs = CPU_COUNT(DynAffinity);
414    CPU_FREE(DynAffinity);
415    return NumCPUs;
416  }
417  return -1;
418}
419#elif defined(__APPLE__)
420// Gets the number of *physical cores* on the machine.
421static int computeHostNumPhysicalCores() {
422  uint32_t count;
423  size_t len = sizeof(count);
424  sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
425  if (count < 1) {
426    int nm[2];
427    nm[0] = CTL_HW;
428    nm[1] = HW_AVAILCPU;
429    sysctl(nm, 2, &count, &len, NULL, 0);
430    if (count < 1)
431      return -1;
432  }
433  return count;
434}
435#elif defined(__MVS__)
436static int computeHostNumPhysicalCores() {
437  enum {
438    // Byte offset of the pointer to the Communications Vector Table (CVT) in
439    // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
440    // will be zero-extended to uintptr_t.
441    FLCCVT = 16,
442    // Byte offset of the pointer to the Common System Data Area (CSD) in the
443    // CVT. The table entry is a 31-bit pointer and will be zero-extended to
444    // uintptr_t.
445    CVTCSD = 660,
446    // Byte offset to the number of live CPs in the LPAR, stored as a signed
447    // 32-bit value in the table.
448    CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
449  };
450  char *PSA = 0;
451  char *CVT = reinterpret_cast<char *>(
452      static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
453  char *CSD = reinterpret_cast<char *>(
454      static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
455  return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
456}
457#else
458// On other systems, return -1 to indicate unknown.
459static int computeHostNumPhysicalCores() { return -1; }
460#endif
461
462int llvm::get_physical_cores() {
463  static int NumCores = computeHostNumPhysicalCores();
464  return NumCores;
465}
466