1 /* 2 * kmp_affinity.h -- header for affinity management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_AFFINITY_H 14 #define KMP_AFFINITY_H 15 16 #include "kmp.h" 17 #include "kmp_os.h" 18 19 #if KMP_AFFINITY_SUPPORTED 20 #if KMP_USE_HWLOC 21 class KMPHwlocAffinity : public KMPAffinity { 22 public: 23 class Mask : public KMPAffinity::Mask { 24 hwloc_cpuset_t mask; 25 26 public: 27 Mask() { 28 mask = hwloc_bitmap_alloc(); 29 this->zero(); 30 } 31 ~Mask() { hwloc_bitmap_free(mask); } 32 void set(int i) override { hwloc_bitmap_set(mask, i); } 33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 34 void clear(int i) override { hwloc_bitmap_clr(mask, i); } 35 void zero() override { hwloc_bitmap_zero(mask); } 36 void copy(const KMPAffinity::Mask *src) override { 37 const Mask *convert = static_cast<const Mask *>(src); 38 hwloc_bitmap_copy(mask, convert->mask); 39 } 40 void bitwise_and(const KMPAffinity::Mask *rhs) override { 41 const Mask *convert = static_cast<const Mask *>(rhs); 42 hwloc_bitmap_and(mask, mask, convert->mask); 43 } 44 void bitwise_or(const KMPAffinity::Mask *rhs) override { 45 const Mask *convert = static_cast<const Mask *>(rhs); 46 hwloc_bitmap_or(mask, mask, convert->mask); 47 } 48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 49 int begin() const override { return hwloc_bitmap_first(mask); } 50 int end() const override { return -1; } 51 int next(int previous) const override { 52 return hwloc_bitmap_next(mask, previous); 53 } 54 int get_system_affinity(bool abort_on_error) override { 55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 56 "Illegal get affinity operation when not capable"); 57 int retval = 58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 59 if (retval >= 0) { 60 return 0; 61 } 62 int error = errno; 63 if (abort_on_error) { 64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 65 } 66 return error; 67 } 68 int set_system_affinity(bool abort_on_error) const override { 69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70 "Illegal get affinity operation when not capable"); 71 int retval = 72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 73 if (retval >= 0) { 74 return 0; 75 } 76 int error = errno; 77 if (abort_on_error) { 78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 79 } 80 return error; 81 } 82 int get_proc_group() const override { 83 int group = -1; 84 #if KMP_OS_WINDOWS 85 if (__kmp_num_proc_groups == 1) { 86 return 1; 87 } 88 for (int i = 0; i < __kmp_num_proc_groups; i++) { 89 // On windows, the long type is always 32 bits 90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 91 unsigned long second_32_bits = 92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 93 if (first_32_bits == 0 && second_32_bits == 0) { 94 continue; 95 } 96 if (group >= 0) { 97 return -1; 98 } 99 group = i; 100 } 101 #endif /* KMP_OS_WINDOWS */ 102 return group; 103 } 104 }; 105 void determine_capable(const char *var) override { 106 const hwloc_topology_support *topology_support; 107 if (__kmp_hwloc_topology == NULL) { 108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 109 __kmp_hwloc_error = TRUE; 110 if (__kmp_affinity_verbose) 111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 112 } 113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 114 __kmp_hwloc_error = TRUE; 115 if (__kmp_affinity_verbose) 116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 117 } 118 } 119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 120 // Is the system capable of setting/getting this thread's affinity? 121 // Also, is topology discovery possible? (pu indicates ability to discover 122 // processing units). And finally, were there no errors when calling any 123 // hwloc_* API functions? 124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 125 topology_support->cpubind->get_thisthread_cpubind && 126 topology_support->discovery->pu && !__kmp_hwloc_error) { 127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro 128 KMP_AFFINITY_ENABLE(TRUE); 129 } else { 130 // indicate that hwloc didn't work and disable affinity 131 __kmp_hwloc_error = TRUE; 132 KMP_AFFINITY_DISABLE(); 133 } 134 } 135 void bind_thread(int which) override { 136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 137 "Illegal set affinity operation when not capable"); 138 KMPAffinity::Mask *mask; 139 KMP_CPU_ALLOC_ON_STACK(mask); 140 KMP_CPU_ZERO(mask); 141 KMP_CPU_SET(which, mask); 142 __kmp_set_system_affinity(mask, TRUE); 143 KMP_CPU_FREE_FROM_STACK(mask); 144 } 145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 147 KMPAffinity::Mask *allocate_mask_array(int num) override { 148 return new Mask[num]; 149 } 150 void deallocate_mask_array(KMPAffinity::Mask *array) override { 151 Mask *hwloc_array = static_cast<Mask *>(array); 152 delete[] hwloc_array; 153 } 154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 155 int index) override { 156 Mask *hwloc_array = static_cast<Mask *>(array); 157 return &(hwloc_array[index]); 158 } 159 api_type get_api_type() const override { return HWLOC; } 160 }; 161 #endif /* KMP_USE_HWLOC */ 162 163 #if KMP_OS_LINUX || KMP_OS_FREEBSD 164 #if KMP_OS_LINUX 165 /* On some of the older OS's that we build on, these constants aren't present 166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 167 all systems of the same arch where they are defined, and they cannot change. 168 stone forever. */ 169 #include <sys/syscall.h> 170 #if KMP_ARCH_X86 || KMP_ARCH_ARM 171 #ifndef __NR_sched_setaffinity 172 #define __NR_sched_setaffinity 241 173 #elif __NR_sched_setaffinity != 241 174 #error Wrong code for setaffinity system call. 175 #endif /* __NR_sched_setaffinity */ 176 #ifndef __NR_sched_getaffinity 177 #define __NR_sched_getaffinity 242 178 #elif __NR_sched_getaffinity != 242 179 #error Wrong code for getaffinity system call. 180 #endif /* __NR_sched_getaffinity */ 181 #elif KMP_ARCH_AARCH64 182 #ifndef __NR_sched_setaffinity 183 #define __NR_sched_setaffinity 122 184 #elif __NR_sched_setaffinity != 122 185 #error Wrong code for setaffinity system call. 186 #endif /* __NR_sched_setaffinity */ 187 #ifndef __NR_sched_getaffinity 188 #define __NR_sched_getaffinity 123 189 #elif __NR_sched_getaffinity != 123 190 #error Wrong code for getaffinity system call. 191 #endif /* __NR_sched_getaffinity */ 192 #elif KMP_ARCH_X86_64 193 #ifndef __NR_sched_setaffinity 194 #define __NR_sched_setaffinity 203 195 #elif __NR_sched_setaffinity != 203 196 #error Wrong code for setaffinity system call. 197 #endif /* __NR_sched_setaffinity */ 198 #ifndef __NR_sched_getaffinity 199 #define __NR_sched_getaffinity 204 200 #elif __NR_sched_getaffinity != 204 201 #error Wrong code for getaffinity system call. 202 #endif /* __NR_sched_getaffinity */ 203 #elif KMP_ARCH_PPC64 204 #ifndef __NR_sched_setaffinity 205 #define __NR_sched_setaffinity 222 206 #elif __NR_sched_setaffinity != 222 207 #error Wrong code for setaffinity system call. 208 #endif /* __NR_sched_setaffinity */ 209 #ifndef __NR_sched_getaffinity 210 #define __NR_sched_getaffinity 223 211 #elif __NR_sched_getaffinity != 223 212 #error Wrong code for getaffinity system call. 213 #endif /* __NR_sched_getaffinity */ 214 #elif KMP_ARCH_MIPS 215 #ifndef __NR_sched_setaffinity 216 #define __NR_sched_setaffinity 4239 217 #elif __NR_sched_setaffinity != 4239 218 #error Wrong code for setaffinity system call. 219 #endif /* __NR_sched_setaffinity */ 220 #ifndef __NR_sched_getaffinity 221 #define __NR_sched_getaffinity 4240 222 #elif __NR_sched_getaffinity != 4240 223 #error Wrong code for getaffinity system call. 224 #endif /* __NR_sched_getaffinity */ 225 #elif KMP_ARCH_MIPS64 226 #ifndef __NR_sched_setaffinity 227 #define __NR_sched_setaffinity 5195 228 #elif __NR_sched_setaffinity != 5195 229 #error Wrong code for setaffinity system call. 230 #endif /* __NR_sched_setaffinity */ 231 #ifndef __NR_sched_getaffinity 232 #define __NR_sched_getaffinity 5196 233 #elif __NR_sched_getaffinity != 5196 234 #error Wrong code for getaffinity system call. 235 #endif /* __NR_sched_getaffinity */ 236 #error Unknown or unsupported architecture 237 #endif /* KMP_ARCH_* */ 238 #elif KMP_OS_FREEBSD 239 #include <pthread.h> 240 #include <pthread_np.h> 241 #endif 242 class KMPNativeAffinity : public KMPAffinity { 243 class Mask : public KMPAffinity::Mask { 244 typedef unsigned char mask_t; 245 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 246 247 public: 248 mask_t *mask; 249 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 250 ~Mask() { 251 if (mask) 252 __kmp_free(mask); 253 } 254 void set(int i) override { 255 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 256 } 257 bool is_set(int i) const override { 258 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 259 } 260 void clear(int i) override { 261 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 262 } 263 void zero() override { 264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 265 mask[i] = 0; 266 } 267 void copy(const KMPAffinity::Mask *src) override { 268 const Mask *convert = static_cast<const Mask *>(src); 269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 270 mask[i] = convert->mask[i]; 271 } 272 void bitwise_and(const KMPAffinity::Mask *rhs) override { 273 const Mask *convert = static_cast<const Mask *>(rhs); 274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 275 mask[i] &= convert->mask[i]; 276 } 277 void bitwise_or(const KMPAffinity::Mask *rhs) override { 278 const Mask *convert = static_cast<const Mask *>(rhs); 279 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 280 mask[i] |= convert->mask[i]; 281 } 282 void bitwise_not() override { 283 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 284 mask[i] = ~(mask[i]); 285 } 286 int begin() const override { 287 int retval = 0; 288 while (retval < end() && !is_set(retval)) 289 ++retval; 290 return retval; 291 } 292 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } 293 int next(int previous) const override { 294 int retval = previous + 1; 295 while (retval < end() && !is_set(retval)) 296 ++retval; 297 return retval; 298 } 299 int get_system_affinity(bool abort_on_error) override { 300 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 301 "Illegal get affinity operation when not capable"); 302 #if KMP_OS_LINUX 303 int retval = 304 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 305 #elif KMP_OS_FREEBSD 306 int r = 307 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 308 int retval = (r == 0 ? 0 : -1); 309 #endif 310 if (retval >= 0) { 311 return 0; 312 } 313 int error = errno; 314 if (abort_on_error) { 315 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 316 } 317 return error; 318 } 319 int set_system_affinity(bool abort_on_error) const override { 320 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 321 "Illegal get affinity operation when not capable"); 322 #if KMP_OS_LINUX 323 int retval = 324 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 325 #elif KMP_OS_FREEBSD 326 int r = 327 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 328 int retval = (r == 0 ? 0 : -1); 329 #endif 330 if (retval >= 0) { 331 return 0; 332 } 333 int error = errno; 334 if (abort_on_error) { 335 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 336 } 337 return error; 338 } 339 }; 340 void determine_capable(const char *env_var) override { 341 __kmp_affinity_determine_capable(env_var); 342 } 343 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 344 KMPAffinity::Mask *allocate_mask() override { 345 KMPNativeAffinity::Mask *retval = new Mask(); 346 return retval; 347 } 348 void deallocate_mask(KMPAffinity::Mask *m) override { 349 KMPNativeAffinity::Mask *native_mask = 350 static_cast<KMPNativeAffinity::Mask *>(m); 351 delete native_mask; 352 } 353 KMPAffinity::Mask *allocate_mask_array(int num) override { 354 return new Mask[num]; 355 } 356 void deallocate_mask_array(KMPAffinity::Mask *array) override { 357 Mask *linux_array = static_cast<Mask *>(array); 358 delete[] linux_array; 359 } 360 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 361 int index) override { 362 Mask *linux_array = static_cast<Mask *>(array); 363 return &(linux_array[index]); 364 } 365 api_type get_api_type() const override { return NATIVE_OS; } 366 }; 367 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 368 369 #if KMP_OS_WINDOWS 370 class KMPNativeAffinity : public KMPAffinity { 371 class Mask : public KMPAffinity::Mask { 372 typedef ULONG_PTR mask_t; 373 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 374 mask_t *mask; 375 376 public: 377 Mask() { 378 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 379 } 380 ~Mask() { 381 if (mask) 382 __kmp_free(mask); 383 } 384 void set(int i) override { 385 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 386 } 387 bool is_set(int i) const override { 388 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 389 } 390 void clear(int i) override { 391 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 392 } 393 void zero() override { 394 for (int i = 0; i < __kmp_num_proc_groups; ++i) 395 mask[i] = 0; 396 } 397 void copy(const KMPAffinity::Mask *src) override { 398 const Mask *convert = static_cast<const Mask *>(src); 399 for (int i = 0; i < __kmp_num_proc_groups; ++i) 400 mask[i] = convert->mask[i]; 401 } 402 void bitwise_and(const KMPAffinity::Mask *rhs) override { 403 const Mask *convert = static_cast<const Mask *>(rhs); 404 for (int i = 0; i < __kmp_num_proc_groups; ++i) 405 mask[i] &= convert->mask[i]; 406 } 407 void bitwise_or(const KMPAffinity::Mask *rhs) override { 408 const Mask *convert = static_cast<const Mask *>(rhs); 409 for (int i = 0; i < __kmp_num_proc_groups; ++i) 410 mask[i] |= convert->mask[i]; 411 } 412 void bitwise_not() override { 413 for (int i = 0; i < __kmp_num_proc_groups; ++i) 414 mask[i] = ~(mask[i]); 415 } 416 int begin() const override { 417 int retval = 0; 418 while (retval < end() && !is_set(retval)) 419 ++retval; 420 return retval; 421 } 422 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 423 int next(int previous) const override { 424 int retval = previous + 1; 425 while (retval < end() && !is_set(retval)) 426 ++retval; 427 return retval; 428 } 429 int set_system_affinity(bool abort_on_error) const override { 430 if (__kmp_num_proc_groups > 1) { 431 // Check for a valid mask. 432 GROUP_AFFINITY ga; 433 int group = get_proc_group(); 434 if (group < 0) { 435 if (abort_on_error) { 436 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 437 } 438 return -1; 439 } 440 // Transform the bit vector into a GROUP_AFFINITY struct 441 // and make the system call to set affinity. 442 ga.Group = group; 443 ga.Mask = mask[group]; 444 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 445 446 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 447 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 448 DWORD error = GetLastError(); 449 if (abort_on_error) { 450 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 451 __kmp_msg_null); 452 } 453 return error; 454 } 455 } else { 456 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 457 DWORD error = GetLastError(); 458 if (abort_on_error) { 459 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 460 __kmp_msg_null); 461 } 462 return error; 463 } 464 } 465 return 0; 466 } 467 int get_system_affinity(bool abort_on_error) override { 468 if (__kmp_num_proc_groups > 1) { 469 this->zero(); 470 GROUP_AFFINITY ga; 471 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 472 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 473 DWORD error = GetLastError(); 474 if (abort_on_error) { 475 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 476 KMP_ERR(error), __kmp_msg_null); 477 } 478 return error; 479 } 480 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 481 (ga.Mask == 0)) { 482 return -1; 483 } 484 mask[ga.Group] = ga.Mask; 485 } else { 486 mask_t newMask, sysMask, retval; 487 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 488 DWORD error = GetLastError(); 489 if (abort_on_error) { 490 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 491 KMP_ERR(error), __kmp_msg_null); 492 } 493 return error; 494 } 495 retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 496 if (!retval) { 497 DWORD error = GetLastError(); 498 if (abort_on_error) { 499 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 500 KMP_ERR(error), __kmp_msg_null); 501 } 502 return error; 503 } 504 newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 505 if (!newMask) { 506 DWORD error = GetLastError(); 507 if (abort_on_error) { 508 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 509 KMP_ERR(error), __kmp_msg_null); 510 } 511 } 512 *mask = retval; 513 } 514 return 0; 515 } 516 int get_proc_group() const override { 517 int group = -1; 518 if (__kmp_num_proc_groups == 1) { 519 return 1; 520 } 521 for (int i = 0; i < __kmp_num_proc_groups; i++) { 522 if (mask[i] == 0) 523 continue; 524 if (group >= 0) 525 return -1; 526 group = i; 527 } 528 return group; 529 } 530 }; 531 void determine_capable(const char *env_var) override { 532 __kmp_affinity_determine_capable(env_var); 533 } 534 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 535 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 536 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 537 KMPAffinity::Mask *allocate_mask_array(int num) override { 538 return new Mask[num]; 539 } 540 void deallocate_mask_array(KMPAffinity::Mask *array) override { 541 Mask *windows_array = static_cast<Mask *>(array); 542 delete[] windows_array; 543 } 544 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 545 int index) override { 546 Mask *windows_array = static_cast<Mask *>(array); 547 return &(windows_array[index]); 548 } 549 api_type get_api_type() const override { return NATIVE_OS; } 550 }; 551 #endif /* KMP_OS_WINDOWS */ 552 #endif /* KMP_AFFINITY_SUPPORTED */ 553 554 class Address { 555 public: 556 static const unsigned maxDepth = 32; 557 unsigned labels[maxDepth]; 558 unsigned childNums[maxDepth]; 559 unsigned depth; 560 unsigned leader; 561 Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 562 Address &operator=(const Address &b) { 563 depth = b.depth; 564 for (unsigned i = 0; i < depth; i++) { 565 labels[i] = b.labels[i]; 566 childNums[i] = b.childNums[i]; 567 } 568 leader = FALSE; 569 return *this; 570 } 571 bool operator==(const Address &b) const { 572 if (depth != b.depth) 573 return false; 574 for (unsigned i = 0; i < depth; i++) 575 if (labels[i] != b.labels[i]) 576 return false; 577 return true; 578 } 579 bool isClose(const Address &b, int level) const { 580 if (depth != b.depth) 581 return false; 582 if ((unsigned)level >= depth) 583 return true; 584 for (unsigned i = 0; i < (depth - level); i++) 585 if (labels[i] != b.labels[i]) 586 return false; 587 return true; 588 } 589 bool operator!=(const Address &b) const { return !operator==(b); } 590 void print() const { 591 unsigned i; 592 printf("Depth: %u --- ", depth); 593 for (i = 0; i < depth; i++) { 594 printf("%u ", labels[i]); 595 } 596 } 597 }; 598 599 class AddrUnsPair { 600 public: 601 Address first; 602 unsigned second; 603 AddrUnsPair(Address _first, unsigned _second) 604 : first(_first), second(_second) {} 605 AddrUnsPair &operator=(const AddrUnsPair &b) { 606 first = b.first; 607 second = b.second; 608 return *this; 609 } 610 void print() const { 611 printf("first = "); 612 first.print(); 613 printf(" --- second = %u", second); 614 } 615 bool operator==(const AddrUnsPair &b) const { 616 if (first != b.first) 617 return false; 618 if (second != b.second) 619 return false; 620 return true; 621 } 622 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 623 }; 624 625 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 626 const Address *aa = &(((const AddrUnsPair *)a)->first); 627 const Address *bb = &(((const AddrUnsPair *)b)->first); 628 unsigned depth = aa->depth; 629 unsigned i; 630 KMP_DEBUG_ASSERT(depth == bb->depth); 631 for (i = 0; i < depth; i++) { 632 if (aa->labels[i] < bb->labels[i]) 633 return -1; 634 if (aa->labels[i] > bb->labels[i]) 635 return 1; 636 } 637 return 0; 638 } 639 640 /* A structure for holding machine-specific hierarchy info to be computed once 641 at init. This structure represents a mapping of threads to the actual machine 642 hierarchy, or to our best guess at what the hierarchy might be, for the 643 purpose of performing an efficient barrier. In the worst case, when there is 644 no machine hierarchy information, it produces a tree suitable for a barrier, 645 similar to the tree used in the hyper barrier. */ 646 class hierarchy_info { 647 public: 648 /* Good default values for number of leaves and branching factor, given no 649 affinity information. Behaves a bit like hyper barrier. */ 650 static const kmp_uint32 maxLeaves = 4; 651 static const kmp_uint32 minBranch = 4; 652 /** Number of levels in the hierarchy. Typical levels are threads/core, 653 cores/package or socket, packages/node, nodes/machine, etc. We don't want 654 to get specific with nomenclature. When the machine is oversubscribed we 655 add levels to duplicate the hierarchy, doubling the thread capacity of the 656 hierarchy each time we add a level. */ 657 kmp_uint32 maxLevels; 658 659 /** This is specifically the depth of the machine configuration hierarchy, in 660 terms of the number of levels along the longest path from root to any 661 leaf. It corresponds to the number of entries in numPerLevel if we exclude 662 all but one trailing 1. */ 663 kmp_uint32 depth; 664 kmp_uint32 base_num_threads; 665 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 666 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 667 // 2=initialization in progress 668 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 669 670 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 671 the parent of a node at level i has. For example, if we have a machine 672 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 673 {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 674 kmp_uint32 *numPerLevel; 675 kmp_uint32 *skipPerLevel; 676 677 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 678 int hier_depth = adr2os[0].first.depth; 679 int level = 0; 680 for (int i = hier_depth - 1; i >= 0; --i) { 681 int max = -1; 682 for (int j = 0; j < num_addrs; ++j) { 683 int next = adr2os[j].first.childNums[i]; 684 if (next > max) 685 max = next; 686 } 687 numPerLevel[level] = max + 1; 688 ++level; 689 } 690 } 691 692 hierarchy_info() 693 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 694 695 void fini() { 696 if (!uninitialized && numPerLevel) { 697 __kmp_free(numPerLevel); 698 numPerLevel = NULL; 699 uninitialized = not_initialized; 700 } 701 } 702 703 void init(AddrUnsPair *adr2os, int num_addrs) { 704 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 705 &uninitialized, not_initialized, initializing); 706 if (bool_result == 0) { // Wait for initialization 707 while (TCR_1(uninitialized) != initialized) 708 KMP_CPU_PAUSE(); 709 return; 710 } 711 KMP_DEBUG_ASSERT(bool_result == 1); 712 713 /* Added explicit initialization of the data fields here to prevent usage of 714 dirty value observed when static library is re-initialized multiple times 715 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 716 OpenMP). */ 717 depth = 1; 718 resizing = 0; 719 maxLevels = 7; 720 numPerLevel = 721 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 722 skipPerLevel = &(numPerLevel[maxLevels]); 723 for (kmp_uint32 i = 0; i < maxLevels; 724 ++i) { // init numPerLevel[*] to 1 item per level 725 numPerLevel[i] = 1; 726 skipPerLevel[i] = 1; 727 } 728 729 // Sort table by physical ID 730 if (adr2os) { 731 qsort(adr2os, num_addrs, sizeof(*adr2os), 732 __kmp_affinity_cmp_Address_labels); 733 deriveLevels(adr2os, num_addrs); 734 } else { 735 numPerLevel[0] = maxLeaves; 736 numPerLevel[1] = num_addrs / maxLeaves; 737 if (num_addrs % maxLeaves) 738 numPerLevel[1]++; 739 } 740 741 base_num_threads = num_addrs; 742 for (int i = maxLevels - 1; i >= 0; 743 --i) // count non-empty levels to get depth 744 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 745 depth++; 746 747 kmp_uint32 branch = minBranch; 748 if (numPerLevel[0] == 1) 749 branch = num_addrs / maxLeaves; 750 if (branch < minBranch) 751 branch = minBranch; 752 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 753 while (numPerLevel[d] > branch || 754 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 755 if (numPerLevel[d] & 1) 756 numPerLevel[d]++; 757 numPerLevel[d] = numPerLevel[d] >> 1; 758 if (numPerLevel[d + 1] == 1) 759 depth++; 760 numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 761 } 762 if (numPerLevel[0] == 1) { 763 branch = branch >> 1; 764 if (branch < 4) 765 branch = minBranch; 766 } 767 } 768 769 for (kmp_uint32 i = 1; i < depth; ++i) 770 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 771 // Fill in hierarchy in the case of oversubscription 772 for (kmp_uint32 i = depth; i < maxLevels; ++i) 773 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 774 775 uninitialized = initialized; // One writer 776 } 777 778 // Resize the hierarchy if nproc changes to something larger than before 779 void resize(kmp_uint32 nproc) { 780 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 781 while (bool_result == 0) { // someone else is trying to resize 782 KMP_CPU_PAUSE(); 783 if (nproc <= base_num_threads) // happy with other thread's resize 784 return; 785 else // try to resize 786 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 787 } 788 KMP_DEBUG_ASSERT(bool_result != 0); 789 if (nproc <= base_num_threads) 790 return; // happy with other thread's resize 791 792 // Calculate new maxLevels 793 kmp_uint32 old_sz = skipPerLevel[depth - 1]; 794 kmp_uint32 incs = 0, old_maxLevels = maxLevels; 795 // First see if old maxLevels is enough to contain new size 796 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 797 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 798 numPerLevel[i - 1] *= 2; 799 old_sz *= 2; 800 depth++; 801 } 802 if (nproc > old_sz) { // Not enough space, need to expand hierarchy 803 while (nproc > old_sz) { 804 old_sz *= 2; 805 incs++; 806 depth++; 807 } 808 maxLevels += incs; 809 810 // Resize arrays 811 kmp_uint32 *old_numPerLevel = numPerLevel; 812 kmp_uint32 *old_skipPerLevel = skipPerLevel; 813 numPerLevel = skipPerLevel = NULL; 814 numPerLevel = 815 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 816 skipPerLevel = &(numPerLevel[maxLevels]); 817 818 // Copy old elements from old arrays 819 for (kmp_uint32 i = 0; i < old_maxLevels; 820 ++i) { // init numPerLevel[*] to 1 item per level 821 numPerLevel[i] = old_numPerLevel[i]; 822 skipPerLevel[i] = old_skipPerLevel[i]; 823 } 824 825 // Init new elements in arrays to 1 826 for (kmp_uint32 i = old_maxLevels; i < maxLevels; 827 ++i) { // init numPerLevel[*] to 1 item per level 828 numPerLevel[i] = 1; 829 skipPerLevel[i] = 1; 830 } 831 832 // Free old arrays 833 __kmp_free(old_numPerLevel); 834 } 835 836 // Fill in oversubscription levels of hierarchy 837 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 838 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 839 840 base_num_threads = nproc; 841 resizing = 0; // One writer 842 } 843 }; 844 #endif // KMP_AFFINITY_H 845