1 /* 2 * kmp_affinity.h -- header for affinity management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_AFFINITY_H 14 #define KMP_AFFINITY_H 15 16 #include "kmp.h" 17 #include "kmp_os.h" 18 19 #if KMP_AFFINITY_SUPPORTED 20 #if KMP_USE_HWLOC 21 class KMPHwlocAffinity : public KMPAffinity { 22 public: 23 class Mask : public KMPAffinity::Mask { 24 hwloc_cpuset_t mask; 25 26 public: 27 Mask() { 28 mask = hwloc_bitmap_alloc(); 29 this->zero(); 30 } 31 ~Mask() { hwloc_bitmap_free(mask); } 32 void set(int i) override { hwloc_bitmap_set(mask, i); } 33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 34 void clear(int i) override { hwloc_bitmap_clr(mask, i); } 35 void zero() override { hwloc_bitmap_zero(mask); } 36 void copy(const KMPAffinity::Mask *src) override { 37 const Mask *convert = static_cast<const Mask *>(src); 38 hwloc_bitmap_copy(mask, convert->mask); 39 } 40 void bitwise_and(const KMPAffinity::Mask *rhs) override { 41 const Mask *convert = static_cast<const Mask *>(rhs); 42 hwloc_bitmap_and(mask, mask, convert->mask); 43 } 44 void bitwise_or(const KMPAffinity::Mask *rhs) override { 45 const Mask *convert = static_cast<const Mask *>(rhs); 46 hwloc_bitmap_or(mask, mask, convert->mask); 47 } 48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 49 int begin() const override { return hwloc_bitmap_first(mask); } 50 int end() const override { return -1; } 51 int next(int previous) const override { 52 return hwloc_bitmap_next(mask, previous); 53 } 54 int get_system_affinity(bool abort_on_error) override { 55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 56 "Illegal get affinity operation when not capable"); 57 int retval = 58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 59 if (retval >= 0) { 60 return 0; 61 } 62 int error = errno; 63 if (abort_on_error) { 64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 65 } 66 return error; 67 } 68 int set_system_affinity(bool abort_on_error) const override { 69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70 "Illegal get affinity operation when not capable"); 71 int retval = 72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 73 if (retval >= 0) { 74 return 0; 75 } 76 int error = errno; 77 if (abort_on_error) { 78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 79 } 80 return error; 81 } 82 int get_proc_group() const override { 83 int group = -1; 84 #if KMP_OS_WINDOWS 85 if (__kmp_num_proc_groups == 1) { 86 return 1; 87 } 88 for (int i = 0; i < __kmp_num_proc_groups; i++) { 89 // On windows, the long type is always 32 bits 90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 91 unsigned long second_32_bits = 92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 93 if (first_32_bits == 0 && second_32_bits == 0) { 94 continue; 95 } 96 if (group >= 0) { 97 return -1; 98 } 99 group = i; 100 } 101 #endif /* KMP_OS_WINDOWS */ 102 return group; 103 } 104 }; 105 void determine_capable(const char *var) override { 106 const hwloc_topology_support *topology_support; 107 if (__kmp_hwloc_topology == NULL) { 108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 109 __kmp_hwloc_error = TRUE; 110 if (__kmp_affinity_verbose) 111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 112 } 113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 114 __kmp_hwloc_error = TRUE; 115 if (__kmp_affinity_verbose) 116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 117 } 118 } 119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 120 // Is the system capable of setting/getting this thread's affinity? 121 // Also, is topology discovery possible? (pu indicates ability to discover 122 // processing units). And finally, were there no errors when calling any 123 // hwloc_* API functions? 124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 125 topology_support->cpubind->get_thisthread_cpubind && 126 topology_support->discovery->pu && !__kmp_hwloc_error) { 127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro 128 KMP_AFFINITY_ENABLE(TRUE); 129 } else { 130 // indicate that hwloc didn't work and disable affinity 131 __kmp_hwloc_error = TRUE; 132 KMP_AFFINITY_DISABLE(); 133 } 134 } 135 void bind_thread(int which) override { 136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 137 "Illegal set affinity operation when not capable"); 138 KMPAffinity::Mask *mask; 139 KMP_CPU_ALLOC_ON_STACK(mask); 140 KMP_CPU_ZERO(mask); 141 KMP_CPU_SET(which, mask); 142 __kmp_set_system_affinity(mask, TRUE); 143 KMP_CPU_FREE_FROM_STACK(mask); 144 } 145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 147 KMPAffinity::Mask *allocate_mask_array(int num) override { 148 return new Mask[num]; 149 } 150 void deallocate_mask_array(KMPAffinity::Mask *array) override { 151 Mask *hwloc_array = static_cast<Mask *>(array); 152 delete[] hwloc_array; 153 } 154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 155 int index) override { 156 Mask *hwloc_array = static_cast<Mask *>(array); 157 return &(hwloc_array[index]); 158 } 159 api_type get_api_type() const override { return HWLOC; } 160 }; 161 #endif /* KMP_USE_HWLOC */ 162 163 #if KMP_OS_LINUX 164 /* On some of the older OS's that we build on, these constants aren't present 165 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 166 all systems of the same arch where they are defined, and they cannot change. 167 stone forever. */ 168 #include <sys/syscall.h> 169 #if KMP_ARCH_X86 || KMP_ARCH_ARM 170 #ifndef __NR_sched_setaffinity 171 #define __NR_sched_setaffinity 241 172 #elif __NR_sched_setaffinity != 241 173 #error Wrong code for setaffinity system call. 174 #endif /* __NR_sched_setaffinity */ 175 #ifndef __NR_sched_getaffinity 176 #define __NR_sched_getaffinity 242 177 #elif __NR_sched_getaffinity != 242 178 #error Wrong code for getaffinity system call. 179 #endif /* __NR_sched_getaffinity */ 180 #elif KMP_ARCH_AARCH64 181 #ifndef __NR_sched_setaffinity 182 #define __NR_sched_setaffinity 122 183 #elif __NR_sched_setaffinity != 122 184 #error Wrong code for setaffinity system call. 185 #endif /* __NR_sched_setaffinity */ 186 #ifndef __NR_sched_getaffinity 187 #define __NR_sched_getaffinity 123 188 #elif __NR_sched_getaffinity != 123 189 #error Wrong code for getaffinity system call. 190 #endif /* __NR_sched_getaffinity */ 191 #elif KMP_ARCH_X86_64 192 #ifndef __NR_sched_setaffinity 193 #define __NR_sched_setaffinity 203 194 #elif __NR_sched_setaffinity != 203 195 #error Wrong code for setaffinity system call. 196 #endif /* __NR_sched_setaffinity */ 197 #ifndef __NR_sched_getaffinity 198 #define __NR_sched_getaffinity 204 199 #elif __NR_sched_getaffinity != 204 200 #error Wrong code for getaffinity system call. 201 #endif /* __NR_sched_getaffinity */ 202 #elif KMP_ARCH_PPC64 203 #ifndef __NR_sched_setaffinity 204 #define __NR_sched_setaffinity 222 205 #elif __NR_sched_setaffinity != 222 206 #error Wrong code for setaffinity system call. 207 #endif /* __NR_sched_setaffinity */ 208 #ifndef __NR_sched_getaffinity 209 #define __NR_sched_getaffinity 223 210 #elif __NR_sched_getaffinity != 223 211 #error Wrong code for getaffinity system call. 212 #endif /* __NR_sched_getaffinity */ 213 #elif KMP_ARCH_MIPS 214 #ifndef __NR_sched_setaffinity 215 #define __NR_sched_setaffinity 4239 216 #elif __NR_sched_setaffinity != 4239 217 #error Wrong code for setaffinity system call. 218 #endif /* __NR_sched_setaffinity */ 219 #ifndef __NR_sched_getaffinity 220 #define __NR_sched_getaffinity 4240 221 #elif __NR_sched_getaffinity != 4240 222 #error Wrong code for getaffinity system call. 223 #endif /* __NR_sched_getaffinity */ 224 #elif KMP_ARCH_MIPS64 225 #ifndef __NR_sched_setaffinity 226 #define __NR_sched_setaffinity 5195 227 #elif __NR_sched_setaffinity != 5195 228 #error Wrong code for setaffinity system call. 229 #endif /* __NR_sched_setaffinity */ 230 #ifndef __NR_sched_getaffinity 231 #define __NR_sched_getaffinity 5196 232 #elif __NR_sched_getaffinity != 5196 233 #error Wrong code for getaffinity system call. 234 #endif /* __NR_sched_getaffinity */ 235 #error Unknown or unsupported architecture 236 #endif /* KMP_ARCH_* */ 237 class KMPNativeAffinity : public KMPAffinity { 238 class Mask : public KMPAffinity::Mask { 239 typedef unsigned char mask_t; 240 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 241 242 public: 243 mask_t *mask; 244 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 245 ~Mask() { 246 if (mask) 247 __kmp_free(mask); 248 } 249 void set(int i) override { 250 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 251 } 252 bool is_set(int i) const override { 253 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 254 } 255 void clear(int i) override { 256 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 257 } 258 void zero() override { 259 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 260 mask[i] = 0; 261 } 262 void copy(const KMPAffinity::Mask *src) override { 263 const Mask *convert = static_cast<const Mask *>(src); 264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 265 mask[i] = convert->mask[i]; 266 } 267 void bitwise_and(const KMPAffinity::Mask *rhs) override { 268 const Mask *convert = static_cast<const Mask *>(rhs); 269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 270 mask[i] &= convert->mask[i]; 271 } 272 void bitwise_or(const KMPAffinity::Mask *rhs) override { 273 const Mask *convert = static_cast<const Mask *>(rhs); 274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 275 mask[i] |= convert->mask[i]; 276 } 277 void bitwise_not() override { 278 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 279 mask[i] = ~(mask[i]); 280 } 281 int begin() const override { 282 int retval = 0; 283 while (retval < end() && !is_set(retval)) 284 ++retval; 285 return retval; 286 } 287 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } 288 int next(int previous) const override { 289 int retval = previous + 1; 290 while (retval < end() && !is_set(retval)) 291 ++retval; 292 return retval; 293 } 294 int get_system_affinity(bool abort_on_error) override { 295 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 296 "Illegal get affinity operation when not capable"); 297 int retval = 298 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 299 if (retval >= 0) { 300 return 0; 301 } 302 int error = errno; 303 if (abort_on_error) { 304 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 305 } 306 return error; 307 } 308 int set_system_affinity(bool abort_on_error) const override { 309 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 310 "Illegal get affinity operation when not capable"); 311 int retval = 312 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 313 if (retval >= 0) { 314 return 0; 315 } 316 int error = errno; 317 if (abort_on_error) { 318 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 319 } 320 return error; 321 } 322 }; 323 void determine_capable(const char *env_var) override { 324 __kmp_affinity_determine_capable(env_var); 325 } 326 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 327 KMPAffinity::Mask *allocate_mask() override { 328 KMPNativeAffinity::Mask *retval = new Mask(); 329 return retval; 330 } 331 void deallocate_mask(KMPAffinity::Mask *m) override { 332 KMPNativeAffinity::Mask *native_mask = 333 static_cast<KMPNativeAffinity::Mask *>(m); 334 delete native_mask; 335 } 336 KMPAffinity::Mask *allocate_mask_array(int num) override { 337 return new Mask[num]; 338 } 339 void deallocate_mask_array(KMPAffinity::Mask *array) override { 340 Mask *linux_array = static_cast<Mask *>(array); 341 delete[] linux_array; 342 } 343 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 344 int index) override { 345 Mask *linux_array = static_cast<Mask *>(array); 346 return &(linux_array[index]); 347 } 348 api_type get_api_type() const override { return NATIVE_OS; } 349 }; 350 #endif /* KMP_OS_LINUX */ 351 352 #if KMP_OS_WINDOWS 353 class KMPNativeAffinity : public KMPAffinity { 354 class Mask : public KMPAffinity::Mask { 355 typedef ULONG_PTR mask_t; 356 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 357 mask_t *mask; 358 359 public: 360 Mask() { 361 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 362 } 363 ~Mask() { 364 if (mask) 365 __kmp_free(mask); 366 } 367 void set(int i) override { 368 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 369 } 370 bool is_set(int i) const override { 371 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 372 } 373 void clear(int i) override { 374 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 375 } 376 void zero() override { 377 for (int i = 0; i < __kmp_num_proc_groups; ++i) 378 mask[i] = 0; 379 } 380 void copy(const KMPAffinity::Mask *src) override { 381 const Mask *convert = static_cast<const Mask *>(src); 382 for (int i = 0; i < __kmp_num_proc_groups; ++i) 383 mask[i] = convert->mask[i]; 384 } 385 void bitwise_and(const KMPAffinity::Mask *rhs) override { 386 const Mask *convert = static_cast<const Mask *>(rhs); 387 for (int i = 0; i < __kmp_num_proc_groups; ++i) 388 mask[i] &= convert->mask[i]; 389 } 390 void bitwise_or(const KMPAffinity::Mask *rhs) override { 391 const Mask *convert = static_cast<const Mask *>(rhs); 392 for (int i = 0; i < __kmp_num_proc_groups; ++i) 393 mask[i] |= convert->mask[i]; 394 } 395 void bitwise_not() override { 396 for (int i = 0; i < __kmp_num_proc_groups; ++i) 397 mask[i] = ~(mask[i]); 398 } 399 int begin() const override { 400 int retval = 0; 401 while (retval < end() && !is_set(retval)) 402 ++retval; 403 return retval; 404 } 405 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 406 int next(int previous) const override { 407 int retval = previous + 1; 408 while (retval < end() && !is_set(retval)) 409 ++retval; 410 return retval; 411 } 412 int set_system_affinity(bool abort_on_error) const override { 413 if (__kmp_num_proc_groups > 1) { 414 // Check for a valid mask. 415 GROUP_AFFINITY ga; 416 int group = get_proc_group(); 417 if (group < 0) { 418 if (abort_on_error) { 419 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 420 } 421 return -1; 422 } 423 // Transform the bit vector into a GROUP_AFFINITY struct 424 // and make the system call to set affinity. 425 ga.Group = group; 426 ga.Mask = mask[group]; 427 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 428 429 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 430 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 431 DWORD error = GetLastError(); 432 if (abort_on_error) { 433 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 434 __kmp_msg_null); 435 } 436 return error; 437 } 438 } else { 439 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 440 DWORD error = GetLastError(); 441 if (abort_on_error) { 442 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 443 __kmp_msg_null); 444 } 445 return error; 446 } 447 } 448 return 0; 449 } 450 int get_system_affinity(bool abort_on_error) override { 451 if (__kmp_num_proc_groups > 1) { 452 this->zero(); 453 GROUP_AFFINITY ga; 454 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 455 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 456 DWORD error = GetLastError(); 457 if (abort_on_error) { 458 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 459 KMP_ERR(error), __kmp_msg_null); 460 } 461 return error; 462 } 463 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 464 (ga.Mask == 0)) { 465 return -1; 466 } 467 mask[ga.Group] = ga.Mask; 468 } else { 469 mask_t newMask, sysMask, retval; 470 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 471 DWORD error = GetLastError(); 472 if (abort_on_error) { 473 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 474 KMP_ERR(error), __kmp_msg_null); 475 } 476 return error; 477 } 478 retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 479 if (!retval) { 480 DWORD error = GetLastError(); 481 if (abort_on_error) { 482 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 483 KMP_ERR(error), __kmp_msg_null); 484 } 485 return error; 486 } 487 newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 488 if (!newMask) { 489 DWORD error = GetLastError(); 490 if (abort_on_error) { 491 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 492 KMP_ERR(error), __kmp_msg_null); 493 } 494 } 495 *mask = retval; 496 } 497 return 0; 498 } 499 int get_proc_group() const override { 500 int group = -1; 501 if (__kmp_num_proc_groups == 1) { 502 return 1; 503 } 504 for (int i = 0; i < __kmp_num_proc_groups; i++) { 505 if (mask[i] == 0) 506 continue; 507 if (group >= 0) 508 return -1; 509 group = i; 510 } 511 return group; 512 } 513 }; 514 void determine_capable(const char *env_var) override { 515 __kmp_affinity_determine_capable(env_var); 516 } 517 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 518 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 519 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 520 KMPAffinity::Mask *allocate_mask_array(int num) override { 521 return new Mask[num]; 522 } 523 void deallocate_mask_array(KMPAffinity::Mask *array) override { 524 Mask *windows_array = static_cast<Mask *>(array); 525 delete[] windows_array; 526 } 527 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 528 int index) override { 529 Mask *windows_array = static_cast<Mask *>(array); 530 return &(windows_array[index]); 531 } 532 api_type get_api_type() const override { return NATIVE_OS; } 533 }; 534 #endif /* KMP_OS_WINDOWS */ 535 #endif /* KMP_AFFINITY_SUPPORTED */ 536 537 class Address { 538 public: 539 static const unsigned maxDepth = 32; 540 unsigned labels[maxDepth]; 541 unsigned childNums[maxDepth]; 542 unsigned depth; 543 unsigned leader; 544 Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 545 Address &operator=(const Address &b) { 546 depth = b.depth; 547 for (unsigned i = 0; i < depth; i++) { 548 labels[i] = b.labels[i]; 549 childNums[i] = b.childNums[i]; 550 } 551 leader = FALSE; 552 return *this; 553 } 554 bool operator==(const Address &b) const { 555 if (depth != b.depth) 556 return false; 557 for (unsigned i = 0; i < depth; i++) 558 if (labels[i] != b.labels[i]) 559 return false; 560 return true; 561 } 562 bool isClose(const Address &b, int level) const { 563 if (depth != b.depth) 564 return false; 565 if ((unsigned)level >= depth) 566 return true; 567 for (unsigned i = 0; i < (depth - level); i++) 568 if (labels[i] != b.labels[i]) 569 return false; 570 return true; 571 } 572 bool operator!=(const Address &b) const { return !operator==(b); } 573 void print() const { 574 unsigned i; 575 printf("Depth: %u --- ", depth); 576 for (i = 0; i < depth; i++) { 577 printf("%u ", labels[i]); 578 } 579 } 580 }; 581 582 class AddrUnsPair { 583 public: 584 Address first; 585 unsigned second; 586 AddrUnsPair(Address _first, unsigned _second) 587 : first(_first), second(_second) {} 588 AddrUnsPair &operator=(const AddrUnsPair &b) { 589 first = b.first; 590 second = b.second; 591 return *this; 592 } 593 void print() const { 594 printf("first = "); 595 first.print(); 596 printf(" --- second = %u", second); 597 } 598 bool operator==(const AddrUnsPair &b) const { 599 if (first != b.first) 600 return false; 601 if (second != b.second) 602 return false; 603 return true; 604 } 605 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 606 }; 607 608 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 609 const Address *aa = &(((const AddrUnsPair *)a)->first); 610 const Address *bb = &(((const AddrUnsPair *)b)->first); 611 unsigned depth = aa->depth; 612 unsigned i; 613 KMP_DEBUG_ASSERT(depth == bb->depth); 614 for (i = 0; i < depth; i++) { 615 if (aa->labels[i] < bb->labels[i]) 616 return -1; 617 if (aa->labels[i] > bb->labels[i]) 618 return 1; 619 } 620 return 0; 621 } 622 623 /* A structure for holding machine-specific hierarchy info to be computed once 624 at init. This structure represents a mapping of threads to the actual machine 625 hierarchy, or to our best guess at what the hierarchy might be, for the 626 purpose of performing an efficient barrier. In the worst case, when there is 627 no machine hierarchy information, it produces a tree suitable for a barrier, 628 similar to the tree used in the hyper barrier. */ 629 class hierarchy_info { 630 public: 631 /* Good default values for number of leaves and branching factor, given no 632 affinity information. Behaves a bit like hyper barrier. */ 633 static const kmp_uint32 maxLeaves = 4; 634 static const kmp_uint32 minBranch = 4; 635 /** Number of levels in the hierarchy. Typical levels are threads/core, 636 cores/package or socket, packages/node, nodes/machine, etc. We don't want 637 to get specific with nomenclature. When the machine is oversubscribed we 638 add levels to duplicate the hierarchy, doubling the thread capacity of the 639 hierarchy each time we add a level. */ 640 kmp_uint32 maxLevels; 641 642 /** This is specifically the depth of the machine configuration hierarchy, in 643 terms of the number of levels along the longest path from root to any 644 leaf. It corresponds to the number of entries in numPerLevel if we exclude 645 all but one trailing 1. */ 646 kmp_uint32 depth; 647 kmp_uint32 base_num_threads; 648 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 649 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 650 // 2=initialization in progress 651 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 652 653 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 654 the parent of a node at level i has. For example, if we have a machine 655 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 656 {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 657 kmp_uint32 *numPerLevel; 658 kmp_uint32 *skipPerLevel; 659 660 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 661 int hier_depth = adr2os[0].first.depth; 662 int level = 0; 663 for (int i = hier_depth - 1; i >= 0; --i) { 664 int max = -1; 665 for (int j = 0; j < num_addrs; ++j) { 666 int next = adr2os[j].first.childNums[i]; 667 if (next > max) 668 max = next; 669 } 670 numPerLevel[level] = max + 1; 671 ++level; 672 } 673 } 674 675 hierarchy_info() 676 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 677 678 void fini() { 679 if (!uninitialized && numPerLevel) { 680 __kmp_free(numPerLevel); 681 numPerLevel = NULL; 682 uninitialized = not_initialized; 683 } 684 } 685 686 void init(AddrUnsPair *adr2os, int num_addrs) { 687 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 688 &uninitialized, not_initialized, initializing); 689 if (bool_result == 0) { // Wait for initialization 690 while (TCR_1(uninitialized) != initialized) 691 KMP_CPU_PAUSE(); 692 return; 693 } 694 KMP_DEBUG_ASSERT(bool_result == 1); 695 696 /* Added explicit initialization of the data fields here to prevent usage of 697 dirty value observed when static library is re-initialized multiple times 698 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 699 OpenMP). */ 700 depth = 1; 701 resizing = 0; 702 maxLevels = 7; 703 numPerLevel = 704 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 705 skipPerLevel = &(numPerLevel[maxLevels]); 706 for (kmp_uint32 i = 0; i < maxLevels; 707 ++i) { // init numPerLevel[*] to 1 item per level 708 numPerLevel[i] = 1; 709 skipPerLevel[i] = 1; 710 } 711 712 // Sort table by physical ID 713 if (adr2os) { 714 qsort(adr2os, num_addrs, sizeof(*adr2os), 715 __kmp_affinity_cmp_Address_labels); 716 deriveLevels(adr2os, num_addrs); 717 } else { 718 numPerLevel[0] = maxLeaves; 719 numPerLevel[1] = num_addrs / maxLeaves; 720 if (num_addrs % maxLeaves) 721 numPerLevel[1]++; 722 } 723 724 base_num_threads = num_addrs; 725 for (int i = maxLevels - 1; i >= 0; 726 --i) // count non-empty levels to get depth 727 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 728 depth++; 729 730 kmp_uint32 branch = minBranch; 731 if (numPerLevel[0] == 1) 732 branch = num_addrs / maxLeaves; 733 if (branch < minBranch) 734 branch = minBranch; 735 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 736 while (numPerLevel[d] > branch || 737 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 738 if (numPerLevel[d] & 1) 739 numPerLevel[d]++; 740 numPerLevel[d] = numPerLevel[d] >> 1; 741 if (numPerLevel[d + 1] == 1) 742 depth++; 743 numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 744 } 745 if (numPerLevel[0] == 1) { 746 branch = branch >> 1; 747 if (branch < 4) 748 branch = minBranch; 749 } 750 } 751 752 for (kmp_uint32 i = 1; i < depth; ++i) 753 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 754 // Fill in hierarchy in the case of oversubscription 755 for (kmp_uint32 i = depth; i < maxLevels; ++i) 756 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 757 758 uninitialized = initialized; // One writer 759 } 760 761 // Resize the hierarchy if nproc changes to something larger than before 762 void resize(kmp_uint32 nproc) { 763 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 764 while (bool_result == 0) { // someone else is trying to resize 765 KMP_CPU_PAUSE(); 766 if (nproc <= base_num_threads) // happy with other thread's resize 767 return; 768 else // try to resize 769 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 770 } 771 KMP_DEBUG_ASSERT(bool_result != 0); 772 if (nproc <= base_num_threads) 773 return; // happy with other thread's resize 774 775 // Calculate new maxLevels 776 kmp_uint32 old_sz = skipPerLevel[depth - 1]; 777 kmp_uint32 incs = 0, old_maxLevels = maxLevels; 778 // First see if old maxLevels is enough to contain new size 779 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 780 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 781 numPerLevel[i - 1] *= 2; 782 old_sz *= 2; 783 depth++; 784 } 785 if (nproc > old_sz) { // Not enough space, need to expand hierarchy 786 while (nproc > old_sz) { 787 old_sz *= 2; 788 incs++; 789 depth++; 790 } 791 maxLevels += incs; 792 793 // Resize arrays 794 kmp_uint32 *old_numPerLevel = numPerLevel; 795 kmp_uint32 *old_skipPerLevel = skipPerLevel; 796 numPerLevel = skipPerLevel = NULL; 797 numPerLevel = 798 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 799 skipPerLevel = &(numPerLevel[maxLevels]); 800 801 // Copy old elements from old arrays 802 for (kmp_uint32 i = 0; i < old_maxLevels; 803 ++i) { // init numPerLevel[*] to 1 item per level 804 numPerLevel[i] = old_numPerLevel[i]; 805 skipPerLevel[i] = old_skipPerLevel[i]; 806 } 807 808 // Init new elements in arrays to 1 809 for (kmp_uint32 i = old_maxLevels; i < maxLevels; 810 ++i) { // init numPerLevel[*] to 1 item per level 811 numPerLevel[i] = 1; 812 skipPerLevel[i] = 1; 813 } 814 815 // Free old arrays 816 __kmp_free(old_numPerLevel); 817 } 818 819 // Fill in oversubscription levels of hierarchy 820 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 821 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 822 823 base_num_threads = nproc; 824 resizing = 0; // One writer 825 } 826 }; 827 #endif // KMP_AFFINITY_H 828