1 /* 2 * kmp_affinity.h -- header for affinity management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_AFFINITY_H 14 #define KMP_AFFINITY_H 15 16 #include "kmp.h" 17 #include "kmp_os.h" 18 19 #if KMP_AFFINITY_SUPPORTED 20 #if KMP_USE_HWLOC 21 class KMPHwlocAffinity : public KMPAffinity { 22 public: 23 class Mask : public KMPAffinity::Mask { 24 hwloc_cpuset_t mask; 25 26 public: 27 Mask() { 28 mask = hwloc_bitmap_alloc(); 29 this->zero(); 30 } 31 ~Mask() { hwloc_bitmap_free(mask); } 32 void set(int i) override { hwloc_bitmap_set(mask, i); } 33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 34 void clear(int i) override { hwloc_bitmap_clr(mask, i); } 35 void zero() override { hwloc_bitmap_zero(mask); } 36 void copy(const KMPAffinity::Mask *src) override { 37 const Mask *convert = static_cast<const Mask *>(src); 38 hwloc_bitmap_copy(mask, convert->mask); 39 } 40 void bitwise_and(const KMPAffinity::Mask *rhs) override { 41 const Mask *convert = static_cast<const Mask *>(rhs); 42 hwloc_bitmap_and(mask, mask, convert->mask); 43 } 44 void bitwise_or(const KMPAffinity::Mask *rhs) override { 45 const Mask *convert = static_cast<const Mask *>(rhs); 46 hwloc_bitmap_or(mask, mask, convert->mask); 47 } 48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 49 int begin() const override { return hwloc_bitmap_first(mask); } 50 int end() const override { return -1; } 51 int next(int previous) const override { 52 return hwloc_bitmap_next(mask, previous); 53 } 54 int get_system_affinity(bool abort_on_error) override { 55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 56 "Illegal get affinity operation when not capable"); 57 int retval = 58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 59 if (retval >= 0) { 60 return 0; 61 } 62 int error = errno; 63 if (abort_on_error) { 64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 65 } 66 return error; 67 } 68 int set_system_affinity(bool abort_on_error) const override { 69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70 "Illegal get affinity operation when not capable"); 71 int retval = 72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 73 if (retval >= 0) { 74 return 0; 75 } 76 int error = errno; 77 if (abort_on_error) { 78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 79 } 80 return error; 81 } 82 int get_proc_group() const override { 83 int group = -1; 84 #if KMP_OS_WINDOWS 85 if (__kmp_num_proc_groups == 1) { 86 return 1; 87 } 88 for (int i = 0; i < __kmp_num_proc_groups; i++) { 89 // On windows, the long type is always 32 bits 90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 91 unsigned long second_32_bits = 92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 93 if (first_32_bits == 0 && second_32_bits == 0) { 94 continue; 95 } 96 if (group >= 0) { 97 return -1; 98 } 99 group = i; 100 } 101 #endif /* KMP_OS_WINDOWS */ 102 return group; 103 } 104 }; 105 void determine_capable(const char *var) override { 106 const hwloc_topology_support *topology_support; 107 if (__kmp_hwloc_topology == NULL) { 108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 109 __kmp_hwloc_error = TRUE; 110 if (__kmp_affinity_verbose) 111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 112 } 113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 114 __kmp_hwloc_error = TRUE; 115 if (__kmp_affinity_verbose) 116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 117 } 118 } 119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 120 // Is the system capable of setting/getting this thread's affinity? 121 // Also, is topology discovery possible? (pu indicates ability to discover 122 // processing units). And finally, were there no errors when calling any 123 // hwloc_* API functions? 124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 125 topology_support->cpubind->get_thisthread_cpubind && 126 topology_support->discovery->pu && !__kmp_hwloc_error) { 127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro 128 KMP_AFFINITY_ENABLE(TRUE); 129 } else { 130 // indicate that hwloc didn't work and disable affinity 131 __kmp_hwloc_error = TRUE; 132 KMP_AFFINITY_DISABLE(); 133 } 134 } 135 void bind_thread(int which) override { 136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 137 "Illegal set affinity operation when not capable"); 138 KMPAffinity::Mask *mask; 139 KMP_CPU_ALLOC_ON_STACK(mask); 140 KMP_CPU_ZERO(mask); 141 KMP_CPU_SET(which, mask); 142 __kmp_set_system_affinity(mask, TRUE); 143 KMP_CPU_FREE_FROM_STACK(mask); 144 } 145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 147 KMPAffinity::Mask *allocate_mask_array(int num) override { 148 return new Mask[num]; 149 } 150 void deallocate_mask_array(KMPAffinity::Mask *array) override { 151 Mask *hwloc_array = static_cast<Mask *>(array); 152 delete[] hwloc_array; 153 } 154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 155 int index) override { 156 Mask *hwloc_array = static_cast<Mask *>(array); 157 return &(hwloc_array[index]); 158 } 159 api_type get_api_type() const override { return HWLOC; } 160 }; 161 #endif /* KMP_USE_HWLOC */ 162 163 #if KMP_OS_LINUX || KMP_OS_FREEBSD 164 #if KMP_OS_LINUX 165 /* On some of the older OS's that we build on, these constants aren't present 166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 167 all systems of the same arch where they are defined, and they cannot change. 168 stone forever. */ 169 #include <sys/syscall.h> 170 #if KMP_ARCH_X86 || KMP_ARCH_ARM 171 #ifndef __NR_sched_setaffinity 172 #define __NR_sched_setaffinity 241 173 #elif __NR_sched_setaffinity != 241 174 #error Wrong code for setaffinity system call. 175 #endif /* __NR_sched_setaffinity */ 176 #ifndef __NR_sched_getaffinity 177 #define __NR_sched_getaffinity 242 178 #elif __NR_sched_getaffinity != 242 179 #error Wrong code for getaffinity system call. 180 #endif /* __NR_sched_getaffinity */ 181 #elif KMP_ARCH_AARCH64 182 #ifndef __NR_sched_setaffinity 183 #define __NR_sched_setaffinity 122 184 #elif __NR_sched_setaffinity != 122 185 #error Wrong code for setaffinity system call. 186 #endif /* __NR_sched_setaffinity */ 187 #ifndef __NR_sched_getaffinity 188 #define __NR_sched_getaffinity 123 189 #elif __NR_sched_getaffinity != 123 190 #error Wrong code for getaffinity system call. 191 #endif /* __NR_sched_getaffinity */ 192 #elif KMP_ARCH_X86_64 193 #ifndef __NR_sched_setaffinity 194 #define __NR_sched_setaffinity 203 195 #elif __NR_sched_setaffinity != 203 196 #error Wrong code for setaffinity system call. 197 #endif /* __NR_sched_setaffinity */ 198 #ifndef __NR_sched_getaffinity 199 #define __NR_sched_getaffinity 204 200 #elif __NR_sched_getaffinity != 204 201 #error Wrong code for getaffinity system call. 202 #endif /* __NR_sched_getaffinity */ 203 #elif KMP_ARCH_PPC64 204 #ifndef __NR_sched_setaffinity 205 #define __NR_sched_setaffinity 222 206 #elif __NR_sched_setaffinity != 222 207 #error Wrong code for setaffinity system call. 208 #endif /* __NR_sched_setaffinity */ 209 #ifndef __NR_sched_getaffinity 210 #define __NR_sched_getaffinity 223 211 #elif __NR_sched_getaffinity != 223 212 #error Wrong code for getaffinity system call. 213 #endif /* __NR_sched_getaffinity */ 214 #elif KMP_ARCH_MIPS 215 #ifndef __NR_sched_setaffinity 216 #define __NR_sched_setaffinity 4239 217 #elif __NR_sched_setaffinity != 4239 218 #error Wrong code for setaffinity system call. 219 #endif /* __NR_sched_setaffinity */ 220 #ifndef __NR_sched_getaffinity 221 #define __NR_sched_getaffinity 4240 222 #elif __NR_sched_getaffinity != 4240 223 #error Wrong code for getaffinity system call. 224 #endif /* __NR_sched_getaffinity */ 225 #elif KMP_ARCH_MIPS64 226 #ifndef __NR_sched_setaffinity 227 #define __NR_sched_setaffinity 5195 228 #elif __NR_sched_setaffinity != 5195 229 #error Wrong code for setaffinity system call. 230 #endif /* __NR_sched_setaffinity */ 231 #ifndef __NR_sched_getaffinity 232 #define __NR_sched_getaffinity 5196 233 #elif __NR_sched_getaffinity != 5196 234 #error Wrong code for getaffinity system call. 235 #endif /* __NR_sched_getaffinity */ 236 #error Unknown or unsupported architecture 237 #endif /* KMP_ARCH_* */ 238 #elif KMP_OS_FREEBSD 239 #include <pthread.h> 240 #include <pthread_np.h> 241 #endif 242 class KMPNativeAffinity : public KMPAffinity { 243 class Mask : public KMPAffinity::Mask { 244 typedef unsigned char mask_t; 245 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 246 247 public: 248 mask_t *mask; 249 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 250 ~Mask() { 251 if (mask) 252 __kmp_free(mask); 253 } 254 void set(int i) override { 255 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 256 } 257 bool is_set(int i) const override { 258 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 259 } 260 void clear(int i) override { 261 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 262 } 263 void zero() override { 264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 265 mask[i] = 0; 266 } 267 void copy(const KMPAffinity::Mask *src) override { 268 const Mask *convert = static_cast<const Mask *>(src); 269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 270 mask[i] = convert->mask[i]; 271 } 272 void bitwise_and(const KMPAffinity::Mask *rhs) override { 273 const Mask *convert = static_cast<const Mask *>(rhs); 274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 275 mask[i] &= convert->mask[i]; 276 } 277 void bitwise_or(const KMPAffinity::Mask *rhs) override { 278 const Mask *convert = static_cast<const Mask *>(rhs); 279 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 280 mask[i] |= convert->mask[i]; 281 } 282 void bitwise_not() override { 283 for (size_t i = 0; i < __kmp_affin_mask_size; ++i) 284 mask[i] = ~(mask[i]); 285 } 286 int begin() const override { 287 int retval = 0; 288 while (retval < end() && !is_set(retval)) 289 ++retval; 290 return retval; 291 } 292 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } 293 int next(int previous) const override { 294 int retval = previous + 1; 295 while (retval < end() && !is_set(retval)) 296 ++retval; 297 return retval; 298 } 299 int get_system_affinity(bool abort_on_error) override { 300 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 301 "Illegal get affinity operation when not capable"); 302 #if KMP_OS_LINUX 303 int retval = 304 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 305 #elif KMP_OS_FREEBSD 306 int retval = 307 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 308 #endif 309 if (retval >= 0) { 310 return 0; 311 } 312 int error = errno; 313 if (abort_on_error) { 314 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 315 } 316 return error; 317 } 318 int set_system_affinity(bool abort_on_error) const override { 319 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 320 "Illegal get affinity operation when not capable"); 321 #if KMP_OS_LINUX 322 int retval = 323 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 324 #elif KMP_OS_FREEBSD 325 int retval = 326 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 327 #endif 328 if (retval >= 0) { 329 return 0; 330 } 331 int error = errno; 332 if (abort_on_error) { 333 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 334 } 335 return error; 336 } 337 }; 338 void determine_capable(const char *env_var) override { 339 __kmp_affinity_determine_capable(env_var); 340 } 341 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 342 KMPAffinity::Mask *allocate_mask() override { 343 KMPNativeAffinity::Mask *retval = new Mask(); 344 return retval; 345 } 346 void deallocate_mask(KMPAffinity::Mask *m) override { 347 KMPNativeAffinity::Mask *native_mask = 348 static_cast<KMPNativeAffinity::Mask *>(m); 349 delete native_mask; 350 } 351 KMPAffinity::Mask *allocate_mask_array(int num) override { 352 return new Mask[num]; 353 } 354 void deallocate_mask_array(KMPAffinity::Mask *array) override { 355 Mask *linux_array = static_cast<Mask *>(array); 356 delete[] linux_array; 357 } 358 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 359 int index) override { 360 Mask *linux_array = static_cast<Mask *>(array); 361 return &(linux_array[index]); 362 } 363 api_type get_api_type() const override { return NATIVE_OS; } 364 }; 365 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 366 367 #if KMP_OS_WINDOWS 368 class KMPNativeAffinity : public KMPAffinity { 369 class Mask : public KMPAffinity::Mask { 370 typedef ULONG_PTR mask_t; 371 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 372 mask_t *mask; 373 374 public: 375 Mask() { 376 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 377 } 378 ~Mask() { 379 if (mask) 380 __kmp_free(mask); 381 } 382 void set(int i) override { 383 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 384 } 385 bool is_set(int i) const override { 386 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 387 } 388 void clear(int i) override { 389 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 390 } 391 void zero() override { 392 for (int i = 0; i < __kmp_num_proc_groups; ++i) 393 mask[i] = 0; 394 } 395 void copy(const KMPAffinity::Mask *src) override { 396 const Mask *convert = static_cast<const Mask *>(src); 397 for (int i = 0; i < __kmp_num_proc_groups; ++i) 398 mask[i] = convert->mask[i]; 399 } 400 void bitwise_and(const KMPAffinity::Mask *rhs) override { 401 const Mask *convert = static_cast<const Mask *>(rhs); 402 for (int i = 0; i < __kmp_num_proc_groups; ++i) 403 mask[i] &= convert->mask[i]; 404 } 405 void bitwise_or(const KMPAffinity::Mask *rhs) override { 406 const Mask *convert = static_cast<const Mask *>(rhs); 407 for (int i = 0; i < __kmp_num_proc_groups; ++i) 408 mask[i] |= convert->mask[i]; 409 } 410 void bitwise_not() override { 411 for (int i = 0; i < __kmp_num_proc_groups; ++i) 412 mask[i] = ~(mask[i]); 413 } 414 int begin() const override { 415 int retval = 0; 416 while (retval < end() && !is_set(retval)) 417 ++retval; 418 return retval; 419 } 420 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 421 int next(int previous) const override { 422 int retval = previous + 1; 423 while (retval < end() && !is_set(retval)) 424 ++retval; 425 return retval; 426 } 427 int set_system_affinity(bool abort_on_error) const override { 428 if (__kmp_num_proc_groups > 1) { 429 // Check for a valid mask. 430 GROUP_AFFINITY ga; 431 int group = get_proc_group(); 432 if (group < 0) { 433 if (abort_on_error) { 434 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 435 } 436 return -1; 437 } 438 // Transform the bit vector into a GROUP_AFFINITY struct 439 // and make the system call to set affinity. 440 ga.Group = group; 441 ga.Mask = mask[group]; 442 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 443 444 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 445 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 446 DWORD error = GetLastError(); 447 if (abort_on_error) { 448 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 449 __kmp_msg_null); 450 } 451 return error; 452 } 453 } else { 454 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 455 DWORD error = GetLastError(); 456 if (abort_on_error) { 457 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 458 __kmp_msg_null); 459 } 460 return error; 461 } 462 } 463 return 0; 464 } 465 int get_system_affinity(bool abort_on_error) override { 466 if (__kmp_num_proc_groups > 1) { 467 this->zero(); 468 GROUP_AFFINITY ga; 469 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 470 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 471 DWORD error = GetLastError(); 472 if (abort_on_error) { 473 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 474 KMP_ERR(error), __kmp_msg_null); 475 } 476 return error; 477 } 478 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 479 (ga.Mask == 0)) { 480 return -1; 481 } 482 mask[ga.Group] = ga.Mask; 483 } else { 484 mask_t newMask, sysMask, retval; 485 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 486 DWORD error = GetLastError(); 487 if (abort_on_error) { 488 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 489 KMP_ERR(error), __kmp_msg_null); 490 } 491 return error; 492 } 493 retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 494 if (!retval) { 495 DWORD error = GetLastError(); 496 if (abort_on_error) { 497 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 498 KMP_ERR(error), __kmp_msg_null); 499 } 500 return error; 501 } 502 newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 503 if (!newMask) { 504 DWORD error = GetLastError(); 505 if (abort_on_error) { 506 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 507 KMP_ERR(error), __kmp_msg_null); 508 } 509 } 510 *mask = retval; 511 } 512 return 0; 513 } 514 int get_proc_group() const override { 515 int group = -1; 516 if (__kmp_num_proc_groups == 1) { 517 return 1; 518 } 519 for (int i = 0; i < __kmp_num_proc_groups; i++) { 520 if (mask[i] == 0) 521 continue; 522 if (group >= 0) 523 return -1; 524 group = i; 525 } 526 return group; 527 } 528 }; 529 void determine_capable(const char *env_var) override { 530 __kmp_affinity_determine_capable(env_var); 531 } 532 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 533 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 534 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 535 KMPAffinity::Mask *allocate_mask_array(int num) override { 536 return new Mask[num]; 537 } 538 void deallocate_mask_array(KMPAffinity::Mask *array) override { 539 Mask *windows_array = static_cast<Mask *>(array); 540 delete[] windows_array; 541 } 542 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 543 int index) override { 544 Mask *windows_array = static_cast<Mask *>(array); 545 return &(windows_array[index]); 546 } 547 api_type get_api_type() const override { return NATIVE_OS; } 548 }; 549 #endif /* KMP_OS_WINDOWS */ 550 #endif /* KMP_AFFINITY_SUPPORTED */ 551 552 class Address { 553 public: 554 static const unsigned maxDepth = 32; 555 unsigned labels[maxDepth]; 556 unsigned childNums[maxDepth]; 557 unsigned depth; 558 unsigned leader; 559 Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 560 Address &operator=(const Address &b) { 561 depth = b.depth; 562 for (unsigned i = 0; i < depth; i++) { 563 labels[i] = b.labels[i]; 564 childNums[i] = b.childNums[i]; 565 } 566 leader = FALSE; 567 return *this; 568 } 569 bool operator==(const Address &b) const { 570 if (depth != b.depth) 571 return false; 572 for (unsigned i = 0; i < depth; i++) 573 if (labels[i] != b.labels[i]) 574 return false; 575 return true; 576 } 577 bool isClose(const Address &b, int level) const { 578 if (depth != b.depth) 579 return false; 580 if ((unsigned)level >= depth) 581 return true; 582 for (unsigned i = 0; i < (depth - level); i++) 583 if (labels[i] != b.labels[i]) 584 return false; 585 return true; 586 } 587 bool operator!=(const Address &b) const { return !operator==(b); } 588 void print() const { 589 unsigned i; 590 printf("Depth: %u --- ", depth); 591 for (i = 0; i < depth; i++) { 592 printf("%u ", labels[i]); 593 } 594 } 595 }; 596 597 class AddrUnsPair { 598 public: 599 Address first; 600 unsigned second; 601 AddrUnsPair(Address _first, unsigned _second) 602 : first(_first), second(_second) {} 603 AddrUnsPair &operator=(const AddrUnsPair &b) { 604 first = b.first; 605 second = b.second; 606 return *this; 607 } 608 void print() const { 609 printf("first = "); 610 first.print(); 611 printf(" --- second = %u", second); 612 } 613 bool operator==(const AddrUnsPair &b) const { 614 if (first != b.first) 615 return false; 616 if (second != b.second) 617 return false; 618 return true; 619 } 620 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 621 }; 622 623 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 624 const Address *aa = &(((const AddrUnsPair *)a)->first); 625 const Address *bb = &(((const AddrUnsPair *)b)->first); 626 unsigned depth = aa->depth; 627 unsigned i; 628 KMP_DEBUG_ASSERT(depth == bb->depth); 629 for (i = 0; i < depth; i++) { 630 if (aa->labels[i] < bb->labels[i]) 631 return -1; 632 if (aa->labels[i] > bb->labels[i]) 633 return 1; 634 } 635 return 0; 636 } 637 638 /* A structure for holding machine-specific hierarchy info to be computed once 639 at init. This structure represents a mapping of threads to the actual machine 640 hierarchy, or to our best guess at what the hierarchy might be, for the 641 purpose of performing an efficient barrier. In the worst case, when there is 642 no machine hierarchy information, it produces a tree suitable for a barrier, 643 similar to the tree used in the hyper barrier. */ 644 class hierarchy_info { 645 public: 646 /* Good default values for number of leaves and branching factor, given no 647 affinity information. Behaves a bit like hyper barrier. */ 648 static const kmp_uint32 maxLeaves = 4; 649 static const kmp_uint32 minBranch = 4; 650 /** Number of levels in the hierarchy. Typical levels are threads/core, 651 cores/package or socket, packages/node, nodes/machine, etc. We don't want 652 to get specific with nomenclature. When the machine is oversubscribed we 653 add levels to duplicate the hierarchy, doubling the thread capacity of the 654 hierarchy each time we add a level. */ 655 kmp_uint32 maxLevels; 656 657 /** This is specifically the depth of the machine configuration hierarchy, in 658 terms of the number of levels along the longest path from root to any 659 leaf. It corresponds to the number of entries in numPerLevel if we exclude 660 all but one trailing 1. */ 661 kmp_uint32 depth; 662 kmp_uint32 base_num_threads; 663 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 664 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 665 // 2=initialization in progress 666 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 667 668 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 669 the parent of a node at level i has. For example, if we have a machine 670 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 671 {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 672 kmp_uint32 *numPerLevel; 673 kmp_uint32 *skipPerLevel; 674 675 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 676 int hier_depth = adr2os[0].first.depth; 677 int level = 0; 678 for (int i = hier_depth - 1; i >= 0; --i) { 679 int max = -1; 680 for (int j = 0; j < num_addrs; ++j) { 681 int next = adr2os[j].first.childNums[i]; 682 if (next > max) 683 max = next; 684 } 685 numPerLevel[level] = max + 1; 686 ++level; 687 } 688 } 689 690 hierarchy_info() 691 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 692 693 void fini() { 694 if (!uninitialized && numPerLevel) { 695 __kmp_free(numPerLevel); 696 numPerLevel = NULL; 697 uninitialized = not_initialized; 698 } 699 } 700 701 void init(AddrUnsPair *adr2os, int num_addrs) { 702 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 703 &uninitialized, not_initialized, initializing); 704 if (bool_result == 0) { // Wait for initialization 705 while (TCR_1(uninitialized) != initialized) 706 KMP_CPU_PAUSE(); 707 return; 708 } 709 KMP_DEBUG_ASSERT(bool_result == 1); 710 711 /* Added explicit initialization of the data fields here to prevent usage of 712 dirty value observed when static library is re-initialized multiple times 713 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 714 OpenMP). */ 715 depth = 1; 716 resizing = 0; 717 maxLevels = 7; 718 numPerLevel = 719 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 720 skipPerLevel = &(numPerLevel[maxLevels]); 721 for (kmp_uint32 i = 0; i < maxLevels; 722 ++i) { // init numPerLevel[*] to 1 item per level 723 numPerLevel[i] = 1; 724 skipPerLevel[i] = 1; 725 } 726 727 // Sort table by physical ID 728 if (adr2os) { 729 qsort(adr2os, num_addrs, sizeof(*adr2os), 730 __kmp_affinity_cmp_Address_labels); 731 deriveLevels(adr2os, num_addrs); 732 } else { 733 numPerLevel[0] = maxLeaves; 734 numPerLevel[1] = num_addrs / maxLeaves; 735 if (num_addrs % maxLeaves) 736 numPerLevel[1]++; 737 } 738 739 base_num_threads = num_addrs; 740 for (int i = maxLevels - 1; i >= 0; 741 --i) // count non-empty levels to get depth 742 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 743 depth++; 744 745 kmp_uint32 branch = minBranch; 746 if (numPerLevel[0] == 1) 747 branch = num_addrs / maxLeaves; 748 if (branch < minBranch) 749 branch = minBranch; 750 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 751 while (numPerLevel[d] > branch || 752 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 753 if (numPerLevel[d] & 1) 754 numPerLevel[d]++; 755 numPerLevel[d] = numPerLevel[d] >> 1; 756 if (numPerLevel[d + 1] == 1) 757 depth++; 758 numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 759 } 760 if (numPerLevel[0] == 1) { 761 branch = branch >> 1; 762 if (branch < 4) 763 branch = minBranch; 764 } 765 } 766 767 for (kmp_uint32 i = 1; i < depth; ++i) 768 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 769 // Fill in hierarchy in the case of oversubscription 770 for (kmp_uint32 i = depth; i < maxLevels; ++i) 771 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 772 773 uninitialized = initialized; // One writer 774 } 775 776 // Resize the hierarchy if nproc changes to something larger than before 777 void resize(kmp_uint32 nproc) { 778 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 779 while (bool_result == 0) { // someone else is trying to resize 780 KMP_CPU_PAUSE(); 781 if (nproc <= base_num_threads) // happy with other thread's resize 782 return; 783 else // try to resize 784 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 785 } 786 KMP_DEBUG_ASSERT(bool_result != 0); 787 if (nproc <= base_num_threads) 788 return; // happy with other thread's resize 789 790 // Calculate new maxLevels 791 kmp_uint32 old_sz = skipPerLevel[depth - 1]; 792 kmp_uint32 incs = 0, old_maxLevels = maxLevels; 793 // First see if old maxLevels is enough to contain new size 794 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 795 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 796 numPerLevel[i - 1] *= 2; 797 old_sz *= 2; 798 depth++; 799 } 800 if (nproc > old_sz) { // Not enough space, need to expand hierarchy 801 while (nproc > old_sz) { 802 old_sz *= 2; 803 incs++; 804 depth++; 805 } 806 maxLevels += incs; 807 808 // Resize arrays 809 kmp_uint32 *old_numPerLevel = numPerLevel; 810 kmp_uint32 *old_skipPerLevel = skipPerLevel; 811 numPerLevel = skipPerLevel = NULL; 812 numPerLevel = 813 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 814 skipPerLevel = &(numPerLevel[maxLevels]); 815 816 // Copy old elements from old arrays 817 for (kmp_uint32 i = 0; i < old_maxLevels; 818 ++i) { // init numPerLevel[*] to 1 item per level 819 numPerLevel[i] = old_numPerLevel[i]; 820 skipPerLevel[i] = old_skipPerLevel[i]; 821 } 822 823 // Init new elements in arrays to 1 824 for (kmp_uint32 i = old_maxLevels; i < maxLevels; 825 ++i) { // init numPerLevel[*] to 1 item per level 826 numPerLevel[i] = 1; 827 skipPerLevel[i] = 1; 828 } 829 830 // Free old arrays 831 __kmp_free(old_numPerLevel); 832 } 833 834 // Fill in oversubscription levels of hierarchy 835 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 836 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 837 838 base_num_threads = nproc; 839 resizing = 0; // One writer 840 } 841 }; 842 #endif // KMP_AFFINITY_H 843