1 /* 2 * kmp_affinity.h -- header for affinity management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_AFFINITY_H 14 #define KMP_AFFINITY_H 15 16 #include "kmp.h" 17 #include "kmp_os.h" 18 19 #if KMP_AFFINITY_SUPPORTED 20 #if KMP_USE_HWLOC 21 class KMPHwlocAffinity : public KMPAffinity { 22 public: 23 class Mask : public KMPAffinity::Mask { 24 hwloc_cpuset_t mask; 25 26 public: 27 Mask() { 28 mask = hwloc_bitmap_alloc(); 29 this->zero(); 30 } 31 ~Mask() { hwloc_bitmap_free(mask); } 32 void set(int i) override { hwloc_bitmap_set(mask, i); } 33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 34 void clear(int i) override { hwloc_bitmap_clr(mask, i); } 35 void zero() override { hwloc_bitmap_zero(mask); } 36 void copy(const KMPAffinity::Mask *src) override { 37 const Mask *convert = static_cast<const Mask *>(src); 38 hwloc_bitmap_copy(mask, convert->mask); 39 } 40 void bitwise_and(const KMPAffinity::Mask *rhs) override { 41 const Mask *convert = static_cast<const Mask *>(rhs); 42 hwloc_bitmap_and(mask, mask, convert->mask); 43 } 44 void bitwise_or(const KMPAffinity::Mask *rhs) override { 45 const Mask *convert = static_cast<const Mask *>(rhs); 46 hwloc_bitmap_or(mask, mask, convert->mask); 47 } 48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 49 int begin() const override { return hwloc_bitmap_first(mask); } 50 int end() const override { return -1; } 51 int next(int previous) const override { 52 return hwloc_bitmap_next(mask, previous); 53 } 54 int get_system_affinity(bool abort_on_error) override { 55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 56 "Illegal get affinity operation when not capable"); 57 long retval = 58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 59 if (retval >= 0) { 60 return 0; 61 } 62 int error = errno; 63 if (abort_on_error) { 64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 65 } 66 return error; 67 } 68 int set_system_affinity(bool abort_on_error) const override { 69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 70 "Illegal set affinity operation when not capable"); 71 long retval = 72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 73 if (retval >= 0) { 74 return 0; 75 } 76 int error = errno; 77 if (abort_on_error) { 78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 79 } 80 return error; 81 } 82 #if KMP_OS_WINDOWS 83 int set_process_affinity(bool abort_on_error) const override { 84 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 85 "Illegal set process affinity operation when not capable"); 86 int error = 0; 87 const hwloc_topology_support *support = 88 hwloc_topology_get_support(__kmp_hwloc_topology); 89 if (support->cpubind->set_proc_cpubind) { 90 int retval; 91 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 92 HWLOC_CPUBIND_PROCESS); 93 if (retval >= 0) 94 return 0; 95 error = errno; 96 if (abort_on_error) 97 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 98 } 99 return error; 100 } 101 #endif 102 int get_proc_group() const override { 103 int group = -1; 104 #if KMP_OS_WINDOWS 105 if (__kmp_num_proc_groups == 1) { 106 return 1; 107 } 108 for (int i = 0; i < __kmp_num_proc_groups; i++) { 109 // On windows, the long type is always 32 bits 110 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 111 unsigned long second_32_bits = 112 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 113 if (first_32_bits == 0 && second_32_bits == 0) { 114 continue; 115 } 116 if (group >= 0) { 117 return -1; 118 } 119 group = i; 120 } 121 #endif /* KMP_OS_WINDOWS */ 122 return group; 123 } 124 }; 125 void determine_capable(const char *var) override { 126 const hwloc_topology_support *topology_support; 127 if (__kmp_hwloc_topology == NULL) { 128 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 129 __kmp_hwloc_error = TRUE; 130 if (__kmp_affinity_verbose) 131 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 132 } 133 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 134 __kmp_hwloc_error = TRUE; 135 if (__kmp_affinity_verbose) 136 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 137 } 138 } 139 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 140 // Is the system capable of setting/getting this thread's affinity? 141 // Also, is topology discovery possible? (pu indicates ability to discover 142 // processing units). And finally, were there no errors when calling any 143 // hwloc_* API functions? 144 if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 145 topology_support->cpubind->get_thisthread_cpubind && 146 topology_support->discovery->pu && !__kmp_hwloc_error) { 147 // enables affinity according to KMP_AFFINITY_CAPABLE() macro 148 KMP_AFFINITY_ENABLE(TRUE); 149 } else { 150 // indicate that hwloc didn't work and disable affinity 151 __kmp_hwloc_error = TRUE; 152 KMP_AFFINITY_DISABLE(); 153 } 154 } 155 void bind_thread(int which) override { 156 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 157 "Illegal set affinity operation when not capable"); 158 KMPAffinity::Mask *mask; 159 KMP_CPU_ALLOC_ON_STACK(mask); 160 KMP_CPU_ZERO(mask); 161 KMP_CPU_SET(which, mask); 162 __kmp_set_system_affinity(mask, TRUE); 163 KMP_CPU_FREE_FROM_STACK(mask); 164 } 165 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 166 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 167 KMPAffinity::Mask *allocate_mask_array(int num) override { 168 return new Mask[num]; 169 } 170 void deallocate_mask_array(KMPAffinity::Mask *array) override { 171 Mask *hwloc_array = static_cast<Mask *>(array); 172 delete[] hwloc_array; 173 } 174 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 175 int index) override { 176 Mask *hwloc_array = static_cast<Mask *>(array); 177 return &(hwloc_array[index]); 178 } 179 api_type get_api_type() const override { return HWLOC; } 180 }; 181 #endif /* KMP_USE_HWLOC */ 182 183 #if KMP_OS_LINUX || KMP_OS_FREEBSD 184 #if KMP_OS_LINUX 185 /* On some of the older OS's that we build on, these constants aren't present 186 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 187 all systems of the same arch where they are defined, and they cannot change. 188 stone forever. */ 189 #include <sys/syscall.h> 190 #if KMP_ARCH_X86 || KMP_ARCH_ARM 191 #ifndef __NR_sched_setaffinity 192 #define __NR_sched_setaffinity 241 193 #elif __NR_sched_setaffinity != 241 194 #error Wrong code for setaffinity system call. 195 #endif /* __NR_sched_setaffinity */ 196 #ifndef __NR_sched_getaffinity 197 #define __NR_sched_getaffinity 242 198 #elif __NR_sched_getaffinity != 242 199 #error Wrong code for getaffinity system call. 200 #endif /* __NR_sched_getaffinity */ 201 #elif KMP_ARCH_AARCH64 202 #ifndef __NR_sched_setaffinity 203 #define __NR_sched_setaffinity 122 204 #elif __NR_sched_setaffinity != 122 205 #error Wrong code for setaffinity system call. 206 #endif /* __NR_sched_setaffinity */ 207 #ifndef __NR_sched_getaffinity 208 #define __NR_sched_getaffinity 123 209 #elif __NR_sched_getaffinity != 123 210 #error Wrong code for getaffinity system call. 211 #endif /* __NR_sched_getaffinity */ 212 #elif KMP_ARCH_X86_64 213 #ifndef __NR_sched_setaffinity 214 #define __NR_sched_setaffinity 203 215 #elif __NR_sched_setaffinity != 203 216 #error Wrong code for setaffinity system call. 217 #endif /* __NR_sched_setaffinity */ 218 #ifndef __NR_sched_getaffinity 219 #define __NR_sched_getaffinity 204 220 #elif __NR_sched_getaffinity != 204 221 #error Wrong code for getaffinity system call. 222 #endif /* __NR_sched_getaffinity */ 223 #elif KMP_ARCH_PPC64 224 #ifndef __NR_sched_setaffinity 225 #define __NR_sched_setaffinity 222 226 #elif __NR_sched_setaffinity != 222 227 #error Wrong code for setaffinity system call. 228 #endif /* __NR_sched_setaffinity */ 229 #ifndef __NR_sched_getaffinity 230 #define __NR_sched_getaffinity 223 231 #elif __NR_sched_getaffinity != 223 232 #error Wrong code for getaffinity system call. 233 #endif /* __NR_sched_getaffinity */ 234 #elif KMP_ARCH_MIPS 235 #ifndef __NR_sched_setaffinity 236 #define __NR_sched_setaffinity 4239 237 #elif __NR_sched_setaffinity != 4239 238 #error Wrong code for setaffinity system call. 239 #endif /* __NR_sched_setaffinity */ 240 #ifndef __NR_sched_getaffinity 241 #define __NR_sched_getaffinity 4240 242 #elif __NR_sched_getaffinity != 4240 243 #error Wrong code for getaffinity system call. 244 #endif /* __NR_sched_getaffinity */ 245 #elif KMP_ARCH_MIPS64 246 #ifndef __NR_sched_setaffinity 247 #define __NR_sched_setaffinity 5195 248 #elif __NR_sched_setaffinity != 5195 249 #error Wrong code for setaffinity system call. 250 #endif /* __NR_sched_setaffinity */ 251 #ifndef __NR_sched_getaffinity 252 #define __NR_sched_getaffinity 5196 253 #elif __NR_sched_getaffinity != 5196 254 #error Wrong code for getaffinity system call. 255 #endif /* __NR_sched_getaffinity */ 256 #error Unknown or unsupported architecture 257 #endif /* KMP_ARCH_* */ 258 #elif KMP_OS_FREEBSD 259 #include <pthread.h> 260 #include <pthread_np.h> 261 #endif 262 class KMPNativeAffinity : public KMPAffinity { 263 class Mask : public KMPAffinity::Mask { 264 typedef unsigned long mask_t; 265 typedef decltype(__kmp_affin_mask_size) mask_size_type; 266 static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 267 static const mask_t ONE = 1; 268 mask_size_type get_num_mask_types() const { 269 return __kmp_affin_mask_size / sizeof(mask_t); 270 } 271 272 public: 273 mask_t *mask; 274 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 275 ~Mask() { 276 if (mask) 277 __kmp_free(mask); 278 } 279 void set(int i) override { 280 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 281 } 282 bool is_set(int i) const override { 283 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 284 } 285 void clear(int i) override { 286 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 287 } 288 void zero() override { 289 mask_size_type e = get_num_mask_types(); 290 for (mask_size_type i = 0; i < e; ++i) 291 mask[i] = (mask_t)0; 292 } 293 void copy(const KMPAffinity::Mask *src) override { 294 const Mask *convert = static_cast<const Mask *>(src); 295 mask_size_type e = get_num_mask_types(); 296 for (mask_size_type i = 0; i < e; ++i) 297 mask[i] = convert->mask[i]; 298 } 299 void bitwise_and(const KMPAffinity::Mask *rhs) override { 300 const Mask *convert = static_cast<const Mask *>(rhs); 301 mask_size_type e = get_num_mask_types(); 302 for (mask_size_type i = 0; i < e; ++i) 303 mask[i] &= convert->mask[i]; 304 } 305 void bitwise_or(const KMPAffinity::Mask *rhs) override { 306 const Mask *convert = static_cast<const Mask *>(rhs); 307 mask_size_type e = get_num_mask_types(); 308 for (mask_size_type i = 0; i < e; ++i) 309 mask[i] |= convert->mask[i]; 310 } 311 void bitwise_not() override { 312 mask_size_type e = get_num_mask_types(); 313 for (mask_size_type i = 0; i < e; ++i) 314 mask[i] = ~(mask[i]); 315 } 316 int begin() const override { 317 int retval = 0; 318 while (retval < end() && !is_set(retval)) 319 ++retval; 320 return retval; 321 } 322 int end() const override { 323 int e; 324 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 325 return e; 326 } 327 int next(int previous) const override { 328 int retval = previous + 1; 329 while (retval < end() && !is_set(retval)) 330 ++retval; 331 return retval; 332 } 333 int get_system_affinity(bool abort_on_error) override { 334 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 335 "Illegal get affinity operation when not capable"); 336 #if KMP_OS_LINUX 337 long retval = 338 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 339 #elif KMP_OS_FREEBSD 340 int r = 341 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 342 int retval = (r == 0 ? 0 : -1); 343 #endif 344 if (retval >= 0) { 345 return 0; 346 } 347 int error = errno; 348 if (abort_on_error) { 349 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 350 } 351 return error; 352 } 353 int set_system_affinity(bool abort_on_error) const override { 354 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 355 "Illegal set affinity operation when not capable"); 356 #if KMP_OS_LINUX 357 long retval = 358 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 359 #elif KMP_OS_FREEBSD 360 int r = 361 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask)); 362 int retval = (r == 0 ? 0 : -1); 363 #endif 364 if (retval >= 0) { 365 return 0; 366 } 367 int error = errno; 368 if (abort_on_error) { 369 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 370 } 371 return error; 372 } 373 }; 374 void determine_capable(const char *env_var) override { 375 __kmp_affinity_determine_capable(env_var); 376 } 377 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 378 KMPAffinity::Mask *allocate_mask() override { 379 KMPNativeAffinity::Mask *retval = new Mask(); 380 return retval; 381 } 382 void deallocate_mask(KMPAffinity::Mask *m) override { 383 KMPNativeAffinity::Mask *native_mask = 384 static_cast<KMPNativeAffinity::Mask *>(m); 385 delete native_mask; 386 } 387 KMPAffinity::Mask *allocate_mask_array(int num) override { 388 return new Mask[num]; 389 } 390 void deallocate_mask_array(KMPAffinity::Mask *array) override { 391 Mask *linux_array = static_cast<Mask *>(array); 392 delete[] linux_array; 393 } 394 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 395 int index) override { 396 Mask *linux_array = static_cast<Mask *>(array); 397 return &(linux_array[index]); 398 } 399 api_type get_api_type() const override { return NATIVE_OS; } 400 }; 401 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 402 403 #if KMP_OS_WINDOWS 404 class KMPNativeAffinity : public KMPAffinity { 405 class Mask : public KMPAffinity::Mask { 406 typedef ULONG_PTR mask_t; 407 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 408 mask_t *mask; 409 410 public: 411 Mask() { 412 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 413 } 414 ~Mask() { 415 if (mask) 416 __kmp_free(mask); 417 } 418 void set(int i) override { 419 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 420 } 421 bool is_set(int i) const override { 422 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 423 } 424 void clear(int i) override { 425 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 426 } 427 void zero() override { 428 for (int i = 0; i < __kmp_num_proc_groups; ++i) 429 mask[i] = 0; 430 } 431 void copy(const KMPAffinity::Mask *src) override { 432 const Mask *convert = static_cast<const Mask *>(src); 433 for (int i = 0; i < __kmp_num_proc_groups; ++i) 434 mask[i] = convert->mask[i]; 435 } 436 void bitwise_and(const KMPAffinity::Mask *rhs) override { 437 const Mask *convert = static_cast<const Mask *>(rhs); 438 for (int i = 0; i < __kmp_num_proc_groups; ++i) 439 mask[i] &= convert->mask[i]; 440 } 441 void bitwise_or(const KMPAffinity::Mask *rhs) override { 442 const Mask *convert = static_cast<const Mask *>(rhs); 443 for (int i = 0; i < __kmp_num_proc_groups; ++i) 444 mask[i] |= convert->mask[i]; 445 } 446 void bitwise_not() override { 447 for (int i = 0; i < __kmp_num_proc_groups; ++i) 448 mask[i] = ~(mask[i]); 449 } 450 int begin() const override { 451 int retval = 0; 452 while (retval < end() && !is_set(retval)) 453 ++retval; 454 return retval; 455 } 456 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 457 int next(int previous) const override { 458 int retval = previous + 1; 459 while (retval < end() && !is_set(retval)) 460 ++retval; 461 return retval; 462 } 463 int set_process_affinity(bool abort_on_error) const override { 464 if (__kmp_num_proc_groups <= 1) { 465 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 466 DWORD error = GetLastError(); 467 if (abort_on_error) { 468 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 469 __kmp_msg_null); 470 } 471 return error; 472 } 473 } 474 return 0; 475 } 476 int set_system_affinity(bool abort_on_error) const override { 477 if (__kmp_num_proc_groups > 1) { 478 // Check for a valid mask. 479 GROUP_AFFINITY ga; 480 int group = get_proc_group(); 481 if (group < 0) { 482 if (abort_on_error) { 483 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 484 } 485 return -1; 486 } 487 // Transform the bit vector into a GROUP_AFFINITY struct 488 // and make the system call to set affinity. 489 ga.Group = group; 490 ga.Mask = mask[group]; 491 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 492 493 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 494 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 495 DWORD error = GetLastError(); 496 if (abort_on_error) { 497 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 498 __kmp_msg_null); 499 } 500 return error; 501 } 502 } else { 503 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 504 DWORD error = GetLastError(); 505 if (abort_on_error) { 506 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 507 __kmp_msg_null); 508 } 509 return error; 510 } 511 } 512 return 0; 513 } 514 int get_system_affinity(bool abort_on_error) override { 515 if (__kmp_num_proc_groups > 1) { 516 this->zero(); 517 GROUP_AFFINITY ga; 518 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 519 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 520 DWORD error = GetLastError(); 521 if (abort_on_error) { 522 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 523 KMP_ERR(error), __kmp_msg_null); 524 } 525 return error; 526 } 527 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 528 (ga.Mask == 0)) { 529 return -1; 530 } 531 mask[ga.Group] = ga.Mask; 532 } else { 533 mask_t newMask, sysMask, retval; 534 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 535 DWORD error = GetLastError(); 536 if (abort_on_error) { 537 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 538 KMP_ERR(error), __kmp_msg_null); 539 } 540 return error; 541 } 542 retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 543 if (!retval) { 544 DWORD error = GetLastError(); 545 if (abort_on_error) { 546 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 547 KMP_ERR(error), __kmp_msg_null); 548 } 549 return error; 550 } 551 newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 552 if (!newMask) { 553 DWORD error = GetLastError(); 554 if (abort_on_error) { 555 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 556 KMP_ERR(error), __kmp_msg_null); 557 } 558 } 559 *mask = retval; 560 } 561 return 0; 562 } 563 int get_proc_group() const override { 564 int group = -1; 565 if (__kmp_num_proc_groups == 1) { 566 return 1; 567 } 568 for (int i = 0; i < __kmp_num_proc_groups; i++) { 569 if (mask[i] == 0) 570 continue; 571 if (group >= 0) 572 return -1; 573 group = i; 574 } 575 return group; 576 } 577 }; 578 void determine_capable(const char *env_var) override { 579 __kmp_affinity_determine_capable(env_var); 580 } 581 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 582 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 583 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 584 KMPAffinity::Mask *allocate_mask_array(int num) override { 585 return new Mask[num]; 586 } 587 void deallocate_mask_array(KMPAffinity::Mask *array) override { 588 Mask *windows_array = static_cast<Mask *>(array); 589 delete[] windows_array; 590 } 591 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 592 int index) override { 593 Mask *windows_array = static_cast<Mask *>(array); 594 return &(windows_array[index]); 595 } 596 api_type get_api_type() const override { return NATIVE_OS; } 597 }; 598 #endif /* KMP_OS_WINDOWS */ 599 #endif /* KMP_AFFINITY_SUPPORTED */ 600 601 class Address { 602 public: 603 static const unsigned maxDepth = 32; 604 unsigned labels[maxDepth]; 605 unsigned childNums[maxDepth]; 606 unsigned depth; 607 unsigned leader; 608 Address(unsigned _depth) : depth(_depth), leader(FALSE) {} 609 Address &operator=(const Address &b) { 610 depth = b.depth; 611 for (unsigned i = 0; i < depth; i++) { 612 labels[i] = b.labels[i]; 613 childNums[i] = b.childNums[i]; 614 } 615 leader = FALSE; 616 return *this; 617 } 618 bool operator==(const Address &b) const { 619 if (depth != b.depth) 620 return false; 621 for (unsigned i = 0; i < depth; i++) 622 if (labels[i] != b.labels[i]) 623 return false; 624 return true; 625 } 626 bool isClose(const Address &b, int level) const { 627 if (depth != b.depth) 628 return false; 629 if ((unsigned)level >= depth) 630 return true; 631 for (unsigned i = 0; i < (depth - level); i++) 632 if (labels[i] != b.labels[i]) 633 return false; 634 return true; 635 } 636 bool operator!=(const Address &b) const { return !operator==(b); } 637 void print() const { 638 unsigned i; 639 printf("Depth: %u --- ", depth); 640 for (i = 0; i < depth; i++) { 641 printf("%u ", labels[i]); 642 } 643 } 644 }; 645 646 class AddrUnsPair { 647 public: 648 Address first; 649 unsigned second; 650 AddrUnsPair(Address _first, unsigned _second) 651 : first(_first), second(_second) {} 652 AddrUnsPair &operator=(const AddrUnsPair &b) { 653 first = b.first; 654 second = b.second; 655 return *this; 656 } 657 void print() const { 658 printf("first = "); 659 first.print(); 660 printf(" --- second = %u", second); 661 } 662 bool operator==(const AddrUnsPair &b) const { 663 if (first != b.first) 664 return false; 665 if (second != b.second) 666 return false; 667 return true; 668 } 669 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } 670 }; 671 672 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { 673 const Address *aa = &(((const AddrUnsPair *)a)->first); 674 const Address *bb = &(((const AddrUnsPair *)b)->first); 675 unsigned depth = aa->depth; 676 unsigned i; 677 KMP_DEBUG_ASSERT(depth == bb->depth); 678 for (i = 0; i < depth; i++) { 679 if (aa->labels[i] < bb->labels[i]) 680 return -1; 681 if (aa->labels[i] > bb->labels[i]) 682 return 1; 683 } 684 return 0; 685 } 686 687 /* A structure for holding machine-specific hierarchy info to be computed once 688 at init. This structure represents a mapping of threads to the actual machine 689 hierarchy, or to our best guess at what the hierarchy might be, for the 690 purpose of performing an efficient barrier. In the worst case, when there is 691 no machine hierarchy information, it produces a tree suitable for a barrier, 692 similar to the tree used in the hyper barrier. */ 693 class hierarchy_info { 694 public: 695 /* Good default values for number of leaves and branching factor, given no 696 affinity information. Behaves a bit like hyper barrier. */ 697 static const kmp_uint32 maxLeaves = 4; 698 static const kmp_uint32 minBranch = 4; 699 /** Number of levels in the hierarchy. Typical levels are threads/core, 700 cores/package or socket, packages/node, nodes/machine, etc. We don't want 701 to get specific with nomenclature. When the machine is oversubscribed we 702 add levels to duplicate the hierarchy, doubling the thread capacity of the 703 hierarchy each time we add a level. */ 704 kmp_uint32 maxLevels; 705 706 /** This is specifically the depth of the machine configuration hierarchy, in 707 terms of the number of levels along the longest path from root to any 708 leaf. It corresponds to the number of entries in numPerLevel if we exclude 709 all but one trailing 1. */ 710 kmp_uint32 depth; 711 kmp_uint32 base_num_threads; 712 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 713 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 714 // 2=initialization in progress 715 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 716 717 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 718 the parent of a node at level i has. For example, if we have a machine 719 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 720 {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 721 kmp_uint32 *numPerLevel; 722 kmp_uint32 *skipPerLevel; 723 724 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { 725 int hier_depth = adr2os[0].first.depth; 726 int level = 0; 727 for (int i = hier_depth - 1; i >= 0; --i) { 728 int max = -1; 729 for (int j = 0; j < num_addrs; ++j) { 730 int next = adr2os[j].first.childNums[i]; 731 if (next > max) 732 max = next; 733 } 734 numPerLevel[level] = max + 1; 735 ++level; 736 } 737 } 738 739 hierarchy_info() 740 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 741 742 void fini() { 743 if (!uninitialized && numPerLevel) { 744 __kmp_free(numPerLevel); 745 numPerLevel = NULL; 746 uninitialized = not_initialized; 747 } 748 } 749 750 void init(AddrUnsPair *adr2os, int num_addrs) { 751 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 752 &uninitialized, not_initialized, initializing); 753 if (bool_result == 0) { // Wait for initialization 754 while (TCR_1(uninitialized) != initialized) 755 KMP_CPU_PAUSE(); 756 return; 757 } 758 KMP_DEBUG_ASSERT(bool_result == 1); 759 760 /* Added explicit initialization of the data fields here to prevent usage of 761 dirty value observed when static library is re-initialized multiple times 762 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 763 OpenMP). */ 764 depth = 1; 765 resizing = 0; 766 maxLevels = 7; 767 numPerLevel = 768 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 769 skipPerLevel = &(numPerLevel[maxLevels]); 770 for (kmp_uint32 i = 0; i < maxLevels; 771 ++i) { // init numPerLevel[*] to 1 item per level 772 numPerLevel[i] = 1; 773 skipPerLevel[i] = 1; 774 } 775 776 // Sort table by physical ID 777 if (adr2os) { 778 qsort(adr2os, num_addrs, sizeof(*adr2os), 779 __kmp_affinity_cmp_Address_labels); 780 deriveLevels(adr2os, num_addrs); 781 } else { 782 numPerLevel[0] = maxLeaves; 783 numPerLevel[1] = num_addrs / maxLeaves; 784 if (num_addrs % maxLeaves) 785 numPerLevel[1]++; 786 } 787 788 base_num_threads = num_addrs; 789 for (int i = maxLevels - 1; i >= 0; 790 --i) // count non-empty levels to get depth 791 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 792 depth++; 793 794 kmp_uint32 branch = minBranch; 795 if (numPerLevel[0] == 1) 796 branch = num_addrs / maxLeaves; 797 if (branch < minBranch) 798 branch = minBranch; 799 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 800 while (numPerLevel[d] > branch || 801 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 802 if (numPerLevel[d] & 1) 803 numPerLevel[d]++; 804 numPerLevel[d] = numPerLevel[d] >> 1; 805 if (numPerLevel[d + 1] == 1) 806 depth++; 807 numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 808 } 809 if (numPerLevel[0] == 1) { 810 branch = branch >> 1; 811 if (branch < 4) 812 branch = minBranch; 813 } 814 } 815 816 for (kmp_uint32 i = 1; i < depth; ++i) 817 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 818 // Fill in hierarchy in the case of oversubscription 819 for (kmp_uint32 i = depth; i < maxLevels; ++i) 820 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 821 822 uninitialized = initialized; // One writer 823 } 824 825 // Resize the hierarchy if nproc changes to something larger than before 826 void resize(kmp_uint32 nproc) { 827 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 828 while (bool_result == 0) { // someone else is trying to resize 829 KMP_CPU_PAUSE(); 830 if (nproc <= base_num_threads) // happy with other thread's resize 831 return; 832 else // try to resize 833 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 834 } 835 KMP_DEBUG_ASSERT(bool_result != 0); 836 if (nproc <= base_num_threads) 837 return; // happy with other thread's resize 838 839 // Calculate new maxLevels 840 kmp_uint32 old_sz = skipPerLevel[depth - 1]; 841 kmp_uint32 incs = 0, old_maxLevels = maxLevels; 842 // First see if old maxLevels is enough to contain new size 843 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 844 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 845 numPerLevel[i - 1] *= 2; 846 old_sz *= 2; 847 depth++; 848 } 849 if (nproc > old_sz) { // Not enough space, need to expand hierarchy 850 while (nproc > old_sz) { 851 old_sz *= 2; 852 incs++; 853 depth++; 854 } 855 maxLevels += incs; 856 857 // Resize arrays 858 kmp_uint32 *old_numPerLevel = numPerLevel; 859 kmp_uint32 *old_skipPerLevel = skipPerLevel; 860 numPerLevel = skipPerLevel = NULL; 861 numPerLevel = 862 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 863 skipPerLevel = &(numPerLevel[maxLevels]); 864 865 // Copy old elements from old arrays 866 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 867 // init numPerLevel[*] to 1 item per level 868 numPerLevel[i] = old_numPerLevel[i]; 869 skipPerLevel[i] = old_skipPerLevel[i]; 870 } 871 872 // Init new elements in arrays to 1 873 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 874 // init numPerLevel[*] to 1 item per level 875 numPerLevel[i] = 1; 876 skipPerLevel[i] = 1; 877 } 878 879 // Free old arrays 880 __kmp_free(old_numPerLevel); 881 } 882 883 // Fill in oversubscription levels of hierarchy 884 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 885 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 886 887 base_num_threads = nproc; 888 resizing = 0; // One writer 889 } 890 }; 891 #endif // KMP_AFFINITY_H 892