1 /* 2 * kmp_affinity.h -- header for affinity management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_AFFINITY_H 14 #define KMP_AFFINITY_H 15 16 #include "kmp.h" 17 #include "kmp_os.h" 18 #include <limits> 19 20 #if KMP_AFFINITY_SUPPORTED 21 #if KMP_USE_HWLOC 22 class KMPHwlocAffinity : public KMPAffinity { 23 public: 24 class Mask : public KMPAffinity::Mask { 25 hwloc_cpuset_t mask; 26 27 public: 28 Mask() { 29 mask = hwloc_bitmap_alloc(); 30 this->zero(); 31 } 32 ~Mask() { hwloc_bitmap_free(mask); } 33 void set(int i) override { hwloc_bitmap_set(mask, i); } 34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } 35 void clear(int i) override { hwloc_bitmap_clr(mask, i); } 36 void zero() override { hwloc_bitmap_zero(mask); } 37 void copy(const KMPAffinity::Mask *src) override { 38 const Mask *convert = static_cast<const Mask *>(src); 39 hwloc_bitmap_copy(mask, convert->mask); 40 } 41 void bitwise_and(const KMPAffinity::Mask *rhs) override { 42 const Mask *convert = static_cast<const Mask *>(rhs); 43 hwloc_bitmap_and(mask, mask, convert->mask); 44 } 45 void bitwise_or(const KMPAffinity::Mask *rhs) override { 46 const Mask *convert = static_cast<const Mask *>(rhs); 47 hwloc_bitmap_or(mask, mask, convert->mask); 48 } 49 void bitwise_not() override { hwloc_bitmap_not(mask, mask); } 50 int begin() const override { return hwloc_bitmap_first(mask); } 51 int end() const override { return -1; } 52 int next(int previous) const override { 53 return hwloc_bitmap_next(mask, previous); 54 } 55 int get_system_affinity(bool abort_on_error) override { 56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 57 "Illegal get affinity operation when not capable"); 58 long retval = 59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 60 if (retval >= 0) { 61 return 0; 62 } 63 int error = errno; 64 if (abort_on_error) { 65 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 66 } 67 return error; 68 } 69 int set_system_affinity(bool abort_on_error) const override { 70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 71 "Illegal set affinity operation when not capable"); 72 long retval = 73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 74 if (retval >= 0) { 75 return 0; 76 } 77 int error = errno; 78 if (abort_on_error) { 79 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 80 } 81 return error; 82 } 83 #if KMP_OS_WINDOWS 84 int set_process_affinity(bool abort_on_error) const override { 85 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 86 "Illegal set process affinity operation when not capable"); 87 int error = 0; 88 const hwloc_topology_support *support = 89 hwloc_topology_get_support(__kmp_hwloc_topology); 90 if (support->cpubind->set_proc_cpubind) { 91 int retval; 92 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, 93 HWLOC_CPUBIND_PROCESS); 94 if (retval >= 0) 95 return 0; 96 error = errno; 97 if (abort_on_error) 98 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 99 } 100 return error; 101 } 102 #endif 103 int get_proc_group() const override { 104 int group = -1; 105 #if KMP_OS_WINDOWS 106 if (__kmp_num_proc_groups == 1) { 107 return 1; 108 } 109 for (int i = 0; i < __kmp_num_proc_groups; i++) { 110 // On windows, the long type is always 32 bits 111 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); 112 unsigned long second_32_bits = 113 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); 114 if (first_32_bits == 0 && second_32_bits == 0) { 115 continue; 116 } 117 if (group >= 0) { 118 return -1; 119 } 120 group = i; 121 } 122 #endif /* KMP_OS_WINDOWS */ 123 return group; 124 } 125 }; 126 void determine_capable(const char *var) override { 127 const hwloc_topology_support *topology_support; 128 if (__kmp_hwloc_topology == NULL) { 129 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { 130 __kmp_hwloc_error = TRUE; 131 if (__kmp_affinity_verbose) 132 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); 133 } 134 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { 135 __kmp_hwloc_error = TRUE; 136 if (__kmp_affinity_verbose) 137 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); 138 } 139 } 140 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); 141 // Is the system capable of setting/getting this thread's affinity? 142 // Also, is topology discovery possible? (pu indicates ability to discover 143 // processing units). And finally, were there no errors when calling any 144 // hwloc_* API functions? 145 if (topology_support && topology_support->cpubind->set_thisthread_cpubind && 146 topology_support->cpubind->get_thisthread_cpubind && 147 topology_support->discovery->pu && !__kmp_hwloc_error) { 148 // enables affinity according to KMP_AFFINITY_CAPABLE() macro 149 KMP_AFFINITY_ENABLE(TRUE); 150 } else { 151 // indicate that hwloc didn't work and disable affinity 152 __kmp_hwloc_error = TRUE; 153 KMP_AFFINITY_DISABLE(); 154 } 155 } 156 void bind_thread(int which) override { 157 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 158 "Illegal set affinity operation when not capable"); 159 KMPAffinity::Mask *mask; 160 KMP_CPU_ALLOC_ON_STACK(mask); 161 KMP_CPU_ZERO(mask); 162 KMP_CPU_SET(which, mask); 163 __kmp_set_system_affinity(mask, TRUE); 164 KMP_CPU_FREE_FROM_STACK(mask); 165 } 166 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 167 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 168 KMPAffinity::Mask *allocate_mask_array(int num) override { 169 return new Mask[num]; 170 } 171 void deallocate_mask_array(KMPAffinity::Mask *array) override { 172 Mask *hwloc_array = static_cast<Mask *>(array); 173 delete[] hwloc_array; 174 } 175 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 176 int index) override { 177 Mask *hwloc_array = static_cast<Mask *>(array); 178 return &(hwloc_array[index]); 179 } 180 api_type get_api_type() const override { return HWLOC; } 181 }; 182 #endif /* KMP_USE_HWLOC */ 183 184 #if KMP_OS_LINUX || KMP_OS_FREEBSD 185 #if KMP_OS_LINUX 186 /* On some of the older OS's that we build on, these constants aren't present 187 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on 188 all systems of the same arch where they are defined, and they cannot change. 189 stone forever. */ 190 #include <sys/syscall.h> 191 #if KMP_ARCH_X86 || KMP_ARCH_ARM 192 #ifndef __NR_sched_setaffinity 193 #define __NR_sched_setaffinity 241 194 #elif __NR_sched_setaffinity != 241 195 #error Wrong code for setaffinity system call. 196 #endif /* __NR_sched_setaffinity */ 197 #ifndef __NR_sched_getaffinity 198 #define __NR_sched_getaffinity 242 199 #elif __NR_sched_getaffinity != 242 200 #error Wrong code for getaffinity system call. 201 #endif /* __NR_sched_getaffinity */ 202 #elif KMP_ARCH_AARCH64 203 #ifndef __NR_sched_setaffinity 204 #define __NR_sched_setaffinity 122 205 #elif __NR_sched_setaffinity != 122 206 #error Wrong code for setaffinity system call. 207 #endif /* __NR_sched_setaffinity */ 208 #ifndef __NR_sched_getaffinity 209 #define __NR_sched_getaffinity 123 210 #elif __NR_sched_getaffinity != 123 211 #error Wrong code for getaffinity system call. 212 #endif /* __NR_sched_getaffinity */ 213 #elif KMP_ARCH_X86_64 214 #ifndef __NR_sched_setaffinity 215 #define __NR_sched_setaffinity 203 216 #elif __NR_sched_setaffinity != 203 217 #error Wrong code for setaffinity system call. 218 #endif /* __NR_sched_setaffinity */ 219 #ifndef __NR_sched_getaffinity 220 #define __NR_sched_getaffinity 204 221 #elif __NR_sched_getaffinity != 204 222 #error Wrong code for getaffinity system call. 223 #endif /* __NR_sched_getaffinity */ 224 #elif KMP_ARCH_PPC64 225 #ifndef __NR_sched_setaffinity 226 #define __NR_sched_setaffinity 222 227 #elif __NR_sched_setaffinity != 222 228 #error Wrong code for setaffinity system call. 229 #endif /* __NR_sched_setaffinity */ 230 #ifndef __NR_sched_getaffinity 231 #define __NR_sched_getaffinity 223 232 #elif __NR_sched_getaffinity != 223 233 #error Wrong code for getaffinity system call. 234 #endif /* __NR_sched_getaffinity */ 235 #elif KMP_ARCH_MIPS 236 #ifndef __NR_sched_setaffinity 237 #define __NR_sched_setaffinity 4239 238 #elif __NR_sched_setaffinity != 4239 239 #error Wrong code for setaffinity system call. 240 #endif /* __NR_sched_setaffinity */ 241 #ifndef __NR_sched_getaffinity 242 #define __NR_sched_getaffinity 4240 243 #elif __NR_sched_getaffinity != 4240 244 #error Wrong code for getaffinity system call. 245 #endif /* __NR_sched_getaffinity */ 246 #elif KMP_ARCH_MIPS64 247 #ifndef __NR_sched_setaffinity 248 #define __NR_sched_setaffinity 5195 249 #elif __NR_sched_setaffinity != 5195 250 #error Wrong code for setaffinity system call. 251 #endif /* __NR_sched_setaffinity */ 252 #ifndef __NR_sched_getaffinity 253 #define __NR_sched_getaffinity 5196 254 #elif __NR_sched_getaffinity != 5196 255 #error Wrong code for getaffinity system call. 256 #endif /* __NR_sched_getaffinity */ 257 #error Unknown or unsupported architecture 258 #endif /* KMP_ARCH_* */ 259 #elif KMP_OS_FREEBSD 260 #include <pthread.h> 261 #include <pthread_np.h> 262 #endif 263 class KMPNativeAffinity : public KMPAffinity { 264 class Mask : public KMPAffinity::Mask { 265 typedef unsigned long mask_t; 266 typedef decltype(__kmp_affin_mask_size) mask_size_type; 267 static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 268 static const mask_t ONE = 1; 269 mask_size_type get_num_mask_types() const { 270 return __kmp_affin_mask_size / sizeof(mask_t); 271 } 272 273 public: 274 mask_t *mask; 275 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } 276 ~Mask() { 277 if (mask) 278 __kmp_free(mask); 279 } 280 void set(int i) override { 281 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); 282 } 283 bool is_set(int i) const override { 284 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); 285 } 286 void clear(int i) override { 287 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); 288 } 289 void zero() override { 290 mask_size_type e = get_num_mask_types(); 291 for (mask_size_type i = 0; i < e; ++i) 292 mask[i] = (mask_t)0; 293 } 294 void copy(const KMPAffinity::Mask *src) override { 295 const Mask *convert = static_cast<const Mask *>(src); 296 mask_size_type e = get_num_mask_types(); 297 for (mask_size_type i = 0; i < e; ++i) 298 mask[i] = convert->mask[i]; 299 } 300 void bitwise_and(const KMPAffinity::Mask *rhs) override { 301 const Mask *convert = static_cast<const Mask *>(rhs); 302 mask_size_type e = get_num_mask_types(); 303 for (mask_size_type i = 0; i < e; ++i) 304 mask[i] &= convert->mask[i]; 305 } 306 void bitwise_or(const KMPAffinity::Mask *rhs) override { 307 const Mask *convert = static_cast<const Mask *>(rhs); 308 mask_size_type e = get_num_mask_types(); 309 for (mask_size_type i = 0; i < e; ++i) 310 mask[i] |= convert->mask[i]; 311 } 312 void bitwise_not() override { 313 mask_size_type e = get_num_mask_types(); 314 for (mask_size_type i = 0; i < e; ++i) 315 mask[i] = ~(mask[i]); 316 } 317 int begin() const override { 318 int retval = 0; 319 while (retval < end() && !is_set(retval)) 320 ++retval; 321 return retval; 322 } 323 int end() const override { 324 int e; 325 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e); 326 return e; 327 } 328 int next(int previous) const override { 329 int retval = previous + 1; 330 while (retval < end() && !is_set(retval)) 331 ++retval; 332 return retval; 333 } 334 int get_system_affinity(bool abort_on_error) override { 335 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 336 "Illegal get affinity operation when not capable"); 337 #if KMP_OS_LINUX 338 long retval = 339 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); 340 #elif KMP_OS_FREEBSD 341 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, 342 reinterpret_cast<cpuset_t *>(mask)); 343 int retval = (r == 0 ? 0 : -1); 344 #endif 345 if (retval >= 0) { 346 return 0; 347 } 348 int error = errno; 349 if (abort_on_error) { 350 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 351 } 352 return error; 353 } 354 int set_system_affinity(bool abort_on_error) const override { 355 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), 356 "Illegal set affinity operation when not capable"); 357 #if KMP_OS_LINUX 358 long retval = 359 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); 360 #elif KMP_OS_FREEBSD 361 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, 362 reinterpret_cast<cpuset_t *>(mask)); 363 int retval = (r == 0 ? 0 : -1); 364 #endif 365 if (retval >= 0) { 366 return 0; 367 } 368 int error = errno; 369 if (abort_on_error) { 370 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null); 371 } 372 return error; 373 } 374 }; 375 void determine_capable(const char *env_var) override { 376 __kmp_affinity_determine_capable(env_var); 377 } 378 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 379 KMPAffinity::Mask *allocate_mask() override { 380 KMPNativeAffinity::Mask *retval = new Mask(); 381 return retval; 382 } 383 void deallocate_mask(KMPAffinity::Mask *m) override { 384 KMPNativeAffinity::Mask *native_mask = 385 static_cast<KMPNativeAffinity::Mask *>(m); 386 delete native_mask; 387 } 388 KMPAffinity::Mask *allocate_mask_array(int num) override { 389 return new Mask[num]; 390 } 391 void deallocate_mask_array(KMPAffinity::Mask *array) override { 392 Mask *linux_array = static_cast<Mask *>(array); 393 delete[] linux_array; 394 } 395 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 396 int index) override { 397 Mask *linux_array = static_cast<Mask *>(array); 398 return &(linux_array[index]); 399 } 400 api_type get_api_type() const override { return NATIVE_OS; } 401 }; 402 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ 403 404 #if KMP_OS_WINDOWS 405 class KMPNativeAffinity : public KMPAffinity { 406 class Mask : public KMPAffinity::Mask { 407 typedef ULONG_PTR mask_t; 408 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; 409 mask_t *mask; 410 411 public: 412 Mask() { 413 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); 414 } 415 ~Mask() { 416 if (mask) 417 __kmp_free(mask); 418 } 419 void set(int i) override { 420 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); 421 } 422 bool is_set(int i) const override { 423 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); 424 } 425 void clear(int i) override { 426 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); 427 } 428 void zero() override { 429 for (int i = 0; i < __kmp_num_proc_groups; ++i) 430 mask[i] = 0; 431 } 432 void copy(const KMPAffinity::Mask *src) override { 433 const Mask *convert = static_cast<const Mask *>(src); 434 for (int i = 0; i < __kmp_num_proc_groups; ++i) 435 mask[i] = convert->mask[i]; 436 } 437 void bitwise_and(const KMPAffinity::Mask *rhs) override { 438 const Mask *convert = static_cast<const Mask *>(rhs); 439 for (int i = 0; i < __kmp_num_proc_groups; ++i) 440 mask[i] &= convert->mask[i]; 441 } 442 void bitwise_or(const KMPAffinity::Mask *rhs) override { 443 const Mask *convert = static_cast<const Mask *>(rhs); 444 for (int i = 0; i < __kmp_num_proc_groups; ++i) 445 mask[i] |= convert->mask[i]; 446 } 447 void bitwise_not() override { 448 for (int i = 0; i < __kmp_num_proc_groups; ++i) 449 mask[i] = ~(mask[i]); 450 } 451 int begin() const override { 452 int retval = 0; 453 while (retval < end() && !is_set(retval)) 454 ++retval; 455 return retval; 456 } 457 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } 458 int next(int previous) const override { 459 int retval = previous + 1; 460 while (retval < end() && !is_set(retval)) 461 ++retval; 462 return retval; 463 } 464 int set_process_affinity(bool abort_on_error) const override { 465 if (__kmp_num_proc_groups <= 1) { 466 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { 467 DWORD error = GetLastError(); 468 if (abort_on_error) { 469 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 470 __kmp_msg_null); 471 } 472 return error; 473 } 474 } 475 return 0; 476 } 477 int set_system_affinity(bool abort_on_error) const override { 478 if (__kmp_num_proc_groups > 1) { 479 // Check for a valid mask. 480 GROUP_AFFINITY ga; 481 int group = get_proc_group(); 482 if (group < 0) { 483 if (abort_on_error) { 484 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); 485 } 486 return -1; 487 } 488 // Transform the bit vector into a GROUP_AFFINITY struct 489 // and make the system call to set affinity. 490 ga.Group = group; 491 ga.Mask = mask[group]; 492 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; 493 494 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); 495 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { 496 DWORD error = GetLastError(); 497 if (abort_on_error) { 498 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 499 __kmp_msg_null); 500 } 501 return error; 502 } 503 } else { 504 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { 505 DWORD error = GetLastError(); 506 if (abort_on_error) { 507 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), 508 __kmp_msg_null); 509 } 510 return error; 511 } 512 } 513 return 0; 514 } 515 int get_system_affinity(bool abort_on_error) override { 516 if (__kmp_num_proc_groups > 1) { 517 this->zero(); 518 GROUP_AFFINITY ga; 519 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); 520 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { 521 DWORD error = GetLastError(); 522 if (abort_on_error) { 523 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), 524 KMP_ERR(error), __kmp_msg_null); 525 } 526 return error; 527 } 528 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || 529 (ga.Mask == 0)) { 530 return -1; 531 } 532 mask[ga.Group] = ga.Mask; 533 } else { 534 mask_t newMask, sysMask, retval; 535 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { 536 DWORD error = GetLastError(); 537 if (abort_on_error) { 538 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"), 539 KMP_ERR(error), __kmp_msg_null); 540 } 541 return error; 542 } 543 retval = SetThreadAffinityMask(GetCurrentThread(), newMask); 544 if (!retval) { 545 DWORD error = GetLastError(); 546 if (abort_on_error) { 547 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 548 KMP_ERR(error), __kmp_msg_null); 549 } 550 return error; 551 } 552 newMask = SetThreadAffinityMask(GetCurrentThread(), retval); 553 if (!newMask) { 554 DWORD error = GetLastError(); 555 if (abort_on_error) { 556 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"), 557 KMP_ERR(error), __kmp_msg_null); 558 } 559 } 560 *mask = retval; 561 } 562 return 0; 563 } 564 int get_proc_group() const override { 565 int group = -1; 566 if (__kmp_num_proc_groups == 1) { 567 return 1; 568 } 569 for (int i = 0; i < __kmp_num_proc_groups; i++) { 570 if (mask[i] == 0) 571 continue; 572 if (group >= 0) 573 return -1; 574 group = i; 575 } 576 return group; 577 } 578 }; 579 void determine_capable(const char *env_var) override { 580 __kmp_affinity_determine_capable(env_var); 581 } 582 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } 583 KMPAffinity::Mask *allocate_mask() override { return new Mask(); } 584 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } 585 KMPAffinity::Mask *allocate_mask_array(int num) override { 586 return new Mask[num]; 587 } 588 void deallocate_mask_array(KMPAffinity::Mask *array) override { 589 Mask *windows_array = static_cast<Mask *>(array); 590 delete[] windows_array; 591 } 592 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, 593 int index) override { 594 Mask *windows_array = static_cast<Mask *>(array); 595 return &(windows_array[index]); 596 } 597 api_type get_api_type() const override { return NATIVE_OS; } 598 }; 599 #endif /* KMP_OS_WINDOWS */ 600 #endif /* KMP_AFFINITY_SUPPORTED */ 601 602 // Describe an attribute for a level in the machine topology 603 struct kmp_hw_attr_t { 604 int core_type : 8; 605 int core_eff : 8; 606 unsigned valid : 1; 607 unsigned reserved : 15; 608 609 static const int UNKNOWN_CORE_EFF = -1; 610 611 kmp_hw_attr_t() 612 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF), 613 valid(0), reserved(0) {} 614 void set_core_type(kmp_hw_core_type_t type) { 615 valid = 1; 616 core_type = type; 617 } 618 void set_core_eff(int eff) { 619 valid = 1; 620 core_eff = eff; 621 } 622 kmp_hw_core_type_t get_core_type() const { 623 return (kmp_hw_core_type_t)core_type; 624 } 625 int get_core_eff() const { return core_eff; } 626 bool is_core_type_valid() const { 627 return core_type != KMP_HW_CORE_TYPE_UNKNOWN; 628 } 629 bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; } 630 operator bool() const { return valid; } 631 void clear() { 632 core_type = KMP_HW_CORE_TYPE_UNKNOWN; 633 core_eff = UNKNOWN_CORE_EFF; 634 valid = 0; 635 } 636 bool contains(const kmp_hw_attr_t &other) const { 637 if (!valid && !other.valid) 638 return true; 639 if (valid && other.valid) { 640 if (other.is_core_type_valid()) { 641 if (!is_core_type_valid() || (get_core_type() != other.get_core_type())) 642 return false; 643 } 644 if (other.is_core_eff_valid()) { 645 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff())) 646 return false; 647 } 648 return true; 649 } 650 return false; 651 } 652 bool operator==(const kmp_hw_attr_t &rhs) const { 653 return (rhs.valid == valid && rhs.core_eff == core_eff && 654 rhs.core_type == core_type); 655 } 656 bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); } 657 }; 658 659 class kmp_hw_thread_t { 660 public: 661 static const int UNKNOWN_ID = -1; 662 static int compare_ids(const void *a, const void *b); 663 static int compare_compact(const void *a, const void *b); 664 int ids[KMP_HW_LAST]; 665 int sub_ids[KMP_HW_LAST]; 666 bool leader; 667 int os_id; 668 kmp_hw_attr_t attrs; 669 670 void print() const; 671 void clear() { 672 for (int i = 0; i < (int)KMP_HW_LAST; ++i) 673 ids[i] = UNKNOWN_ID; 674 leader = false; 675 attrs.clear(); 676 } 677 }; 678 679 class kmp_topology_t { 680 681 struct flags_t { 682 int uniform : 1; 683 int reserved : 31; 684 }; 685 686 int depth; 687 688 // The following arrays are all 'depth' long and have been 689 // allocated to hold up to KMP_HW_LAST number of objects if 690 // needed so layers can be added without reallocation of any array 691 692 // Orderd array of the types in the topology 693 kmp_hw_t *types; 694 695 // Keep quick topology ratios, for non-uniform topologies, 696 // this ratio holds the max number of itemAs per itemB 697 // e.g., [ 4 packages | 6 cores / package | 2 threads / core ] 698 int *ratio; 699 700 // Storage containing the absolute number of each topology layer 701 int *count; 702 703 // The number of core efficiencies. This is only useful for hybrid 704 // topologies. Core efficiencies will range from 0 to num efficiencies - 1 705 int num_core_efficiencies; 706 int num_core_types; 707 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES]; 708 709 // The hardware threads array 710 // hw_threads is num_hw_threads long 711 // Each hw_thread's ids and sub_ids are depth deep 712 int num_hw_threads; 713 kmp_hw_thread_t *hw_threads; 714 715 // Equivalence hash where the key is the hardware topology item 716 // and the value is the equivalent hardware topology type in the 717 // types[] array, if the value is KMP_HW_UNKNOWN, then there is no 718 // known equivalence for the topology type 719 kmp_hw_t equivalent[KMP_HW_LAST]; 720 721 // Flags describing the topology 722 flags_t flags; 723 724 // Insert a new topology layer after allocation 725 void _insert_layer(kmp_hw_t type, const int *ids); 726 727 #if KMP_GROUP_AFFINITY 728 // Insert topology information about Windows Processor groups 729 void _insert_windows_proc_groups(); 730 #endif 731 732 // Count each item & get the num x's per y 733 // e.g., get the number of cores and the number of threads per core 734 // for each (x, y) in (KMP_HW_* , KMP_HW_*) 735 void _gather_enumeration_information(); 736 737 // Remove layers that don't add information to the topology. 738 // This is done by having the layer take on the id = UNKNOWN_ID (-1) 739 void _remove_radix1_layers(); 740 741 // Find out if the topology is uniform 742 void _discover_uniformity(); 743 744 // Set all the sub_ids for each hardware thread 745 void _set_sub_ids(); 746 747 // Set global affinity variables describing the number of threads per 748 // core, the number of packages, the number of cores per package, and 749 // the number of cores. 750 void _set_globals(); 751 752 // Set the last level cache equivalent type 753 void _set_last_level_cache(); 754 755 // Return the number of cores with a particular attribute, 'attr'. 756 // If 'find_all' is true, then find all cores on the machine, otherwise find 757 // all cores per the layer 'above' 758 int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above, 759 bool find_all = false) const; 760 761 public: 762 // Force use of allocate()/deallocate() 763 kmp_topology_t() = delete; 764 kmp_topology_t(const kmp_topology_t &t) = delete; 765 kmp_topology_t(kmp_topology_t &&t) = delete; 766 kmp_topology_t &operator=(const kmp_topology_t &t) = delete; 767 kmp_topology_t &operator=(kmp_topology_t &&t) = delete; 768 769 static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types); 770 static void deallocate(kmp_topology_t *); 771 772 // Functions used in create_map() routines 773 kmp_hw_thread_t &at(int index) { 774 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 775 return hw_threads[index]; 776 } 777 const kmp_hw_thread_t &at(int index) const { 778 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); 779 return hw_threads[index]; 780 } 781 int get_num_hw_threads() const { return num_hw_threads; } 782 void sort_ids() { 783 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 784 kmp_hw_thread_t::compare_ids); 785 } 786 // Check if the hardware ids are unique, if they are 787 // return true, otherwise return false 788 bool check_ids() const; 789 790 // Function to call after the create_map() routine 791 void canonicalize(); 792 void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); 793 794 // Functions used after canonicalize() called 795 bool filter_hw_subset(); 796 bool is_close(int hwt1, int hwt2, int level) const; 797 bool is_uniform() const { return flags.uniform; } 798 // Tell whether a type is a valid type in the topology 799 // returns KMP_HW_UNKNOWN when there is no equivalent type 800 kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; } 801 // Set type1 = type2 802 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { 803 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); 804 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2); 805 kmp_hw_t real_type2 = equivalent[type2]; 806 if (real_type2 == KMP_HW_UNKNOWN) 807 real_type2 = type2; 808 equivalent[type1] = real_type2; 809 // This loop is required since any of the types may have been set to 810 // be equivalent to type1. They all must be checked and reset to type2. 811 KMP_FOREACH_HW_TYPE(type) { 812 if (equivalent[type] == type1) { 813 equivalent[type] = real_type2; 814 } 815 } 816 } 817 // Calculate number of types corresponding to level1 818 // per types corresponding to level2 (e.g., number of threads per core) 819 int calculate_ratio(int level1, int level2) const { 820 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth); 821 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth); 822 int r = 1; 823 for (int level = level1; level > level2; --level) 824 r *= ratio[level]; 825 return r; 826 } 827 int get_ratio(int level) const { 828 KMP_DEBUG_ASSERT(level >= 0 && level < depth); 829 return ratio[level]; 830 } 831 int get_depth() const { return depth; }; 832 kmp_hw_t get_type(int level) const { 833 KMP_DEBUG_ASSERT(level >= 0 && level < depth); 834 return types[level]; 835 } 836 int get_level(kmp_hw_t type) const { 837 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type); 838 int eq_type = equivalent[type]; 839 if (eq_type == KMP_HW_UNKNOWN) 840 return -1; 841 for (int i = 0; i < depth; ++i) 842 if (types[i] == eq_type) 843 return i; 844 return -1; 845 } 846 int get_count(int level) const { 847 KMP_DEBUG_ASSERT(level >= 0 && level < depth); 848 return count[level]; 849 } 850 // Return the total number of cores with attribute 'attr' 851 int get_ncores_with_attr(const kmp_hw_attr_t &attr) const { 852 return _get_ncores_with_attr(attr, -1, true); 853 } 854 // Return the number of cores with attribute 855 // 'attr' per topology level 'above' 856 int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const { 857 return _get_ncores_with_attr(attr, above, false); 858 } 859 860 #if KMP_AFFINITY_SUPPORTED 861 void sort_compact() { 862 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t), 863 kmp_hw_thread_t::compare_compact); 864 } 865 #endif 866 void print(const char *env_var = "KMP_AFFINITY") const; 867 void dump() const; 868 }; 869 extern kmp_topology_t *__kmp_topology; 870 871 class kmp_hw_subset_t { 872 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS; 873 874 public: 875 // Describe a machine topology item in KMP_HW_SUBSET 876 struct item_t { 877 kmp_hw_t type; 878 int num_attrs; 879 int num[MAX_ATTRS]; 880 int offset[MAX_ATTRS]; 881 kmp_hw_attr_t attr[MAX_ATTRS]; 882 }; 883 // Put parenthesis around max to avoid accidental use of Windows max macro. 884 const static int USE_ALL = (std::numeric_limits<int>::max)(); 885 886 private: 887 int depth; 888 int capacity; 889 item_t *items; 890 kmp_uint64 set; 891 bool absolute; 892 // The set must be able to handle up to KMP_HW_LAST number of layers 893 KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST); 894 // Sorting the KMP_HW_SUBSET items to follow topology order 895 // All unknown topology types will be at the beginning of the subset 896 static int hw_subset_compare(const void *i1, const void *i2) { 897 kmp_hw_t type1 = ((const item_t *)i1)->type; 898 kmp_hw_t type2 = ((const item_t *)i2)->type; 899 int level1 = __kmp_topology->get_level(type1); 900 int level2 = __kmp_topology->get_level(type2); 901 return level1 - level2; 902 } 903 904 public: 905 // Force use of allocate()/deallocate() 906 kmp_hw_subset_t() = delete; 907 kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete; 908 kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete; 909 kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete; 910 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete; 911 912 static kmp_hw_subset_t *allocate() { 913 int initial_capacity = 5; 914 kmp_hw_subset_t *retval = 915 (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t)); 916 retval->depth = 0; 917 retval->capacity = initial_capacity; 918 retval->set = 0ull; 919 retval->absolute = false; 920 retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity); 921 return retval; 922 } 923 static void deallocate(kmp_hw_subset_t *subset) { 924 __kmp_free(subset->items); 925 __kmp_free(subset); 926 } 927 void set_absolute() { absolute = true; } 928 bool is_absolute() const { return absolute; } 929 void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) { 930 for (int i = 0; i < depth; ++i) { 931 // Found an existing item for this layer type 932 // Add the num, offset, and attr to this item 933 if (items[i].type == type) { 934 int idx = items[i].num_attrs++; 935 if ((size_t)idx >= MAX_ATTRS) 936 return; 937 items[i].num[idx] = num; 938 items[i].offset[idx] = offset; 939 items[i].attr[idx] = attr; 940 return; 941 } 942 } 943 if (depth == capacity - 1) { 944 capacity *= 2; 945 item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity); 946 for (int i = 0; i < depth; ++i) 947 new_items[i] = items[i]; 948 __kmp_free(items); 949 items = new_items; 950 } 951 items[depth].num_attrs = 1; 952 items[depth].type = type; 953 items[depth].num[0] = num; 954 items[depth].offset[0] = offset; 955 items[depth].attr[0] = attr; 956 depth++; 957 set |= (1ull << type); 958 } 959 int get_depth() const { return depth; } 960 const item_t &at(int index) const { 961 KMP_DEBUG_ASSERT(index >= 0 && index < depth); 962 return items[index]; 963 } 964 item_t &at(int index) { 965 KMP_DEBUG_ASSERT(index >= 0 && index < depth); 966 return items[index]; 967 } 968 void remove(int index) { 969 KMP_DEBUG_ASSERT(index >= 0 && index < depth); 970 set &= ~(1ull << items[index].type); 971 for (int j = index + 1; j < depth; ++j) { 972 items[j - 1] = items[j]; 973 } 974 depth--; 975 } 976 void sort() { 977 KMP_DEBUG_ASSERT(__kmp_topology); 978 qsort(items, depth, sizeof(item_t), hw_subset_compare); 979 } 980 bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } 981 void dump() const { 982 printf("**********************\n"); 983 printf("*** kmp_hw_subset: ***\n"); 984 printf("* depth: %d\n", depth); 985 printf("* items:\n"); 986 for (int i = 0; i < depth; ++i) { 987 printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type)); 988 for (int j = 0; j < items[i].num_attrs; ++j) { 989 printf(" num: %d, offset: %d, attr: ", items[i].num[j], 990 items[i].offset[j]); 991 if (!items[i].attr[j]) { 992 printf(" (none)\n"); 993 } else { 994 printf( 995 " core_type = %s, core_eff = %d\n", 996 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()), 997 items[i].attr[j].get_core_eff()); 998 } 999 } 1000 } 1001 printf("* set: 0x%llx\n", set); 1002 printf("* absolute: %d\n", absolute); 1003 printf("**********************\n"); 1004 } 1005 }; 1006 extern kmp_hw_subset_t *__kmp_hw_subset; 1007 1008 /* A structure for holding machine-specific hierarchy info to be computed once 1009 at init. This structure represents a mapping of threads to the actual machine 1010 hierarchy, or to our best guess at what the hierarchy might be, for the 1011 purpose of performing an efficient barrier. In the worst case, when there is 1012 no machine hierarchy information, it produces a tree suitable for a barrier, 1013 similar to the tree used in the hyper barrier. */ 1014 class hierarchy_info { 1015 public: 1016 /* Good default values for number of leaves and branching factor, given no 1017 affinity information. Behaves a bit like hyper barrier. */ 1018 static const kmp_uint32 maxLeaves = 4; 1019 static const kmp_uint32 minBranch = 4; 1020 /** Number of levels in the hierarchy. Typical levels are threads/core, 1021 cores/package or socket, packages/node, nodes/machine, etc. We don't want 1022 to get specific with nomenclature. When the machine is oversubscribed we 1023 add levels to duplicate the hierarchy, doubling the thread capacity of the 1024 hierarchy each time we add a level. */ 1025 kmp_uint32 maxLevels; 1026 1027 /** This is specifically the depth of the machine configuration hierarchy, in 1028 terms of the number of levels along the longest path from root to any 1029 leaf. It corresponds to the number of entries in numPerLevel if we exclude 1030 all but one trailing 1. */ 1031 kmp_uint32 depth; 1032 kmp_uint32 base_num_threads; 1033 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; 1034 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 1035 // 2=initialization in progress 1036 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing 1037 1038 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children 1039 the parent of a node at level i has. For example, if we have a machine 1040 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = 1041 {2, 4, 4, 1, 1}. All empty levels are set to 1. */ 1042 kmp_uint32 *numPerLevel; 1043 kmp_uint32 *skipPerLevel; 1044 1045 void deriveLevels() { 1046 int hier_depth = __kmp_topology->get_depth(); 1047 for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) { 1048 numPerLevel[level] = __kmp_topology->get_ratio(i); 1049 } 1050 } 1051 1052 hierarchy_info() 1053 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} 1054 1055 void fini() { 1056 if (!uninitialized && numPerLevel) { 1057 __kmp_free(numPerLevel); 1058 numPerLevel = NULL; 1059 uninitialized = not_initialized; 1060 } 1061 } 1062 1063 void init(int num_addrs) { 1064 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( 1065 &uninitialized, not_initialized, initializing); 1066 if (bool_result == 0) { // Wait for initialization 1067 while (TCR_1(uninitialized) != initialized) 1068 KMP_CPU_PAUSE(); 1069 return; 1070 } 1071 KMP_DEBUG_ASSERT(bool_result == 1); 1072 1073 /* Added explicit initialization of the data fields here to prevent usage of 1074 dirty value observed when static library is re-initialized multiple times 1075 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses 1076 OpenMP). */ 1077 depth = 1; 1078 resizing = 0; 1079 maxLevels = 7; 1080 numPerLevel = 1081 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 1082 skipPerLevel = &(numPerLevel[maxLevels]); 1083 for (kmp_uint32 i = 0; i < maxLevels; 1084 ++i) { // init numPerLevel[*] to 1 item per level 1085 numPerLevel[i] = 1; 1086 skipPerLevel[i] = 1; 1087 } 1088 1089 // Sort table by physical ID 1090 if (__kmp_topology && __kmp_topology->get_depth() > 0) { 1091 deriveLevels(); 1092 } else { 1093 numPerLevel[0] = maxLeaves; 1094 numPerLevel[1] = num_addrs / maxLeaves; 1095 if (num_addrs % maxLeaves) 1096 numPerLevel[1]++; 1097 } 1098 1099 base_num_threads = num_addrs; 1100 for (int i = maxLevels - 1; i >= 0; 1101 --i) // count non-empty levels to get depth 1102 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' 1103 depth++; 1104 1105 kmp_uint32 branch = minBranch; 1106 if (numPerLevel[0] == 1) 1107 branch = num_addrs / maxLeaves; 1108 if (branch < minBranch) 1109 branch = minBranch; 1110 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width 1111 while (numPerLevel[d] > branch || 1112 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! 1113 if (numPerLevel[d] & 1) 1114 numPerLevel[d]++; 1115 numPerLevel[d] = numPerLevel[d] >> 1; 1116 if (numPerLevel[d + 1] == 1) 1117 depth++; 1118 numPerLevel[d + 1] = numPerLevel[d + 1] << 1; 1119 } 1120 if (numPerLevel[0] == 1) { 1121 branch = branch >> 1; 1122 if (branch < 4) 1123 branch = minBranch; 1124 } 1125 } 1126 1127 for (kmp_uint32 i = 1; i < depth; ++i) 1128 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; 1129 // Fill in hierarchy in the case of oversubscription 1130 for (kmp_uint32 i = depth; i < maxLevels; ++i) 1131 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 1132 1133 uninitialized = initialized; // One writer 1134 } 1135 1136 // Resize the hierarchy if nproc changes to something larger than before 1137 void resize(kmp_uint32 nproc) { 1138 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 1139 while (bool_result == 0) { // someone else is trying to resize 1140 KMP_CPU_PAUSE(); 1141 if (nproc <= base_num_threads) // happy with other thread's resize 1142 return; 1143 else // try to resize 1144 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); 1145 } 1146 KMP_DEBUG_ASSERT(bool_result != 0); 1147 if (nproc <= base_num_threads) 1148 return; // happy with other thread's resize 1149 1150 // Calculate new maxLevels 1151 kmp_uint32 old_sz = skipPerLevel[depth - 1]; 1152 kmp_uint32 incs = 0, old_maxLevels = maxLevels; 1153 // First see if old maxLevels is enough to contain new size 1154 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { 1155 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 1156 numPerLevel[i - 1] *= 2; 1157 old_sz *= 2; 1158 depth++; 1159 } 1160 if (nproc > old_sz) { // Not enough space, need to expand hierarchy 1161 while (nproc > old_sz) { 1162 old_sz *= 2; 1163 incs++; 1164 depth++; 1165 } 1166 maxLevels += incs; 1167 1168 // Resize arrays 1169 kmp_uint32 *old_numPerLevel = numPerLevel; 1170 kmp_uint32 *old_skipPerLevel = skipPerLevel; 1171 numPerLevel = skipPerLevel = NULL; 1172 numPerLevel = 1173 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); 1174 skipPerLevel = &(numPerLevel[maxLevels]); 1175 1176 // Copy old elements from old arrays 1177 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { 1178 // init numPerLevel[*] to 1 item per level 1179 numPerLevel[i] = old_numPerLevel[i]; 1180 skipPerLevel[i] = old_skipPerLevel[i]; 1181 } 1182 1183 // Init new elements in arrays to 1 1184 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { 1185 // init numPerLevel[*] to 1 item per level 1186 numPerLevel[i] = 1; 1187 skipPerLevel[i] = 1; 1188 } 1189 1190 // Free old arrays 1191 __kmp_free(old_numPerLevel); 1192 } 1193 1194 // Fill in oversubscription levels of hierarchy 1195 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) 1196 skipPerLevel[i] = 2 * skipPerLevel[i - 1]; 1197 1198 base_num_threads = nproc; 1199 resizing = 0; // One writer 1200 } 1201 }; 1202 #endif // KMP_AFFINITY_H 1203