xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_dispatch_hier.h (revision fe6060f10f634930ff71b7c50291ddc610da2475)
10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_dispatch_hier.h -- hierarchical scheduling methods and data structures
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef KMP_DISPATCH_HIER_H
140b57cec5SDimitry Andric #define KMP_DISPATCH_HIER_H
150b57cec5SDimitry Andric #include "kmp.h"
160b57cec5SDimitry Andric #include "kmp_dispatch.h"
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric // Layer type for scheduling hierarchy
190b57cec5SDimitry Andric enum kmp_hier_layer_e {
200b57cec5SDimitry Andric   LAYER_THREAD = -1,
210b57cec5SDimitry Andric   LAYER_L1,
220b57cec5SDimitry Andric   LAYER_L2,
230b57cec5SDimitry Andric   LAYER_L3,
240b57cec5SDimitry Andric   LAYER_NUMA,
250b57cec5SDimitry Andric   LAYER_LOOP,
260b57cec5SDimitry Andric   LAYER_LAST
270b57cec5SDimitry Andric };
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric // Convert hierarchy type (LAYER_L1, LAYER_L2, etc.) to C-style string
__kmp_get_hier_str(kmp_hier_layer_e type)300b57cec5SDimitry Andric static inline const char *__kmp_get_hier_str(kmp_hier_layer_e type) {
310b57cec5SDimitry Andric   switch (type) {
320b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_THREAD:
330b57cec5SDimitry Andric     return "THREAD";
340b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_L1:
350b57cec5SDimitry Andric     return "L1";
360b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_L2:
370b57cec5SDimitry Andric     return "L2";
380b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_L3:
390b57cec5SDimitry Andric     return "L3";
400b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_NUMA:
410b57cec5SDimitry Andric     return "NUMA";
420b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_LOOP:
430b57cec5SDimitry Andric     return "WHOLE_LOOP";
440b57cec5SDimitry Andric   case kmp_hier_layer_e::LAYER_LAST:
450b57cec5SDimitry Andric     return "LAST";
460b57cec5SDimitry Andric   }
470b57cec5SDimitry Andric   KMP_ASSERT(0);
480b57cec5SDimitry Andric   // Appease compilers, should never get here
490b57cec5SDimitry Andric   return "ERROR";
500b57cec5SDimitry Andric }
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric // Structure to store values parsed from OMP_SCHEDULE for scheduling hierarchy
530b57cec5SDimitry Andric typedef struct kmp_hier_sched_env_t {
540b57cec5SDimitry Andric   int size;
550b57cec5SDimitry Andric   int capacity;
560b57cec5SDimitry Andric   enum sched_type *scheds;
570b57cec5SDimitry Andric   kmp_int32 *small_chunks;
580b57cec5SDimitry Andric   kmp_int64 *large_chunks;
590b57cec5SDimitry Andric   kmp_hier_layer_e *layers;
600b57cec5SDimitry Andric   // Append a level of the hierarchy
appendkmp_hier_sched_env_t610b57cec5SDimitry Andric   void append(enum sched_type sched, kmp_int32 chunk, kmp_hier_layer_e layer) {
620b57cec5SDimitry Andric     if (capacity == 0) {
630b57cec5SDimitry Andric       scheds = (enum sched_type *)__kmp_allocate(sizeof(enum sched_type) *
640b57cec5SDimitry Andric                                                  kmp_hier_layer_e::LAYER_LAST);
650b57cec5SDimitry Andric       small_chunks = (kmp_int32 *)__kmp_allocate(sizeof(kmp_int32) *
660b57cec5SDimitry Andric                                                  kmp_hier_layer_e::LAYER_LAST);
670b57cec5SDimitry Andric       large_chunks = (kmp_int64 *)__kmp_allocate(sizeof(kmp_int64) *
680b57cec5SDimitry Andric                                                  kmp_hier_layer_e::LAYER_LAST);
690b57cec5SDimitry Andric       layers = (kmp_hier_layer_e *)__kmp_allocate(sizeof(kmp_hier_layer_e) *
700b57cec5SDimitry Andric                                                   kmp_hier_layer_e::LAYER_LAST);
710b57cec5SDimitry Andric       capacity = kmp_hier_layer_e::LAYER_LAST;
720b57cec5SDimitry Andric     }
730b57cec5SDimitry Andric     int current_size = size;
740b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(current_size < kmp_hier_layer_e::LAYER_LAST);
750b57cec5SDimitry Andric     scheds[current_size] = sched;
760b57cec5SDimitry Andric     layers[current_size] = layer;
770b57cec5SDimitry Andric     small_chunks[current_size] = chunk;
780b57cec5SDimitry Andric     large_chunks[current_size] = (kmp_int64)chunk;
790b57cec5SDimitry Andric     size++;
800b57cec5SDimitry Andric   }
810b57cec5SDimitry Andric   // Sort the hierarchy using selection sort, size will always be small
820b57cec5SDimitry Andric   // (less than LAYER_LAST) so it is not necessary to use an nlog(n) algorithm
sortkmp_hier_sched_env_t830b57cec5SDimitry Andric   void sort() {
840b57cec5SDimitry Andric     if (size <= 1)
850b57cec5SDimitry Andric       return;
860b57cec5SDimitry Andric     for (int i = 0; i < size; ++i) {
870b57cec5SDimitry Andric       int switch_index = i;
880b57cec5SDimitry Andric       for (int j = i + 1; j < size; ++j) {
890b57cec5SDimitry Andric         if (layers[j] < layers[switch_index])
900b57cec5SDimitry Andric           switch_index = j;
910b57cec5SDimitry Andric       }
920b57cec5SDimitry Andric       if (switch_index != i) {
930b57cec5SDimitry Andric         kmp_hier_layer_e temp1 = layers[i];
940b57cec5SDimitry Andric         enum sched_type temp2 = scheds[i];
950b57cec5SDimitry Andric         kmp_int32 temp3 = small_chunks[i];
960b57cec5SDimitry Andric         kmp_int64 temp4 = large_chunks[i];
970b57cec5SDimitry Andric         layers[i] = layers[switch_index];
980b57cec5SDimitry Andric         scheds[i] = scheds[switch_index];
990b57cec5SDimitry Andric         small_chunks[i] = small_chunks[switch_index];
1000b57cec5SDimitry Andric         large_chunks[i] = large_chunks[switch_index];
1010b57cec5SDimitry Andric         layers[switch_index] = temp1;
1020b57cec5SDimitry Andric         scheds[switch_index] = temp2;
1030b57cec5SDimitry Andric         small_chunks[switch_index] = temp3;
1040b57cec5SDimitry Andric         large_chunks[switch_index] = temp4;
1050b57cec5SDimitry Andric       }
1060b57cec5SDimitry Andric     }
1070b57cec5SDimitry Andric   }
1080b57cec5SDimitry Andric   // Free all memory
deallocatekmp_hier_sched_env_t1090b57cec5SDimitry Andric   void deallocate() {
1100b57cec5SDimitry Andric     if (capacity > 0) {
1110b57cec5SDimitry Andric       __kmp_free(scheds);
1120b57cec5SDimitry Andric       __kmp_free(layers);
1130b57cec5SDimitry Andric       __kmp_free(small_chunks);
1140b57cec5SDimitry Andric       __kmp_free(large_chunks);
1150b57cec5SDimitry Andric       scheds = NULL;
1160b57cec5SDimitry Andric       layers = NULL;
1170b57cec5SDimitry Andric       small_chunks = NULL;
1180b57cec5SDimitry Andric       large_chunks = NULL;
1190b57cec5SDimitry Andric     }
1200b57cec5SDimitry Andric     size = 0;
1210b57cec5SDimitry Andric     capacity = 0;
1220b57cec5SDimitry Andric   }
1230b57cec5SDimitry Andric } kmp_hier_sched_env_t;
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric extern int __kmp_dispatch_hand_threading;
1260b57cec5SDimitry Andric extern kmp_hier_sched_env_t __kmp_hier_scheds;
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric // Sizes of layer arrays bounded by max number of detected L1s, L2s, etc.
1290b57cec5SDimitry Andric extern int __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LAST + 1];
1300b57cec5SDimitry Andric extern int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1];
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric extern int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type);
1330b57cec5SDimitry Andric extern int __kmp_dispatch_get_id(int gtid, kmp_hier_layer_e type);
1340b57cec5SDimitry Andric extern int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1,
1350b57cec5SDimitry Andric                                         kmp_hier_layer_e t2);
1360b57cec5SDimitry Andric extern void __kmp_dispatch_free_hierarchies(kmp_team_t *team);
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric template <typename T> struct kmp_hier_shared_bdata_t {
1390b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
1400b57cec5SDimitry Andric   volatile kmp_uint64 val[2];
1410b57cec5SDimitry Andric   kmp_int32 status[2];
1420b57cec5SDimitry Andric   T lb[2];
1430b57cec5SDimitry Andric   T ub[2];
1440b57cec5SDimitry Andric   ST st[2];
1450b57cec5SDimitry Andric   dispatch_shared_info_template<T> sh[2];
zerokmp_hier_shared_bdata_t1460b57cec5SDimitry Andric   void zero() {
1470b57cec5SDimitry Andric     val[0] = val[1] = 0;
1480b57cec5SDimitry Andric     status[0] = status[1] = 0;
1490b57cec5SDimitry Andric     lb[0] = lb[1] = 0;
1500b57cec5SDimitry Andric     ub[0] = ub[1] = 0;
1510b57cec5SDimitry Andric     st[0] = st[1] = 0;
1520b57cec5SDimitry Andric     sh[0].u.s.iteration = sh[1].u.s.iteration = 0;
1530b57cec5SDimitry Andric   }
set_next_hand_threadkmp_hier_shared_bdata_t1540b57cec5SDimitry Andric   void set_next_hand_thread(T nlb, T nub, ST nst, kmp_int32 nstatus,
1550b57cec5SDimitry Andric                             kmp_uint64 index) {
1560b57cec5SDimitry Andric     lb[1 - index] = nlb;
1570b57cec5SDimitry Andric     ub[1 - index] = nub;
1580b57cec5SDimitry Andric     st[1 - index] = nst;
1590b57cec5SDimitry Andric     status[1 - index] = nstatus;
1600b57cec5SDimitry Andric   }
set_nextkmp_hier_shared_bdata_t1610b57cec5SDimitry Andric   void set_next(T nlb, T nub, ST nst, kmp_int32 nstatus, kmp_uint64 index) {
1620b57cec5SDimitry Andric     lb[1 - index] = nlb;
1630b57cec5SDimitry Andric     ub[1 - index] = nub;
1640b57cec5SDimitry Andric     st[1 - index] = nst;
1650b57cec5SDimitry Andric     status[1 - index] = nstatus;
1660b57cec5SDimitry Andric     sh[1 - index].u.s.iteration = 0;
1670b57cec5SDimitry Andric   }
1680b57cec5SDimitry Andric 
get_next_statuskmp_hier_shared_bdata_t1690b57cec5SDimitry Andric   kmp_int32 get_next_status(kmp_uint64 index) const {
1700b57cec5SDimitry Andric     return status[1 - index];
1710b57cec5SDimitry Andric   }
get_next_lbkmp_hier_shared_bdata_t1720b57cec5SDimitry Andric   T get_next_lb(kmp_uint64 index) const { return lb[1 - index]; }
get_next_ubkmp_hier_shared_bdata_t1730b57cec5SDimitry Andric   T get_next_ub(kmp_uint64 index) const { return ub[1 - index]; }
get_next_stkmp_hier_shared_bdata_t1740b57cec5SDimitry Andric   ST get_next_st(kmp_uint64 index) const { return st[1 - index]; }
get_next_shkmp_hier_shared_bdata_t1750b57cec5SDimitry Andric   dispatch_shared_info_template<T> volatile *get_next_sh(kmp_uint64 index) {
1760b57cec5SDimitry Andric     return &(sh[1 - index]);
1770b57cec5SDimitry Andric   }
1780b57cec5SDimitry Andric 
get_curr_statuskmp_hier_shared_bdata_t1790b57cec5SDimitry Andric   kmp_int32 get_curr_status(kmp_uint64 index) const { return status[index]; }
get_curr_lbkmp_hier_shared_bdata_t1800b57cec5SDimitry Andric   T get_curr_lb(kmp_uint64 index) const { return lb[index]; }
get_curr_ubkmp_hier_shared_bdata_t1810b57cec5SDimitry Andric   T get_curr_ub(kmp_uint64 index) const { return ub[index]; }
get_curr_stkmp_hier_shared_bdata_t1820b57cec5SDimitry Andric   ST get_curr_st(kmp_uint64 index) const { return st[index]; }
get_curr_shkmp_hier_shared_bdata_t1830b57cec5SDimitry Andric   dispatch_shared_info_template<T> volatile *get_curr_sh(kmp_uint64 index) {
1840b57cec5SDimitry Andric     return &(sh[index]);
1850b57cec5SDimitry Andric   }
1860b57cec5SDimitry Andric };
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric /*
1890b57cec5SDimitry Andric  * In the barrier implementations, num_active is the number of threads that are
1900b57cec5SDimitry Andric  * attached to the kmp_hier_top_unit_t structure in the scheduling hierarchy.
1910b57cec5SDimitry Andric  * bdata is the shared barrier data that resides on the kmp_hier_top_unit_t
1920b57cec5SDimitry Andric  * structure. tdata is the thread private data that resides on the thread
1930b57cec5SDimitry Andric  * data structure.
1940b57cec5SDimitry Andric  *
1950b57cec5SDimitry Andric  * The reset_shared() method is used to initialize the barrier data on the
1960b57cec5SDimitry Andric  * kmp_hier_top_unit_t hierarchy structure
1970b57cec5SDimitry Andric  *
1980b57cec5SDimitry Andric  * The reset_private() method is used to initialize the barrier data on the
1990b57cec5SDimitry Andric  * thread's private dispatch buffer structure
2000b57cec5SDimitry Andric  *
2010b57cec5SDimitry Andric  * The barrier() method takes an id, which is that thread's id for the
2020b57cec5SDimitry Andric  * kmp_hier_top_unit_t structure, and implements the barrier.  All threads wait
2030b57cec5SDimitry Andric  * inside barrier() until all fellow threads who are attached to that
2040b57cec5SDimitry Andric  * kmp_hier_top_unit_t structure have arrived.
2050b57cec5SDimitry Andric  */
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric // Core barrier implementation
2080b57cec5SDimitry Andric // Can be used in a unit with between 2 to 8 threads
2090b57cec5SDimitry Andric template <typename T> class core_barrier_impl {
get_wait_val(int num_active)2100b57cec5SDimitry Andric   static inline kmp_uint64 get_wait_val(int num_active) {
2110b57cec5SDimitry Andric     kmp_uint64 wait_val = 0LL;
2120b57cec5SDimitry Andric     switch (num_active) {
2130b57cec5SDimitry Andric     case 2:
2140b57cec5SDimitry Andric       wait_val = 0x0101LL;
2150b57cec5SDimitry Andric       break;
2160b57cec5SDimitry Andric     case 3:
2170b57cec5SDimitry Andric       wait_val = 0x010101LL;
2180b57cec5SDimitry Andric       break;
2190b57cec5SDimitry Andric     case 4:
2200b57cec5SDimitry Andric       wait_val = 0x01010101LL;
2210b57cec5SDimitry Andric       break;
2220b57cec5SDimitry Andric     case 5:
2230b57cec5SDimitry Andric       wait_val = 0x0101010101LL;
2240b57cec5SDimitry Andric       break;
2250b57cec5SDimitry Andric     case 6:
2260b57cec5SDimitry Andric       wait_val = 0x010101010101LL;
2270b57cec5SDimitry Andric       break;
2280b57cec5SDimitry Andric     case 7:
2290b57cec5SDimitry Andric       wait_val = 0x01010101010101LL;
2300b57cec5SDimitry Andric       break;
2310b57cec5SDimitry Andric     case 8:
2320b57cec5SDimitry Andric       wait_val = 0x0101010101010101LL;
2330b57cec5SDimitry Andric       break;
2340b57cec5SDimitry Andric     default:
2350b57cec5SDimitry Andric       // don't use the core_barrier_impl for more than 8 threads
2360b57cec5SDimitry Andric       KMP_ASSERT(0);
2370b57cec5SDimitry Andric     }
2380b57cec5SDimitry Andric     return wait_val;
2390b57cec5SDimitry Andric   }
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric public:
2420b57cec5SDimitry Andric   static void reset_private(kmp_int32 num_active,
2430b57cec5SDimitry Andric                             kmp_hier_private_bdata_t *tdata);
2440b57cec5SDimitry Andric   static void reset_shared(kmp_int32 num_active,
2450b57cec5SDimitry Andric                            kmp_hier_shared_bdata_t<T> *bdata);
2460b57cec5SDimitry Andric   static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t<T> *bdata,
2470b57cec5SDimitry Andric                       kmp_hier_private_bdata_t *tdata);
2480b57cec5SDimitry Andric };
2490b57cec5SDimitry Andric 
2500b57cec5SDimitry Andric template <typename T>
reset_private(kmp_int32 num_active,kmp_hier_private_bdata_t * tdata)2510b57cec5SDimitry Andric void core_barrier_impl<T>::reset_private(kmp_int32 num_active,
2520b57cec5SDimitry Andric                                          kmp_hier_private_bdata_t *tdata) {
2530b57cec5SDimitry Andric   tdata->num_active = num_active;
2540b57cec5SDimitry Andric   tdata->index = 0;
2550b57cec5SDimitry Andric   tdata->wait_val[0] = tdata->wait_val[1] = get_wait_val(num_active);
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric template <typename T>
reset_shared(kmp_int32 num_active,kmp_hier_shared_bdata_t<T> * bdata)2580b57cec5SDimitry Andric void core_barrier_impl<T>::reset_shared(kmp_int32 num_active,
2590b57cec5SDimitry Andric                                         kmp_hier_shared_bdata_t<T> *bdata) {
2600b57cec5SDimitry Andric   bdata->val[0] = bdata->val[1] = 0LL;
2610b57cec5SDimitry Andric   bdata->status[0] = bdata->status[1] = 0LL;
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric template <typename T>
barrier(kmp_int32 id,kmp_hier_shared_bdata_t<T> * bdata,kmp_hier_private_bdata_t * tdata)2640b57cec5SDimitry Andric void core_barrier_impl<T>::barrier(kmp_int32 id,
2650b57cec5SDimitry Andric                                    kmp_hier_shared_bdata_t<T> *bdata,
2660b57cec5SDimitry Andric                                    kmp_hier_private_bdata_t *tdata) {
2670b57cec5SDimitry Andric   kmp_uint64 current_index = tdata->index;
2680b57cec5SDimitry Andric   kmp_uint64 next_index = 1 - current_index;
2690b57cec5SDimitry Andric   kmp_uint64 current_wait_value = tdata->wait_val[current_index];
2700b57cec5SDimitry Andric   kmp_uint64 next_wait_value =
2710b57cec5SDimitry Andric       (current_wait_value ? 0 : get_wait_val(tdata->num_active));
2720b57cec5SDimitry Andric   KD_TRACE(10, ("core_barrier_impl::barrier(): T#%d current_index:%llu "
2730b57cec5SDimitry Andric                 "next_index:%llu curr_wait:%llu next_wait:%llu\n",
2740b57cec5SDimitry Andric                 __kmp_get_gtid(), current_index, next_index, current_wait_value,
2750b57cec5SDimitry Andric                 next_wait_value));
276e8d8bef9SDimitry Andric   char v = (current_wait_value ? '\1' : '\0');
2770b57cec5SDimitry Andric   (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v;
2780b57cec5SDimitry Andric   __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
2790b57cec5SDimitry Andric                          __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
2800b57cec5SDimitry Andric   tdata->wait_val[current_index] = next_wait_value;
2810b57cec5SDimitry Andric   tdata->index = next_index;
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric // Counter barrier implementation
2850b57cec5SDimitry Andric // Can be used in a unit with arbitrary number of active threads
2860b57cec5SDimitry Andric template <typename T> class counter_barrier_impl {
2870b57cec5SDimitry Andric public:
2880b57cec5SDimitry Andric   static void reset_private(kmp_int32 num_active,
2890b57cec5SDimitry Andric                             kmp_hier_private_bdata_t *tdata);
2900b57cec5SDimitry Andric   static void reset_shared(kmp_int32 num_active,
2910b57cec5SDimitry Andric                            kmp_hier_shared_bdata_t<T> *bdata);
2920b57cec5SDimitry Andric   static void barrier(kmp_int32 id, kmp_hier_shared_bdata_t<T> *bdata,
2930b57cec5SDimitry Andric                       kmp_hier_private_bdata_t *tdata);
2940b57cec5SDimitry Andric };
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric template <typename T>
reset_private(kmp_int32 num_active,kmp_hier_private_bdata_t * tdata)2970b57cec5SDimitry Andric void counter_barrier_impl<T>::reset_private(kmp_int32 num_active,
2980b57cec5SDimitry Andric                                             kmp_hier_private_bdata_t *tdata) {
2990b57cec5SDimitry Andric   tdata->num_active = num_active;
3000b57cec5SDimitry Andric   tdata->index = 0;
3010b57cec5SDimitry Andric   tdata->wait_val[0] = tdata->wait_val[1] = (kmp_uint64)num_active;
3020b57cec5SDimitry Andric }
3030b57cec5SDimitry Andric template <typename T>
reset_shared(kmp_int32 num_active,kmp_hier_shared_bdata_t<T> * bdata)3040b57cec5SDimitry Andric void counter_barrier_impl<T>::reset_shared(kmp_int32 num_active,
3050b57cec5SDimitry Andric                                            kmp_hier_shared_bdata_t<T> *bdata) {
3060b57cec5SDimitry Andric   bdata->val[0] = bdata->val[1] = 0LL;
3070b57cec5SDimitry Andric   bdata->status[0] = bdata->status[1] = 0LL;
3080b57cec5SDimitry Andric }
3090b57cec5SDimitry Andric template <typename T>
barrier(kmp_int32 id,kmp_hier_shared_bdata_t<T> * bdata,kmp_hier_private_bdata_t * tdata)3100b57cec5SDimitry Andric void counter_barrier_impl<T>::barrier(kmp_int32 id,
3110b57cec5SDimitry Andric                                       kmp_hier_shared_bdata_t<T> *bdata,
3120b57cec5SDimitry Andric                                       kmp_hier_private_bdata_t *tdata) {
3130b57cec5SDimitry Andric   volatile kmp_int64 *val;
3140b57cec5SDimitry Andric   kmp_uint64 current_index = tdata->index;
3150b57cec5SDimitry Andric   kmp_uint64 next_index = 1 - current_index;
3160b57cec5SDimitry Andric   kmp_uint64 current_wait_value = tdata->wait_val[current_index];
3170b57cec5SDimitry Andric   kmp_uint64 next_wait_value = current_wait_value + tdata->num_active;
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   KD_TRACE(10, ("counter_barrier_impl::barrier(): T#%d current_index:%llu "
3200b57cec5SDimitry Andric                 "next_index:%llu curr_wait:%llu next_wait:%llu\n",
3210b57cec5SDimitry Andric                 __kmp_get_gtid(), current_index, next_index, current_wait_value,
3220b57cec5SDimitry Andric                 next_wait_value));
3230b57cec5SDimitry Andric   val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index]));
3240b57cec5SDimitry Andric   KMP_TEST_THEN_INC64(val);
3250b57cec5SDimitry Andric   __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
3260b57cec5SDimitry Andric                          __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
3270b57cec5SDimitry Andric   tdata->wait_val[current_index] = next_wait_value;
3280b57cec5SDimitry Andric   tdata->index = next_index;
3290b57cec5SDimitry Andric }
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric // Data associated with topology unit within a layer
3320b57cec5SDimitry Andric // For example, one kmp_hier_top_unit_t corresponds to one L1 cache
3330b57cec5SDimitry Andric template <typename T> struct kmp_hier_top_unit_t {
3340b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
3350b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
3360b57cec5SDimitry Andric   kmp_int32 active; // number of topology units that communicate with this unit
3370b57cec5SDimitry Andric   // chunk information (lower/upper bound, stride, etc.)
3380b57cec5SDimitry Andric   dispatch_private_info_template<T> hier_pr;
3390b57cec5SDimitry Andric   kmp_hier_top_unit_t<T> *hier_parent; // pointer to parent unit
3400b57cec5SDimitry Andric   kmp_hier_shared_bdata_t<T> hier_barrier; // shared barrier data for this unit
3410b57cec5SDimitry Andric 
get_hier_idkmp_hier_top_unit_t3420b57cec5SDimitry Andric   kmp_int32 get_hier_id() const { return hier_pr.hier_id; }
reset_shared_barrierkmp_hier_top_unit_t3430b57cec5SDimitry Andric   void reset_shared_barrier() {
3440b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(active > 0);
3450b57cec5SDimitry Andric     if (active == 1)
3460b57cec5SDimitry Andric       return;
3470b57cec5SDimitry Andric     hier_barrier.zero();
3480b57cec5SDimitry Andric     if (active >= 2 && active <= 8) {
3490b57cec5SDimitry Andric       core_barrier_impl<T>::reset_shared(active, &hier_barrier);
3500b57cec5SDimitry Andric     } else {
3510b57cec5SDimitry Andric       counter_barrier_impl<T>::reset_shared(active, &hier_barrier);
3520b57cec5SDimitry Andric     }
3530b57cec5SDimitry Andric   }
reset_private_barrierkmp_hier_top_unit_t3540b57cec5SDimitry Andric   void reset_private_barrier(kmp_hier_private_bdata_t *tdata) {
3550b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(tdata);
3560b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(active > 0);
3570b57cec5SDimitry Andric     if (active == 1)
3580b57cec5SDimitry Andric       return;
3590b57cec5SDimitry Andric     if (active >= 2 && active <= 8) {
3600b57cec5SDimitry Andric       core_barrier_impl<T>::reset_private(active, tdata);
3610b57cec5SDimitry Andric     } else {
3620b57cec5SDimitry Andric       counter_barrier_impl<T>::reset_private(active, tdata);
3630b57cec5SDimitry Andric     }
3640b57cec5SDimitry Andric   }
barrierkmp_hier_top_unit_t3650b57cec5SDimitry Andric   void barrier(kmp_int32 id, kmp_hier_private_bdata_t *tdata) {
3660b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(tdata);
3670b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(active > 0);
3680b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(id >= 0 && id < active);
3690b57cec5SDimitry Andric     if (active == 1) {
3700b57cec5SDimitry Andric       tdata->index = 1 - tdata->index;
3710b57cec5SDimitry Andric       return;
3720b57cec5SDimitry Andric     }
3730b57cec5SDimitry Andric     if (active >= 2 && active <= 8) {
3740b57cec5SDimitry Andric       core_barrier_impl<T>::barrier(id, &hier_barrier, tdata);
3750b57cec5SDimitry Andric     } else {
3760b57cec5SDimitry Andric       counter_barrier_impl<T>::barrier(id, &hier_barrier, tdata);
3770b57cec5SDimitry Andric     }
3780b57cec5SDimitry Andric   }
3790b57cec5SDimitry Andric 
get_next_statuskmp_hier_top_unit_t3800b57cec5SDimitry Andric   kmp_int32 get_next_status(kmp_uint64 index) const {
3810b57cec5SDimitry Andric     return hier_barrier.get_next_status(index);
3820b57cec5SDimitry Andric   }
get_next_lbkmp_hier_top_unit_t3830b57cec5SDimitry Andric   T get_next_lb(kmp_uint64 index) const {
3840b57cec5SDimitry Andric     return hier_barrier.get_next_lb(index);
3850b57cec5SDimitry Andric   }
get_next_ubkmp_hier_top_unit_t3860b57cec5SDimitry Andric   T get_next_ub(kmp_uint64 index) const {
3870b57cec5SDimitry Andric     return hier_barrier.get_next_ub(index);
3880b57cec5SDimitry Andric   }
get_next_stkmp_hier_top_unit_t3890b57cec5SDimitry Andric   ST get_next_st(kmp_uint64 index) const {
3900b57cec5SDimitry Andric     return hier_barrier.get_next_st(index);
3910b57cec5SDimitry Andric   }
get_next_shkmp_hier_top_unit_t3920b57cec5SDimitry Andric   dispatch_shared_info_template<T> volatile *get_next_sh(kmp_uint64 index) {
3930b57cec5SDimitry Andric     return hier_barrier.get_next_sh(index);
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric 
get_curr_statuskmp_hier_top_unit_t3960b57cec5SDimitry Andric   kmp_int32 get_curr_status(kmp_uint64 index) const {
3970b57cec5SDimitry Andric     return hier_barrier.get_curr_status(index);
3980b57cec5SDimitry Andric   }
get_curr_lbkmp_hier_top_unit_t3990b57cec5SDimitry Andric   T get_curr_lb(kmp_uint64 index) const {
4000b57cec5SDimitry Andric     return hier_barrier.get_curr_lb(index);
4010b57cec5SDimitry Andric   }
get_curr_ubkmp_hier_top_unit_t4020b57cec5SDimitry Andric   T get_curr_ub(kmp_uint64 index) const {
4030b57cec5SDimitry Andric     return hier_barrier.get_curr_ub(index);
4040b57cec5SDimitry Andric   }
get_curr_stkmp_hier_top_unit_t4050b57cec5SDimitry Andric   ST get_curr_st(kmp_uint64 index) const {
4060b57cec5SDimitry Andric     return hier_barrier.get_curr_st(index);
4070b57cec5SDimitry Andric   }
get_curr_shkmp_hier_top_unit_t4080b57cec5SDimitry Andric   dispatch_shared_info_template<T> volatile *get_curr_sh(kmp_uint64 index) {
4090b57cec5SDimitry Andric     return hier_barrier.get_curr_sh(index);
4100b57cec5SDimitry Andric   }
4110b57cec5SDimitry Andric 
set_next_hand_threadkmp_hier_top_unit_t4120b57cec5SDimitry Andric   void set_next_hand_thread(T lb, T ub, ST st, kmp_int32 status,
4130b57cec5SDimitry Andric                             kmp_uint64 index) {
4140b57cec5SDimitry Andric     hier_barrier.set_next_hand_thread(lb, ub, st, status, index);
4150b57cec5SDimitry Andric   }
set_nextkmp_hier_top_unit_t4160b57cec5SDimitry Andric   void set_next(T lb, T ub, ST st, kmp_int32 status, kmp_uint64 index) {
4170b57cec5SDimitry Andric     hier_barrier.set_next(lb, ub, st, status, index);
4180b57cec5SDimitry Andric   }
get_my_prkmp_hier_top_unit_t4190b57cec5SDimitry Andric   dispatch_private_info_template<T> *get_my_pr() { return &hier_pr; }
get_parentkmp_hier_top_unit_t4200b57cec5SDimitry Andric   kmp_hier_top_unit_t<T> *get_parent() { return hier_parent; }
get_parent_prkmp_hier_top_unit_t4210b57cec5SDimitry Andric   dispatch_private_info_template<T> *get_parent_pr() {
4220b57cec5SDimitry Andric     return &(hier_parent->hier_pr);
4230b57cec5SDimitry Andric   }
4240b57cec5SDimitry Andric 
is_activekmp_hier_top_unit_t4250b57cec5SDimitry Andric   kmp_int32 is_active() const { return active; }
get_num_activekmp_hier_top_unit_t4260b57cec5SDimitry Andric   kmp_int32 get_num_active() const { return active; }
4270b57cec5SDimitry Andric #ifdef KMP_DEBUG
printkmp_hier_top_unit_t4280b57cec5SDimitry Andric   void print() {
4290b57cec5SDimitry Andric     KD_TRACE(
4300b57cec5SDimitry Andric         10,
4310b57cec5SDimitry Andric         ("    kmp_hier_top_unit_t: active:%d pr:%p lb:%d ub:%d st:%d tc:%d\n",
4320b57cec5SDimitry Andric          active, &hier_pr, hier_pr.u.p.lb, hier_pr.u.p.ub, hier_pr.u.p.st,
4330b57cec5SDimitry Andric          hier_pr.u.p.tc));
4340b57cec5SDimitry Andric   }
4350b57cec5SDimitry Andric #endif
4360b57cec5SDimitry Andric };
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric // Information regarding a single layer within the scheduling hierarchy
4390b57cec5SDimitry Andric template <typename T> struct kmp_hier_layer_info_t {
4400b57cec5SDimitry Andric   int num_active; // number of threads active in this level
4410b57cec5SDimitry Andric   kmp_hier_layer_e type; // LAYER_L1, LAYER_L2, etc.
4420b57cec5SDimitry Andric   enum sched_type sched; // static, dynamic, guided, etc.
4430b57cec5SDimitry Andric   typename traits_t<T>::signed_t chunk; // chunk size associated with schedule
4440b57cec5SDimitry Andric   int length; // length of the kmp_hier_top_unit_t array
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric #ifdef KMP_DEBUG
4470b57cec5SDimitry Andric   // Print this layer's information
printkmp_hier_layer_info_t4480b57cec5SDimitry Andric   void print() {
4490b57cec5SDimitry Andric     const char *t = __kmp_get_hier_str(type);
4500b57cec5SDimitry Andric     KD_TRACE(
4510b57cec5SDimitry Andric         10,
4520b57cec5SDimitry Andric         ("    kmp_hier_layer_info_t: num_active:%d type:%s sched:%d chunk:%d "
4530b57cec5SDimitry Andric          "length:%d\n",
4540b57cec5SDimitry Andric          num_active, t, sched, chunk, length));
4550b57cec5SDimitry Andric   }
4560b57cec5SDimitry Andric #endif
4570b57cec5SDimitry Andric };
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric /*
4600b57cec5SDimitry Andric  * Structure to implement entire hierarchy
4610b57cec5SDimitry Andric  *
4620b57cec5SDimitry Andric  * The hierarchy is kept as an array of arrays to represent the different
4630b57cec5SDimitry Andric  * layers.  Layer 0 is the lowest layer to layer num_layers - 1 which is the
4640b57cec5SDimitry Andric  * highest layer.
4650b57cec5SDimitry Andric  * Example:
4660b57cec5SDimitry Andric  * [ 2 ] -> [ L3 | L3 ]
4670b57cec5SDimitry Andric  * [ 1 ] -> [ L2 | L2 | L2 | L2 ]
4680b57cec5SDimitry Andric  * [ 0 ] -> [ L1 | L1 | L1 | L1 | L1 | L1 | L1 | L1 ]
4690b57cec5SDimitry Andric  * There is also an array of layer_info_t which has information regarding
4700b57cec5SDimitry Andric  * each layer
4710b57cec5SDimitry Andric  */
4720b57cec5SDimitry Andric template <typename T> struct kmp_hier_t {
4730b57cec5SDimitry Andric public:
4740b57cec5SDimitry Andric   typedef typename traits_t<T>::unsigned_t UT;
4750b57cec5SDimitry Andric   typedef typename traits_t<T>::signed_t ST;
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric private:
next_recursekmp_hier_t4780b57cec5SDimitry Andric   int next_recurse(ident_t *loc, int gtid, kmp_hier_top_unit_t<T> *current,
4790b57cec5SDimitry Andric                    kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st,
4800b57cec5SDimitry Andric                    kmp_int32 previous_id, int hier_level) {
4810b57cec5SDimitry Andric     int status;
4820b57cec5SDimitry Andric     kmp_info_t *th = __kmp_threads[gtid];
4830b57cec5SDimitry Andric     auto parent = current->get_parent();
4840b57cec5SDimitry Andric     bool last_layer = (hier_level == get_num_layers() - 1);
4850b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(th);
4860b57cec5SDimitry Andric     kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[hier_level]);
4870b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(current);
4880b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(hier_level >= 0);
4890b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(hier_level < get_num_layers());
4900b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(tdata);
4910b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(parent || last_layer);
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric     KD_TRACE(
4940b57cec5SDimitry Andric         1, ("kmp_hier_t.next_recurse(): T#%d (%d) called\n", gtid, hier_level));
4950b57cec5SDimitry Andric 
4960b57cec5SDimitry Andric     T hier_id = (T)current->get_hier_id();
4970b57cec5SDimitry Andric     // Attempt to grab next iteration range for this level
4980b57cec5SDimitry Andric     if (previous_id == 0) {
499*fe6060f1SDimitry Andric       KD_TRACE(1, ("kmp_hier_t.next_recurse(): T#%d (%d) is primary of unit\n",
5000b57cec5SDimitry Andric                    gtid, hier_level));
5010b57cec5SDimitry Andric       kmp_int32 contains_last;
5020b57cec5SDimitry Andric       T my_lb, my_ub;
5030b57cec5SDimitry Andric       ST my_st;
5040b57cec5SDimitry Andric       T nproc;
5050b57cec5SDimitry Andric       dispatch_shared_info_template<T> volatile *my_sh;
5060b57cec5SDimitry Andric       dispatch_private_info_template<T> *my_pr;
5070b57cec5SDimitry Andric       if (last_layer) {
5080b57cec5SDimitry Andric         // last layer below the very top uses the single shared buffer
5090b57cec5SDimitry Andric         // from the team struct.
5100b57cec5SDimitry Andric         KD_TRACE(10,
5110b57cec5SDimitry Andric                  ("kmp_hier_t.next_recurse(): T#%d (%d) using top level sh\n",
5120b57cec5SDimitry Andric                   gtid, hier_level));
5130b57cec5SDimitry Andric         my_sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
5140b57cec5SDimitry Andric             th->th.th_dispatch->th_dispatch_sh_current);
5150b57cec5SDimitry Andric         nproc = (T)get_top_level_nproc();
5160b57cec5SDimitry Andric       } else {
5170b57cec5SDimitry Andric         // middle layers use the shared buffer inside the kmp_hier_top_unit_t
5180b57cec5SDimitry Andric         // structure
5190b57cec5SDimitry Andric         KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) using hier sh\n",
5200b57cec5SDimitry Andric                       gtid, hier_level));
5210b57cec5SDimitry Andric         my_sh =
5220b57cec5SDimitry Andric             parent->get_curr_sh(th->th.th_hier_bar_data[hier_level + 1].index);
5230b57cec5SDimitry Andric         nproc = (T)parent->get_num_active();
5240b57cec5SDimitry Andric       }
5250b57cec5SDimitry Andric       my_pr = current->get_my_pr();
5260b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(my_sh);
5270b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(my_pr);
5280b57cec5SDimitry Andric       enum sched_type schedule = get_sched(hier_level);
5290b57cec5SDimitry Andric       ST chunk = (ST)get_chunk(hier_level);
5300b57cec5SDimitry Andric       status = __kmp_dispatch_next_algorithm<T>(gtid, my_pr, my_sh,
5310b57cec5SDimitry Andric                                                 &contains_last, &my_lb, &my_ub,
5320b57cec5SDimitry Andric                                                 &my_st, nproc, hier_id);
5330b57cec5SDimitry Andric       KD_TRACE(
5340b57cec5SDimitry Andric           10,
5350b57cec5SDimitry Andric           ("kmp_hier_t.next_recurse(): T#%d (%d) next_pr_sh() returned %d\n",
5360b57cec5SDimitry Andric            gtid, hier_level, status));
5370b57cec5SDimitry Andric       // When no iterations are found (status == 0) and this is not the last
5380b57cec5SDimitry Andric       // layer, attempt to go up the hierarchy for more iterations
5390b57cec5SDimitry Andric       if (status == 0 && !last_layer) {
540e8d8bef9SDimitry Andric         kmp_int32 hid;
541e8d8bef9SDimitry Andric         __kmp_type_convert(hier_id, &hid);
5420b57cec5SDimitry Andric         status = next_recurse(loc, gtid, parent, &contains_last, &my_lb, &my_ub,
543e8d8bef9SDimitry Andric                               &my_st, hid, hier_level + 1);
5440b57cec5SDimitry Andric         KD_TRACE(
5450b57cec5SDimitry Andric             10,
5460b57cec5SDimitry Andric             ("kmp_hier_t.next_recurse(): T#%d (%d) hier_next() returned %d\n",
5470b57cec5SDimitry Andric              gtid, hier_level, status));
5480b57cec5SDimitry Andric         if (status == 1) {
5490b57cec5SDimitry Andric           kmp_hier_private_bdata_t *upper_tdata =
5500b57cec5SDimitry Andric               &(th->th.th_hier_bar_data[hier_level + 1]);
5510b57cec5SDimitry Andric           my_sh = parent->get_curr_sh(upper_tdata->index);
5520b57cec5SDimitry Andric           KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) about to init\n",
5530b57cec5SDimitry Andric                         gtid, hier_level));
5540b57cec5SDimitry Andric           __kmp_dispatch_init_algorithm(loc, gtid, my_pr, schedule,
5550b57cec5SDimitry Andric                                         parent->get_curr_lb(upper_tdata->index),
5560b57cec5SDimitry Andric                                         parent->get_curr_ub(upper_tdata->index),
5570b57cec5SDimitry Andric                                         parent->get_curr_st(upper_tdata->index),
5580b57cec5SDimitry Andric #if USE_ITT_BUILD
5590b57cec5SDimitry Andric                                         NULL,
5600b57cec5SDimitry Andric #endif
5610b57cec5SDimitry Andric                                         chunk, nproc, hier_id);
5620b57cec5SDimitry Andric           status = __kmp_dispatch_next_algorithm<T>(
5630b57cec5SDimitry Andric               gtid, my_pr, my_sh, &contains_last, &my_lb, &my_ub, &my_st, nproc,
5640b57cec5SDimitry Andric               hier_id);
5650b57cec5SDimitry Andric           if (!status) {
5660b57cec5SDimitry Andric             KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) status not 1 "
5670b57cec5SDimitry Andric                           "setting to 2!\n",
5680b57cec5SDimitry Andric                           gtid, hier_level));
5690b57cec5SDimitry Andric             status = 2;
5700b57cec5SDimitry Andric           }
5710b57cec5SDimitry Andric         }
5720b57cec5SDimitry Andric       }
5730b57cec5SDimitry Andric       current->set_next(my_lb, my_ub, my_st, status, tdata->index);
5740b57cec5SDimitry Andric       // Propagate whether a unit holds the actual global last iteration
5750b57cec5SDimitry Andric       // The contains_last attribute is sent downwards from the top to the
5760b57cec5SDimitry Andric       // bottom of the hierarchy via the contains_last flag inside the
5770b57cec5SDimitry Andric       // private dispatch buffers in the hierarchy's middle layers
5780b57cec5SDimitry Andric       if (contains_last) {
5790b57cec5SDimitry Andric         // If the next_algorithm() method returns 1 for p_last and it is the
5800b57cec5SDimitry Andric         // last layer or our parent contains the last serial chunk, then the
5810b57cec5SDimitry Andric         // chunk must contain the last serial iteration.
5820b57cec5SDimitry Andric         if (last_layer || parent->hier_pr.flags.contains_last) {
5830b57cec5SDimitry Andric           KD_TRACE(10, ("kmp_hier_t.next_recurse(): T#%d (%d) Setting this pr "
5840b57cec5SDimitry Andric                         "to contain last.\n",
5850b57cec5SDimitry Andric                         gtid, hier_level));
5860b57cec5SDimitry Andric           current->hier_pr.flags.contains_last = contains_last;
5870b57cec5SDimitry Andric         }
5880b57cec5SDimitry Andric         if (!current->hier_pr.flags.contains_last)
5890b57cec5SDimitry Andric           contains_last = FALSE;
5900b57cec5SDimitry Andric       }
5910b57cec5SDimitry Andric       if (p_last)
5920b57cec5SDimitry Andric         *p_last = contains_last;
593*fe6060f1SDimitry Andric     } // if primary thread of this unit
5940b57cec5SDimitry Andric     if (hier_level > 0 || !__kmp_dispatch_hand_threading) {
5950b57cec5SDimitry Andric       KD_TRACE(10,
5960b57cec5SDimitry Andric                ("kmp_hier_t.next_recurse(): T#%d (%d) going into barrier.\n",
5970b57cec5SDimitry Andric                 gtid, hier_level));
5980b57cec5SDimitry Andric       current->barrier(previous_id, tdata);
5990b57cec5SDimitry Andric       KD_TRACE(10,
6000b57cec5SDimitry Andric                ("kmp_hier_t.next_recurse(): T#%d (%d) released and exit %d\n",
6010b57cec5SDimitry Andric                 gtid, hier_level, current->get_curr_status(tdata->index)));
6020b57cec5SDimitry Andric     } else {
6030b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(previous_id == 0);
6040b57cec5SDimitry Andric       return status;
6050b57cec5SDimitry Andric     }
6060b57cec5SDimitry Andric     return current->get_curr_status(tdata->index);
6070b57cec5SDimitry Andric   }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric public:
6100b57cec5SDimitry Andric   int top_level_nproc;
6110b57cec5SDimitry Andric   int num_layers;
6120b57cec5SDimitry Andric   bool valid;
6130b57cec5SDimitry Andric   int type_size;
6140b57cec5SDimitry Andric   kmp_hier_layer_info_t<T> *info;
6150b57cec5SDimitry Andric   kmp_hier_top_unit_t<T> **layers;
6160b57cec5SDimitry Andric   // Deallocate all memory from this hierarchy
deallocatekmp_hier_t6170b57cec5SDimitry Andric   void deallocate() {
6180b57cec5SDimitry Andric     for (int i = 0; i < num_layers; ++i)
6190b57cec5SDimitry Andric       if (layers[i] != NULL) {
6200b57cec5SDimitry Andric         __kmp_free(layers[i]);
6210b57cec5SDimitry Andric       }
6220b57cec5SDimitry Andric     if (layers != NULL) {
6230b57cec5SDimitry Andric       __kmp_free(layers);
6240b57cec5SDimitry Andric       layers = NULL;
6250b57cec5SDimitry Andric     }
6260b57cec5SDimitry Andric     if (info != NULL) {
6270b57cec5SDimitry Andric       __kmp_free(info);
6280b57cec5SDimitry Andric       info = NULL;
6290b57cec5SDimitry Andric     }
6300b57cec5SDimitry Andric     num_layers = 0;
6310b57cec5SDimitry Andric     valid = false;
6320b57cec5SDimitry Andric   }
6330b57cec5SDimitry Andric   // Returns true if reallocation is needed else false
need_to_reallocatekmp_hier_t6340b57cec5SDimitry Andric   bool need_to_reallocate(int n, const kmp_hier_layer_e *new_layers,
6350b57cec5SDimitry Andric                           const enum sched_type *new_scheds,
6360b57cec5SDimitry Andric                           const ST *new_chunks) const {
6370b57cec5SDimitry Andric     if (!valid || layers == NULL || info == NULL ||
6380b57cec5SDimitry Andric         traits_t<T>::type_size != type_size || n != num_layers)
6390b57cec5SDimitry Andric       return true;
6400b57cec5SDimitry Andric     for (int i = 0; i < n; ++i) {
6410b57cec5SDimitry Andric       if (info[i].type != new_layers[i])
6420b57cec5SDimitry Andric         return true;
6430b57cec5SDimitry Andric       if (info[i].sched != new_scheds[i])
6440b57cec5SDimitry Andric         return true;
6450b57cec5SDimitry Andric       if (info[i].chunk != new_chunks[i])
6460b57cec5SDimitry Andric         return true;
6470b57cec5SDimitry Andric     }
6480b57cec5SDimitry Andric     return false;
6490b57cec5SDimitry Andric   }
6500b57cec5SDimitry Andric   // A single thread should call this function while the other threads wait
6510b57cec5SDimitry Andric   // create a new scheduling hierarchy consisting of new_layers, new_scheds
6520b57cec5SDimitry Andric   // and new_chunks.  These should come pre-sorted according to
6530b57cec5SDimitry Andric   // kmp_hier_layer_e value.  This function will try to avoid reallocation
6540b57cec5SDimitry Andric   // if it can
allocate_hierkmp_hier_t6550b57cec5SDimitry Andric   void allocate_hier(int n, const kmp_hier_layer_e *new_layers,
6560b57cec5SDimitry Andric                      const enum sched_type *new_scheds, const ST *new_chunks) {
6570b57cec5SDimitry Andric     top_level_nproc = 0;
6580b57cec5SDimitry Andric     if (!need_to_reallocate(n, new_layers, new_scheds, new_chunks)) {
6590b57cec5SDimitry Andric       KD_TRACE(
6600b57cec5SDimitry Andric           10,
6610b57cec5SDimitry Andric           ("kmp_hier_t<T>::allocate_hier: T#0 do not need to reallocate\n"));
6620b57cec5SDimitry Andric       for (int i = 0; i < n; ++i) {
6630b57cec5SDimitry Andric         info[i].num_active = 0;
6640b57cec5SDimitry Andric         for (int j = 0; j < get_length(i); ++j)
6650b57cec5SDimitry Andric           layers[i][j].active = 0;
6660b57cec5SDimitry Andric       }
6670b57cec5SDimitry Andric       return;
6680b57cec5SDimitry Andric     }
6690b57cec5SDimitry Andric     KD_TRACE(10, ("kmp_hier_t<T>::allocate_hier: T#0 full alloc\n"));
6700b57cec5SDimitry Andric     deallocate();
6710b57cec5SDimitry Andric     type_size = traits_t<T>::type_size;
6720b57cec5SDimitry Andric     num_layers = n;
6730b57cec5SDimitry Andric     info = (kmp_hier_layer_info_t<T> *)__kmp_allocate(
6740b57cec5SDimitry Andric         sizeof(kmp_hier_layer_info_t<T>) * n);
6750b57cec5SDimitry Andric     layers = (kmp_hier_top_unit_t<T> **)__kmp_allocate(
6760b57cec5SDimitry Andric         sizeof(kmp_hier_top_unit_t<T> *) * n);
6770b57cec5SDimitry Andric     for (int i = 0; i < n; ++i) {
6780b57cec5SDimitry Andric       int max = 0;
6790b57cec5SDimitry Andric       kmp_hier_layer_e layer = new_layers[i];
6800b57cec5SDimitry Andric       info[i].num_active = 0;
6810b57cec5SDimitry Andric       info[i].type = layer;
6820b57cec5SDimitry Andric       info[i].sched = new_scheds[i];
6830b57cec5SDimitry Andric       info[i].chunk = new_chunks[i];
6840b57cec5SDimitry Andric       max = __kmp_hier_max_units[layer + 1];
6850b57cec5SDimitry Andric       if (max == 0) {
6860b57cec5SDimitry Andric         valid = false;
6870b57cec5SDimitry Andric         KMP_WARNING(HierSchedInvalid, __kmp_get_hier_str(layer));
6880b57cec5SDimitry Andric         deallocate();
6890b57cec5SDimitry Andric         return;
6900b57cec5SDimitry Andric       }
6910b57cec5SDimitry Andric       info[i].length = max;
6920b57cec5SDimitry Andric       layers[i] = (kmp_hier_top_unit_t<T> *)__kmp_allocate(
6930b57cec5SDimitry Andric           sizeof(kmp_hier_top_unit_t<T>) * max);
6940b57cec5SDimitry Andric       for (int j = 0; j < max; ++j) {
6950b57cec5SDimitry Andric         layers[i][j].active = 0;
6960b57cec5SDimitry Andric         layers[i][j].hier_pr.flags.use_hier = TRUE;
6970b57cec5SDimitry Andric       }
6980b57cec5SDimitry Andric     }
6990b57cec5SDimitry Andric     valid = true;
7000b57cec5SDimitry Andric   }
7010b57cec5SDimitry Andric   // loc - source file location
7020b57cec5SDimitry Andric   // gtid - global thread identifier
7030b57cec5SDimitry Andric   // pr - this thread's private dispatch buffer (corresponding with gtid)
7040b57cec5SDimitry Andric   // p_last (return value) - pointer to flag indicating this set of iterations
7050b57cec5SDimitry Andric   // contains last
7060b57cec5SDimitry Andric   //          iteration
7070b57cec5SDimitry Andric   // p_lb (return value) - lower bound for this chunk of iterations
7080b57cec5SDimitry Andric   // p_ub (return value) - upper bound for this chunk of iterations
7090b57cec5SDimitry Andric   // p_st (return value) - stride for this chunk of iterations
7100b57cec5SDimitry Andric   //
7110b57cec5SDimitry Andric   // Returns 1 if there are more iterations to perform, 0 otherwise
nextkmp_hier_t7120b57cec5SDimitry Andric   int next(ident_t *loc, int gtid, dispatch_private_info_template<T> *pr,
7130b57cec5SDimitry Andric            kmp_int32 *p_last, T *p_lb, T *p_ub, ST *p_st) {
7140b57cec5SDimitry Andric     int status;
7150b57cec5SDimitry Andric     kmp_int32 contains_last = 0;
7160b57cec5SDimitry Andric     kmp_info_t *th = __kmp_threads[gtid];
7170b57cec5SDimitry Andric     kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[0]);
7180b57cec5SDimitry Andric     auto parent = pr->get_parent();
7190b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(parent);
7200b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(th);
7210b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(tdata);
7220b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(parent);
7230b57cec5SDimitry Andric     T nproc = (T)parent->get_num_active();
7240b57cec5SDimitry Andric     T unit_id = (T)pr->get_hier_id();
7250b57cec5SDimitry Andric     KD_TRACE(
7260b57cec5SDimitry Andric         10,
7270b57cec5SDimitry Andric         ("kmp_hier_t.next(): T#%d THREAD LEVEL nproc:%d unit_id:%d called\n",
7280b57cec5SDimitry Andric          gtid, nproc, unit_id));
7290b57cec5SDimitry Andric     // Handthreading implementation
7300b57cec5SDimitry Andric     // Each iteration is performed by all threads on last unit (typically
7310b57cec5SDimitry Andric     // cores/tiles)
7320b57cec5SDimitry Andric     // e.g., threads 0,1,2,3 all execute iteration 0
7330b57cec5SDimitry Andric     //       threads 0,1,2,3 all execute iteration 1
7340b57cec5SDimitry Andric     //       threads 4,5,6,7 all execute iteration 2
7350b57cec5SDimitry Andric     //       threads 4,5,6,7 all execute iteration 3
7360b57cec5SDimitry Andric     //       ... etc.
7370b57cec5SDimitry Andric     if (__kmp_dispatch_hand_threading) {
7380b57cec5SDimitry Andric       KD_TRACE(10,
7390b57cec5SDimitry Andric                ("kmp_hier_t.next(): T#%d THREAD LEVEL using hand threading\n",
7400b57cec5SDimitry Andric                 gtid));
7410b57cec5SDimitry Andric       if (unit_id == 0) {
7420b57cec5SDimitry Andric         // For hand threading, the sh buffer on the lowest level is only ever
743*fe6060f1SDimitry Andric         // modified and read by the primary thread on that level.  Because of
7440b57cec5SDimitry Andric         // this, we can always use the first sh buffer.
7450b57cec5SDimitry Andric         auto sh = &(parent->hier_barrier.sh[0]);
7460b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(sh);
7470b57cec5SDimitry Andric         status = __kmp_dispatch_next_algorithm<T>(
7480b57cec5SDimitry Andric             gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
7490b57cec5SDimitry Andric         if (!status) {
7500b57cec5SDimitry Andric           bool done = false;
7510b57cec5SDimitry Andric           while (!done) {
7520b57cec5SDimitry Andric             done = true;
753e8d8bef9SDimitry Andric             kmp_int32 uid;
754e8d8bef9SDimitry Andric             __kmp_type_convert(unit_id, &uid);
7550b57cec5SDimitry Andric             status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub,
756e8d8bef9SDimitry Andric                                   p_st, uid, 0);
7570b57cec5SDimitry Andric             if (status == 1) {
7580b57cec5SDimitry Andric               __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule,
7590b57cec5SDimitry Andric                                             parent->get_next_lb(tdata->index),
7600b57cec5SDimitry Andric                                             parent->get_next_ub(tdata->index),
7610b57cec5SDimitry Andric                                             parent->get_next_st(tdata->index),
7620b57cec5SDimitry Andric #if USE_ITT_BUILD
7630b57cec5SDimitry Andric                                             NULL,
7640b57cec5SDimitry Andric #endif
7650b57cec5SDimitry Andric                                             pr->u.p.parm1, nproc, unit_id);
7660b57cec5SDimitry Andric               sh->u.s.iteration = 0;
7670b57cec5SDimitry Andric               status = __kmp_dispatch_next_algorithm<T>(
7680b57cec5SDimitry Andric                   gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc,
7690b57cec5SDimitry Andric                   unit_id);
7700b57cec5SDimitry Andric               if (!status) {
7710b57cec5SDimitry Andric                 KD_TRACE(10,
7720b57cec5SDimitry Andric                          ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 "
7730b57cec5SDimitry Andric                           "after next_pr_sh()"
7740b57cec5SDimitry Andric                           "trying again.\n",
7750b57cec5SDimitry Andric                           gtid));
7760b57cec5SDimitry Andric                 done = false;
7770b57cec5SDimitry Andric               }
7780b57cec5SDimitry Andric             } else if (status == 2) {
7790b57cec5SDimitry Andric               KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 "
7800b57cec5SDimitry Andric                             "trying again.\n",
7810b57cec5SDimitry Andric                             gtid));
7820b57cec5SDimitry Andric               done = false;
7830b57cec5SDimitry Andric             }
7840b57cec5SDimitry Andric           }
7850b57cec5SDimitry Andric         }
7860b57cec5SDimitry Andric         parent->set_next_hand_thread(*p_lb, *p_ub, *p_st, status, tdata->index);
787*fe6060f1SDimitry Andric       } // if primary thread of lowest unit level
7880b57cec5SDimitry Andric       parent->barrier(pr->get_hier_id(), tdata);
7890b57cec5SDimitry Andric       if (unit_id != 0) {
7900b57cec5SDimitry Andric         *p_lb = parent->get_curr_lb(tdata->index);
7910b57cec5SDimitry Andric         *p_ub = parent->get_curr_ub(tdata->index);
7920b57cec5SDimitry Andric         *p_st = parent->get_curr_st(tdata->index);
7930b57cec5SDimitry Andric         status = parent->get_curr_status(tdata->index);
7940b57cec5SDimitry Andric       }
7950b57cec5SDimitry Andric     } else {
7960b57cec5SDimitry Andric       // Normal implementation
7970b57cec5SDimitry Andric       // Each thread grabs an iteration chunk and executes it (no cooperation)
7980b57cec5SDimitry Andric       auto sh = parent->get_curr_sh(tdata->index);
7990b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(sh);
8000b57cec5SDimitry Andric       status = __kmp_dispatch_next_algorithm<T>(
8010b57cec5SDimitry Andric           gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
8020b57cec5SDimitry Andric       KD_TRACE(10,
8030b57cec5SDimitry Andric                ("kmp_hier_t.next(): T#%d THREAD LEVEL next_algorithm status:%d "
8040b57cec5SDimitry Andric                 "contains_last:%d p_lb:%d p_ub:%d p_st:%d\n",
8050b57cec5SDimitry Andric                 gtid, status, contains_last, *p_lb, *p_ub, *p_st));
8060b57cec5SDimitry Andric       if (!status) {
8070b57cec5SDimitry Andric         bool done = false;
8080b57cec5SDimitry Andric         while (!done) {
8090b57cec5SDimitry Andric           done = true;
810e8d8bef9SDimitry Andric           kmp_int32 uid;
811e8d8bef9SDimitry Andric           __kmp_type_convert(unit_id, &uid);
8120b57cec5SDimitry Andric           status = next_recurse(loc, gtid, parent, &contains_last, p_lb, p_ub,
813e8d8bef9SDimitry Andric                                 p_st, uid, 0);
8140b57cec5SDimitry Andric           if (status == 1) {
8150b57cec5SDimitry Andric             sh = parent->get_curr_sh(tdata->index);
8160b57cec5SDimitry Andric             __kmp_dispatch_init_algorithm(loc, gtid, pr, pr->schedule,
8170b57cec5SDimitry Andric                                           parent->get_curr_lb(tdata->index),
8180b57cec5SDimitry Andric                                           parent->get_curr_ub(tdata->index),
8190b57cec5SDimitry Andric                                           parent->get_curr_st(tdata->index),
8200b57cec5SDimitry Andric #if USE_ITT_BUILD
8210b57cec5SDimitry Andric                                           NULL,
8220b57cec5SDimitry Andric #endif
8230b57cec5SDimitry Andric                                           pr->u.p.parm1, nproc, unit_id);
8240b57cec5SDimitry Andric             status = __kmp_dispatch_next_algorithm<T>(
8250b57cec5SDimitry Andric                 gtid, pr, sh, &contains_last, p_lb, p_ub, p_st, nproc, unit_id);
8260b57cec5SDimitry Andric             if (!status) {
8270b57cec5SDimitry Andric               KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 0 "
8280b57cec5SDimitry Andric                             "after next_pr_sh()"
8290b57cec5SDimitry Andric                             "trying again.\n",
8300b57cec5SDimitry Andric                             gtid));
8310b57cec5SDimitry Andric               done = false;
8320b57cec5SDimitry Andric             }
8330b57cec5SDimitry Andric           } else if (status == 2) {
8340b57cec5SDimitry Andric             KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL status == 2 "
8350b57cec5SDimitry Andric                           "trying again.\n",
8360b57cec5SDimitry Andric                           gtid));
8370b57cec5SDimitry Andric             done = false;
8380b57cec5SDimitry Andric           }
8390b57cec5SDimitry Andric         }
8400b57cec5SDimitry Andric       }
8410b57cec5SDimitry Andric     }
8420b57cec5SDimitry Andric     if (contains_last && !parent->hier_pr.flags.contains_last) {
8430b57cec5SDimitry Andric       KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL resetting "
8440b57cec5SDimitry Andric                     "contains_last to FALSE\n",
8450b57cec5SDimitry Andric                     gtid));
8460b57cec5SDimitry Andric       contains_last = FALSE;
8470b57cec5SDimitry Andric     }
8480b57cec5SDimitry Andric     if (p_last)
8490b57cec5SDimitry Andric       *p_last = contains_last;
8500b57cec5SDimitry Andric     KD_TRACE(10, ("kmp_hier_t.next(): T#%d THREAD LEVEL exit status %d\n", gtid,
8510b57cec5SDimitry Andric                   status));
8520b57cec5SDimitry Andric     return status;
8530b57cec5SDimitry Andric   }
8540b57cec5SDimitry Andric   // These functions probe the layer info structure
8550b57cec5SDimitry Andric   // Returns the type of topology unit given level
get_typekmp_hier_t8560b57cec5SDimitry Andric   kmp_hier_layer_e get_type(int level) const {
8570b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0);
8580b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level < num_layers);
8590b57cec5SDimitry Andric     return info[level].type;
8600b57cec5SDimitry Andric   }
8610b57cec5SDimitry Andric   // Returns the schedule type at given level
get_schedkmp_hier_t8620b57cec5SDimitry Andric   enum sched_type get_sched(int level) const {
8630b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0);
8640b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level < num_layers);
8650b57cec5SDimitry Andric     return info[level].sched;
8660b57cec5SDimitry Andric   }
8670b57cec5SDimitry Andric   // Returns the chunk size at given level
get_chunkkmp_hier_t8680b57cec5SDimitry Andric   ST get_chunk(int level) const {
8690b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0);
8700b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level < num_layers);
8710b57cec5SDimitry Andric     return info[level].chunk;
8720b57cec5SDimitry Andric   }
8730b57cec5SDimitry Andric   // Returns the number of active threads at given level
get_num_activekmp_hier_t8740b57cec5SDimitry Andric   int get_num_active(int level) const {
8750b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0);
8760b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level < num_layers);
8770b57cec5SDimitry Andric     return info[level].num_active;
8780b57cec5SDimitry Andric   }
8790b57cec5SDimitry Andric   // Returns the length of topology unit array at given level
get_lengthkmp_hier_t8800b57cec5SDimitry Andric   int get_length(int level) const {
8810b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0);
8820b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level < num_layers);
8830b57cec5SDimitry Andric     return info[level].length;
8840b57cec5SDimitry Andric   }
8850b57cec5SDimitry Andric   // Returns the topology unit given the level and index
get_unitkmp_hier_t8860b57cec5SDimitry Andric   kmp_hier_top_unit_t<T> *get_unit(int level, int index) {
8870b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level >= 0);
8880b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(level < num_layers);
8890b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(index >= 0);
8900b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(index < get_length(level));
8910b57cec5SDimitry Andric     return &(layers[level][index]);
8920b57cec5SDimitry Andric   }
8930b57cec5SDimitry Andric   // Returns the number of layers in the hierarchy
get_num_layerskmp_hier_t8940b57cec5SDimitry Andric   int get_num_layers() const { return num_layers; }
8950b57cec5SDimitry Andric   // Returns the number of threads in the top layer
8960b57cec5SDimitry Andric   // This is necessary because we don't store a topology unit as
8970b57cec5SDimitry Andric   // the very top level and the scheduling algorithms need this information
get_top_level_nprockmp_hier_t8980b57cec5SDimitry Andric   int get_top_level_nproc() const { return top_level_nproc; }
8990b57cec5SDimitry Andric   // Return whether this hierarchy is valid or not
is_validkmp_hier_t9000b57cec5SDimitry Andric   bool is_valid() const { return valid; }
9010b57cec5SDimitry Andric #ifdef KMP_DEBUG
9020b57cec5SDimitry Andric   // Print the hierarchy
printkmp_hier_t9030b57cec5SDimitry Andric   void print() {
9040b57cec5SDimitry Andric     KD_TRACE(10, ("kmp_hier_t:\n"));
9050b57cec5SDimitry Andric     for (int i = num_layers - 1; i >= 0; --i) {
9060b57cec5SDimitry Andric       KD_TRACE(10, ("Info[%d] = ", i));
9070b57cec5SDimitry Andric       info[i].print();
9080b57cec5SDimitry Andric     }
9090b57cec5SDimitry Andric     for (int i = num_layers - 1; i >= 0; --i) {
9100b57cec5SDimitry Andric       KD_TRACE(10, ("Layer[%d] =\n", i));
9110b57cec5SDimitry Andric       for (int j = 0; j < info[i].length; ++j) {
9120b57cec5SDimitry Andric         layers[i][j].print();
9130b57cec5SDimitry Andric       }
9140b57cec5SDimitry Andric     }
9150b57cec5SDimitry Andric   }
9160b57cec5SDimitry Andric #endif
9170b57cec5SDimitry Andric };
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric template <typename T>
__kmp_dispatch_init_hierarchy(ident_t * loc,int n,kmp_hier_layer_e * new_layers,enum sched_type * new_scheds,typename traits_t<T>::signed_t * new_chunks,T lb,T ub,typename traits_t<T>::signed_t st)9200b57cec5SDimitry Andric void __kmp_dispatch_init_hierarchy(ident_t *loc, int n,
9210b57cec5SDimitry Andric                                    kmp_hier_layer_e *new_layers,
9220b57cec5SDimitry Andric                                    enum sched_type *new_scheds,
9230b57cec5SDimitry Andric                                    typename traits_t<T>::signed_t *new_chunks,
9240b57cec5SDimitry Andric                                    T lb, T ub,
9250b57cec5SDimitry Andric                                    typename traits_t<T>::signed_t st) {
9260b57cec5SDimitry Andric   int tid, gtid, num_hw_threads, num_threads_per_layer1, active;
927*fe6060f1SDimitry Andric   unsigned int my_buffer_index;
9280b57cec5SDimitry Andric   kmp_info_t *th;
9290b57cec5SDimitry Andric   kmp_team_t *team;
9300b57cec5SDimitry Andric   dispatch_private_info_template<T> *pr;
9310b57cec5SDimitry Andric   dispatch_shared_info_template<T> volatile *sh;
9320b57cec5SDimitry Andric   gtid = __kmp_entry_gtid();
9330b57cec5SDimitry Andric   tid = __kmp_tid_from_gtid(gtid);
9340b57cec5SDimitry Andric #ifdef KMP_DEBUG
9350b57cec5SDimitry Andric   KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d called: %d layer(s)\n",
9360b57cec5SDimitry Andric                 gtid, n));
9370b57cec5SDimitry Andric   for (int i = 0; i < n; ++i) {
9380b57cec5SDimitry Andric     const char *layer = __kmp_get_hier_str(new_layers[i]);
9390b57cec5SDimitry Andric     KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d: new_layers[%d] = %s, "
9400b57cec5SDimitry Andric                   "new_scheds[%d] = %d, new_chunks[%d] = %u\n",
9410b57cec5SDimitry Andric                   gtid, i, layer, i, (int)new_scheds[i], i, new_chunks[i]));
9420b57cec5SDimitry Andric   }
9430b57cec5SDimitry Andric #endif // KMP_DEBUG
9440b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(n > 0);
9450b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(new_layers);
9460b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(new_scheds);
9470b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(new_chunks);
9480b57cec5SDimitry Andric   if (!TCR_4(__kmp_init_parallel))
9490b57cec5SDimitry Andric     __kmp_parallel_initialize();
9500b57cec5SDimitry Andric   __kmp_resume_if_soft_paused();
9510b57cec5SDimitry Andric 
9520b57cec5SDimitry Andric   th = __kmp_threads[gtid];
9530b57cec5SDimitry Andric   team = th->th.th_team;
9540b57cec5SDimitry Andric   active = !team->t.t_serialized;
9550b57cec5SDimitry Andric   th->th.th_ident = loc;
9560b57cec5SDimitry Andric   num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
9570b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th->th.th_dispatch ==
9580b57cec5SDimitry Andric                    &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
9590b57cec5SDimitry Andric   my_buffer_index = th->th.th_dispatch->th_disp_index;
9600b57cec5SDimitry Andric   pr = reinterpret_cast<dispatch_private_info_template<T> *>(
9610b57cec5SDimitry Andric       &th->th.th_dispatch
9620b57cec5SDimitry Andric            ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
9630b57cec5SDimitry Andric   sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
9640b57cec5SDimitry Andric       &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
9650b57cec5SDimitry Andric   if (!active) {
9660b57cec5SDimitry Andric     KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d not active parallel. "
9670b57cec5SDimitry Andric                   "Using normal dispatch functions.\n",
9680b57cec5SDimitry Andric                   gtid));
9690b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(pr);
9700b57cec5SDimitry Andric     pr->flags.use_hier = FALSE;
9710b57cec5SDimitry Andric     pr->flags.contains_last = FALSE;
9720b57cec5SDimitry Andric     return;
9730b57cec5SDimitry Andric   }
9740b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(pr);
9750b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(sh);
9760b57cec5SDimitry Andric   pr->flags.use_hier = TRUE;
9770b57cec5SDimitry Andric   pr->u.p.tc = 0;
978*fe6060f1SDimitry Andric   // Have primary thread allocate the hierarchy
9790b57cec5SDimitry Andric   if (__kmp_tid_from_gtid(gtid) == 0) {
9800b57cec5SDimitry Andric     KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d pr:%p sh:%p allocating "
9810b57cec5SDimitry Andric                   "hierarchy\n",
9820b57cec5SDimitry Andric                   gtid, pr, sh));
9830b57cec5SDimitry Andric     if (sh->hier == NULL) {
9840b57cec5SDimitry Andric       sh->hier = (kmp_hier_t<T> *)__kmp_allocate(sizeof(kmp_hier_t<T>));
9850b57cec5SDimitry Andric     }
9860b57cec5SDimitry Andric     sh->hier->allocate_hier(n, new_layers, new_scheds, new_chunks);
9870b57cec5SDimitry Andric     sh->u.s.iteration = 0;
9880b57cec5SDimitry Andric   }
9890b57cec5SDimitry Andric   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
9900b57cec5SDimitry Andric   // Check to make sure the hierarchy is valid
9910b57cec5SDimitry Andric   kmp_hier_t<T> *hier = sh->hier;
9920b57cec5SDimitry Andric   if (!sh->hier->is_valid()) {
9930b57cec5SDimitry Andric     pr->flags.use_hier = FALSE;
9940b57cec5SDimitry Andric     return;
9950b57cec5SDimitry Andric   }
9960b57cec5SDimitry Andric   // Have threads allocate their thread-private barrier data if it hasn't
9970b57cec5SDimitry Andric   // already been allocated
9980b57cec5SDimitry Andric   if (th->th.th_hier_bar_data == NULL) {
9990b57cec5SDimitry Andric     th->th.th_hier_bar_data = (kmp_hier_private_bdata_t *)__kmp_allocate(
10000b57cec5SDimitry Andric         sizeof(kmp_hier_private_bdata_t) * kmp_hier_layer_e::LAYER_LAST);
10010b57cec5SDimitry Andric   }
10025ffd83dbSDimitry Andric   // Have threads "register" themselves by modifying the active count for each
10030b57cec5SDimitry Andric   // level they are involved in. The active count will act as nthreads for that
10040b57cec5SDimitry Andric   // level regarding the scheduling algorithms
10050b57cec5SDimitry Andric   for (int i = 0; i < n; ++i) {
10060b57cec5SDimitry Andric     int index = __kmp_dispatch_get_index(tid, hier->get_type(i));
10070b57cec5SDimitry Andric     kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index);
10080b57cec5SDimitry Andric     // Setup the thread's private dispatch buffer's hierarchy pointers
10090b57cec5SDimitry Andric     if (i == 0)
10100b57cec5SDimitry Andric       pr->hier_parent = my_unit;
10110b57cec5SDimitry Andric     // If this unit is already active, then increment active count and wait
10120b57cec5SDimitry Andric     if (my_unit->is_active()) {
10130b57cec5SDimitry Andric       KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) "
10140b57cec5SDimitry Andric                     "is already active (%d)\n",
10150b57cec5SDimitry Andric                     gtid, my_unit, my_unit->active));
10160b57cec5SDimitry Andric       KMP_TEST_THEN_INC32(&(my_unit->active));
10170b57cec5SDimitry Andric       break;
10180b57cec5SDimitry Andric     }
10190b57cec5SDimitry Andric     // Flag that this unit is active
10200b57cec5SDimitry Andric     if (KMP_COMPARE_AND_STORE_ACQ32(&(my_unit->active), 0, 1)) {
10210b57cec5SDimitry Andric       // Do not setup parent pointer for top level unit since it has no parent
10220b57cec5SDimitry Andric       if (i < n - 1) {
10230b57cec5SDimitry Andric         // Setup middle layer pointers to parents
10240b57cec5SDimitry Andric         my_unit->get_my_pr()->hier_id =
10250b57cec5SDimitry Andric             index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i),
10260b57cec5SDimitry Andric                                                  hier->get_type(i + 1));
10270b57cec5SDimitry Andric         int parent_index = __kmp_dispatch_get_index(tid, hier->get_type(i + 1));
10280b57cec5SDimitry Andric         my_unit->hier_parent = hier->get_unit(i + 1, parent_index);
10290b57cec5SDimitry Andric       } else {
10300b57cec5SDimitry Andric         // Setup top layer information (no parent pointers are set)
10310b57cec5SDimitry Andric         my_unit->get_my_pr()->hier_id =
10320b57cec5SDimitry Andric             index % __kmp_dispatch_get_t1_per_t2(hier->get_type(i),
10330b57cec5SDimitry Andric                                                  kmp_hier_layer_e::LAYER_LOOP);
10340b57cec5SDimitry Andric         KMP_TEST_THEN_INC32(&(hier->top_level_nproc));
10350b57cec5SDimitry Andric         my_unit->hier_parent = nullptr;
10360b57cec5SDimitry Andric       }
10370b57cec5SDimitry Andric       // Set trip count to 0 so that next() operation will initially climb up
10380b57cec5SDimitry Andric       // the hierarchy to get more iterations (early exit in next() for tc == 0)
10390b57cec5SDimitry Andric       my_unit->get_my_pr()->u.p.tc = 0;
10400b57cec5SDimitry Andric       // Increment this layer's number of active units
10410b57cec5SDimitry Andric       KMP_TEST_THEN_INC32(&(hier->info[i].num_active));
10420b57cec5SDimitry Andric       KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d my_unit (%p) "
10430b57cec5SDimitry Andric                     "incrementing num_active\n",
10440b57cec5SDimitry Andric                     gtid, my_unit));
10450b57cec5SDimitry Andric     } else {
10460b57cec5SDimitry Andric       KMP_TEST_THEN_INC32(&(my_unit->active));
10470b57cec5SDimitry Andric       break;
10480b57cec5SDimitry Andric     }
10490b57cec5SDimitry Andric   }
10500b57cec5SDimitry Andric   // Set this thread's id
10510b57cec5SDimitry Andric   num_threads_per_layer1 = __kmp_dispatch_get_t1_per_t2(
10520b57cec5SDimitry Andric       kmp_hier_layer_e::LAYER_THREAD, hier->get_type(0));
10530b57cec5SDimitry Andric   pr->hier_id = tid % num_threads_per_layer1;
10540b57cec5SDimitry Andric   // For oversubscribed threads, increment their index within the lowest unit
10550b57cec5SDimitry Andric   // This is done to prevent having two or more threads with id 0, id 1, etc.
10560b57cec5SDimitry Andric   if (tid >= num_hw_threads)
10570b57cec5SDimitry Andric     pr->hier_id += ((tid / num_hw_threads) * num_threads_per_layer1);
10580b57cec5SDimitry Andric   KD_TRACE(
10590b57cec5SDimitry Andric       10, ("__kmp_dispatch_init_hierarchy: T#%d setting lowest hier_id to %d\n",
10600b57cec5SDimitry Andric            gtid, pr->hier_id));
10610b57cec5SDimitry Andric 
10620b57cec5SDimitry Andric   pr->flags.contains_last = FALSE;
10630b57cec5SDimitry Andric   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
10640b57cec5SDimitry Andric 
10650b57cec5SDimitry Andric   // Now that the number of active threads at each level is determined,
10660b57cec5SDimitry Andric   // the barrier data for each unit can be initialized and the last layer's
10670b57cec5SDimitry Andric   // loop information can be initialized.
10680b57cec5SDimitry Andric   int prev_id = pr->get_hier_id();
10690b57cec5SDimitry Andric   for (int i = 0; i < n; ++i) {
10700b57cec5SDimitry Andric     if (prev_id != 0)
10710b57cec5SDimitry Andric       break;
10720b57cec5SDimitry Andric     int index = __kmp_dispatch_get_index(tid, hier->get_type(i));
10730b57cec5SDimitry Andric     kmp_hier_top_unit_t<T> *my_unit = hier->get_unit(i, index);
1074*fe6060f1SDimitry Andric     // Only primary threads of this unit within the hierarchy do initialization
10750b57cec5SDimitry Andric     KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d (%d) prev_id is 0\n",
10760b57cec5SDimitry Andric                   gtid, i));
10770b57cec5SDimitry Andric     my_unit->reset_shared_barrier();
10780b57cec5SDimitry Andric     my_unit->hier_pr.flags.contains_last = FALSE;
10790b57cec5SDimitry Andric     // Last layer, initialize the private buffers with entire loop information
1080480093f4SDimitry Andric     // Now the next next_algorithm() call will get the first chunk of
10810b57cec5SDimitry Andric     // iterations properly
10820b57cec5SDimitry Andric     if (i == n - 1) {
10830b57cec5SDimitry Andric       __kmp_dispatch_init_algorithm<T>(
10840b57cec5SDimitry Andric           loc, gtid, my_unit->get_my_pr(), hier->get_sched(i), lb, ub, st,
10850b57cec5SDimitry Andric #if USE_ITT_BUILD
10860b57cec5SDimitry Andric           NULL,
10870b57cec5SDimitry Andric #endif
10880b57cec5SDimitry Andric           hier->get_chunk(i), hier->get_num_active(i), my_unit->get_hier_id());
10890b57cec5SDimitry Andric     }
10900b57cec5SDimitry Andric     prev_id = my_unit->get_hier_id();
10910b57cec5SDimitry Andric   }
10920b57cec5SDimitry Andric   // Initialize each layer of the thread's private barrier data
10930b57cec5SDimitry Andric   kmp_hier_top_unit_t<T> *unit = pr->hier_parent;
10940b57cec5SDimitry Andric   for (int i = 0; i < n && unit; ++i, unit = unit->get_parent()) {
10950b57cec5SDimitry Andric     kmp_hier_private_bdata_t *tdata = &(th->th.th_hier_bar_data[i]);
10960b57cec5SDimitry Andric     unit->reset_private_barrier(tdata);
10970b57cec5SDimitry Andric   }
10980b57cec5SDimitry Andric   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
10990b57cec5SDimitry Andric 
11000b57cec5SDimitry Andric #ifdef KMP_DEBUG
11010b57cec5SDimitry Andric   if (__kmp_tid_from_gtid(gtid) == 0) {
11020b57cec5SDimitry Andric     for (int i = 0; i < n; ++i) {
11030b57cec5SDimitry Andric       KD_TRACE(10,
11040b57cec5SDimitry Andric                ("__kmp_dispatch_init_hierarchy: T#%d active count[%d] = %d\n",
11050b57cec5SDimitry Andric                 gtid, i, hier->get_num_active(i)));
11060b57cec5SDimitry Andric     }
11070b57cec5SDimitry Andric     hier->print();
11080b57cec5SDimitry Andric   }
11090b57cec5SDimitry Andric   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
11100b57cec5SDimitry Andric #endif // KMP_DEBUG
11110b57cec5SDimitry Andric }
11120b57cec5SDimitry Andric #endif
1113