1 /* 2 * kmp_barrier.h 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_BARRIER_H 14 #define KMP_BARRIER_H 15 16 #include "kmp.h" 17 #include "kmp_i18n.h" 18 19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC 20 #include <xmmintrin.h> 21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment) 22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr) 23 #elif KMP_HAVE_ALIGNED_ALLOC 24 #define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size) 25 #define KMP_ALIGNED_FREE(ptr) free(ptr) 26 #elif KMP_HAVE_POSIX_MEMALIGN 27 static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) { 28 void *ptr; 29 int n = posix_memalign(&ptr, alignment, size); 30 if (n != 0) { 31 if (ptr) 32 free(ptr); 33 return nullptr; 34 } 35 return ptr; 36 } 37 #define KMP_ALIGNED_FREE(ptr) free(ptr) 38 #elif KMP_HAVE__ALIGNED_MALLOC 39 #include <malloc.h> 40 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment) 41 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr) 42 #else 43 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size) 44 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr) 45 #endif 46 47 // Use four cache lines: MLC tends to prefetch the next or previous cache line 48 // creating a possible fake conflict between cores, so this is the only way to 49 // guarantee that no such prefetch can happen. 50 #ifndef KMP_FOURLINE_ALIGN_CACHE 51 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE) 52 #endif 53 54 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0 55 56 class distributedBarrier { 57 struct flags_s { 58 kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed; 59 }; 60 61 struct go_s { 62 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go; 63 }; 64 65 struct iter_s { 66 kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter; 67 }; 68 69 struct sleep_s { 70 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep; 71 }; 72 73 void init(size_t nthr); 74 void resize(size_t nthr); 75 void computeGo(size_t n); 76 void computeVarsForN(size_t n); 77 78 public: 79 enum { 80 MAX_ITERS = 3, 81 MAX_GOS = 8, 82 IDEAL_GOS = 4, 83 IDEAL_CONTENTION = 16, 84 }; 85 86 flags_s *flags[MAX_ITERS]; 87 go_s *go; 88 iter_s *iter; 89 sleep_s *sleep; 90 91 size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier 92 size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure 93 // number of go signals each requiring one write per iteration 94 size_t KMP_ALIGN_CACHE num_gos; 95 // number of groups of gos 96 size_t KMP_ALIGN_CACHE num_groups; 97 // threads per go signal 98 size_t KMP_ALIGN_CACHE threads_per_go; 99 bool KMP_ALIGN_CACHE fix_threads_per_go; 100 // threads per group 101 size_t KMP_ALIGN_CACHE threads_per_group; 102 // number of go signals in a group 103 size_t KMP_ALIGN_CACHE gos_per_group; 104 void *team_icvs; 105 106 distributedBarrier() = delete; 107 ~distributedBarrier() = delete; 108 109 // Used instead of constructor to create aligned data 110 static distributedBarrier *allocate(int nThreads) { 111 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE( 112 sizeof(distributedBarrier), 4 * CACHE_LINE); 113 if (!d) { 114 KMP_FATAL(MemoryAllocFailed); 115 } 116 d->num_threads = 0; 117 d->max_threads = 0; 118 for (int i = 0; i < MAX_ITERS; ++i) 119 d->flags[i] = NULL; 120 d->go = NULL; 121 d->iter = NULL; 122 d->sleep = NULL; 123 d->team_icvs = NULL; 124 d->fix_threads_per_go = false; 125 // calculate gos and groups ONCE on base size 126 d->computeGo(nThreads); 127 d->init(nThreads); 128 return d; 129 } 130 131 static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); } 132 133 void update_num_threads(size_t nthr) { init(nthr); } 134 135 bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); } 136 size_t get_num_threads() { return num_threads; } 137 kmp_uint64 go_release(); 138 void go_reset(); 139 }; 140 141 #endif // KMP_BARRIER_H 142