1 /* 2 * kmp_barrier.h 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_BARRIER_H 14 #define KMP_BARRIER_H 15 16 #include "kmp.h" 17 #include "kmp_i18n.h" 18 19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC 20 #include <xmmintrin.h> 21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment) 22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr) 23 #elif KMP_HAVE_ALIGNED_ALLOC 24 #define KMP_ALGIN_UP(val, alignment) \ 25 (((val) + (alignment)-1) / (alignment) * (alignment)) 26 #define KMP_ALIGNED_ALLOCATE(size, alignment) \ 27 aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment)) 28 #define KMP_ALIGNED_FREE(ptr) free(ptr) 29 #elif KMP_HAVE_POSIX_MEMALIGN 30 static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) { 31 void *ptr; 32 int n = posix_memalign(&ptr, alignment, size); 33 if (n != 0) { 34 if (ptr) 35 free(ptr); 36 return nullptr; 37 } 38 return ptr; 39 } 40 #define KMP_ALIGNED_FREE(ptr) free(ptr) 41 #elif KMP_HAVE__ALIGNED_MALLOC 42 #include <malloc.h> 43 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment) 44 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr) 45 #else 46 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size) 47 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr) 48 #endif 49 50 // Use four cache lines: MLC tends to prefetch the next or previous cache line 51 // creating a possible fake conflict between cores, so this is the only way to 52 // guarantee that no such prefetch can happen. 53 #ifndef KMP_FOURLINE_ALIGN_CACHE 54 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE) 55 #endif 56 57 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0 58 59 class distributedBarrier { 60 struct flags_s { 61 kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed; 62 }; 63 64 struct go_s { 65 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go; 66 }; 67 68 struct iter_s { 69 kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter; 70 }; 71 72 struct sleep_s { 73 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep; 74 }; 75 76 void init(size_t nthr); 77 void resize(size_t nthr); 78 void computeGo(size_t n); 79 void computeVarsForN(size_t n); 80 81 public: 82 enum { 83 MAX_ITERS = 3, 84 MAX_GOS = 8, 85 IDEAL_GOS = 4, 86 IDEAL_CONTENTION = 16, 87 }; 88 89 flags_s *flags[MAX_ITERS]; 90 go_s *go; 91 iter_s *iter; 92 sleep_s *sleep; 93 94 size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier 95 size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure 96 // number of go signals each requiring one write per iteration 97 size_t KMP_ALIGN_CACHE num_gos; 98 // number of groups of gos 99 size_t KMP_ALIGN_CACHE num_groups; 100 // threads per go signal 101 size_t KMP_ALIGN_CACHE threads_per_go; 102 bool KMP_ALIGN_CACHE fix_threads_per_go; 103 // threads per group 104 size_t KMP_ALIGN_CACHE threads_per_group; 105 // number of go signals in a group 106 size_t KMP_ALIGN_CACHE gos_per_group; 107 void *team_icvs; 108 109 distributedBarrier() = delete; 110 ~distributedBarrier() = delete; 111 112 // Used instead of constructor to create aligned data 113 static distributedBarrier *allocate(int nThreads) { 114 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE( 115 sizeof(distributedBarrier), 4 * CACHE_LINE); 116 if (!d) { 117 KMP_FATAL(MemoryAllocFailed); 118 } 119 d->num_threads = 0; 120 d->max_threads = 0; 121 for (int i = 0; i < MAX_ITERS; ++i) 122 d->flags[i] = NULL; 123 d->go = NULL; 124 d->iter = NULL; 125 d->sleep = NULL; 126 d->team_icvs = NULL; 127 d->fix_threads_per_go = false; 128 // calculate gos and groups ONCE on base size 129 d->computeGo(nThreads); 130 d->init(nThreads); 131 return d; 132 } 133 134 static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); } 135 136 void update_num_threads(size_t nthr) { init(nthr); } 137 138 bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); } 139 size_t get_num_threads() { return num_threads; } 140 kmp_uint64 go_release(); 141 void go_reset(); 142 }; 143 144 #endif // KMP_BARRIER_H 145