1 /* 2 * kmp_collapse.h -- header for loop collapse feature 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_COLLAPSE_H 14 #define KMP_COLLAPSE_H 15 16 #include <type_traits> 17 18 // Type of the index into the loop nest structures 19 // (with values from 0 to less than n from collapse(n)) 20 typedef kmp_int32 kmp_index_t; 21 22 // Type for combined loop nest space IV: 23 typedef kmp_uint64 kmp_loop_nest_iv_t; 24 25 // Loop has <, <=, etc. as a comparison: 26 enum comparison_t : kmp_int32 { 27 comp_less_or_eq = 0, 28 comp_greater_or_eq = 1, 29 comp_not_eq = 2, 30 comp_less = 3, 31 comp_greater = 4 32 }; 33 34 // Type of loop IV. 35 // Type of bounds and step, after usual promotions 36 // are a subset of these types (32 & 64 only): 37 enum loop_type_t : kmp_int32 { 38 loop_type_uint8 = 0, 39 loop_type_int8 = 1, 40 loop_type_uint16 = 2, 41 loop_type_int16 = 3, 42 loop_type_uint32 = 4, 43 loop_type_int32 = 5, 44 loop_type_uint64 = 6, 45 loop_type_int64 = 7 46 }; 47 48 /*! 49 @ingroup WORK_SHARING 50 * Describes the structure for rectangular nested loops. 51 */ 52 template <typename T> struct bounds_infoXX_template { 53 54 // typedef typename traits_t<T>::unsigned_t UT; 55 typedef typename traits_t<T>::signed_t ST; 56 57 loop_type_t loop_type; // The differentiator 58 loop_type_t loop_iv_type; 59 comparison_t comparison; 60 // outer_iv should be 0 (or any other less then number of dimentions) 61 // if loop doesn't depend on it (lb1 and ub1 will be 0). 62 // This way we can do multiplication without a check. 63 kmp_index_t outer_iv; 64 65 // unions to keep the size constant: 66 union { 67 T lb0; 68 kmp_uint64 lb0_u64; // real type can be signed 69 }; 70 71 union { 72 T lb1; 73 kmp_uint64 lb1_u64; // real type can be signed 74 }; 75 76 union { 77 T ub0; 78 kmp_uint64 ub0_u64; // real type can be signed 79 }; 80 81 union { 82 T ub1; 83 kmp_uint64 ub1_u64; // real type can be signed 84 }; 85 86 union { 87 ST step; // signed even if bounds type is unsigned 88 kmp_int64 step_64; // signed 89 }; 90 91 kmp_loop_nest_iv_t trip_count; 92 }; 93 94 /*! 95 @ingroup WORK_SHARING 96 * Interface struct for rectangular nested loops. 97 * Same size as bounds_infoXX_template. 98 */ 99 struct bounds_info_t { 100 101 loop_type_t loop_type; // The differentiator 102 loop_type_t loop_iv_type; 103 comparison_t comparison; 104 // outer_iv should be 0 (or any other less then number of dimentions) 105 // if loop doesn't depend on it (lb1 and ub1 will be 0). 106 // This way we can do multiplication without a check. 107 kmp_index_t outer_iv; 108 109 kmp_uint64 lb0_u64; // real type can be signed 110 kmp_uint64 lb1_u64; // real type can be signed 111 kmp_uint64 ub0_u64; // real type can be signed 112 kmp_uint64 ub1_u64; // real type can be signed 113 kmp_int64 step_64; // signed 114 115 // This is internal, but it's the only internal thing we need 116 // in rectangular case, so let's expose it here: 117 kmp_loop_nest_iv_t trip_count; 118 }; 119 120 //------------------------------------------------------------------------- 121 // Additional types for internal representation: 122 123 // Array for a point in the loop space, in the original space. 124 // It's represented in kmp_uint64, but each dimention is calculated in 125 // that loop IV type. Also dimentions have to be converted to those types 126 // when used in generated code. 127 typedef kmp_uint64* kmp_point_t; 128 129 // Array: Number of loop iterations on each nesting level to achieve some point, 130 // in expanded space or in original space. 131 // OMPTODO: move from using iterations to using offsets (iterations multiplied 132 // by steps). For those we need to be careful with the types, as step can be 133 // negative, but it'll remove multiplications and divisions in several places. 134 typedef kmp_loop_nest_iv_t* kmp_iterations_t; 135 136 // Internal struct with additional info: 137 template <typename T> struct bounds_info_internalXX_template { 138 139 // OMPTODO: should span have type T or should it better be 140 // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than 141 // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to 142 // use big_span_t, if it can be resolved at compile time. 143 typedef 144 typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> 145 big_span_t; 146 147 // typedef typename big_span_t span_t; 148 typedef T span_t; 149 150 bounds_infoXX_template<T> b; // possibly adjusted bounds 151 152 // Leaving this as a union in case we'll switch to span_t with different sizes 153 // (depending on T) 154 union { 155 // Smallest possible value of iv (may be smaller than actually possible) 156 span_t span_smallest; 157 kmp_uint64 span_smallest_u64; 158 }; 159 160 // Leaving this as a union in case we'll switch to span_t with different sizes 161 // (depending on T) 162 union { 163 // Biggest possible value of iv (may be bigger than actually possible) 164 span_t span_biggest; 165 kmp_uint64 span_biggest_u64; 166 }; 167 168 // Did we adjust loop bounds (not counting canonicalization)? 169 bool loop_bounds_adjusted; 170 }; 171 172 // Internal struct with additional info: 173 struct bounds_info_internal_t { 174 175 bounds_info_t b; // possibly adjusted bounds 176 177 // Smallest possible value of iv (may be smaller than actually possible) 178 kmp_uint64 span_smallest_u64; 179 180 // Biggest possible value of iv (may be bigger than actually possible) 181 kmp_uint64 span_biggest_u64; 182 183 // Did we adjust loop bounds (not counting canonicalization)? 184 bool loop_bounds_adjusted; 185 }; 186 187 //----------APIs for rectangular loop nests-------------------------------- 188 189 // Canonicalize loop nest and calculate overall trip count. 190 // "bounds_nest" has to be allocated per thread. 191 // API will modify original bounds_nest array to bring it to a canonical form 192 // (only <= and >=, no !=, <, >). If the original loop nest was already in a 193 // canonical form there will be no changes to bounds in bounds_nest array 194 // (only trip counts will be calculated). 195 // Returns trip count of overall space. 196 extern "C" kmp_loop_nest_iv_t 197 __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, 198 /*in/out*/ bounds_info_t *original_bounds_nest, 199 kmp_index_t n); 200 201 // Calculate old induction variables corresponding to overall new_iv. 202 // Note: original IV will be returned as if it had kmp_uint64 type, 203 // will have to be converted to original type in user code. 204 // Note: trip counts should be already calculated by 205 // __kmpc_process_loop_nest_rectang. 206 // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline 207 // that into user code. 208 extern "C" void 209 __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, 210 const bounds_info_t *original_bounds_nest, 211 /*out*/ kmp_uint64 *original_ivs, 212 kmp_index_t n); 213 214 //----------Init API for non-rectangular loops-------------------------------- 215 216 // Init API for collapsed loops (static, no chunks defined). 217 // "bounds_nest" has to be allocated per thread. 218 // API will modify original bounds_nest array to bring it to a canonical form 219 // (only <= and >=, no !=, <, >). If the original loop nest was already in a 220 // canonical form there will be no changes to bounds in bounds_nest array 221 // (only trip counts will be calculated). Internally API will expand the space 222 // to parallelogram/parallelepiped, calculate total, calculate bounds for the 223 // chunks in terms of the new IV, re-calc them in terms of old IVs (especially 224 // important on the left side, to hit the lower bounds and not step over), and 225 // pick the correct chunk for this thread (so it will calculate chunks up to the 226 // needed one). It could be optimized to calculate just this chunk, potentially 227 // a bit less well distributed among threads. It is designed to make sure that 228 // threads will receive predictable chunks, deterministically (so that next nest 229 // of loops with similar characteristics will get exactly same chunks on same 230 // threads). 231 // Current contract: chunk_bounds_nest has only lb0 and ub0, 232 // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). 233 extern "C" kmp_int32 234 __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, 235 /*in/out*/ bounds_info_t *original_bounds_nest, 236 /*out*/ bounds_info_t *chunk_bounds_nest, 237 kmp_index_t n, 238 /*out*/ kmp_int32 *plastiter); 239 240 #endif // KMP_COLLAPSE_H 241