1 /* 2 * kmp_collapse.h -- header for loop collapse feature 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef KMP_COLLAPSE_H 14 #define KMP_COLLAPSE_H 15 16 #include <type_traits> 17 18 // Type of the index into the loop nest structures 19 // (with values from 0 to less than n from collapse(n)) 20 typedef kmp_int32 kmp_index_t; 21 22 // Type for combined loop nest space IV: 23 typedef kmp_uint64 kmp_loop_nest_iv_t; 24 25 // Loop has <, <=, etc. as a comparison: 26 enum comparison_t : kmp_int32 { 27 comp_less_or_eq = 0, 28 comp_greater_or_eq = 1, 29 comp_not_eq = 2, 30 comp_less = 3, 31 comp_greater = 4 32 }; 33 34 // Type of loop IV. 35 // Type of bounds and step, after usual promotions 36 // are a subset of these types (32 & 64 only): 37 enum loop_type_t : kmp_int32 { 38 loop_type_uint8 = 0, 39 loop_type_int8 = 1, 40 loop_type_uint16 = 2, 41 loop_type_int16 = 3, 42 loop_type_uint32 = 4, 43 loop_type_int32 = 5, 44 loop_type_uint64 = 6, 45 loop_type_int64 = 7 46 }; 47 48 // Defining loop types to handle special cases 49 enum nested_loop_type_t : kmp_int32 { 50 nested_loop_type_unkown = 0, 51 nested_loop_type_lower_triangular_matrix = 1, 52 nested_loop_type_upper_triangular_matrix = 2 53 }; 54 55 /*! 56 @ingroup WORK_SHARING 57 * Describes the structure for rectangular nested loops. 58 */ 59 template <typename T> struct bounds_infoXX_template { 60 61 // typedef typename traits_t<T>::unsigned_t UT; 62 typedef typename traits_t<T>::signed_t ST; 63 64 loop_type_t loop_type; // The differentiator 65 loop_type_t loop_iv_type; 66 comparison_t comparison; 67 // outer_iv should be 0 (or any other less then number of dimentions) 68 // if loop doesn't depend on it (lb1 and ub1 will be 0). 69 // This way we can do multiplication without a check. 70 kmp_index_t outer_iv; 71 72 // unions to keep the size constant: 73 union { 74 T lb0; 75 kmp_uint64 lb0_u64; // real type can be signed 76 }; 77 78 union { 79 T lb1; 80 kmp_uint64 lb1_u64; // real type can be signed 81 }; 82 83 union { 84 T ub0; 85 kmp_uint64 ub0_u64; // real type can be signed 86 }; 87 88 union { 89 T ub1; 90 kmp_uint64 ub1_u64; // real type can be signed 91 }; 92 93 union { 94 ST step; // signed even if bounds type is unsigned 95 kmp_int64 step_64; // signed 96 }; 97 98 kmp_loop_nest_iv_t trip_count; 99 }; 100 101 /*! 102 @ingroup WORK_SHARING 103 * Interface struct for rectangular nested loops. 104 * Same size as bounds_infoXX_template. 105 */ 106 struct bounds_info_t { 107 108 loop_type_t loop_type; // The differentiator 109 loop_type_t loop_iv_type; 110 comparison_t comparison; 111 // outer_iv should be 0 (or any other less then number of dimentions) 112 // if loop doesn't depend on it (lb1 and ub1 will be 0). 113 // This way we can do multiplication without a check. 114 kmp_index_t outer_iv; 115 116 kmp_uint64 lb0_u64; // real type can be signed 117 kmp_uint64 lb1_u64; // real type can be signed 118 kmp_uint64 ub0_u64; // real type can be signed 119 kmp_uint64 ub1_u64; // real type can be signed 120 kmp_int64 step_64; // signed 121 122 // This is internal, but it's the only internal thing we need 123 // in rectangular case, so let's expose it here: 124 kmp_loop_nest_iv_t trip_count; 125 }; 126 127 //------------------------------------------------------------------------- 128 // Additional types for internal representation: 129 130 // Array for a point in the loop space, in the original space. 131 // It's represented in kmp_uint64, but each dimention is calculated in 132 // that loop IV type. Also dimentions have to be converted to those types 133 // when used in generated code. 134 typedef kmp_uint64 *kmp_point_t; 135 136 // Array: Number of loop iterations on each nesting level to achieve some point, 137 // in expanded space or in original space. 138 // OMPTODO: move from using iterations to using offsets (iterations multiplied 139 // by steps). For those we need to be careful with the types, as step can be 140 // negative, but it'll remove multiplications and divisions in several places. 141 typedef kmp_loop_nest_iv_t *kmp_iterations_t; 142 143 // Internal struct with additional info: 144 template <typename T> struct bounds_info_internalXX_template { 145 146 // OMPTODO: should span have type T or should it better be 147 // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than 148 // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to 149 // use big_span_t, if it can be resolved at compile time. 150 typedef 151 typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> 152 big_span_t; 153 154 // typedef typename big_span_t span_t; 155 typedef T span_t; 156 157 bounds_infoXX_template<T> b; // possibly adjusted bounds 158 159 // Leaving this as a union in case we'll switch to span_t with different sizes 160 // (depending on T) 161 union { 162 // Smallest possible value of iv (may be smaller than actually possible) 163 span_t span_smallest; 164 kmp_uint64 span_smallest_u64; 165 }; 166 167 // Leaving this as a union in case we'll switch to span_t with different sizes 168 // (depending on T) 169 union { 170 // Biggest possible value of iv (may be bigger than actually possible) 171 span_t span_biggest; 172 kmp_uint64 span_biggest_u64; 173 }; 174 175 // Did we adjust loop bounds (not counting canonicalization)? 176 bool loop_bounds_adjusted; 177 }; 178 179 // Internal struct with additional info: 180 struct bounds_info_internal_t { 181 182 bounds_info_t b; // possibly adjusted bounds 183 184 // Smallest possible value of iv (may be smaller than actually possible) 185 kmp_uint64 span_smallest_u64; 186 187 // Biggest possible value of iv (may be bigger than actually possible) 188 kmp_uint64 span_biggest_u64; 189 190 // Did we adjust loop bounds (not counting canonicalization)? 191 bool loop_bounds_adjusted; 192 }; 193 194 //----------APIs for rectangular loop nests-------------------------------- 195 196 // Canonicalize loop nest and calculate overall trip count. 197 // "bounds_nest" has to be allocated per thread. 198 // API will modify original bounds_nest array to bring it to a canonical form 199 // (only <= and >=, no !=, <, >). If the original loop nest was already in a 200 // canonical form there will be no changes to bounds in bounds_nest array 201 // (only trip counts will be calculated). 202 // Returns trip count of overall space. 203 extern "C" kmp_loop_nest_iv_t 204 __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, 205 /*in/out*/ bounds_info_t *original_bounds_nest, 206 kmp_index_t n); 207 208 // Calculate old induction variables corresponding to overall new_iv. 209 // Note: original IV will be returned as if it had kmp_uint64 type, 210 // will have to be converted to original type in user code. 211 // Note: trip counts should be already calculated by 212 // __kmpc_process_loop_nest_rectang. 213 // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline 214 // that into user code. 215 extern "C" void 216 __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, 217 const bounds_info_t *original_bounds_nest, 218 /*out*/ kmp_uint64 *original_ivs, 219 kmp_index_t n); 220 221 //----------Init API for non-rectangular loops-------------------------------- 222 223 // Init API for collapsed loops (static, no chunks defined). 224 // "bounds_nest" has to be allocated per thread. 225 // API will modify original bounds_nest array to bring it to a canonical form 226 // (only <= and >=, no !=, <, >). If the original loop nest was already in a 227 // canonical form there will be no changes to bounds in bounds_nest array 228 // (only trip counts will be calculated). Internally API will expand the space 229 // to parallelogram/parallelepiped, calculate total, calculate bounds for the 230 // chunks in terms of the new IV, re-calc them in terms of old IVs (especially 231 // important on the left side, to hit the lower bounds and not step over), and 232 // pick the correct chunk for this thread (so it will calculate chunks up to the 233 // needed one). It could be optimized to calculate just this chunk, potentially 234 // a bit less well distributed among threads. It is designed to make sure that 235 // threads will receive predictable chunks, deterministically (so that next nest 236 // of loops with similar characteristics will get exactly same chunks on same 237 // threads). 238 // Current contract: chunk_bounds_nest has only lb0 and ub0, 239 // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). 240 extern "C" kmp_int32 241 __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, 242 /*in/out*/ bounds_info_t *original_bounds_nest, 243 /*out*/ bounds_info_t *chunk_bounds_nest, 244 kmp_index_t n, 245 /*out*/ kmp_int32 *plastiter); 246 247 #endif // KMP_COLLAPSE_H 248