/* * kmp_collapse.h -- header for loop collapse feature */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef KMP_COLLAPSE_H #define KMP_COLLAPSE_H #include // Type of the index into the loop nest structures // (with values from 0 to less than n from collapse(n)) typedef kmp_int32 kmp_index_t; // Type for combined loop nest space IV: typedef kmp_uint64 kmp_loop_nest_iv_t; // Loop has <, <=, etc. as a comparison: enum comparison_t : kmp_int32 { comp_less_or_eq = 0, comp_greater_or_eq = 1, comp_not_eq = 2, comp_less = 3, comp_greater = 4 }; // Type of loop IV. // Type of bounds and step, after usual promotions // are a subset of these types (32 & 64 only): enum loop_type_t : kmp_int32 { loop_type_uint8 = 0, loop_type_int8 = 1, loop_type_uint16 = 2, loop_type_int16 = 3, loop_type_uint32 = 4, loop_type_int32 = 5, loop_type_uint64 = 6, loop_type_int64 = 7 }; /*! @ingroup WORK_SHARING * Describes the structure for rectangular nested loops. */ template struct bounds_infoXX_template { // typedef typename traits_t::unsigned_t UT; typedef typename traits_t::signed_t ST; loop_type_t loop_type; // The differentiator loop_type_t loop_iv_type; comparison_t comparison; // outer_iv should be 0 (or any other less then number of dimentions) // if loop doesn't depend on it (lb1 and ub1 will be 0). // This way we can do multiplication without a check. kmp_index_t outer_iv; // unions to keep the size constant: union { T lb0; kmp_uint64 lb0_u64; // real type can be signed }; union { T lb1; kmp_uint64 lb1_u64; // real type can be signed }; union { T ub0; kmp_uint64 ub0_u64; // real type can be signed }; union { T ub1; kmp_uint64 ub1_u64; // real type can be signed }; union { ST step; // signed even if bounds type is unsigned kmp_int64 step_64; // signed }; kmp_loop_nest_iv_t trip_count; }; /*! @ingroup WORK_SHARING * Interface struct for rectangular nested loops. * Same size as bounds_infoXX_template. */ struct bounds_info_t { loop_type_t loop_type; // The differentiator loop_type_t loop_iv_type; comparison_t comparison; // outer_iv should be 0 (or any other less then number of dimentions) // if loop doesn't depend on it (lb1 and ub1 will be 0). // This way we can do multiplication without a check. kmp_index_t outer_iv; kmp_uint64 lb0_u64; // real type can be signed kmp_uint64 lb1_u64; // real type can be signed kmp_uint64 ub0_u64; // real type can be signed kmp_uint64 ub1_u64; // real type can be signed kmp_int64 step_64; // signed // This is internal, but it's the only internal thing we need // in rectangular case, so let's expose it here: kmp_loop_nest_iv_t trip_count; }; //------------------------------------------------------------------------- // Additional types for internal representation: // Array for a point in the loop space, in the original space. // It's represented in kmp_uint64, but each dimention is calculated in // that loop IV type. Also dimentions have to be converted to those types // when used in generated code. typedef kmp_uint64* kmp_point_t; // Array: Number of loop iterations on each nesting level to achieve some point, // in expanded space or in original space. // OMPTODO: move from using iterations to using offsets (iterations multiplied // by steps). For those we need to be careful with the types, as step can be // negative, but it'll remove multiplications and divisions in several places. typedef kmp_loop_nest_iv_t* kmp_iterations_t; // Internal struct with additional info: template struct bounds_info_internalXX_template { // OMPTODO: should span have type T or should it better be // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to // use big_span_t, if it can be resolved at compile time. typedef typename std::conditional::value, kmp_int64, kmp_uint64> big_span_t; // typedef typename big_span_t span_t; typedef T span_t; bounds_infoXX_template b; // possibly adjusted bounds // Leaving this as a union in case we'll switch to span_t with different sizes // (depending on T) union { // Smallest possible value of iv (may be smaller than actually possible) span_t span_smallest; kmp_uint64 span_smallest_u64; }; // Leaving this as a union in case we'll switch to span_t with different sizes // (depending on T) union { // Biggest possible value of iv (may be bigger than actually possible) span_t span_biggest; kmp_uint64 span_biggest_u64; }; // Did we adjust loop bounds (not counting canonicalization)? bool loop_bounds_adjusted; }; // Internal struct with additional info: struct bounds_info_internal_t { bounds_info_t b; // possibly adjusted bounds // Smallest possible value of iv (may be smaller than actually possible) kmp_uint64 span_smallest_u64; // Biggest possible value of iv (may be bigger than actually possible) kmp_uint64 span_biggest_u64; // Did we adjust loop bounds (not counting canonicalization)? bool loop_bounds_adjusted; }; //----------APIs for rectangular loop nests-------------------------------- // Canonicalize loop nest and calculate overall trip count. // "bounds_nest" has to be allocated per thread. // API will modify original bounds_nest array to bring it to a canonical form // (only <= and >=, no !=, <, >). If the original loop nest was already in a // canonical form there will be no changes to bounds in bounds_nest array // (only trip counts will be calculated). // Returns trip count of overall space. extern "C" kmp_loop_nest_iv_t __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, /*in/out*/ bounds_info_t *original_bounds_nest, kmp_index_t n); // Calculate old induction variables corresponding to overall new_iv. // Note: original IV will be returned as if it had kmp_uint64 type, // will have to be converted to original type in user code. // Note: trip counts should be already calculated by // __kmpc_process_loop_nest_rectang. // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline // that into user code. extern "C" void __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, const bounds_info_t *original_bounds_nest, /*out*/ kmp_uint64 *original_ivs, kmp_index_t n); //----------Init API for non-rectangular loops-------------------------------- // Init API for collapsed loops (static, no chunks defined). // "bounds_nest" has to be allocated per thread. // API will modify original bounds_nest array to bring it to a canonical form // (only <= and >=, no !=, <, >). If the original loop nest was already in a // canonical form there will be no changes to bounds in bounds_nest array // (only trip counts will be calculated). Internally API will expand the space // to parallelogram/parallelepiped, calculate total, calculate bounds for the // chunks in terms of the new IV, re-calc them in terms of old IVs (especially // important on the left side, to hit the lower bounds and not step over), and // pick the correct chunk for this thread (so it will calculate chunks up to the // needed one). It could be optimized to calculate just this chunk, potentially // a bit less well distributed among threads. It is designed to make sure that // threads will receive predictable chunks, deterministically (so that next nest // of loops with similar characteristics will get exactly same chunks on same // threads). // Current contract: chunk_bounds_nest has only lb0 and ub0, // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). extern "C" kmp_int32 __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, /*in/out*/ bounds_info_t *original_bounds_nest, /*out*/ bounds_info_t *chunk_bounds_nest, kmp_index_t n, /*out*/ kmp_int32 *plastiter); #endif // KMP_COLLAPSE_H