1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 65 static inline void check_loc(ident_t *&loc) { 66 if (loc == NULL) 67 loc = &loc_stub; // may need to report location info to ittnotify 68 } 69 70 template <typename T> 71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 72 kmp_int32 schedtype, kmp_int32 *plastiter, 73 T *plower, T *pupper, 74 typename traits_t<T>::signed_t *pstride, 75 typename traits_t<T>::signed_t incr, 76 typename traits_t<T>::signed_t chunk 77 #if OMPT_SUPPORT && OMPT_OPTIONAL 78 , 79 void *codeptr 80 #endif 81 ) { 82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 85 86 typedef typename traits_t<T>::unsigned_t UT; 87 typedef typename traits_t<T>::signed_t ST; 88 /* this all has to be changed back to TID and such.. */ 89 kmp_int32 gtid = global_tid; 90 kmp_uint32 tid; 91 kmp_uint32 nth; 92 UT trip_count; 93 kmp_team_t *team; 94 __kmp_assert_valid_gtid(gtid); 95 kmp_info_t *th = __kmp_threads[gtid]; 96 97 #if OMPT_SUPPORT && OMPT_OPTIONAL 98 ompt_team_info_t *team_info = NULL; 99 ompt_task_info_t *task_info = NULL; 100 ompt_work_t ompt_work_type = ompt_work_loop; 101 102 static kmp_int8 warn = 0; 103 104 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 105 // Only fully initialize variables needed by OMPT if OMPT is enabled. 106 team_info = __ompt_get_teaminfo(0, NULL); 107 task_info = __ompt_get_task_info_object(0); 108 // Determine workshare type 109 if (loc != NULL) { 110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 111 ompt_work_type = ompt_work_loop; 112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 113 ompt_work_type = ompt_work_sections; 114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 115 ompt_work_type = ompt_work_distribute; 116 } else { 117 kmp_int8 bool_res = 118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 119 if (bool_res) 120 KMP_WARNING(OmptOutdatedWorkshare); 121 } 122 KMP_DEBUG_ASSERT(ompt_work_type); 123 } 124 } 125 #endif 126 127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 129 #ifdef KMP_DEBUG 130 { 131 char *buff; 132 // create format specifiers before the debug output 133 buff = __kmp_str_format( 134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 139 *pstride, incr, chunk)); 140 __kmp_str_free(&buff); 141 } 142 #endif 143 144 if (__kmp_env_consistency_check) { 145 __kmp_push_workshare(global_tid, ct_pdo, loc); 146 if (incr == 0) { 147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 148 loc); 149 } 150 } 151 /* special handling for zero-trip loops */ 152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 153 if (plastiter != NULL) 154 *plastiter = FALSE; 155 /* leave pupper and plower set to entire iteration space */ 156 *pstride = incr; /* value should never be used */ 157 // *plower = *pupper - incr; 158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 161 #ifdef KMP_DEBUG 162 { 163 char *buff; 164 // create format specifiers before the debug output 165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 166 "lower=%%%s upper=%%%s stride = %%%s " 167 "signed?<%s>, loc = %%s\n", 168 traits_t<T>::spec, traits_t<T>::spec, 169 traits_t<ST>::spec, traits_t<T>::spec); 170 check_loc(loc); 171 KD_TRACE(100, 172 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 173 __kmp_str_free(&buff); 174 } 175 #endif 176 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 177 178 #if OMPT_SUPPORT && OMPT_OPTIONAL 179 if (ompt_enabled.ompt_callback_work) { 180 ompt_callbacks.ompt_callback(ompt_callback_work)( 181 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 182 &(task_info->task_data), 0, codeptr); 183 } 184 #endif 185 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 186 return; 187 } 188 189 // Although there are schedule enumerations above kmp_ord_upper which are not 190 // schedules for "distribute", the only ones which are useful are dynamic, so 191 // cannot be seen here, since this codepath is only executed for static 192 // schedules. 193 if (schedtype > kmp_ord_upper) { 194 // we are in DISTRIBUTE construct 195 schedtype += kmp_sch_static - 196 kmp_distribute_static; // AC: convert to usual schedule type 197 if (th->th.th_team->t.t_serialized > 1) { 198 tid = 0; 199 team = th->th.th_team; 200 } else { 201 tid = th->th.th_team->t.t_master_tid; 202 team = th->th.th_team->t.t_parent; 203 } 204 } else { 205 tid = __kmp_tid_from_gtid(global_tid); 206 team = th->th.th_team; 207 } 208 209 /* determine if "for" loop is an active worksharing construct */ 210 if (team->t.t_serialized) { 211 /* serialized parallel, each thread executes whole iteration space */ 212 if (plastiter != NULL) 213 *plastiter = TRUE; 214 /* leave pupper and plower set to entire iteration space */ 215 *pstride = 216 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 217 218 #ifdef KMP_DEBUG 219 { 220 char *buff; 221 // create format specifiers before the debug output 222 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 223 "lower=%%%s upper=%%%s stride = %%%s\n", 224 traits_t<T>::spec, traits_t<T>::spec, 225 traits_t<ST>::spec); 226 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 227 __kmp_str_free(&buff); 228 } 229 #endif 230 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 231 232 #if OMPT_SUPPORT && OMPT_OPTIONAL 233 if (ompt_enabled.ompt_callback_work) { 234 ompt_callbacks.ompt_callback(ompt_callback_work)( 235 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 236 &(task_info->task_data), *pstride, codeptr); 237 } 238 #endif 239 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 240 return; 241 } 242 nth = team->t.t_nproc; 243 if (nth == 1) { 244 if (plastiter != NULL) 245 *plastiter = TRUE; 246 *pstride = 247 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 248 #ifdef KMP_DEBUG 249 { 250 char *buff; 251 // create format specifiers before the debug output 252 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 253 "lower=%%%s upper=%%%s stride = %%%s\n", 254 traits_t<T>::spec, traits_t<T>::spec, 255 traits_t<ST>::spec); 256 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 257 __kmp_str_free(&buff); 258 } 259 #endif 260 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 261 262 #if OMPT_SUPPORT && OMPT_OPTIONAL 263 if (ompt_enabled.ompt_callback_work) { 264 ompt_callbacks.ompt_callback(ompt_callback_work)( 265 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 266 &(task_info->task_data), *pstride, codeptr); 267 } 268 #endif 269 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 270 return; 271 } 272 273 /* compute trip count */ 274 if (incr == 1) { 275 trip_count = *pupper - *plower + 1; 276 } else if (incr == -1) { 277 trip_count = *plower - *pupper + 1; 278 } else if (incr > 0) { 279 // upper-lower can exceed the limit of signed type 280 trip_count = (UT)(*pupper - *plower) / incr + 1; 281 } else { 282 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 283 } 284 285 #if KMP_STATS_ENABLED 286 if (KMP_MASTER_GTID(gtid)) { 287 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 288 } 289 #endif 290 291 if (__kmp_env_consistency_check) { 292 /* tripcount overflow? */ 293 if (trip_count == 0 && *pupper != *plower) { 294 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 295 loc); 296 } 297 } 298 299 /* compute remaining parameters */ 300 switch (schedtype) { 301 case kmp_sch_static: { 302 if (trip_count < nth) { 303 KMP_DEBUG_ASSERT( 304 __kmp_static == kmp_sch_static_greedy || 305 __kmp_static == 306 kmp_sch_static_balanced); // Unknown static scheduling type. 307 if (tid < trip_count) { 308 *pupper = *plower = *plower + tid * incr; 309 } else { 310 // set bounds so non-active threads execute no iterations 311 *plower = *pupper + (incr > 0 ? 1 : -1); 312 } 313 if (plastiter != NULL) 314 *plastiter = (tid == trip_count - 1); 315 } else { 316 if (__kmp_static == kmp_sch_static_balanced) { 317 UT small_chunk = trip_count / nth; 318 UT extras = trip_count % nth; 319 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 320 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 321 if (plastiter != NULL) 322 *plastiter = (tid == nth - 1); 323 } else { 324 T big_chunk_inc_count = 325 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 326 T old_upper = *pupper; 327 328 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 329 // Unknown static scheduling type. 330 331 *plower += tid * big_chunk_inc_count; 332 *pupper = *plower + big_chunk_inc_count - incr; 333 if (incr > 0) { 334 if (*pupper < *plower) 335 *pupper = traits_t<T>::max_value; 336 if (plastiter != NULL) 337 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 338 if (*pupper > old_upper) 339 *pupper = old_upper; // tracker C73258 340 } else { 341 if (*pupper > *plower) 342 *pupper = traits_t<T>::min_value; 343 if (plastiter != NULL) 344 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 345 if (*pupper < old_upper) 346 *pupper = old_upper; // tracker C73258 347 } 348 } 349 } 350 *pstride = trip_count; 351 break; 352 } 353 case kmp_sch_static_chunked: { 354 ST span; 355 UT nchunks; 356 if (chunk < 1) 357 chunk = 1; 358 else if ((UT)chunk > trip_count) 359 chunk = trip_count; 360 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 361 span = chunk * incr; 362 if (nchunks < nth) { 363 *pstride = span * nchunks; 364 if (tid < nchunks) { 365 *plower = *plower + (span * tid); 366 *pupper = *plower + span - incr; 367 } else { 368 *plower = *pupper + (incr > 0 ? 1 : -1); 369 } 370 } else { 371 *pstride = span * nth; 372 *plower = *plower + (span * tid); 373 *pupper = *plower + span - incr; 374 } 375 if (plastiter != NULL) 376 *plastiter = (tid == (nchunks - 1) % nth); 377 break; 378 } 379 case kmp_sch_static_balanced_chunked: { 380 T old_upper = *pupper; 381 // round up to make sure the chunk is enough to cover all iterations 382 UT span = (trip_count + nth - 1) / nth; 383 384 // perform chunk adjustment 385 chunk = (span + chunk - 1) & ~(chunk - 1); 386 387 span = chunk * incr; 388 *plower = *plower + (span * tid); 389 *pupper = *plower + span - incr; 390 if (incr > 0) { 391 if (*pupper > old_upper) 392 *pupper = old_upper; 393 } else if (*pupper < old_upper) 394 *pupper = old_upper; 395 396 if (plastiter != NULL) 397 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 398 break; 399 } 400 default: 401 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 402 break; 403 } 404 405 #if USE_ITT_BUILD 406 // Report loop metadata 407 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 408 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 409 team->t.t_active_level == 1) { 410 kmp_uint64 cur_chunk = chunk; 411 check_loc(loc); 412 // Calculate chunk in case it was not specified; it is specified for 413 // kmp_sch_static_chunked 414 if (schedtype == kmp_sch_static) { 415 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 416 } 417 // 0 - "static" schedule 418 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 419 } 420 #endif 421 #ifdef KMP_DEBUG 422 { 423 char *buff; 424 // create format specifiers before the debug output 425 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 426 "upper=%%%s stride = %%%s signed?<%s>\n", 427 traits_t<T>::spec, traits_t<T>::spec, 428 traits_t<ST>::spec, traits_t<T>::spec); 429 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 430 __kmp_str_free(&buff); 431 } 432 #endif 433 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 434 435 #if OMPT_SUPPORT && OMPT_OPTIONAL 436 if (ompt_enabled.ompt_callback_work) { 437 ompt_callbacks.ompt_callback(ompt_callback_work)( 438 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 439 &(task_info->task_data), trip_count, codeptr); 440 } 441 if (ompt_enabled.ompt_callback_dispatch) { 442 ompt_dispatch_t dispatch_type; 443 ompt_data_t instance = ompt_data_none; 444 ompt_dispatch_chunk_t dispatch_chunk; 445 if (ompt_work_type == ompt_work_sections) { 446 dispatch_type = ompt_dispatch_section; 447 instance.ptr = codeptr; 448 } else { 449 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr); 450 dispatch_type = (ompt_work_type == ompt_work_distribute) 451 ? ompt_dispatch_distribute_chunk 452 : ompt_dispatch_ws_loop_chunk; 453 instance.ptr = &dispatch_chunk; 454 } 455 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 456 &(team_info->parallel_data), &(task_info->task_data), dispatch_type, 457 instance); 458 } 459 #endif 460 461 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 462 return; 463 } 464 465 template <typename T> 466 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 467 kmp_int32 schedule, kmp_int32 *plastiter, 468 T *plower, T *pupper, T *pupperDist, 469 typename traits_t<T>::signed_t *pstride, 470 typename traits_t<T>::signed_t incr, 471 typename traits_t<T>::signed_t chunk 472 #if OMPT_SUPPORT && OMPT_OPTIONAL 473 , 474 void *codeptr 475 #endif 476 ) { 477 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 478 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 479 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 480 typedef typename traits_t<T>::unsigned_t UT; 481 typedef typename traits_t<T>::signed_t ST; 482 kmp_uint32 tid; 483 kmp_uint32 nth; 484 kmp_uint32 team_id; 485 kmp_uint32 nteams; 486 UT trip_count; 487 kmp_team_t *team; 488 kmp_info_t *th; 489 490 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 491 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 492 __kmp_assert_valid_gtid(gtid); 493 #ifdef KMP_DEBUG 494 { 495 char *buff; 496 // create format specifiers before the debug output 497 buff = __kmp_str_format( 498 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 499 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 500 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 501 traits_t<ST>::spec, traits_t<T>::spec); 502 KD_TRACE(100, 503 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 504 __kmp_str_free(&buff); 505 } 506 #endif 507 508 if (__kmp_env_consistency_check) { 509 __kmp_push_workshare(gtid, ct_pdo, loc); 510 if (incr == 0) { 511 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 512 loc); 513 } 514 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 515 // The loop is illegal. 516 // Some zero-trip loops maintained by compiler, e.g.: 517 // for(i=10;i<0;++i) // lower >= upper - run-time check 518 // for(i=0;i>10;--i) // lower <= upper - run-time check 519 // for(i=0;i>10;++i) // incr > 0 - compile-time check 520 // for(i=10;i<0;--i) // incr < 0 - compile-time check 521 // Compiler does not check the following illegal loops: 522 // for(i=0;i<10;i+=incr) // where incr<0 523 // for(i=10;i>0;i-=incr) // where incr<0 524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 525 } 526 } 527 tid = __kmp_tid_from_gtid(gtid); 528 th = __kmp_threads[gtid]; 529 nth = th->th.th_team_nproc; 530 team = th->th.th_team; 531 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 532 nteams = th->th.th_teams_size.nteams; 533 team_id = team->t.t_master_tid; 534 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 535 536 // compute global trip count 537 if (incr == 1) { 538 trip_count = *pupper - *plower + 1; 539 } else if (incr == -1) { 540 trip_count = *plower - *pupper + 1; 541 } else if (incr > 0) { 542 // upper-lower can exceed the limit of signed type 543 trip_count = (UT)(*pupper - *plower) / incr + 1; 544 } else { 545 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 546 } 547 548 *pstride = *pupper - *plower; // just in case (can be unused) 549 if (trip_count <= nteams) { 550 KMP_DEBUG_ASSERT( 551 __kmp_static == kmp_sch_static_greedy || 552 __kmp_static == 553 kmp_sch_static_balanced); // Unknown static scheduling type. 554 // only primary threads of some teams get single iteration, other threads 555 // get nothing 556 if (team_id < trip_count && tid == 0) { 557 *pupper = *pupperDist = *plower = *plower + team_id * incr; 558 } else { 559 *pupperDist = *pupper; 560 *plower = *pupper + incr; // compiler should skip loop body 561 } 562 if (plastiter != NULL) 563 *plastiter = (tid == 0 && team_id == trip_count - 1); 564 } else { 565 // Get the team's chunk first (each team gets at most one chunk) 566 if (__kmp_static == kmp_sch_static_balanced) { 567 UT chunkD = trip_count / nteams; 568 UT extras = trip_count % nteams; 569 *plower += 570 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 571 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 572 if (plastiter != NULL) 573 *plastiter = (team_id == nteams - 1); 574 } else { 575 T chunk_inc_count = 576 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 577 T upper = *pupper; 578 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 579 // Unknown static scheduling type. 580 *plower += team_id * chunk_inc_count; 581 *pupperDist = *plower + chunk_inc_count - incr; 582 // Check/correct bounds if needed 583 if (incr > 0) { 584 if (*pupperDist < *plower) 585 *pupperDist = traits_t<T>::max_value; 586 if (plastiter != NULL) 587 *plastiter = *plower <= upper && *pupperDist > upper - incr; 588 if (*pupperDist > upper) 589 *pupperDist = upper; // tracker C73258 590 if (*plower > *pupperDist) { 591 *pupper = *pupperDist; // no iterations available for the team 592 goto end; 593 } 594 } else { 595 if (*pupperDist > *plower) 596 *pupperDist = traits_t<T>::min_value; 597 if (plastiter != NULL) 598 *plastiter = *plower >= upper && *pupperDist < upper - incr; 599 if (*pupperDist < upper) 600 *pupperDist = upper; // tracker C73258 601 if (*plower < *pupperDist) { 602 *pupper = *pupperDist; // no iterations available for the team 603 goto end; 604 } 605 } 606 } 607 // Get the parallel loop chunk now (for thread) 608 // compute trip count for team's chunk 609 if (incr == 1) { 610 trip_count = *pupperDist - *plower + 1; 611 } else if (incr == -1) { 612 trip_count = *plower - *pupperDist + 1; 613 } else if (incr > 1) { 614 // upper-lower can exceed the limit of signed type 615 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 616 } else { 617 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 618 } 619 KMP_DEBUG_ASSERT(trip_count); 620 switch (schedule) { 621 case kmp_sch_static: { 622 if (trip_count <= nth) { 623 KMP_DEBUG_ASSERT( 624 __kmp_static == kmp_sch_static_greedy || 625 __kmp_static == 626 kmp_sch_static_balanced); // Unknown static scheduling type. 627 if (tid < trip_count) 628 *pupper = *plower = *plower + tid * incr; 629 else 630 *plower = *pupper + incr; // no iterations available 631 if (plastiter != NULL) 632 if (*plastiter != 0 && !(tid == trip_count - 1)) 633 *plastiter = 0; 634 } else { 635 if (__kmp_static == kmp_sch_static_balanced) { 636 UT chunkL = trip_count / nth; 637 UT extras = trip_count % nth; 638 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 639 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 640 if (plastiter != NULL) 641 if (*plastiter != 0 && !(tid == nth - 1)) 642 *plastiter = 0; 643 } else { 644 T chunk_inc_count = 645 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 646 T upper = *pupperDist; 647 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 648 // Unknown static scheduling type. 649 *plower += tid * chunk_inc_count; 650 *pupper = *plower + chunk_inc_count - incr; 651 if (incr > 0) { 652 if (*pupper < *plower) 653 *pupper = traits_t<T>::max_value; 654 if (plastiter != NULL) 655 if (*plastiter != 0 && 656 !(*plower <= upper && *pupper > upper - incr)) 657 *plastiter = 0; 658 if (*pupper > upper) 659 *pupper = upper; // tracker C73258 660 } else { 661 if (*pupper > *plower) 662 *pupper = traits_t<T>::min_value; 663 if (plastiter != NULL) 664 if (*plastiter != 0 && 665 !(*plower >= upper && *pupper < upper - incr)) 666 *plastiter = 0; 667 if (*pupper < upper) 668 *pupper = upper; // tracker C73258 669 } 670 } 671 } 672 break; 673 } 674 case kmp_sch_static_chunked: { 675 ST span; 676 if (chunk < 1) 677 chunk = 1; 678 span = chunk * incr; 679 *pstride = span * nth; 680 *plower = *plower + (span * tid); 681 *pupper = *plower + span - incr; 682 if (plastiter != NULL) 683 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 684 *plastiter = 0; 685 break; 686 } 687 default: 688 KMP_ASSERT2(0, 689 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 690 break; 691 } 692 } 693 end:; 694 #ifdef KMP_DEBUG 695 { 696 char *buff; 697 // create format specifiers before the debug output 698 buff = __kmp_str_format( 699 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 700 "stride=%%%s signed?<%s>\n", 701 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 702 traits_t<ST>::spec, traits_t<T>::spec); 703 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 704 __kmp_str_free(&buff); 705 } 706 #endif 707 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 708 #if OMPT_SUPPORT && OMPT_OPTIONAL 709 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 710 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 711 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 712 if (ompt_enabled.ompt_callback_work) { 713 ompt_callbacks.ompt_callback(ompt_callback_work)( 714 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), 715 &(task_info->task_data), 0, codeptr); 716 } 717 if (ompt_enabled.ompt_callback_dispatch) { 718 ompt_data_t instance = ompt_data_none; 719 ompt_dispatch_chunk_t dispatch_chunk; 720 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr); 721 instance.ptr = &dispatch_chunk; 722 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 723 &(team_info->parallel_data), &(task_info->task_data), 724 ompt_dispatch_distribute_chunk, instance); 725 } 726 } 727 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 728 KMP_STATS_LOOP_END(OMP_distribute_iterations); 729 return; 730 } 731 732 template <typename T> 733 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 734 kmp_int32 *p_last, T *p_lb, T *p_ub, 735 typename traits_t<T>::signed_t *p_st, 736 typename traits_t<T>::signed_t incr, 737 typename traits_t<T>::signed_t chunk) { 738 // The routine returns the first chunk distributed to the team and 739 // stride for next chunks calculation. 740 // Last iteration flag set for the team that will execute 741 // the last iteration of the loop. 742 // The routine is called for dist_schedule(static,chunk) only. 743 typedef typename traits_t<T>::unsigned_t UT; 744 typedef typename traits_t<T>::signed_t ST; 745 kmp_uint32 team_id; 746 kmp_uint32 nteams; 747 UT trip_count; 748 T lower; 749 T upper; 750 ST span; 751 kmp_team_t *team; 752 kmp_info_t *th; 753 754 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 755 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 756 __kmp_assert_valid_gtid(gtid); 757 #ifdef KMP_DEBUG 758 { 759 char *buff; 760 // create format specifiers before the debug output 761 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 762 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 763 traits_t<T>::spec, traits_t<T>::spec, 764 traits_t<ST>::spec, traits_t<ST>::spec, 765 traits_t<T>::spec); 766 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 767 __kmp_str_free(&buff); 768 } 769 #endif 770 771 lower = *p_lb; 772 upper = *p_ub; 773 if (__kmp_env_consistency_check) { 774 if (incr == 0) { 775 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 776 loc); 777 } 778 if (incr > 0 ? (upper < lower) : (lower < upper)) { 779 // The loop is illegal. 780 // Some zero-trip loops maintained by compiler, e.g.: 781 // for(i=10;i<0;++i) // lower >= upper - run-time check 782 // for(i=0;i>10;--i) // lower <= upper - run-time check 783 // for(i=0;i>10;++i) // incr > 0 - compile-time check 784 // for(i=10;i<0;--i) // incr < 0 - compile-time check 785 // Compiler does not check the following illegal loops: 786 // for(i=0;i<10;i+=incr) // where incr<0 787 // for(i=10;i>0;i-=incr) // where incr<0 788 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 789 } 790 } 791 th = __kmp_threads[gtid]; 792 team = th->th.th_team; 793 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 794 nteams = th->th.th_teams_size.nteams; 795 team_id = team->t.t_master_tid; 796 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 797 798 // compute trip count 799 if (incr == 1) { 800 trip_count = upper - lower + 1; 801 } else if (incr == -1) { 802 trip_count = lower - upper + 1; 803 } else if (incr > 0) { 804 // upper-lower can exceed the limit of signed type 805 trip_count = (UT)(upper - lower) / incr + 1; 806 } else { 807 trip_count = (UT)(lower - upper) / (-incr) + 1; 808 } 809 if (chunk < 1) 810 chunk = 1; 811 span = chunk * incr; 812 *p_st = span * nteams; 813 *p_lb = lower + (span * team_id); 814 *p_ub = *p_lb + span - incr; 815 if (p_last != NULL) 816 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 817 // Correct upper bound if needed 818 if (incr > 0) { 819 if (*p_ub < *p_lb) // overflow? 820 *p_ub = traits_t<T>::max_value; 821 if (*p_ub > upper) 822 *p_ub = upper; // tracker C73258 823 } else { // incr < 0 824 if (*p_ub > *p_lb) 825 *p_ub = traits_t<T>::min_value; 826 if (*p_ub < upper) 827 *p_ub = upper; // tracker C73258 828 } 829 #ifdef KMP_DEBUG 830 { 831 char *buff; 832 // create format specifiers before the debug output 833 buff = 834 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 835 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 836 traits_t<T>::spec, traits_t<T>::spec, 837 traits_t<ST>::spec, traits_t<ST>::spec); 838 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 839 __kmp_str_free(&buff); 840 } 841 #endif 842 } 843 844 //------------------------------------------------------------------------------ 845 extern "C" { 846 /*! 847 @ingroup WORK_SHARING 848 @param loc Source code location 849 @param gtid Global thread id of this thread 850 @param schedtype Scheduling type 851 @param plastiter Pointer to the "last iteration" flag 852 @param plower Pointer to the lower bound 853 @param pupper Pointer to the upper bound 854 @param pstride Pointer to the stride 855 @param incr Loop increment 856 @param chunk The chunk size 857 858 Each of the four functions here are identical apart from the argument types. 859 860 The functions compute the upper and lower bounds and stride to be used for the 861 set of iterations to be executed by the current thread from the statically 862 scheduled loop that is described by the initial values of the bounds, stride, 863 increment and chunk size. 864 865 @{ 866 */ 867 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 868 kmp_int32 *plastiter, kmp_int32 *plower, 869 kmp_int32 *pupper, kmp_int32 *pstride, 870 kmp_int32 incr, kmp_int32 chunk) { 871 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 872 pupper, pstride, incr, chunk 873 #if OMPT_SUPPORT && OMPT_OPTIONAL 874 , 875 OMPT_GET_RETURN_ADDRESS(0) 876 #endif 877 ); 878 } 879 880 /*! 881 See @ref __kmpc_for_static_init_4 882 */ 883 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 884 kmp_int32 schedtype, kmp_int32 *plastiter, 885 kmp_uint32 *plower, kmp_uint32 *pupper, 886 kmp_int32 *pstride, kmp_int32 incr, 887 kmp_int32 chunk) { 888 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 889 pupper, pstride, incr, chunk 890 #if OMPT_SUPPORT && OMPT_OPTIONAL 891 , 892 OMPT_GET_RETURN_ADDRESS(0) 893 #endif 894 ); 895 } 896 897 /*! 898 See @ref __kmpc_for_static_init_4 899 */ 900 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 901 kmp_int32 *plastiter, kmp_int64 *plower, 902 kmp_int64 *pupper, kmp_int64 *pstride, 903 kmp_int64 incr, kmp_int64 chunk) { 904 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 905 pupper, pstride, incr, chunk 906 #if OMPT_SUPPORT && OMPT_OPTIONAL 907 , 908 OMPT_GET_RETURN_ADDRESS(0) 909 #endif 910 ); 911 } 912 913 /*! 914 See @ref __kmpc_for_static_init_4 915 */ 916 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 917 kmp_int32 schedtype, kmp_int32 *plastiter, 918 kmp_uint64 *plower, kmp_uint64 *pupper, 919 kmp_int64 *pstride, kmp_int64 incr, 920 kmp_int64 chunk) { 921 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 922 pupper, pstride, incr, chunk 923 #if OMPT_SUPPORT && OMPT_OPTIONAL 924 , 925 OMPT_GET_RETURN_ADDRESS(0) 926 #endif 927 ); 928 } 929 /*! 930 @} 931 */ 932 933 #if OMPT_SUPPORT && OMPT_OPTIONAL 934 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0) 935 #else 936 #define OMPT_CODEPTR_ARG 937 #endif 938 939 /*! 940 @ingroup WORK_SHARING 941 @param loc Source code location 942 @param gtid Global thread id of this thread 943 @param schedule Scheduling type for the parallel loop 944 @param plastiter Pointer to the "last iteration" flag 945 @param plower Pointer to the lower bound 946 @param pupper Pointer to the upper bound of loop chunk 947 @param pupperD Pointer to the upper bound of dist_chunk 948 @param pstride Pointer to the stride for parallel loop 949 @param incr Loop increment 950 @param chunk The chunk size for the parallel loop 951 952 Each of the four functions here are identical apart from the argument types. 953 954 The functions compute the upper and lower bounds and strides to be used for the 955 set of iterations to be executed by the current thread from the statically 956 scheduled loop that is described by the initial values of the bounds, strides, 957 increment and chunks for parallel loop and distribute constructs. 958 959 @{ 960 */ 961 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 962 kmp_int32 schedule, kmp_int32 *plastiter, 963 kmp_int32 *plower, kmp_int32 *pupper, 964 kmp_int32 *pupperD, kmp_int32 *pstride, 965 kmp_int32 incr, kmp_int32 chunk) { 966 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 967 pupper, pupperD, pstride, incr, 968 chunk OMPT_CODEPTR_ARG); 969 } 970 971 /*! 972 See @ref __kmpc_dist_for_static_init_4 973 */ 974 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 975 kmp_int32 schedule, kmp_int32 *plastiter, 976 kmp_uint32 *plower, kmp_uint32 *pupper, 977 kmp_uint32 *pupperD, kmp_int32 *pstride, 978 kmp_int32 incr, kmp_int32 chunk) { 979 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 980 pupper, pupperD, pstride, incr, 981 chunk OMPT_CODEPTR_ARG); 982 } 983 984 /*! 985 See @ref __kmpc_dist_for_static_init_4 986 */ 987 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 988 kmp_int32 schedule, kmp_int32 *plastiter, 989 kmp_int64 *plower, kmp_int64 *pupper, 990 kmp_int64 *pupperD, kmp_int64 *pstride, 991 kmp_int64 incr, kmp_int64 chunk) { 992 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 993 pupper, pupperD, pstride, incr, 994 chunk OMPT_CODEPTR_ARG); 995 } 996 997 /*! 998 See @ref __kmpc_dist_for_static_init_4 999 */ 1000 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 1001 kmp_int32 schedule, kmp_int32 *plastiter, 1002 kmp_uint64 *plower, kmp_uint64 *pupper, 1003 kmp_uint64 *pupperD, kmp_int64 *pstride, 1004 kmp_int64 incr, kmp_int64 chunk) { 1005 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 1006 pupper, pupperD, pstride, incr, 1007 chunk OMPT_CODEPTR_ARG); 1008 } 1009 /*! 1010 @} 1011 */ 1012 1013 //------------------------------------------------------------------------------ 1014 // Auxiliary routines for Distribute Parallel Loop construct implementation 1015 // Transfer call to template< type T > 1016 // __kmp_team_static_init( ident_t *loc, int gtid, 1017 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 1018 1019 /*! 1020 @ingroup WORK_SHARING 1021 @{ 1022 @param loc Source location 1023 @param gtid Global thread id 1024 @param p_last pointer to last iteration flag 1025 @param p_lb pointer to Lower bound 1026 @param p_ub pointer to Upper bound 1027 @param p_st Step (or increment if you prefer) 1028 @param incr Loop increment 1029 @param chunk The chunk size to block with 1030 1031 The functions compute the upper and lower bounds and stride to be used for the 1032 set of iterations to be executed by the current team from the statically 1033 scheduled loop that is described by the initial values of the bounds, stride, 1034 increment and chunk for the distribute construct as part of composite distribute 1035 parallel loop construct. These functions are all identical apart from the types 1036 of the arguments. 1037 */ 1038 1039 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1040 kmp_int32 *p_lb, kmp_int32 *p_ub, 1041 kmp_int32 *p_st, kmp_int32 incr, 1042 kmp_int32 chunk) { 1043 KMP_DEBUG_ASSERT(__kmp_init_serial); 1044 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1045 chunk); 1046 } 1047 1048 /*! 1049 See @ref __kmpc_team_static_init_4 1050 */ 1051 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1052 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 1053 kmp_int32 *p_st, kmp_int32 incr, 1054 kmp_int32 chunk) { 1055 KMP_DEBUG_ASSERT(__kmp_init_serial); 1056 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1057 chunk); 1058 } 1059 1060 /*! 1061 See @ref __kmpc_team_static_init_4 1062 */ 1063 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1064 kmp_int64 *p_lb, kmp_int64 *p_ub, 1065 kmp_int64 *p_st, kmp_int64 incr, 1066 kmp_int64 chunk) { 1067 KMP_DEBUG_ASSERT(__kmp_init_serial); 1068 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1069 chunk); 1070 } 1071 1072 /*! 1073 See @ref __kmpc_team_static_init_4 1074 */ 1075 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1076 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1077 kmp_int64 *p_st, kmp_int64 incr, 1078 kmp_int64 chunk) { 1079 KMP_DEBUG_ASSERT(__kmp_init_serial); 1080 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1081 chunk); 1082 } 1083 /*! 1084 @} 1085 */ 1086 1087 } // extern "C" 1088