1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 #if USE_ITT_BUILD || defined KMP_DEBUG 65 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 66 static inline void check_loc(ident_t *&loc) { 67 if (loc == NULL) 68 loc = &loc_stub; // may need to report location info to ittnotify 69 } 70 #endif 71 72 template <typename T> 73 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 74 kmp_int32 schedtype, kmp_int32 *plastiter, 75 T *plower, T *pupper, 76 typename traits_t<T>::signed_t *pstride, 77 typename traits_t<T>::signed_t incr, 78 typename traits_t<T>::signed_t chunk 79 #if OMPT_SUPPORT && OMPT_OPTIONAL 80 , 81 void *codeptr 82 #endif 83 ) { 84 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 85 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 87 88 // Clear monotonic/nonmonotonic bits (ignore it) 89 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype); 90 91 typedef typename traits_t<T>::unsigned_t UT; 92 typedef typename traits_t<T>::signed_t ST; 93 /* this all has to be changed back to TID and such.. */ 94 kmp_int32 gtid = global_tid; 95 kmp_uint32 tid; 96 kmp_uint32 nth; 97 UT trip_count; 98 kmp_team_t *team; 99 __kmp_assert_valid_gtid(gtid); 100 kmp_info_t *th = __kmp_threads[gtid]; 101 102 #if OMPT_SUPPORT && OMPT_OPTIONAL 103 ompt_team_info_t *team_info = NULL; 104 ompt_task_info_t *task_info = NULL; 105 ompt_work_t ompt_work_type = ompt_work_loop; 106 107 static kmp_int8 warn = 0; 108 109 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 110 // Only fully initialize variables needed by OMPT if OMPT is enabled. 111 team_info = __ompt_get_teaminfo(0, NULL); 112 task_info = __ompt_get_task_info_object(0); 113 // Determine workshare type 114 if (loc != NULL) { 115 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 116 ompt_work_type = ompt_work_loop; 117 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 118 ompt_work_type = ompt_work_sections; 119 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 120 ompt_work_type = ompt_work_distribute; 121 } else { 122 kmp_int8 bool_res = 123 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 124 if (bool_res) 125 KMP_WARNING(OmptOutdatedWorkshare); 126 } 127 KMP_DEBUG_ASSERT(ompt_work_type); 128 } 129 } 130 #endif 131 132 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 133 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 134 #ifdef KMP_DEBUG 135 { 136 char *buff; 137 // create format specifiers before the debug output 138 buff = __kmp_str_format( 139 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 140 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 141 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 142 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 143 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 144 *pstride, incr, chunk)); 145 __kmp_str_free(&buff); 146 } 147 #endif 148 149 if (__kmp_env_consistency_check) { 150 __kmp_push_workshare(global_tid, ct_pdo, loc); 151 if (incr == 0) { 152 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 153 loc); 154 } 155 } 156 /* special handling for zero-trip loops */ 157 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 158 if (plastiter != NULL) 159 *plastiter = FALSE; 160 /* leave pupper and plower set to entire iteration space */ 161 *pstride = incr; /* value should never be used */ 162 // *plower = *pupper - incr; 163 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 164 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 165 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 166 #ifdef KMP_DEBUG 167 { 168 char *buff; 169 // create format specifiers before the debug output 170 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 171 "lower=%%%s upper=%%%s stride = %%%s " 172 "signed?<%s>, loc = %%s\n", 173 traits_t<T>::spec, traits_t<T>::spec, 174 traits_t<ST>::spec, traits_t<T>::spec); 175 check_loc(loc); 176 KD_TRACE(100, 177 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 178 __kmp_str_free(&buff); 179 } 180 #endif 181 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 182 183 #if OMPT_SUPPORT && OMPT_OPTIONAL 184 if (ompt_enabled.ompt_callback_work) { 185 ompt_callbacks.ompt_callback(ompt_callback_work)( 186 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 187 &(task_info->task_data), 0, codeptr); 188 } 189 #endif 190 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 191 return; 192 } 193 194 // Although there are schedule enumerations above kmp_ord_upper which are not 195 // schedules for "distribute", the only ones which are useful are dynamic, so 196 // cannot be seen here, since this codepath is only executed for static 197 // schedules. 198 if (schedtype > kmp_ord_upper) { 199 // we are in DISTRIBUTE construct 200 schedtype += kmp_sch_static - 201 kmp_distribute_static; // AC: convert to usual schedule type 202 if (th->th.th_team->t.t_serialized > 1) { 203 tid = 0; 204 team = th->th.th_team; 205 } else { 206 tid = th->th.th_team->t.t_master_tid; 207 team = th->th.th_team->t.t_parent; 208 } 209 } else { 210 tid = __kmp_tid_from_gtid(global_tid); 211 team = th->th.th_team; 212 } 213 214 /* determine if "for" loop is an active worksharing construct */ 215 if (team->t.t_serialized) { 216 /* serialized parallel, each thread executes whole iteration space */ 217 if (plastiter != NULL) 218 *plastiter = TRUE; 219 /* leave pupper and plower set to entire iteration space */ 220 *pstride = 221 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 222 223 #ifdef KMP_DEBUG 224 { 225 char *buff; 226 // create format specifiers before the debug output 227 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 228 "lower=%%%s upper=%%%s stride = %%%s\n", 229 traits_t<T>::spec, traits_t<T>::spec, 230 traits_t<ST>::spec); 231 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 232 __kmp_str_free(&buff); 233 } 234 #endif 235 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 236 237 #if OMPT_SUPPORT && OMPT_OPTIONAL 238 if (ompt_enabled.ompt_callback_work) { 239 ompt_callbacks.ompt_callback(ompt_callback_work)( 240 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 241 &(task_info->task_data), *pstride, codeptr); 242 } 243 #endif 244 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 245 return; 246 } 247 nth = team->t.t_nproc; 248 if (nth == 1) { 249 if (plastiter != NULL) 250 *plastiter = TRUE; 251 *pstride = 252 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 253 #ifdef KMP_DEBUG 254 { 255 char *buff; 256 // create format specifiers before the debug output 257 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 258 "lower=%%%s upper=%%%s stride = %%%s\n", 259 traits_t<T>::spec, traits_t<T>::spec, 260 traits_t<ST>::spec); 261 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 262 __kmp_str_free(&buff); 263 } 264 #endif 265 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 266 267 #if OMPT_SUPPORT && OMPT_OPTIONAL 268 if (ompt_enabled.ompt_callback_work) { 269 ompt_callbacks.ompt_callback(ompt_callback_work)( 270 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 271 &(task_info->task_data), *pstride, codeptr); 272 } 273 #endif 274 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 275 return; 276 } 277 278 /* compute trip count */ 279 if (incr == 1) { 280 trip_count = *pupper - *plower + 1; 281 } else if (incr == -1) { 282 trip_count = *plower - *pupper + 1; 283 } else if (incr > 0) { 284 // upper-lower can exceed the limit of signed type 285 trip_count = (UT)(*pupper - *plower) / incr + 1; 286 } else { 287 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 288 } 289 290 #if KMP_STATS_ENABLED 291 if (KMP_MASTER_GTID(gtid)) { 292 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 293 } 294 #endif 295 296 if (__kmp_env_consistency_check) { 297 /* tripcount overflow? */ 298 if (trip_count == 0 && *pupper != *plower) { 299 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 300 loc); 301 } 302 } 303 304 /* compute remaining parameters */ 305 switch (schedtype) { 306 case kmp_sch_static: { 307 if (trip_count < nth) { 308 KMP_DEBUG_ASSERT( 309 __kmp_static == kmp_sch_static_greedy || 310 __kmp_static == 311 kmp_sch_static_balanced); // Unknown static scheduling type. 312 if (tid < trip_count) { 313 *pupper = *plower = *plower + tid * incr; 314 } else { 315 // set bounds so non-active threads execute no iterations 316 *plower = *pupper + (incr > 0 ? 1 : -1); 317 } 318 if (plastiter != NULL) 319 *plastiter = (tid == trip_count - 1); 320 } else { 321 if (__kmp_static == kmp_sch_static_balanced) { 322 UT small_chunk = trip_count / nth; 323 UT extras = trip_count % nth; 324 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 325 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 326 if (plastiter != NULL) 327 *plastiter = (tid == nth - 1); 328 } else { 329 T big_chunk_inc_count = 330 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 331 T old_upper = *pupper; 332 333 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 334 // Unknown static scheduling type. 335 336 *plower += tid * big_chunk_inc_count; 337 *pupper = *plower + big_chunk_inc_count - incr; 338 if (incr > 0) { 339 if (*pupper < *plower) 340 *pupper = traits_t<T>::max_value; 341 if (plastiter != NULL) 342 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 343 if (*pupper > old_upper) 344 *pupper = old_upper; // tracker C73258 345 } else { 346 if (*pupper > *plower) 347 *pupper = traits_t<T>::min_value; 348 if (plastiter != NULL) 349 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 350 if (*pupper < old_upper) 351 *pupper = old_upper; // tracker C73258 352 } 353 } 354 } 355 *pstride = trip_count; 356 break; 357 } 358 case kmp_sch_static_chunked: { 359 ST span; 360 UT nchunks; 361 if (chunk < 1) 362 chunk = 1; 363 else if ((UT)chunk > trip_count) 364 chunk = trip_count; 365 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 366 span = chunk * incr; 367 if (nchunks < nth) { 368 *pstride = span * nchunks; 369 if (tid < nchunks) { 370 *plower = *plower + (span * tid); 371 *pupper = *plower + span - incr; 372 } else { 373 *plower = *pupper + (incr > 0 ? 1 : -1); 374 } 375 } else { 376 *pstride = span * nth; 377 *plower = *plower + (span * tid); 378 *pupper = *plower + span - incr; 379 } 380 if (plastiter != NULL) 381 *plastiter = (tid == (nchunks - 1) % nth); 382 break; 383 } 384 case kmp_sch_static_balanced_chunked: { 385 T old_upper = *pupper; 386 // round up to make sure the chunk is enough to cover all iterations 387 UT span = (trip_count + nth - 1) / nth; 388 389 // perform chunk adjustment 390 chunk = (span + chunk - 1) & ~(chunk - 1); 391 392 span = chunk * incr; 393 *plower = *plower + (span * tid); 394 *pupper = *plower + span - incr; 395 if (incr > 0) { 396 if (*pupper > old_upper) 397 *pupper = old_upper; 398 } else if (*pupper < old_upper) 399 *pupper = old_upper; 400 401 if (plastiter != NULL) 402 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 403 break; 404 } 405 default: 406 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 407 break; 408 } 409 410 #if USE_ITT_BUILD 411 // Report loop metadata 412 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 413 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 414 team->t.t_active_level == 1) { 415 kmp_uint64 cur_chunk = chunk; 416 check_loc(loc); 417 // Calculate chunk in case it was not specified; it is specified for 418 // kmp_sch_static_chunked 419 if (schedtype == kmp_sch_static) { 420 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 421 } 422 // 0 - "static" schedule 423 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 424 } 425 #endif 426 #ifdef KMP_DEBUG 427 { 428 char *buff; 429 // create format specifiers before the debug output 430 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 431 "upper=%%%s stride = %%%s signed?<%s>\n", 432 traits_t<T>::spec, traits_t<T>::spec, 433 traits_t<ST>::spec, traits_t<T>::spec); 434 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 435 __kmp_str_free(&buff); 436 } 437 #endif 438 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 439 440 #if OMPT_SUPPORT && OMPT_OPTIONAL 441 if (ompt_enabled.ompt_callback_work) { 442 ompt_callbacks.ompt_callback(ompt_callback_work)( 443 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 444 &(task_info->task_data), trip_count, codeptr); 445 } 446 if (ompt_enabled.ompt_callback_dispatch) { 447 ompt_dispatch_t dispatch_type; 448 ompt_data_t instance = ompt_data_none; 449 ompt_dispatch_chunk_t dispatch_chunk; 450 if (ompt_work_type == ompt_work_sections) { 451 dispatch_type = ompt_dispatch_section; 452 instance.ptr = codeptr; 453 } else { 454 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr); 455 dispatch_type = (ompt_work_type == ompt_work_distribute) 456 ? ompt_dispatch_distribute_chunk 457 : ompt_dispatch_ws_loop_chunk; 458 instance.ptr = &dispatch_chunk; 459 } 460 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 461 &(team_info->parallel_data), &(task_info->task_data), dispatch_type, 462 instance); 463 } 464 #endif 465 466 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 467 return; 468 } 469 470 template <typename T> 471 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 472 kmp_int32 schedule, kmp_int32 *plastiter, 473 T *plower, T *pupper, T *pupperDist, 474 typename traits_t<T>::signed_t *pstride, 475 typename traits_t<T>::signed_t incr, 476 typename traits_t<T>::signed_t chunk 477 #if OMPT_SUPPORT && OMPT_OPTIONAL 478 , 479 void *codeptr 480 #endif 481 ) { 482 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 483 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 484 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 485 typedef typename traits_t<T>::unsigned_t UT; 486 typedef typename traits_t<T>::signed_t ST; 487 kmp_uint32 tid; 488 kmp_uint32 nth; 489 kmp_uint32 team_id; 490 kmp_uint32 nteams; 491 UT trip_count; 492 kmp_team_t *team; 493 kmp_info_t *th; 494 495 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 496 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 497 __kmp_assert_valid_gtid(gtid); 498 #ifdef KMP_DEBUG 499 { 500 char *buff; 501 // create format specifiers before the debug output 502 buff = __kmp_str_format( 503 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 504 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 505 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 506 traits_t<ST>::spec, traits_t<T>::spec); 507 KD_TRACE(100, 508 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 509 __kmp_str_free(&buff); 510 } 511 #endif 512 513 if (__kmp_env_consistency_check) { 514 __kmp_push_workshare(gtid, ct_pdo, loc); 515 if (incr == 0) { 516 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 517 loc); 518 } 519 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 520 // The loop is illegal. 521 // Some zero-trip loops maintained by compiler, e.g.: 522 // for(i=10;i<0;++i) // lower >= upper - run-time check 523 // for(i=0;i>10;--i) // lower <= upper - run-time check 524 // for(i=0;i>10;++i) // incr > 0 - compile-time check 525 // for(i=10;i<0;--i) // incr < 0 - compile-time check 526 // Compiler does not check the following illegal loops: 527 // for(i=0;i<10;i+=incr) // where incr<0 528 // for(i=10;i>0;i-=incr) // where incr<0 529 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 530 } 531 } 532 tid = __kmp_tid_from_gtid(gtid); 533 th = __kmp_threads[gtid]; 534 nth = th->th.th_team_nproc; 535 team = th->th.th_team; 536 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 537 nteams = th->th.th_teams_size.nteams; 538 team_id = team->t.t_master_tid; 539 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 540 541 // compute global trip count 542 if (incr == 1) { 543 trip_count = *pupper - *plower + 1; 544 } else if (incr == -1) { 545 trip_count = *plower - *pupper + 1; 546 } else if (incr > 0) { 547 // upper-lower can exceed the limit of signed type 548 trip_count = (UT)(*pupper - *plower) / incr + 1; 549 } else { 550 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 551 } 552 553 *pstride = *pupper - *plower; // just in case (can be unused) 554 if (trip_count <= nteams) { 555 KMP_DEBUG_ASSERT( 556 __kmp_static == kmp_sch_static_greedy || 557 __kmp_static == 558 kmp_sch_static_balanced); // Unknown static scheduling type. 559 // only primary threads of some teams get single iteration, other threads 560 // get nothing 561 if (team_id < trip_count && tid == 0) { 562 *pupper = *pupperDist = *plower = *plower + team_id * incr; 563 } else { 564 *pupperDist = *pupper; 565 *plower = *pupper + incr; // compiler should skip loop body 566 } 567 if (plastiter != NULL) 568 *plastiter = (tid == 0 && team_id == trip_count - 1); 569 } else { 570 // Get the team's chunk first (each team gets at most one chunk) 571 if (__kmp_static == kmp_sch_static_balanced) { 572 UT chunkD = trip_count / nteams; 573 UT extras = trip_count % nteams; 574 *plower += 575 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 576 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 577 if (plastiter != NULL) 578 *plastiter = (team_id == nteams - 1); 579 } else { 580 T chunk_inc_count = 581 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 582 T upper = *pupper; 583 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 584 // Unknown static scheduling type. 585 *plower += team_id * chunk_inc_count; 586 *pupperDist = *plower + chunk_inc_count - incr; 587 // Check/correct bounds if needed 588 if (incr > 0) { 589 if (*pupperDist < *plower) 590 *pupperDist = traits_t<T>::max_value; 591 if (plastiter != NULL) 592 *plastiter = *plower <= upper && *pupperDist > upper - incr; 593 if (*pupperDist > upper) 594 *pupperDist = upper; // tracker C73258 595 if (*plower > *pupperDist) { 596 *pupper = *pupperDist; // no iterations available for the team 597 goto end; 598 } 599 } else { 600 if (*pupperDist > *plower) 601 *pupperDist = traits_t<T>::min_value; 602 if (plastiter != NULL) 603 *plastiter = *plower >= upper && *pupperDist < upper - incr; 604 if (*pupperDist < upper) 605 *pupperDist = upper; // tracker C73258 606 if (*plower < *pupperDist) { 607 *pupper = *pupperDist; // no iterations available for the team 608 goto end; 609 } 610 } 611 } 612 // Get the parallel loop chunk now (for thread) 613 // compute trip count for team's chunk 614 if (incr == 1) { 615 trip_count = *pupperDist - *plower + 1; 616 } else if (incr == -1) { 617 trip_count = *plower - *pupperDist + 1; 618 } else if (incr > 1) { 619 // upper-lower can exceed the limit of signed type 620 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 621 } else { 622 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 623 } 624 KMP_DEBUG_ASSERT(trip_count); 625 switch (schedule) { 626 case kmp_sch_static: { 627 if (trip_count <= nth) { 628 KMP_DEBUG_ASSERT( 629 __kmp_static == kmp_sch_static_greedy || 630 __kmp_static == 631 kmp_sch_static_balanced); // Unknown static scheduling type. 632 if (tid < trip_count) 633 *pupper = *plower = *plower + tid * incr; 634 else 635 *plower = *pupper + incr; // no iterations available 636 if (plastiter != NULL) 637 if (*plastiter != 0 && !(tid == trip_count - 1)) 638 *plastiter = 0; 639 } else { 640 if (__kmp_static == kmp_sch_static_balanced) { 641 UT chunkL = trip_count / nth; 642 UT extras = trip_count % nth; 643 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 644 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 645 if (plastiter != NULL) 646 if (*plastiter != 0 && !(tid == nth - 1)) 647 *plastiter = 0; 648 } else { 649 T chunk_inc_count = 650 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 651 T upper = *pupperDist; 652 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 653 // Unknown static scheduling type. 654 *plower += tid * chunk_inc_count; 655 *pupper = *plower + chunk_inc_count - incr; 656 if (incr > 0) { 657 if (*pupper < *plower) 658 *pupper = traits_t<T>::max_value; 659 if (plastiter != NULL) 660 if (*plastiter != 0 && 661 !(*plower <= upper && *pupper > upper - incr)) 662 *plastiter = 0; 663 if (*pupper > upper) 664 *pupper = upper; // tracker C73258 665 } else { 666 if (*pupper > *plower) 667 *pupper = traits_t<T>::min_value; 668 if (plastiter != NULL) 669 if (*plastiter != 0 && 670 !(*plower >= upper && *pupper < upper - incr)) 671 *plastiter = 0; 672 if (*pupper < upper) 673 *pupper = upper; // tracker C73258 674 } 675 } 676 } 677 break; 678 } 679 case kmp_sch_static_chunked: { 680 ST span; 681 if (chunk < 1) 682 chunk = 1; 683 span = chunk * incr; 684 *pstride = span * nth; 685 *plower = *plower + (span * tid); 686 *pupper = *plower + span - incr; 687 if (plastiter != NULL) 688 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 689 *plastiter = 0; 690 break; 691 } 692 default: 693 KMP_ASSERT2(0, 694 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 695 break; 696 } 697 } 698 end:; 699 #ifdef KMP_DEBUG 700 { 701 char *buff; 702 // create format specifiers before the debug output 703 buff = __kmp_str_format( 704 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 705 "stride=%%%s signed?<%s>\n", 706 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 707 traits_t<ST>::spec, traits_t<T>::spec); 708 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 709 __kmp_str_free(&buff); 710 } 711 #endif 712 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 713 #if OMPT_SUPPORT && OMPT_OPTIONAL 714 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 715 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 716 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 717 if (ompt_enabled.ompt_callback_work) { 718 ompt_callbacks.ompt_callback(ompt_callback_work)( 719 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), 720 &(task_info->task_data), 0, codeptr); 721 } 722 if (ompt_enabled.ompt_callback_dispatch) { 723 ompt_data_t instance = ompt_data_none; 724 ompt_dispatch_chunk_t dispatch_chunk; 725 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr); 726 instance.ptr = &dispatch_chunk; 727 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 728 &(team_info->parallel_data), &(task_info->task_data), 729 ompt_dispatch_distribute_chunk, instance); 730 } 731 } 732 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 733 KMP_STATS_LOOP_END(OMP_distribute_iterations); 734 return; 735 } 736 737 template <typename T> 738 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 739 kmp_int32 *p_last, T *p_lb, T *p_ub, 740 typename traits_t<T>::signed_t *p_st, 741 typename traits_t<T>::signed_t incr, 742 typename traits_t<T>::signed_t chunk) { 743 // The routine returns the first chunk distributed to the team and 744 // stride for next chunks calculation. 745 // Last iteration flag set for the team that will execute 746 // the last iteration of the loop. 747 // The routine is called for dist_schedule(static,chunk) only. 748 typedef typename traits_t<T>::unsigned_t UT; 749 typedef typename traits_t<T>::signed_t ST; 750 kmp_uint32 team_id; 751 kmp_uint32 nteams; 752 UT trip_count; 753 T lower; 754 T upper; 755 ST span; 756 kmp_team_t *team; 757 kmp_info_t *th; 758 759 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 760 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 761 __kmp_assert_valid_gtid(gtid); 762 #ifdef KMP_DEBUG 763 { 764 char *buff; 765 // create format specifiers before the debug output 766 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 767 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 768 traits_t<T>::spec, traits_t<T>::spec, 769 traits_t<ST>::spec, traits_t<ST>::spec, 770 traits_t<T>::spec); 771 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 772 __kmp_str_free(&buff); 773 } 774 #endif 775 776 lower = *p_lb; 777 upper = *p_ub; 778 if (__kmp_env_consistency_check) { 779 if (incr == 0) { 780 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 781 loc); 782 } 783 if (incr > 0 ? (upper < lower) : (lower < upper)) { 784 // The loop is illegal. 785 // Some zero-trip loops maintained by compiler, e.g.: 786 // for(i=10;i<0;++i) // lower >= upper - run-time check 787 // for(i=0;i>10;--i) // lower <= upper - run-time check 788 // for(i=0;i>10;++i) // incr > 0 - compile-time check 789 // for(i=10;i<0;--i) // incr < 0 - compile-time check 790 // Compiler does not check the following illegal loops: 791 // for(i=0;i<10;i+=incr) // where incr<0 792 // for(i=10;i>0;i-=incr) // where incr<0 793 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 794 } 795 } 796 th = __kmp_threads[gtid]; 797 team = th->th.th_team; 798 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 799 nteams = th->th.th_teams_size.nteams; 800 team_id = team->t.t_master_tid; 801 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 802 803 // compute trip count 804 if (incr == 1) { 805 trip_count = upper - lower + 1; 806 } else if (incr == -1) { 807 trip_count = lower - upper + 1; 808 } else if (incr > 0) { 809 // upper-lower can exceed the limit of signed type 810 trip_count = (UT)(upper - lower) / incr + 1; 811 } else { 812 trip_count = (UT)(lower - upper) / (-incr) + 1; 813 } 814 if (chunk < 1) 815 chunk = 1; 816 span = chunk * incr; 817 *p_st = span * nteams; 818 *p_lb = lower + (span * team_id); 819 *p_ub = *p_lb + span - incr; 820 if (p_last != NULL) 821 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 822 // Correct upper bound if needed 823 if (incr > 0) { 824 if (*p_ub < *p_lb) // overflow? 825 *p_ub = traits_t<T>::max_value; 826 if (*p_ub > upper) 827 *p_ub = upper; // tracker C73258 828 } else { // incr < 0 829 if (*p_ub > *p_lb) 830 *p_ub = traits_t<T>::min_value; 831 if (*p_ub < upper) 832 *p_ub = upper; // tracker C73258 833 } 834 #ifdef KMP_DEBUG 835 { 836 char *buff; 837 // create format specifiers before the debug output 838 buff = 839 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 840 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 841 traits_t<T>::spec, traits_t<T>::spec, 842 traits_t<ST>::spec, traits_t<ST>::spec); 843 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 844 __kmp_str_free(&buff); 845 } 846 #endif 847 } 848 849 //------------------------------------------------------------------------------ 850 extern "C" { 851 /*! 852 @ingroup WORK_SHARING 853 @param loc Source code location 854 @param gtid Global thread id of this thread 855 @param schedtype Scheduling type 856 @param plastiter Pointer to the "last iteration" flag 857 @param plower Pointer to the lower bound 858 @param pupper Pointer to the upper bound 859 @param pstride Pointer to the stride 860 @param incr Loop increment 861 @param chunk The chunk size 862 863 Each of the four functions here are identical apart from the argument types. 864 865 The functions compute the upper and lower bounds and stride to be used for the 866 set of iterations to be executed by the current thread from the statically 867 scheduled loop that is described by the initial values of the bounds, stride, 868 increment and chunk size. 869 870 @{ 871 */ 872 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 873 kmp_int32 *plastiter, kmp_int32 *plower, 874 kmp_int32 *pupper, kmp_int32 *pstride, 875 kmp_int32 incr, kmp_int32 chunk) { 876 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 877 pupper, pstride, incr, chunk 878 #if OMPT_SUPPORT && OMPT_OPTIONAL 879 , 880 OMPT_GET_RETURN_ADDRESS(0) 881 #endif 882 ); 883 } 884 885 /*! 886 See @ref __kmpc_for_static_init_4 887 */ 888 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 889 kmp_int32 schedtype, kmp_int32 *plastiter, 890 kmp_uint32 *plower, kmp_uint32 *pupper, 891 kmp_int32 *pstride, kmp_int32 incr, 892 kmp_int32 chunk) { 893 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 894 pupper, pstride, incr, chunk 895 #if OMPT_SUPPORT && OMPT_OPTIONAL 896 , 897 OMPT_GET_RETURN_ADDRESS(0) 898 #endif 899 ); 900 } 901 902 /*! 903 See @ref __kmpc_for_static_init_4 904 */ 905 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 906 kmp_int32 *plastiter, kmp_int64 *plower, 907 kmp_int64 *pupper, kmp_int64 *pstride, 908 kmp_int64 incr, kmp_int64 chunk) { 909 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 910 pupper, pstride, incr, chunk 911 #if OMPT_SUPPORT && OMPT_OPTIONAL 912 , 913 OMPT_GET_RETURN_ADDRESS(0) 914 #endif 915 ); 916 } 917 918 /*! 919 See @ref __kmpc_for_static_init_4 920 */ 921 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 922 kmp_int32 schedtype, kmp_int32 *plastiter, 923 kmp_uint64 *plower, kmp_uint64 *pupper, 924 kmp_int64 *pstride, kmp_int64 incr, 925 kmp_int64 chunk) { 926 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 927 pupper, pstride, incr, chunk 928 #if OMPT_SUPPORT && OMPT_OPTIONAL 929 , 930 OMPT_GET_RETURN_ADDRESS(0) 931 #endif 932 ); 933 } 934 /*! 935 @} 936 */ 937 938 #if OMPT_SUPPORT && OMPT_OPTIONAL 939 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0) 940 #else 941 #define OMPT_CODEPTR_ARG 942 #endif 943 944 /*! 945 @ingroup WORK_SHARING 946 @param loc Source code location 947 @param gtid Global thread id of this thread 948 @param schedule Scheduling type for the parallel loop 949 @param plastiter Pointer to the "last iteration" flag 950 @param plower Pointer to the lower bound 951 @param pupper Pointer to the upper bound of loop chunk 952 @param pupperD Pointer to the upper bound of dist_chunk 953 @param pstride Pointer to the stride for parallel loop 954 @param incr Loop increment 955 @param chunk The chunk size for the parallel loop 956 957 Each of the four functions here are identical apart from the argument types. 958 959 The functions compute the upper and lower bounds and strides to be used for the 960 set of iterations to be executed by the current thread from the statically 961 scheduled loop that is described by the initial values of the bounds, strides, 962 increment and chunks for parallel loop and distribute constructs. 963 964 @{ 965 */ 966 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 967 kmp_int32 schedule, kmp_int32 *plastiter, 968 kmp_int32 *plower, kmp_int32 *pupper, 969 kmp_int32 *pupperD, kmp_int32 *pstride, 970 kmp_int32 incr, kmp_int32 chunk) { 971 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 972 pupper, pupperD, pstride, incr, 973 chunk OMPT_CODEPTR_ARG); 974 } 975 976 /*! 977 See @ref __kmpc_dist_for_static_init_4 978 */ 979 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 980 kmp_int32 schedule, kmp_int32 *plastiter, 981 kmp_uint32 *plower, kmp_uint32 *pupper, 982 kmp_uint32 *pupperD, kmp_int32 *pstride, 983 kmp_int32 incr, kmp_int32 chunk) { 984 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 985 pupper, pupperD, pstride, incr, 986 chunk OMPT_CODEPTR_ARG); 987 } 988 989 /*! 990 See @ref __kmpc_dist_for_static_init_4 991 */ 992 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 993 kmp_int32 schedule, kmp_int32 *plastiter, 994 kmp_int64 *plower, kmp_int64 *pupper, 995 kmp_int64 *pupperD, kmp_int64 *pstride, 996 kmp_int64 incr, kmp_int64 chunk) { 997 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 998 pupper, pupperD, pstride, incr, 999 chunk OMPT_CODEPTR_ARG); 1000 } 1001 1002 /*! 1003 See @ref __kmpc_dist_for_static_init_4 1004 */ 1005 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 1006 kmp_int32 schedule, kmp_int32 *plastiter, 1007 kmp_uint64 *plower, kmp_uint64 *pupper, 1008 kmp_uint64 *pupperD, kmp_int64 *pstride, 1009 kmp_int64 incr, kmp_int64 chunk) { 1010 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 1011 pupper, pupperD, pstride, incr, 1012 chunk OMPT_CODEPTR_ARG); 1013 } 1014 /*! 1015 @} 1016 */ 1017 1018 //------------------------------------------------------------------------------ 1019 // Auxiliary routines for Distribute Parallel Loop construct implementation 1020 // Transfer call to template< type T > 1021 // __kmp_team_static_init( ident_t *loc, int gtid, 1022 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 1023 1024 /*! 1025 @ingroup WORK_SHARING 1026 @{ 1027 @param loc Source location 1028 @param gtid Global thread id 1029 @param p_last pointer to last iteration flag 1030 @param p_lb pointer to Lower bound 1031 @param p_ub pointer to Upper bound 1032 @param p_st Step (or increment if you prefer) 1033 @param incr Loop increment 1034 @param chunk The chunk size to block with 1035 1036 The functions compute the upper and lower bounds and stride to be used for the 1037 set of iterations to be executed by the current team from the statically 1038 scheduled loop that is described by the initial values of the bounds, stride, 1039 increment and chunk for the distribute construct as part of composite distribute 1040 parallel loop construct. These functions are all identical apart from the types 1041 of the arguments. 1042 */ 1043 1044 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1045 kmp_int32 *p_lb, kmp_int32 *p_ub, 1046 kmp_int32 *p_st, kmp_int32 incr, 1047 kmp_int32 chunk) { 1048 KMP_DEBUG_ASSERT(__kmp_init_serial); 1049 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1050 chunk); 1051 } 1052 1053 /*! 1054 See @ref __kmpc_team_static_init_4 1055 */ 1056 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1057 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 1058 kmp_int32 *p_st, kmp_int32 incr, 1059 kmp_int32 chunk) { 1060 KMP_DEBUG_ASSERT(__kmp_init_serial); 1061 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1062 chunk); 1063 } 1064 1065 /*! 1066 See @ref __kmpc_team_static_init_4 1067 */ 1068 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1069 kmp_int64 *p_lb, kmp_int64 *p_ub, 1070 kmp_int64 *p_st, kmp_int64 incr, 1071 kmp_int64 chunk) { 1072 KMP_DEBUG_ASSERT(__kmp_init_serial); 1073 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1074 chunk); 1075 } 1076 1077 /*! 1078 See @ref __kmpc_team_static_init_4 1079 */ 1080 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1081 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1082 kmp_int64 *p_st, kmp_int64 incr, 1083 kmp_int64 chunk) { 1084 KMP_DEBUG_ASSERT(__kmp_init_serial); 1085 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1086 chunk); 1087 } 1088 /*! 1089 @} 1090 */ 1091 1092 } // extern "C" 1093