1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 KMP_DEBUG_ASSERT(i != 0); \ 56 t = (l - u) / (-i) + 1; \ 57 } \ 58 KMP_COUNT_VALUE(stat, t); \ 59 KMP_POP_PARTITIONED_TIMER(); \ 60 } 61 #else 62 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 63 #endif 64 65 #if USE_ITT_BUILD || defined KMP_DEBUG 66 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 67 static inline void check_loc(ident_t *&loc) { 68 if (loc == NULL) 69 loc = &loc_stub; // may need to report location info to ittnotify 70 } 71 #endif 72 73 template <typename T> 74 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 75 kmp_int32 schedtype, kmp_int32 *plastiter, 76 T *plower, T *pupper, 77 typename traits_t<T>::signed_t *pstride, 78 typename traits_t<T>::signed_t incr, 79 typename traits_t<T>::signed_t chunk 80 #if OMPT_SUPPORT && OMPT_OPTIONAL 81 , 82 void *codeptr 83 #endif 84 ) { 85 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 87 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 88 89 // Clear monotonic/nonmonotonic bits (ignore it) 90 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype); 91 92 typedef typename traits_t<T>::unsigned_t UT; 93 typedef typename traits_t<T>::signed_t ST; 94 /* this all has to be changed back to TID and such.. */ 95 kmp_int32 gtid = global_tid; 96 kmp_uint32 tid; 97 kmp_uint32 nth; 98 UT trip_count; 99 kmp_team_t *team; 100 __kmp_assert_valid_gtid(gtid); 101 kmp_info_t *th = __kmp_threads[gtid]; 102 103 #if OMPT_SUPPORT && OMPT_OPTIONAL 104 ompt_team_info_t *team_info = NULL; 105 ompt_task_info_t *task_info = NULL; 106 ompt_work_t ompt_work_type = ompt_work_loop_static; 107 108 static kmp_int8 warn = 0; 109 110 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 111 // Only fully initialize variables needed by OMPT if OMPT is enabled. 112 team_info = __ompt_get_teaminfo(0, NULL); 113 task_info = __ompt_get_task_info_object(0); 114 // Determine workshare type 115 if (loc != NULL) { 116 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 117 ompt_work_type = ompt_work_loop_static; 118 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 119 ompt_work_type = ompt_work_sections; 120 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 121 ompt_work_type = ompt_work_distribute; 122 } else { 123 kmp_int8 bool_res = 124 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 125 if (bool_res) 126 KMP_WARNING(OmptOutdatedWorkshare); 127 } 128 KMP_DEBUG_ASSERT(ompt_work_type); 129 } 130 } 131 #endif 132 133 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 134 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 135 #ifdef KMP_DEBUG 136 { 137 char *buff; 138 // create format specifiers before the debug output 139 buff = __kmp_str_format( 140 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 141 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 142 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 143 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 144 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 145 *pstride, incr, chunk)); 146 __kmp_str_free(&buff); 147 } 148 #endif 149 150 if (__kmp_env_consistency_check) { 151 __kmp_push_workshare(global_tid, ct_pdo, loc); 152 if (incr == 0) { 153 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 154 loc); 155 } 156 } 157 /* special handling for zero-trip loops */ 158 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 159 if (plastiter != NULL) 160 *plastiter = FALSE; 161 /* leave pupper and plower set to entire iteration space */ 162 *pstride = incr; /* value should never be used */ 163 // *plower = *pupper - incr; 164 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 165 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 166 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 167 #ifdef KMP_DEBUG 168 { 169 char *buff; 170 // create format specifiers before the debug output 171 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 172 "lower=%%%s upper=%%%s stride = %%%s " 173 "signed?<%s>, loc = %%s\n", 174 traits_t<T>::spec, traits_t<T>::spec, 175 traits_t<ST>::spec, traits_t<T>::spec); 176 check_loc(loc); 177 KD_TRACE(100, 178 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 179 __kmp_str_free(&buff); 180 } 181 #endif 182 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 183 184 #if OMPT_SUPPORT && OMPT_OPTIONAL 185 if (ompt_enabled.ompt_callback_work) { 186 ompt_callbacks.ompt_callback(ompt_callback_work)( 187 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 188 &(task_info->task_data), 0, codeptr); 189 } 190 #endif 191 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 192 return; 193 } 194 195 // Although there are schedule enumerations above kmp_ord_upper which are not 196 // schedules for "distribute", the only ones which are useful are dynamic, so 197 // cannot be seen here, since this codepath is only executed for static 198 // schedules. 199 if (schedtype > kmp_ord_upper) { 200 // we are in DISTRIBUTE construct 201 schedtype += kmp_sch_static - 202 kmp_distribute_static; // AC: convert to usual schedule type 203 if (th->th.th_team->t.t_serialized > 1) { 204 tid = 0; 205 team = th->th.th_team; 206 } else { 207 tid = th->th.th_team->t.t_master_tid; 208 team = th->th.th_team->t.t_parent; 209 } 210 } else { 211 tid = __kmp_tid_from_gtid(global_tid); 212 team = th->th.th_team; 213 } 214 215 /* determine if "for" loop is an active worksharing construct */ 216 if (team->t.t_serialized) { 217 /* serialized parallel, each thread executes whole iteration space */ 218 if (plastiter != NULL) 219 *plastiter = TRUE; 220 /* leave pupper and plower set to entire iteration space */ 221 *pstride = 222 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 223 224 #ifdef KMP_DEBUG 225 { 226 char *buff; 227 // create format specifiers before the debug output 228 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 229 "lower=%%%s upper=%%%s stride = %%%s\n", 230 traits_t<T>::spec, traits_t<T>::spec, 231 traits_t<ST>::spec); 232 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 233 __kmp_str_free(&buff); 234 } 235 #endif 236 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 237 238 #if OMPT_SUPPORT && OMPT_OPTIONAL 239 if (ompt_enabled.ompt_callback_work) { 240 ompt_callbacks.ompt_callback(ompt_callback_work)( 241 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 242 &(task_info->task_data), *pstride, codeptr); 243 } 244 #endif 245 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 246 return; 247 } 248 nth = team->t.t_nproc; 249 if (nth == 1) { 250 if (plastiter != NULL) 251 *plastiter = TRUE; 252 *pstride = 253 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 254 #ifdef KMP_DEBUG 255 { 256 char *buff; 257 // create format specifiers before the debug output 258 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 259 "lower=%%%s upper=%%%s stride = %%%s\n", 260 traits_t<T>::spec, traits_t<T>::spec, 261 traits_t<ST>::spec); 262 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 263 __kmp_str_free(&buff); 264 } 265 #endif 266 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 267 268 #if OMPT_SUPPORT && OMPT_OPTIONAL 269 if (ompt_enabled.ompt_callback_work) { 270 ompt_callbacks.ompt_callback(ompt_callback_work)( 271 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 272 &(task_info->task_data), *pstride, codeptr); 273 } 274 #endif 275 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 276 return; 277 } 278 279 /* compute trip count */ 280 if (incr == 1) { 281 trip_count = *pupper - *plower + 1; 282 } else if (incr == -1) { 283 trip_count = *plower - *pupper + 1; 284 } else if (incr > 0) { 285 // upper-lower can exceed the limit of signed type 286 trip_count = (UT)(*pupper - *plower) / incr + 1; 287 } else { 288 KMP_DEBUG_ASSERT(incr != 0); 289 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 290 } 291 292 #if KMP_STATS_ENABLED 293 if (KMP_MASTER_GTID(gtid)) { 294 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 295 } 296 #endif 297 298 if (__kmp_env_consistency_check) { 299 /* tripcount overflow? */ 300 if (trip_count == 0 && *pupper != *plower) { 301 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 302 loc); 303 } 304 } 305 306 /* compute remaining parameters */ 307 switch (schedtype) { 308 case kmp_sch_static: { 309 if (trip_count < nth) { 310 KMP_DEBUG_ASSERT( 311 __kmp_static == kmp_sch_static_greedy || 312 __kmp_static == 313 kmp_sch_static_balanced); // Unknown static scheduling type. 314 if (tid < trip_count) { 315 *pupper = *plower = *plower + tid * incr; 316 } else { 317 // set bounds so non-active threads execute no iterations 318 *plower = *pupper + (incr > 0 ? 1 : -1); 319 } 320 if (plastiter != NULL) 321 *plastiter = (tid == trip_count - 1); 322 } else { 323 KMP_DEBUG_ASSERT(nth != 0); 324 if (__kmp_static == kmp_sch_static_balanced) { 325 UT small_chunk = trip_count / nth; 326 UT extras = trip_count % nth; 327 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 328 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 329 if (plastiter != NULL) 330 *plastiter = (tid == nth - 1); 331 } else { 332 T big_chunk_inc_count = 333 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 334 T old_upper = *pupper; 335 336 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 337 // Unknown static scheduling type. 338 339 *plower += tid * big_chunk_inc_count; 340 *pupper = *plower + big_chunk_inc_count - incr; 341 if (incr > 0) { 342 if (*pupper < *plower) 343 *pupper = traits_t<T>::max_value; 344 if (plastiter != NULL) 345 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 346 if (*pupper > old_upper) 347 *pupper = old_upper; // tracker C73258 348 } else { 349 if (*pupper > *plower) 350 *pupper = traits_t<T>::min_value; 351 if (plastiter != NULL) 352 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 353 if (*pupper < old_upper) 354 *pupper = old_upper; // tracker C73258 355 } 356 } 357 } 358 *pstride = trip_count; 359 break; 360 } 361 case kmp_sch_static_chunked: { 362 ST span; 363 UT nchunks; 364 KMP_DEBUG_ASSERT(chunk != 0); 365 if (chunk < 1) 366 chunk = 1; 367 else if ((UT)chunk > trip_count) 368 chunk = trip_count; 369 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 370 span = chunk * incr; 371 if (nchunks < nth) { 372 *pstride = span * nchunks; 373 if (tid < nchunks) { 374 *plower = *plower + (span * tid); 375 *pupper = *plower + span - incr; 376 } else { 377 *plower = *pupper + (incr > 0 ? 1 : -1); 378 } 379 } else { 380 *pstride = span * nth; 381 *plower = *plower + (span * tid); 382 *pupper = *plower + span - incr; 383 } 384 if (plastiter != NULL) 385 *plastiter = (tid == (nchunks - 1) % nth); 386 break; 387 } 388 case kmp_sch_static_balanced_chunked: { 389 T old_upper = *pupper; 390 KMP_DEBUG_ASSERT(nth != 0); 391 // round up to make sure the chunk is enough to cover all iterations 392 UT span = (trip_count + nth - 1) / nth; 393 394 // perform chunk adjustment 395 chunk = (span + chunk - 1) & ~(chunk - 1); 396 397 span = chunk * incr; 398 *plower = *plower + (span * tid); 399 *pupper = *plower + span - incr; 400 if (incr > 0) { 401 if (*pupper > old_upper) 402 *pupper = old_upper; 403 } else if (*pupper < old_upper) 404 *pupper = old_upper; 405 406 if (plastiter != NULL) { 407 KMP_DEBUG_ASSERT(chunk != 0); 408 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 409 } 410 break; 411 } 412 default: 413 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 414 break; 415 } 416 417 #if USE_ITT_BUILD 418 // Report loop metadata 419 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 420 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 421 team->t.t_active_level == 1) { 422 kmp_uint64 cur_chunk = chunk; 423 check_loc(loc); 424 // Calculate chunk in case it was not specified; it is specified for 425 // kmp_sch_static_chunked 426 if (schedtype == kmp_sch_static) { 427 KMP_DEBUG_ASSERT(nth != 0); 428 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 429 } 430 // 0 - "static" schedule 431 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 432 } 433 #endif 434 #ifdef KMP_DEBUG 435 { 436 char *buff; 437 // create format specifiers before the debug output 438 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 439 "upper=%%%s stride = %%%s signed?<%s>\n", 440 traits_t<T>::spec, traits_t<T>::spec, 441 traits_t<ST>::spec, traits_t<T>::spec); 442 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 443 __kmp_str_free(&buff); 444 } 445 #endif 446 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 447 448 #if OMPT_SUPPORT && OMPT_OPTIONAL 449 if (ompt_enabled.ompt_callback_work) { 450 ompt_callbacks.ompt_callback(ompt_callback_work)( 451 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 452 &(task_info->task_data), trip_count, codeptr); 453 } 454 if (ompt_enabled.ompt_callback_dispatch) { 455 ompt_dispatch_t dispatch_type; 456 ompt_data_t instance = ompt_data_none; 457 ompt_dispatch_chunk_t dispatch_chunk; 458 if (ompt_work_type == ompt_work_sections) { 459 dispatch_type = ompt_dispatch_section; 460 instance.ptr = codeptr; 461 } else { 462 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr); 463 dispatch_type = (ompt_work_type == ompt_work_distribute) 464 ? ompt_dispatch_distribute_chunk 465 : ompt_dispatch_ws_loop_chunk; 466 instance.ptr = &dispatch_chunk; 467 } 468 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 469 &(team_info->parallel_data), &(task_info->task_data), dispatch_type, 470 instance); 471 } 472 #endif 473 474 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 475 return; 476 } 477 478 template <typename T> 479 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 480 kmp_int32 schedule, kmp_int32 *plastiter, 481 T *plower, T *pupper, T *pupperDist, 482 typename traits_t<T>::signed_t *pstride, 483 typename traits_t<T>::signed_t incr, 484 typename traits_t<T>::signed_t chunk 485 #if OMPT_SUPPORT && OMPT_OPTIONAL 486 , 487 void *codeptr 488 #endif 489 ) { 490 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 491 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 492 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 493 typedef typename traits_t<T>::unsigned_t UT; 494 typedef typename traits_t<T>::signed_t ST; 495 kmp_uint32 tid; 496 kmp_uint32 nth; 497 kmp_uint32 team_id; 498 kmp_uint32 nteams; 499 UT trip_count; 500 kmp_team_t *team; 501 kmp_info_t *th; 502 503 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 504 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 505 __kmp_assert_valid_gtid(gtid); 506 #ifdef KMP_DEBUG 507 { 508 char *buff; 509 // create format specifiers before the debug output 510 buff = __kmp_str_format( 511 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 512 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 513 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 514 traits_t<ST>::spec, traits_t<T>::spec); 515 KD_TRACE(100, 516 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 517 __kmp_str_free(&buff); 518 } 519 #endif 520 521 if (__kmp_env_consistency_check) { 522 __kmp_push_workshare(gtid, ct_pdo, loc); 523 if (incr == 0) { 524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 525 loc); 526 } 527 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 528 // The loop is illegal. 529 // Some zero-trip loops maintained by compiler, e.g.: 530 // for(i=10;i<0;++i) // lower >= upper - run-time check 531 // for(i=0;i>10;--i) // lower <= upper - run-time check 532 // for(i=0;i>10;++i) // incr > 0 - compile-time check 533 // for(i=10;i<0;--i) // incr < 0 - compile-time check 534 // Compiler does not check the following illegal loops: 535 // for(i=0;i<10;i+=incr) // where incr<0 536 // for(i=10;i>0;i-=incr) // where incr<0 537 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 538 } 539 } 540 tid = __kmp_tid_from_gtid(gtid); 541 th = __kmp_threads[gtid]; 542 nth = th->th.th_team_nproc; 543 team = th->th.th_team; 544 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 545 nteams = th->th.th_teams_size.nteams; 546 team_id = team->t.t_master_tid; 547 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 548 549 // compute global trip count 550 if (incr == 1) { 551 trip_count = *pupper - *plower + 1; 552 } else if (incr == -1) { 553 trip_count = *plower - *pupper + 1; 554 } else if (incr > 0) { 555 // upper-lower can exceed the limit of signed type 556 trip_count = (UT)(*pupper - *plower) / incr + 1; 557 } else { 558 KMP_DEBUG_ASSERT(incr != 0); 559 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 560 } 561 562 *pstride = *pupper - *plower; // just in case (can be unused) 563 if (trip_count <= nteams) { 564 KMP_DEBUG_ASSERT( 565 __kmp_static == kmp_sch_static_greedy || 566 __kmp_static == 567 kmp_sch_static_balanced); // Unknown static scheduling type. 568 // only primary threads of some teams get single iteration, other threads 569 // get nothing 570 if (team_id < trip_count && tid == 0) { 571 *pupper = *pupperDist = *plower = *plower + team_id * incr; 572 } else { 573 *pupperDist = *pupper; 574 *plower = *pupper + incr; // compiler should skip loop body 575 } 576 if (plastiter != NULL) 577 *plastiter = (tid == 0 && team_id == trip_count - 1); 578 } else { 579 // Get the team's chunk first (each team gets at most one chunk) 580 KMP_DEBUG_ASSERT(nteams != 0); 581 if (__kmp_static == kmp_sch_static_balanced) { 582 UT chunkD = trip_count / nteams; 583 UT extras = trip_count % nteams; 584 *plower += 585 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 586 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 587 if (plastiter != NULL) 588 *plastiter = (team_id == nteams - 1); 589 } else { 590 T chunk_inc_count = 591 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 592 T upper = *pupper; 593 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 594 // Unknown static scheduling type. 595 *plower += team_id * chunk_inc_count; 596 *pupperDist = *plower + chunk_inc_count - incr; 597 // Check/correct bounds if needed 598 if (incr > 0) { 599 if (*pupperDist < *plower) 600 *pupperDist = traits_t<T>::max_value; 601 if (plastiter != NULL) 602 *plastiter = *plower <= upper && *pupperDist > upper - incr; 603 if (*pupperDist > upper) 604 *pupperDist = upper; // tracker C73258 605 if (*plower > *pupperDist) { 606 *pupper = *pupperDist; // no iterations available for the team 607 goto end; 608 } 609 } else { 610 if (*pupperDist > *plower) 611 *pupperDist = traits_t<T>::min_value; 612 if (plastiter != NULL) 613 *plastiter = *plower >= upper && *pupperDist < upper - incr; 614 if (*pupperDist < upper) 615 *pupperDist = upper; // tracker C73258 616 if (*plower < *pupperDist) { 617 *pupper = *pupperDist; // no iterations available for the team 618 goto end; 619 } 620 } 621 } 622 // Get the parallel loop chunk now (for thread) 623 // compute trip count for team's chunk 624 if (incr == 1) { 625 trip_count = *pupperDist - *plower + 1; 626 } else if (incr == -1) { 627 trip_count = *plower - *pupperDist + 1; 628 } else if (incr > 1) { 629 // upper-lower can exceed the limit of signed type 630 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 631 } else { 632 KMP_DEBUG_ASSERT(incr != 0); 633 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 634 } 635 KMP_DEBUG_ASSERT(trip_count); 636 switch (schedule) { 637 case kmp_sch_static: { 638 if (trip_count <= nth) { 639 KMP_DEBUG_ASSERT( 640 __kmp_static == kmp_sch_static_greedy || 641 __kmp_static == 642 kmp_sch_static_balanced); // Unknown static scheduling type. 643 if (tid < trip_count) 644 *pupper = *plower = *plower + tid * incr; 645 else 646 *plower = *pupper + incr; // no iterations available 647 if (plastiter != NULL) 648 if (*plastiter != 0 && !(tid == trip_count - 1)) 649 *plastiter = 0; 650 } else { 651 KMP_DEBUG_ASSERT(nth != 0); 652 if (__kmp_static == kmp_sch_static_balanced) { 653 UT chunkL = trip_count / nth; 654 UT extras = trip_count % nth; 655 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 656 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 657 if (plastiter != NULL) 658 if (*plastiter != 0 && !(tid == nth - 1)) 659 *plastiter = 0; 660 } else { 661 T chunk_inc_count = 662 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 663 T upper = *pupperDist; 664 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 665 // Unknown static scheduling type. 666 *plower += tid * chunk_inc_count; 667 *pupper = *plower + chunk_inc_count - incr; 668 if (incr > 0) { 669 if (*pupper < *plower) 670 *pupper = traits_t<T>::max_value; 671 if (plastiter != NULL) 672 if (*plastiter != 0 && 673 !(*plower <= upper && *pupper > upper - incr)) 674 *plastiter = 0; 675 if (*pupper > upper) 676 *pupper = upper; // tracker C73258 677 } else { 678 if (*pupper > *plower) 679 *pupper = traits_t<T>::min_value; 680 if (plastiter != NULL) 681 if (*plastiter != 0 && 682 !(*plower >= upper && *pupper < upper - incr)) 683 *plastiter = 0; 684 if (*pupper < upper) 685 *pupper = upper; // tracker C73258 686 } 687 } 688 } 689 break; 690 } 691 case kmp_sch_static_chunked: { 692 ST span; 693 if (chunk < 1) 694 chunk = 1; 695 span = chunk * incr; 696 *pstride = span * nth; 697 *plower = *plower + (span * tid); 698 *pupper = *plower + span - incr; 699 if (plastiter != NULL) { 700 KMP_DEBUG_ASSERT(chunk != 0); 701 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 702 *plastiter = 0; 703 } 704 break; 705 } 706 default: 707 KMP_ASSERT2(0, 708 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 709 break; 710 } 711 } 712 end:; 713 #ifdef KMP_DEBUG 714 { 715 char *buff; 716 // create format specifiers before the debug output 717 buff = __kmp_str_format( 718 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 719 "stride=%%%s signed?<%s>\n", 720 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 721 traits_t<ST>::spec, traits_t<T>::spec); 722 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 723 __kmp_str_free(&buff); 724 } 725 #endif 726 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 727 #if OMPT_SUPPORT && OMPT_OPTIONAL 728 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 729 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 730 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 731 if (ompt_enabled.ompt_callback_work) { 732 ompt_callbacks.ompt_callback(ompt_callback_work)( 733 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), 734 &(task_info->task_data), 0, codeptr); 735 } 736 if (ompt_enabled.ompt_callback_dispatch) { 737 ompt_data_t instance = ompt_data_none; 738 ompt_dispatch_chunk_t dispatch_chunk; 739 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr); 740 instance.ptr = &dispatch_chunk; 741 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 742 &(team_info->parallel_data), &(task_info->task_data), 743 ompt_dispatch_distribute_chunk, instance); 744 } 745 } 746 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 747 KMP_STATS_LOOP_END(OMP_distribute_iterations); 748 return; 749 } 750 751 template <typename T> 752 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 753 kmp_int32 *p_last, T *p_lb, T *p_ub, 754 typename traits_t<T>::signed_t *p_st, 755 typename traits_t<T>::signed_t incr, 756 typename traits_t<T>::signed_t chunk) { 757 // The routine returns the first chunk distributed to the team and 758 // stride for next chunks calculation. 759 // Last iteration flag set for the team that will execute 760 // the last iteration of the loop. 761 // The routine is called for dist_schedule(static,chunk) only. 762 typedef typename traits_t<T>::unsigned_t UT; 763 typedef typename traits_t<T>::signed_t ST; 764 kmp_uint32 team_id; 765 kmp_uint32 nteams; 766 UT trip_count; 767 T lower; 768 T upper; 769 ST span; 770 kmp_team_t *team; 771 kmp_info_t *th; 772 773 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 774 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 775 __kmp_assert_valid_gtid(gtid); 776 #ifdef KMP_DEBUG 777 { 778 char *buff; 779 // create format specifiers before the debug output 780 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 781 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 782 traits_t<T>::spec, traits_t<T>::spec, 783 traits_t<ST>::spec, traits_t<ST>::spec, 784 traits_t<T>::spec); 785 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 786 __kmp_str_free(&buff); 787 } 788 #endif 789 790 lower = *p_lb; 791 upper = *p_ub; 792 if (__kmp_env_consistency_check) { 793 if (incr == 0) { 794 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 795 loc); 796 } 797 if (incr > 0 ? (upper < lower) : (lower < upper)) { 798 // The loop is illegal. 799 // Some zero-trip loops maintained by compiler, e.g.: 800 // for(i=10;i<0;++i) // lower >= upper - run-time check 801 // for(i=0;i>10;--i) // lower <= upper - run-time check 802 // for(i=0;i>10;++i) // incr > 0 - compile-time check 803 // for(i=10;i<0;--i) // incr < 0 - compile-time check 804 // Compiler does not check the following illegal loops: 805 // for(i=0;i<10;i+=incr) // where incr<0 806 // for(i=10;i>0;i-=incr) // where incr<0 807 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 808 } 809 } 810 th = __kmp_threads[gtid]; 811 team = th->th.th_team; 812 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 813 nteams = th->th.th_teams_size.nteams; 814 team_id = team->t.t_master_tid; 815 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 816 817 // compute trip count 818 if (incr == 1) { 819 trip_count = upper - lower + 1; 820 } else if (incr == -1) { 821 trip_count = lower - upper + 1; 822 } else if (incr > 0) { 823 // upper-lower can exceed the limit of signed type 824 trip_count = (UT)(upper - lower) / incr + 1; 825 } else { 826 KMP_DEBUG_ASSERT(incr != 0); 827 trip_count = (UT)(lower - upper) / (-incr) + 1; 828 } 829 if (chunk < 1) 830 chunk = 1; 831 span = chunk * incr; 832 *p_st = span * nteams; 833 *p_lb = lower + (span * team_id); 834 *p_ub = *p_lb + span - incr; 835 if (p_last != NULL) { 836 KMP_DEBUG_ASSERT(chunk != 0); 837 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 838 } 839 // Correct upper bound if needed 840 if (incr > 0) { 841 if (*p_ub < *p_lb) // overflow? 842 *p_ub = traits_t<T>::max_value; 843 if (*p_ub > upper) 844 *p_ub = upper; // tracker C73258 845 } else { // incr < 0 846 if (*p_ub > *p_lb) 847 *p_ub = traits_t<T>::min_value; 848 if (*p_ub < upper) 849 *p_ub = upper; // tracker C73258 850 } 851 #ifdef KMP_DEBUG 852 { 853 char *buff; 854 // create format specifiers before the debug output 855 buff = 856 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 857 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 858 traits_t<T>::spec, traits_t<T>::spec, 859 traits_t<ST>::spec, traits_t<ST>::spec); 860 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 861 __kmp_str_free(&buff); 862 } 863 #endif 864 } 865 866 //------------------------------------------------------------------------------ 867 extern "C" { 868 /*! 869 @ingroup WORK_SHARING 870 @param loc Source code location 871 @param gtid Global thread id of this thread 872 @param schedtype Scheduling type 873 @param plastiter Pointer to the "last iteration" flag 874 @param plower Pointer to the lower bound 875 @param pupper Pointer to the upper bound 876 @param pstride Pointer to the stride 877 @param incr Loop increment 878 @param chunk The chunk size 879 880 Each of the four functions here are identical apart from the argument types. 881 882 The functions compute the upper and lower bounds and stride to be used for the 883 set of iterations to be executed by the current thread from the statically 884 scheduled loop that is described by the initial values of the bounds, stride, 885 increment and chunk size. 886 887 @{ 888 */ 889 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 890 kmp_int32 *plastiter, kmp_int32 *plower, 891 kmp_int32 *pupper, kmp_int32 *pstride, 892 kmp_int32 incr, kmp_int32 chunk) { 893 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 894 pupper, pstride, incr, chunk 895 #if OMPT_SUPPORT && OMPT_OPTIONAL 896 , 897 OMPT_GET_RETURN_ADDRESS(0) 898 #endif 899 ); 900 } 901 902 /*! 903 See @ref __kmpc_for_static_init_4 904 */ 905 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 906 kmp_int32 schedtype, kmp_int32 *plastiter, 907 kmp_uint32 *plower, kmp_uint32 *pupper, 908 kmp_int32 *pstride, kmp_int32 incr, 909 kmp_int32 chunk) { 910 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 911 pupper, pstride, incr, chunk 912 #if OMPT_SUPPORT && OMPT_OPTIONAL 913 , 914 OMPT_GET_RETURN_ADDRESS(0) 915 #endif 916 ); 917 } 918 919 /*! 920 See @ref __kmpc_for_static_init_4 921 */ 922 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 923 kmp_int32 *plastiter, kmp_int64 *plower, 924 kmp_int64 *pupper, kmp_int64 *pstride, 925 kmp_int64 incr, kmp_int64 chunk) { 926 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 927 pupper, pstride, incr, chunk 928 #if OMPT_SUPPORT && OMPT_OPTIONAL 929 , 930 OMPT_GET_RETURN_ADDRESS(0) 931 #endif 932 ); 933 } 934 935 /*! 936 See @ref __kmpc_for_static_init_4 937 */ 938 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 939 kmp_int32 schedtype, kmp_int32 *plastiter, 940 kmp_uint64 *plower, kmp_uint64 *pupper, 941 kmp_int64 *pstride, kmp_int64 incr, 942 kmp_int64 chunk) { 943 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 944 pupper, pstride, incr, chunk 945 #if OMPT_SUPPORT && OMPT_OPTIONAL 946 , 947 OMPT_GET_RETURN_ADDRESS(0) 948 #endif 949 ); 950 } 951 /*! 952 @} 953 */ 954 955 #if OMPT_SUPPORT && OMPT_OPTIONAL 956 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0) 957 #else 958 #define OMPT_CODEPTR_ARG 959 #endif 960 961 /*! 962 @ingroup WORK_SHARING 963 @param loc Source code location 964 @param gtid Global thread id of this thread 965 @param schedule Scheduling type for the parallel loop 966 @param plastiter Pointer to the "last iteration" flag 967 @param plower Pointer to the lower bound 968 @param pupper Pointer to the upper bound of loop chunk 969 @param pupperD Pointer to the upper bound of dist_chunk 970 @param pstride Pointer to the stride for parallel loop 971 @param incr Loop increment 972 @param chunk The chunk size for the parallel loop 973 974 Each of the four functions here are identical apart from the argument types. 975 976 The functions compute the upper and lower bounds and strides to be used for the 977 set of iterations to be executed by the current thread from the statically 978 scheduled loop that is described by the initial values of the bounds, strides, 979 increment and chunks for parallel loop and distribute constructs. 980 981 @{ 982 */ 983 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 984 kmp_int32 schedule, kmp_int32 *plastiter, 985 kmp_int32 *plower, kmp_int32 *pupper, 986 kmp_int32 *pupperD, kmp_int32 *pstride, 987 kmp_int32 incr, kmp_int32 chunk) { 988 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 989 pupper, pupperD, pstride, incr, 990 chunk OMPT_CODEPTR_ARG); 991 } 992 993 /*! 994 See @ref __kmpc_dist_for_static_init_4 995 */ 996 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 997 kmp_int32 schedule, kmp_int32 *plastiter, 998 kmp_uint32 *plower, kmp_uint32 *pupper, 999 kmp_uint32 *pupperD, kmp_int32 *pstride, 1000 kmp_int32 incr, kmp_int32 chunk) { 1001 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 1002 pupper, pupperD, pstride, incr, 1003 chunk OMPT_CODEPTR_ARG); 1004 } 1005 1006 /*! 1007 See @ref __kmpc_dist_for_static_init_4 1008 */ 1009 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 1010 kmp_int32 schedule, kmp_int32 *plastiter, 1011 kmp_int64 *plower, kmp_int64 *pupper, 1012 kmp_int64 *pupperD, kmp_int64 *pstride, 1013 kmp_int64 incr, kmp_int64 chunk) { 1014 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 1015 pupper, pupperD, pstride, incr, 1016 chunk OMPT_CODEPTR_ARG); 1017 } 1018 1019 /*! 1020 See @ref __kmpc_dist_for_static_init_4 1021 */ 1022 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 1023 kmp_int32 schedule, kmp_int32 *plastiter, 1024 kmp_uint64 *plower, kmp_uint64 *pupper, 1025 kmp_uint64 *pupperD, kmp_int64 *pstride, 1026 kmp_int64 incr, kmp_int64 chunk) { 1027 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 1028 pupper, pupperD, pstride, incr, 1029 chunk OMPT_CODEPTR_ARG); 1030 } 1031 /*! 1032 @} 1033 */ 1034 1035 //------------------------------------------------------------------------------ 1036 // Auxiliary routines for Distribute Parallel Loop construct implementation 1037 // Transfer call to template< type T > 1038 // __kmp_team_static_init( ident_t *loc, int gtid, 1039 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 1040 1041 /*! 1042 @ingroup WORK_SHARING 1043 @{ 1044 @param loc Source location 1045 @param gtid Global thread id 1046 @param p_last pointer to last iteration flag 1047 @param p_lb pointer to Lower bound 1048 @param p_ub pointer to Upper bound 1049 @param p_st Step (or increment if you prefer) 1050 @param incr Loop increment 1051 @param chunk The chunk size to block with 1052 1053 The functions compute the upper and lower bounds and stride to be used for the 1054 set of iterations to be executed by the current team from the statically 1055 scheduled loop that is described by the initial values of the bounds, stride, 1056 increment and chunk for the distribute construct as part of composite distribute 1057 parallel loop construct. These functions are all identical apart from the types 1058 of the arguments. 1059 */ 1060 1061 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1062 kmp_int32 *p_lb, kmp_int32 *p_ub, 1063 kmp_int32 *p_st, kmp_int32 incr, 1064 kmp_int32 chunk) { 1065 KMP_DEBUG_ASSERT(__kmp_init_serial); 1066 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1067 chunk); 1068 } 1069 1070 /*! 1071 See @ref __kmpc_team_static_init_4 1072 */ 1073 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1074 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 1075 kmp_int32 *p_st, kmp_int32 incr, 1076 kmp_int32 chunk) { 1077 KMP_DEBUG_ASSERT(__kmp_init_serial); 1078 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1079 chunk); 1080 } 1081 1082 /*! 1083 See @ref __kmpc_team_static_init_4 1084 */ 1085 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1086 kmp_int64 *p_lb, kmp_int64 *p_ub, 1087 kmp_int64 *p_st, kmp_int64 incr, 1088 kmp_int64 chunk) { 1089 KMP_DEBUG_ASSERT(__kmp_init_serial); 1090 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1091 chunk); 1092 } 1093 1094 /*! 1095 See @ref __kmpc_team_static_init_4 1096 */ 1097 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1098 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1099 kmp_int64 *p_st, kmp_int64 incr, 1100 kmp_int64 chunk) { 1101 KMP_DEBUG_ASSERT(__kmp_init_serial); 1102 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1103 chunk); 1104 } 1105 /*! 1106 @} 1107 */ 1108 1109 } // extern "C" 1110