1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 65 static inline void check_loc(ident_t *&loc) { 66 if (loc == NULL) 67 loc = &loc_stub; // may need to report location info to ittnotify 68 } 69 70 template <typename T> 71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 72 kmp_int32 schedtype, kmp_int32 *plastiter, 73 T *plower, T *pupper, 74 typename traits_t<T>::signed_t *pstride, 75 typename traits_t<T>::signed_t incr, 76 typename traits_t<T>::signed_t chunk 77 #if OMPT_SUPPORT && OMPT_OPTIONAL 78 , 79 void *codeptr 80 #endif 81 ) { 82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 85 86 typedef typename traits_t<T>::unsigned_t UT; 87 typedef typename traits_t<T>::signed_t ST; 88 /* this all has to be changed back to TID and such.. */ 89 kmp_int32 gtid = global_tid; 90 kmp_uint32 tid; 91 kmp_uint32 nth; 92 UT trip_count; 93 kmp_team_t *team; 94 __kmp_assert_valid_gtid(gtid); 95 kmp_info_t *th = __kmp_threads[gtid]; 96 97 #if OMPT_SUPPORT && OMPT_OPTIONAL 98 ompt_team_info_t *team_info = NULL; 99 ompt_task_info_t *task_info = NULL; 100 ompt_work_t ompt_work_type = ompt_work_loop; 101 102 static kmp_int8 warn = 0; 103 104 if (ompt_enabled.ompt_callback_work) { 105 // Only fully initialize variables needed by OMPT if OMPT is enabled. 106 team_info = __ompt_get_teaminfo(0, NULL); 107 task_info = __ompt_get_task_info_object(0); 108 // Determine workshare type 109 if (loc != NULL) { 110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 111 ompt_work_type = ompt_work_loop; 112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 113 ompt_work_type = ompt_work_sections; 114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 115 ompt_work_type = ompt_work_distribute; 116 } else { 117 kmp_int8 bool_res = 118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 119 if (bool_res) 120 KMP_WARNING(OmptOutdatedWorkshare); 121 } 122 KMP_DEBUG_ASSERT(ompt_work_type); 123 } 124 } 125 #endif 126 127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 129 #ifdef KMP_DEBUG 130 { 131 char *buff; 132 // create format specifiers before the debug output 133 buff = __kmp_str_format( 134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 139 *pstride, incr, chunk)); 140 __kmp_str_free(&buff); 141 } 142 #endif 143 144 if (__kmp_env_consistency_check) { 145 __kmp_push_workshare(global_tid, ct_pdo, loc); 146 if (incr == 0) { 147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 148 loc); 149 } 150 } 151 /* special handling for zero-trip loops */ 152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 153 if (plastiter != NULL) 154 *plastiter = FALSE; 155 /* leave pupper and plower set to entire iteration space */ 156 *pstride = incr; /* value should never be used */ 157 // *plower = *pupper - incr; 158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 161 #ifdef KMP_DEBUG 162 { 163 char *buff; 164 // create format specifiers before the debug output 165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 166 "lower=%%%s upper=%%%s stride = %%%s " 167 "signed?<%s>, loc = %%s\n", 168 traits_t<T>::spec, traits_t<T>::spec, 169 traits_t<ST>::spec, traits_t<T>::spec); 170 check_loc(loc); 171 KD_TRACE(100, 172 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 173 __kmp_str_free(&buff); 174 } 175 #endif 176 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 177 178 #if OMPT_SUPPORT && OMPT_OPTIONAL 179 if (ompt_enabled.ompt_callback_work) { 180 ompt_callbacks.ompt_callback(ompt_callback_work)( 181 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 182 &(task_info->task_data), 0, codeptr); 183 } 184 #endif 185 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 186 return; 187 } 188 189 // Although there are schedule enumerations above kmp_ord_upper which are not 190 // schedules for "distribute", the only ones which are useful are dynamic, so 191 // cannot be seen here, since this codepath is only executed for static 192 // schedules. 193 if (schedtype > kmp_ord_upper) { 194 // we are in DISTRIBUTE construct 195 schedtype += kmp_sch_static - 196 kmp_distribute_static; // AC: convert to usual schedule type 197 tid = th->th.th_team->t.t_master_tid; 198 team = th->th.th_team->t.t_parent; 199 } else { 200 tid = __kmp_tid_from_gtid(global_tid); 201 team = th->th.th_team; 202 } 203 204 /* determine if "for" loop is an active worksharing construct */ 205 if (team->t.t_serialized) { 206 /* serialized parallel, each thread executes whole iteration space */ 207 if (plastiter != NULL) 208 *plastiter = TRUE; 209 /* leave pupper and plower set to entire iteration space */ 210 *pstride = 211 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 212 213 #ifdef KMP_DEBUG 214 { 215 char *buff; 216 // create format specifiers before the debug output 217 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 218 "lower=%%%s upper=%%%s stride = %%%s\n", 219 traits_t<T>::spec, traits_t<T>::spec, 220 traits_t<ST>::spec); 221 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 222 __kmp_str_free(&buff); 223 } 224 #endif 225 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 226 227 #if OMPT_SUPPORT && OMPT_OPTIONAL 228 if (ompt_enabled.ompt_callback_work) { 229 ompt_callbacks.ompt_callback(ompt_callback_work)( 230 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 231 &(task_info->task_data), *pstride, codeptr); 232 } 233 #endif 234 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 235 return; 236 } 237 nth = team->t.t_nproc; 238 if (nth == 1) { 239 if (plastiter != NULL) 240 *plastiter = TRUE; 241 *pstride = 242 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 243 #ifdef KMP_DEBUG 244 { 245 char *buff; 246 // create format specifiers before the debug output 247 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 248 "lower=%%%s upper=%%%s stride = %%%s\n", 249 traits_t<T>::spec, traits_t<T>::spec, 250 traits_t<ST>::spec); 251 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 252 __kmp_str_free(&buff); 253 } 254 #endif 255 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 256 257 #if OMPT_SUPPORT && OMPT_OPTIONAL 258 if (ompt_enabled.ompt_callback_work) { 259 ompt_callbacks.ompt_callback(ompt_callback_work)( 260 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 261 &(task_info->task_data), *pstride, codeptr); 262 } 263 #endif 264 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 265 return; 266 } 267 268 /* compute trip count */ 269 if (incr == 1) { 270 trip_count = *pupper - *plower + 1; 271 } else if (incr == -1) { 272 trip_count = *plower - *pupper + 1; 273 } else if (incr > 0) { 274 // upper-lower can exceed the limit of signed type 275 trip_count = (UT)(*pupper - *plower) / incr + 1; 276 } else { 277 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 278 } 279 280 #if KMP_STATS_ENABLED 281 if (KMP_MASTER_GTID(gtid)) { 282 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 283 } 284 #endif 285 286 if (__kmp_env_consistency_check) { 287 /* tripcount overflow? */ 288 if (trip_count == 0 && *pupper != *plower) { 289 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 290 loc); 291 } 292 } 293 294 /* compute remaining parameters */ 295 switch (schedtype) { 296 case kmp_sch_static: { 297 if (trip_count < nth) { 298 KMP_DEBUG_ASSERT( 299 __kmp_static == kmp_sch_static_greedy || 300 __kmp_static == 301 kmp_sch_static_balanced); // Unknown static scheduling type. 302 if (tid < trip_count) { 303 *pupper = *plower = *plower + tid * incr; 304 } else { 305 // set bounds so non-active threads execute no iterations 306 *plower = *pupper + (incr > 0 ? 1 : -1); 307 } 308 if (plastiter != NULL) 309 *plastiter = (tid == trip_count - 1); 310 } else { 311 if (__kmp_static == kmp_sch_static_balanced) { 312 UT small_chunk = trip_count / nth; 313 UT extras = trip_count % nth; 314 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 315 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 316 if (plastiter != NULL) 317 *plastiter = (tid == nth - 1); 318 } else { 319 T big_chunk_inc_count = 320 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 321 T old_upper = *pupper; 322 323 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 324 // Unknown static scheduling type. 325 326 *plower += tid * big_chunk_inc_count; 327 *pupper = *plower + big_chunk_inc_count - incr; 328 if (incr > 0) { 329 if (*pupper < *plower) 330 *pupper = traits_t<T>::max_value; 331 if (plastiter != NULL) 332 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 333 if (*pupper > old_upper) 334 *pupper = old_upper; // tracker C73258 335 } else { 336 if (*pupper > *plower) 337 *pupper = traits_t<T>::min_value; 338 if (plastiter != NULL) 339 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 340 if (*pupper < old_upper) 341 *pupper = old_upper; // tracker C73258 342 } 343 } 344 } 345 *pstride = trip_count; 346 break; 347 } 348 case kmp_sch_static_chunked: { 349 ST span; 350 UT nchunks; 351 if (chunk < 1) 352 chunk = 1; 353 else if ((UT)chunk > trip_count) 354 chunk = trip_count; 355 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 356 span = chunk * incr; 357 if (nchunks < nth) { 358 *pstride = span * nchunks; 359 if (tid < nchunks) { 360 *plower = *plower + (span * tid); 361 *pupper = *plower + span - incr; 362 } else { 363 *plower = *pupper + (incr > 0 ? 1 : -1); 364 } 365 } else { 366 *pstride = span * nth; 367 *plower = *plower + (span * tid); 368 *pupper = *plower + span - incr; 369 } 370 if (plastiter != NULL) 371 *plastiter = (tid == (nchunks - 1) % nth); 372 break; 373 } 374 case kmp_sch_static_balanced_chunked: { 375 T old_upper = *pupper; 376 // round up to make sure the chunk is enough to cover all iterations 377 UT span = (trip_count + nth - 1) / nth; 378 379 // perform chunk adjustment 380 chunk = (span + chunk - 1) & ~(chunk - 1); 381 382 span = chunk * incr; 383 *plower = *plower + (span * tid); 384 *pupper = *plower + span - incr; 385 if (incr > 0) { 386 if (*pupper > old_upper) 387 *pupper = old_upper; 388 } else if (*pupper < old_upper) 389 *pupper = old_upper; 390 391 if (plastiter != NULL) 392 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 393 break; 394 } 395 default: 396 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 397 break; 398 } 399 400 #if USE_ITT_BUILD 401 // Report loop metadata 402 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 403 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 404 team->t.t_active_level == 1) { 405 kmp_uint64 cur_chunk = chunk; 406 check_loc(loc); 407 // Calculate chunk in case it was not specified; it is specified for 408 // kmp_sch_static_chunked 409 if (schedtype == kmp_sch_static) { 410 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 411 } 412 // 0 - "static" schedule 413 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 414 } 415 #endif 416 #ifdef KMP_DEBUG 417 { 418 char *buff; 419 // create format specifiers before the debug output 420 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 421 "upper=%%%s stride = %%%s signed?<%s>\n", 422 traits_t<T>::spec, traits_t<T>::spec, 423 traits_t<ST>::spec, traits_t<T>::spec); 424 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 425 __kmp_str_free(&buff); 426 } 427 #endif 428 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 429 430 #if OMPT_SUPPORT && OMPT_OPTIONAL 431 if (ompt_enabled.ompt_callback_work) { 432 ompt_callbacks.ompt_callback(ompt_callback_work)( 433 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 434 &(task_info->task_data), trip_count, codeptr); 435 } 436 #endif 437 438 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 439 return; 440 } 441 442 template <typename T> 443 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 444 kmp_int32 schedule, kmp_int32 *plastiter, 445 T *plower, T *pupper, T *pupperDist, 446 typename traits_t<T>::signed_t *pstride, 447 typename traits_t<T>::signed_t incr, 448 typename traits_t<T>::signed_t chunk) { 449 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 450 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 451 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 452 typedef typename traits_t<T>::unsigned_t UT; 453 typedef typename traits_t<T>::signed_t ST; 454 kmp_uint32 tid; 455 kmp_uint32 nth; 456 kmp_uint32 team_id; 457 kmp_uint32 nteams; 458 UT trip_count; 459 kmp_team_t *team; 460 kmp_info_t *th; 461 462 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 463 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 464 __kmp_assert_valid_gtid(gtid); 465 #ifdef KMP_DEBUG 466 { 467 char *buff; 468 // create format specifiers before the debug output 469 buff = __kmp_str_format( 470 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 471 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 472 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 473 traits_t<ST>::spec, traits_t<T>::spec); 474 KD_TRACE(100, 475 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 476 __kmp_str_free(&buff); 477 } 478 #endif 479 480 if (__kmp_env_consistency_check) { 481 __kmp_push_workshare(gtid, ct_pdo, loc); 482 if (incr == 0) { 483 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 484 loc); 485 } 486 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 487 // The loop is illegal. 488 // Some zero-trip loops maintained by compiler, e.g.: 489 // for(i=10;i<0;++i) // lower >= upper - run-time check 490 // for(i=0;i>10;--i) // lower <= upper - run-time check 491 // for(i=0;i>10;++i) // incr > 0 - compile-time check 492 // for(i=10;i<0;--i) // incr < 0 - compile-time check 493 // Compiler does not check the following illegal loops: 494 // for(i=0;i<10;i+=incr) // where incr<0 495 // for(i=10;i>0;i-=incr) // where incr<0 496 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 497 } 498 } 499 tid = __kmp_tid_from_gtid(gtid); 500 th = __kmp_threads[gtid]; 501 nth = th->th.th_team_nproc; 502 team = th->th.th_team; 503 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 504 nteams = th->th.th_teams_size.nteams; 505 team_id = team->t.t_master_tid; 506 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 507 508 // compute global trip count 509 if (incr == 1) { 510 trip_count = *pupper - *plower + 1; 511 } else if (incr == -1) { 512 trip_count = *plower - *pupper + 1; 513 } else if (incr > 0) { 514 // upper-lower can exceed the limit of signed type 515 trip_count = (UT)(*pupper - *plower) / incr + 1; 516 } else { 517 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 518 } 519 520 *pstride = *pupper - *plower; // just in case (can be unused) 521 if (trip_count <= nteams) { 522 KMP_DEBUG_ASSERT( 523 __kmp_static == kmp_sch_static_greedy || 524 __kmp_static == 525 kmp_sch_static_balanced); // Unknown static scheduling type. 526 // only primary threads of some teams get single iteration, other threads 527 // get nothing 528 if (team_id < trip_count && tid == 0) { 529 *pupper = *pupperDist = *plower = *plower + team_id * incr; 530 } else { 531 *pupperDist = *pupper; 532 *plower = *pupper + incr; // compiler should skip loop body 533 } 534 if (plastiter != NULL) 535 *plastiter = (tid == 0 && team_id == trip_count - 1); 536 } else { 537 // Get the team's chunk first (each team gets at most one chunk) 538 if (__kmp_static == kmp_sch_static_balanced) { 539 UT chunkD = trip_count / nteams; 540 UT extras = trip_count % nteams; 541 *plower += 542 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 543 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 544 if (plastiter != NULL) 545 *plastiter = (team_id == nteams - 1); 546 } else { 547 T chunk_inc_count = 548 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 549 T upper = *pupper; 550 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 551 // Unknown static scheduling type. 552 *plower += team_id * chunk_inc_count; 553 *pupperDist = *plower + chunk_inc_count - incr; 554 // Check/correct bounds if needed 555 if (incr > 0) { 556 if (*pupperDist < *plower) 557 *pupperDist = traits_t<T>::max_value; 558 if (plastiter != NULL) 559 *plastiter = *plower <= upper && *pupperDist > upper - incr; 560 if (*pupperDist > upper) 561 *pupperDist = upper; // tracker C73258 562 if (*plower > *pupperDist) { 563 *pupper = *pupperDist; // no iterations available for the team 564 goto end; 565 } 566 } else { 567 if (*pupperDist > *plower) 568 *pupperDist = traits_t<T>::min_value; 569 if (plastiter != NULL) 570 *plastiter = *plower >= upper && *pupperDist < upper - incr; 571 if (*pupperDist < upper) 572 *pupperDist = upper; // tracker C73258 573 if (*plower < *pupperDist) { 574 *pupper = *pupperDist; // no iterations available for the team 575 goto end; 576 } 577 } 578 } 579 // Get the parallel loop chunk now (for thread) 580 // compute trip count for team's chunk 581 if (incr == 1) { 582 trip_count = *pupperDist - *plower + 1; 583 } else if (incr == -1) { 584 trip_count = *plower - *pupperDist + 1; 585 } else if (incr > 1) { 586 // upper-lower can exceed the limit of signed type 587 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 588 } else { 589 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 590 } 591 KMP_DEBUG_ASSERT(trip_count); 592 switch (schedule) { 593 case kmp_sch_static: { 594 if (trip_count <= nth) { 595 KMP_DEBUG_ASSERT( 596 __kmp_static == kmp_sch_static_greedy || 597 __kmp_static == 598 kmp_sch_static_balanced); // Unknown static scheduling type. 599 if (tid < trip_count) 600 *pupper = *plower = *plower + tid * incr; 601 else 602 *plower = *pupper + incr; // no iterations available 603 if (plastiter != NULL) 604 if (*plastiter != 0 && !(tid == trip_count - 1)) 605 *plastiter = 0; 606 } else { 607 if (__kmp_static == kmp_sch_static_balanced) { 608 UT chunkL = trip_count / nth; 609 UT extras = trip_count % nth; 610 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 611 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 612 if (plastiter != NULL) 613 if (*plastiter != 0 && !(tid == nth - 1)) 614 *plastiter = 0; 615 } else { 616 T chunk_inc_count = 617 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 618 T upper = *pupperDist; 619 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 620 // Unknown static scheduling type. 621 *plower += tid * chunk_inc_count; 622 *pupper = *plower + chunk_inc_count - incr; 623 if (incr > 0) { 624 if (*pupper < *plower) 625 *pupper = traits_t<T>::max_value; 626 if (plastiter != NULL) 627 if (*plastiter != 0 && 628 !(*plower <= upper && *pupper > upper - incr)) 629 *plastiter = 0; 630 if (*pupper > upper) 631 *pupper = upper; // tracker C73258 632 } else { 633 if (*pupper > *plower) 634 *pupper = traits_t<T>::min_value; 635 if (plastiter != NULL) 636 if (*plastiter != 0 && 637 !(*plower >= upper && *pupper < upper - incr)) 638 *plastiter = 0; 639 if (*pupper < upper) 640 *pupper = upper; // tracker C73258 641 } 642 } 643 } 644 break; 645 } 646 case kmp_sch_static_chunked: { 647 ST span; 648 if (chunk < 1) 649 chunk = 1; 650 span = chunk * incr; 651 *pstride = span * nth; 652 *plower = *plower + (span * tid); 653 *pupper = *plower + span - incr; 654 if (plastiter != NULL) 655 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 656 *plastiter = 0; 657 break; 658 } 659 default: 660 KMP_ASSERT2(0, 661 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 662 break; 663 } 664 } 665 end:; 666 #ifdef KMP_DEBUG 667 { 668 char *buff; 669 // create format specifiers before the debug output 670 buff = __kmp_str_format( 671 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 672 "stride=%%%s signed?<%s>\n", 673 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 674 traits_t<ST>::spec, traits_t<T>::spec); 675 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 676 __kmp_str_free(&buff); 677 } 678 #endif 679 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 680 KMP_STATS_LOOP_END(OMP_distribute_iterations); 681 return; 682 } 683 684 template <typename T> 685 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 686 kmp_int32 *p_last, T *p_lb, T *p_ub, 687 typename traits_t<T>::signed_t *p_st, 688 typename traits_t<T>::signed_t incr, 689 typename traits_t<T>::signed_t chunk) { 690 // The routine returns the first chunk distributed to the team and 691 // stride for next chunks calculation. 692 // Last iteration flag set for the team that will execute 693 // the last iteration of the loop. 694 // The routine is called for dist_schedule(static,chunk) only. 695 typedef typename traits_t<T>::unsigned_t UT; 696 typedef typename traits_t<T>::signed_t ST; 697 kmp_uint32 team_id; 698 kmp_uint32 nteams; 699 UT trip_count; 700 T lower; 701 T upper; 702 ST span; 703 kmp_team_t *team; 704 kmp_info_t *th; 705 706 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 707 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 708 __kmp_assert_valid_gtid(gtid); 709 #ifdef KMP_DEBUG 710 { 711 char *buff; 712 // create format specifiers before the debug output 713 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 714 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 715 traits_t<T>::spec, traits_t<T>::spec, 716 traits_t<ST>::spec, traits_t<ST>::spec, 717 traits_t<T>::spec); 718 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 719 __kmp_str_free(&buff); 720 } 721 #endif 722 723 lower = *p_lb; 724 upper = *p_ub; 725 if (__kmp_env_consistency_check) { 726 if (incr == 0) { 727 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 728 loc); 729 } 730 if (incr > 0 ? (upper < lower) : (lower < upper)) { 731 // The loop is illegal. 732 // Some zero-trip loops maintained by compiler, e.g.: 733 // for(i=10;i<0;++i) // lower >= upper - run-time check 734 // for(i=0;i>10;--i) // lower <= upper - run-time check 735 // for(i=0;i>10;++i) // incr > 0 - compile-time check 736 // for(i=10;i<0;--i) // incr < 0 - compile-time check 737 // Compiler does not check the following illegal loops: 738 // for(i=0;i<10;i+=incr) // where incr<0 739 // for(i=10;i>0;i-=incr) // where incr<0 740 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 741 } 742 } 743 th = __kmp_threads[gtid]; 744 team = th->th.th_team; 745 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 746 nteams = th->th.th_teams_size.nteams; 747 team_id = team->t.t_master_tid; 748 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 749 750 // compute trip count 751 if (incr == 1) { 752 trip_count = upper - lower + 1; 753 } else if (incr == -1) { 754 trip_count = lower - upper + 1; 755 } else if (incr > 0) { 756 // upper-lower can exceed the limit of signed type 757 trip_count = (UT)(upper - lower) / incr + 1; 758 } else { 759 trip_count = (UT)(lower - upper) / (-incr) + 1; 760 } 761 if (chunk < 1) 762 chunk = 1; 763 span = chunk * incr; 764 *p_st = span * nteams; 765 *p_lb = lower + (span * team_id); 766 *p_ub = *p_lb + span - incr; 767 if (p_last != NULL) 768 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 769 // Correct upper bound if needed 770 if (incr > 0) { 771 if (*p_ub < *p_lb) // overflow? 772 *p_ub = traits_t<T>::max_value; 773 if (*p_ub > upper) 774 *p_ub = upper; // tracker C73258 775 } else { // incr < 0 776 if (*p_ub > *p_lb) 777 *p_ub = traits_t<T>::min_value; 778 if (*p_ub < upper) 779 *p_ub = upper; // tracker C73258 780 } 781 #ifdef KMP_DEBUG 782 { 783 char *buff; 784 // create format specifiers before the debug output 785 buff = 786 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 787 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 788 traits_t<T>::spec, traits_t<T>::spec, 789 traits_t<ST>::spec, traits_t<ST>::spec); 790 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 791 __kmp_str_free(&buff); 792 } 793 #endif 794 } 795 796 //------------------------------------------------------------------------------ 797 extern "C" { 798 /*! 799 @ingroup WORK_SHARING 800 @param loc Source code location 801 @param gtid Global thread id of this thread 802 @param schedtype Scheduling type 803 @param plastiter Pointer to the "last iteration" flag 804 @param plower Pointer to the lower bound 805 @param pupper Pointer to the upper bound 806 @param pstride Pointer to the stride 807 @param incr Loop increment 808 @param chunk The chunk size 809 810 Each of the four functions here are identical apart from the argument types. 811 812 The functions compute the upper and lower bounds and stride to be used for the 813 set of iterations to be executed by the current thread from the statically 814 scheduled loop that is described by the initial values of the bounds, stride, 815 increment and chunk size. 816 817 @{ 818 */ 819 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 820 kmp_int32 *plastiter, kmp_int32 *plower, 821 kmp_int32 *pupper, kmp_int32 *pstride, 822 kmp_int32 incr, kmp_int32 chunk) { 823 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 824 pupper, pstride, incr, chunk 825 #if OMPT_SUPPORT && OMPT_OPTIONAL 826 , 827 OMPT_GET_RETURN_ADDRESS(0) 828 #endif 829 ); 830 } 831 832 /*! 833 See @ref __kmpc_for_static_init_4 834 */ 835 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 836 kmp_int32 schedtype, kmp_int32 *plastiter, 837 kmp_uint32 *plower, kmp_uint32 *pupper, 838 kmp_int32 *pstride, kmp_int32 incr, 839 kmp_int32 chunk) { 840 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 841 pupper, pstride, incr, chunk 842 #if OMPT_SUPPORT && OMPT_OPTIONAL 843 , 844 OMPT_GET_RETURN_ADDRESS(0) 845 #endif 846 ); 847 } 848 849 /*! 850 See @ref __kmpc_for_static_init_4 851 */ 852 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 853 kmp_int32 *plastiter, kmp_int64 *plower, 854 kmp_int64 *pupper, kmp_int64 *pstride, 855 kmp_int64 incr, kmp_int64 chunk) { 856 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 857 pupper, pstride, incr, chunk 858 #if OMPT_SUPPORT && OMPT_OPTIONAL 859 , 860 OMPT_GET_RETURN_ADDRESS(0) 861 #endif 862 ); 863 } 864 865 /*! 866 See @ref __kmpc_for_static_init_4 867 */ 868 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 869 kmp_int32 schedtype, kmp_int32 *plastiter, 870 kmp_uint64 *plower, kmp_uint64 *pupper, 871 kmp_int64 *pstride, kmp_int64 incr, 872 kmp_int64 chunk) { 873 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 874 pupper, pstride, incr, chunk 875 #if OMPT_SUPPORT && OMPT_OPTIONAL 876 , 877 OMPT_GET_RETURN_ADDRESS(0) 878 #endif 879 ); 880 } 881 /*! 882 @} 883 */ 884 885 /*! 886 @ingroup WORK_SHARING 887 @param loc Source code location 888 @param gtid Global thread id of this thread 889 @param schedule Scheduling type for the parallel loop 890 @param plastiter Pointer to the "last iteration" flag 891 @param plower Pointer to the lower bound 892 @param pupper Pointer to the upper bound of loop chunk 893 @param pupperD Pointer to the upper bound of dist_chunk 894 @param pstride Pointer to the stride for parallel loop 895 @param incr Loop increment 896 @param chunk The chunk size for the parallel loop 897 898 Each of the four functions here are identical apart from the argument types. 899 900 The functions compute the upper and lower bounds and strides to be used for the 901 set of iterations to be executed by the current thread from the statically 902 scheduled loop that is described by the initial values of the bounds, strides, 903 increment and chunks for parallel loop and distribute constructs. 904 905 @{ 906 */ 907 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 908 kmp_int32 schedule, kmp_int32 *plastiter, 909 kmp_int32 *plower, kmp_int32 *pupper, 910 kmp_int32 *pupperD, kmp_int32 *pstride, 911 kmp_int32 incr, kmp_int32 chunk) { 912 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 913 pupper, pupperD, pstride, incr, chunk); 914 } 915 916 /*! 917 See @ref __kmpc_dist_for_static_init_4 918 */ 919 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 920 kmp_int32 schedule, kmp_int32 *plastiter, 921 kmp_uint32 *plower, kmp_uint32 *pupper, 922 kmp_uint32 *pupperD, kmp_int32 *pstride, 923 kmp_int32 incr, kmp_int32 chunk) { 924 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 925 pupper, pupperD, pstride, incr, chunk); 926 } 927 928 /*! 929 See @ref __kmpc_dist_for_static_init_4 930 */ 931 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 932 kmp_int32 schedule, kmp_int32 *plastiter, 933 kmp_int64 *plower, kmp_int64 *pupper, 934 kmp_int64 *pupperD, kmp_int64 *pstride, 935 kmp_int64 incr, kmp_int64 chunk) { 936 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 937 pupper, pupperD, pstride, incr, chunk); 938 } 939 940 /*! 941 See @ref __kmpc_dist_for_static_init_4 942 */ 943 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 944 kmp_int32 schedule, kmp_int32 *plastiter, 945 kmp_uint64 *plower, kmp_uint64 *pupper, 946 kmp_uint64 *pupperD, kmp_int64 *pstride, 947 kmp_int64 incr, kmp_int64 chunk) { 948 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 949 pupper, pupperD, pstride, incr, chunk); 950 } 951 /*! 952 @} 953 */ 954 955 //------------------------------------------------------------------------------ 956 // Auxiliary routines for Distribute Parallel Loop construct implementation 957 // Transfer call to template< type T > 958 // __kmp_team_static_init( ident_t *loc, int gtid, 959 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 960 961 /*! 962 @ingroup WORK_SHARING 963 @{ 964 @param loc Source location 965 @param gtid Global thread id 966 @param p_last pointer to last iteration flag 967 @param p_lb pointer to Lower bound 968 @param p_ub pointer to Upper bound 969 @param p_st Step (or increment if you prefer) 970 @param incr Loop increment 971 @param chunk The chunk size to block with 972 973 The functions compute the upper and lower bounds and stride to be used for the 974 set of iterations to be executed by the current team from the statically 975 scheduled loop that is described by the initial values of the bounds, stride, 976 increment and chunk for the distribute construct as part of composite distribute 977 parallel loop construct. These functions are all identical apart from the types 978 of the arguments. 979 */ 980 981 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 982 kmp_int32 *p_lb, kmp_int32 *p_ub, 983 kmp_int32 *p_st, kmp_int32 incr, 984 kmp_int32 chunk) { 985 KMP_DEBUG_ASSERT(__kmp_init_serial); 986 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 987 chunk); 988 } 989 990 /*! 991 See @ref __kmpc_team_static_init_4 992 */ 993 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 994 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 995 kmp_int32 *p_st, kmp_int32 incr, 996 kmp_int32 chunk) { 997 KMP_DEBUG_ASSERT(__kmp_init_serial); 998 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 999 chunk); 1000 } 1001 1002 /*! 1003 See @ref __kmpc_team_static_init_4 1004 */ 1005 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1006 kmp_int64 *p_lb, kmp_int64 *p_ub, 1007 kmp_int64 *p_st, kmp_int64 incr, 1008 kmp_int64 chunk) { 1009 KMP_DEBUG_ASSERT(__kmp_init_serial); 1010 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1011 chunk); 1012 } 1013 1014 /*! 1015 See @ref __kmpc_team_static_init_4 1016 */ 1017 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1018 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1019 kmp_int64 *p_st, kmp_int64 incr, 1020 kmp_int64 chunk) { 1021 KMP_DEBUG_ASSERT(__kmp_init_serial); 1022 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1023 chunk); 1024 } 1025 /*! 1026 @} 1027 */ 1028 1029 } // extern "C" 1030