1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 65 static inline void check_loc(ident_t *&loc) { 66 if (loc == NULL) 67 loc = &loc_stub; // may need to report location info to ittnotify 68 } 69 70 template <typename T> 71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 72 kmp_int32 schedtype, kmp_int32 *plastiter, 73 T *plower, T *pupper, 74 typename traits_t<T>::signed_t *pstride, 75 typename traits_t<T>::signed_t incr, 76 typename traits_t<T>::signed_t chunk 77 #if OMPT_SUPPORT && OMPT_OPTIONAL 78 , 79 void *codeptr 80 #endif 81 ) { 82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 85 86 typedef typename traits_t<T>::unsigned_t UT; 87 typedef typename traits_t<T>::signed_t ST; 88 /* this all has to be changed back to TID and such.. */ 89 kmp_int32 gtid = global_tid; 90 kmp_uint32 tid; 91 kmp_uint32 nth; 92 UT trip_count; 93 kmp_team_t *team; 94 __kmp_assert_valid_gtid(gtid); 95 kmp_info_t *th = __kmp_threads[gtid]; 96 97 #if OMPT_SUPPORT && OMPT_OPTIONAL 98 ompt_team_info_t *team_info = NULL; 99 ompt_task_info_t *task_info = NULL; 100 ompt_work_t ompt_work_type = ompt_work_loop; 101 102 static kmp_int8 warn = 0; 103 104 if (ompt_enabled.ompt_callback_work) { 105 // Only fully initialize variables needed by OMPT if OMPT is enabled. 106 team_info = __ompt_get_teaminfo(0, NULL); 107 task_info = __ompt_get_task_info_object(0); 108 // Determine workshare type 109 if (loc != NULL) { 110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 111 ompt_work_type = ompt_work_loop; 112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 113 ompt_work_type = ompt_work_sections; 114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 115 ompt_work_type = ompt_work_distribute; 116 } else { 117 kmp_int8 bool_res = 118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 119 if (bool_res) 120 KMP_WARNING(OmptOutdatedWorkshare); 121 } 122 KMP_DEBUG_ASSERT(ompt_work_type); 123 } 124 } 125 #endif 126 127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 129 #ifdef KMP_DEBUG 130 { 131 char *buff; 132 // create format specifiers before the debug output 133 buff = __kmp_str_format( 134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 139 *pstride, incr, chunk)); 140 __kmp_str_free(&buff); 141 } 142 #endif 143 144 if (__kmp_env_consistency_check) { 145 __kmp_push_workshare(global_tid, ct_pdo, loc); 146 if (incr == 0) { 147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 148 loc); 149 } 150 } 151 /* special handling for zero-trip loops */ 152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 153 if (plastiter != NULL) 154 *plastiter = FALSE; 155 /* leave pupper and plower set to entire iteration space */ 156 *pstride = incr; /* value should never be used */ 157 // *plower = *pupper - incr; 158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 161 #ifdef KMP_DEBUG 162 { 163 char *buff; 164 // create format specifiers before the debug output 165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 166 "lower=%%%s upper=%%%s stride = %%%s " 167 "signed?<%s>, loc = %%s\n", 168 traits_t<T>::spec, traits_t<T>::spec, 169 traits_t<ST>::spec, traits_t<T>::spec); 170 KD_TRACE(100, 171 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 172 __kmp_str_free(&buff); 173 } 174 #endif 175 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 176 177 #if OMPT_SUPPORT && OMPT_OPTIONAL 178 if (ompt_enabled.ompt_callback_work) { 179 ompt_callbacks.ompt_callback(ompt_callback_work)( 180 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 181 &(task_info->task_data), 0, codeptr); 182 } 183 #endif 184 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 185 return; 186 } 187 188 // Although there are schedule enumerations above kmp_ord_upper which are not 189 // schedules for "distribute", the only ones which are useful are dynamic, so 190 // cannot be seen here, since this codepath is only executed for static 191 // schedules. 192 if (schedtype > kmp_ord_upper) { 193 // we are in DISTRIBUTE construct 194 schedtype += kmp_sch_static - 195 kmp_distribute_static; // AC: convert to usual schedule type 196 tid = th->th.th_team->t.t_master_tid; 197 team = th->th.th_team->t.t_parent; 198 } else { 199 tid = __kmp_tid_from_gtid(global_tid); 200 team = th->th.th_team; 201 } 202 203 /* determine if "for" loop is an active worksharing construct */ 204 if (team->t.t_serialized) { 205 /* serialized parallel, each thread executes whole iteration space */ 206 if (plastiter != NULL) 207 *plastiter = TRUE; 208 /* leave pupper and plower set to entire iteration space */ 209 *pstride = 210 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 211 212 #ifdef KMP_DEBUG 213 { 214 char *buff; 215 // create format specifiers before the debug output 216 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 217 "lower=%%%s upper=%%%s stride = %%%s\n", 218 traits_t<T>::spec, traits_t<T>::spec, 219 traits_t<ST>::spec); 220 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 221 __kmp_str_free(&buff); 222 } 223 #endif 224 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 225 226 #if OMPT_SUPPORT && OMPT_OPTIONAL 227 if (ompt_enabled.ompt_callback_work) { 228 ompt_callbacks.ompt_callback(ompt_callback_work)( 229 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 230 &(task_info->task_data), *pstride, codeptr); 231 } 232 #endif 233 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 234 return; 235 } 236 nth = team->t.t_nproc; 237 if (nth == 1) { 238 if (plastiter != NULL) 239 *plastiter = TRUE; 240 *pstride = 241 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 242 #ifdef KMP_DEBUG 243 { 244 char *buff; 245 // create format specifiers before the debug output 246 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 247 "lower=%%%s upper=%%%s stride = %%%s\n", 248 traits_t<T>::spec, traits_t<T>::spec, 249 traits_t<ST>::spec); 250 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 251 __kmp_str_free(&buff); 252 } 253 #endif 254 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 255 256 #if OMPT_SUPPORT && OMPT_OPTIONAL 257 if (ompt_enabled.ompt_callback_work) { 258 ompt_callbacks.ompt_callback(ompt_callback_work)( 259 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 260 &(task_info->task_data), *pstride, codeptr); 261 } 262 #endif 263 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 264 return; 265 } 266 267 /* compute trip count */ 268 if (incr == 1) { 269 trip_count = *pupper - *plower + 1; 270 } else if (incr == -1) { 271 trip_count = *plower - *pupper + 1; 272 } else if (incr > 0) { 273 // upper-lower can exceed the limit of signed type 274 trip_count = (UT)(*pupper - *plower) / incr + 1; 275 } else { 276 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 277 } 278 279 #if KMP_STATS_ENABLED 280 if (KMP_MASTER_GTID(gtid)) { 281 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 282 } 283 #endif 284 285 if (__kmp_env_consistency_check) { 286 /* tripcount overflow? */ 287 if (trip_count == 0 && *pupper != *plower) { 288 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 289 loc); 290 } 291 } 292 293 /* compute remaining parameters */ 294 switch (schedtype) { 295 case kmp_sch_static: { 296 if (trip_count < nth) { 297 KMP_DEBUG_ASSERT( 298 __kmp_static == kmp_sch_static_greedy || 299 __kmp_static == 300 kmp_sch_static_balanced); // Unknown static scheduling type. 301 if (tid < trip_count) { 302 *pupper = *plower = *plower + tid * incr; 303 } else { 304 *plower = *pupper + incr; 305 } 306 if (plastiter != NULL) 307 *plastiter = (tid == trip_count - 1); 308 } else { 309 if (__kmp_static == kmp_sch_static_balanced) { 310 UT small_chunk = trip_count / nth; 311 UT extras = trip_count % nth; 312 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 313 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 314 if (plastiter != NULL) 315 *plastiter = (tid == nth - 1); 316 } else { 317 T big_chunk_inc_count = 318 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 319 T old_upper = *pupper; 320 321 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 322 // Unknown static scheduling type. 323 324 *plower += tid * big_chunk_inc_count; 325 *pupper = *plower + big_chunk_inc_count - incr; 326 if (incr > 0) { 327 if (*pupper < *plower) 328 *pupper = traits_t<T>::max_value; 329 if (plastiter != NULL) 330 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 331 if (*pupper > old_upper) 332 *pupper = old_upper; // tracker C73258 333 } else { 334 if (*pupper > *plower) 335 *pupper = traits_t<T>::min_value; 336 if (plastiter != NULL) 337 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 338 if (*pupper < old_upper) 339 *pupper = old_upper; // tracker C73258 340 } 341 } 342 } 343 *pstride = trip_count; 344 break; 345 } 346 case kmp_sch_static_chunked: { 347 ST span; 348 if (chunk < 1) { 349 chunk = 1; 350 } 351 span = chunk * incr; 352 *pstride = span * nth; 353 *plower = *plower + (span * tid); 354 *pupper = *plower + span - incr; 355 if (plastiter != NULL) 356 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 357 break; 358 } 359 case kmp_sch_static_balanced_chunked: { 360 T old_upper = *pupper; 361 // round up to make sure the chunk is enough to cover all iterations 362 UT span = (trip_count + nth - 1) / nth; 363 364 // perform chunk adjustment 365 chunk = (span + chunk - 1) & ~(chunk - 1); 366 367 span = chunk * incr; 368 *plower = *plower + (span * tid); 369 *pupper = *plower + span - incr; 370 if (incr > 0) { 371 if (*pupper > old_upper) 372 *pupper = old_upper; 373 } else if (*pupper < old_upper) 374 *pupper = old_upper; 375 376 if (plastiter != NULL) 377 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 378 break; 379 } 380 default: 381 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 382 break; 383 } 384 385 #if USE_ITT_BUILD 386 // Report loop metadata 387 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 388 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 389 team->t.t_active_level == 1) { 390 kmp_uint64 cur_chunk = chunk; 391 check_loc(loc); 392 // Calculate chunk in case it was not specified; it is specified for 393 // kmp_sch_static_chunked 394 if (schedtype == kmp_sch_static) { 395 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 396 } 397 // 0 - "static" schedule 398 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 399 } 400 #endif 401 #ifdef KMP_DEBUG 402 { 403 char *buff; 404 // create format specifiers before the debug output 405 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 406 "upper=%%%s stride = %%%s signed?<%s>\n", 407 traits_t<T>::spec, traits_t<T>::spec, 408 traits_t<ST>::spec, traits_t<T>::spec); 409 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 410 __kmp_str_free(&buff); 411 } 412 #endif 413 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 414 415 #if OMPT_SUPPORT && OMPT_OPTIONAL 416 if (ompt_enabled.ompt_callback_work) { 417 ompt_callbacks.ompt_callback(ompt_callback_work)( 418 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 419 &(task_info->task_data), trip_count, codeptr); 420 } 421 #endif 422 423 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 424 return; 425 } 426 427 template <typename T> 428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 429 kmp_int32 schedule, kmp_int32 *plastiter, 430 T *plower, T *pupper, T *pupperDist, 431 typename traits_t<T>::signed_t *pstride, 432 typename traits_t<T>::signed_t incr, 433 typename traits_t<T>::signed_t chunk) { 434 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 435 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 436 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 437 typedef typename traits_t<T>::unsigned_t UT; 438 typedef typename traits_t<T>::signed_t ST; 439 kmp_uint32 tid; 440 kmp_uint32 nth; 441 kmp_uint32 team_id; 442 kmp_uint32 nteams; 443 UT trip_count; 444 kmp_team_t *team; 445 kmp_info_t *th; 446 447 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 448 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 449 __kmp_assert_valid_gtid(gtid); 450 #ifdef KMP_DEBUG 451 { 452 char *buff; 453 // create format specifiers before the debug output 454 buff = __kmp_str_format( 455 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 456 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 457 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 458 traits_t<ST>::spec, traits_t<T>::spec); 459 KD_TRACE(100, 460 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 461 __kmp_str_free(&buff); 462 } 463 #endif 464 465 if (__kmp_env_consistency_check) { 466 __kmp_push_workshare(gtid, ct_pdo, loc); 467 if (incr == 0) { 468 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 469 loc); 470 } 471 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 472 // The loop is illegal. 473 // Some zero-trip loops maintained by compiler, e.g.: 474 // for(i=10;i<0;++i) // lower >= upper - run-time check 475 // for(i=0;i>10;--i) // lower <= upper - run-time check 476 // for(i=0;i>10;++i) // incr > 0 - compile-time check 477 // for(i=10;i<0;--i) // incr < 0 - compile-time check 478 // Compiler does not check the following illegal loops: 479 // for(i=0;i<10;i+=incr) // where incr<0 480 // for(i=10;i>0;i-=incr) // where incr<0 481 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 482 } 483 } 484 tid = __kmp_tid_from_gtid(gtid); 485 th = __kmp_threads[gtid]; 486 nth = th->th.th_team_nproc; 487 team = th->th.th_team; 488 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 489 nteams = th->th.th_teams_size.nteams; 490 team_id = team->t.t_master_tid; 491 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 492 493 // compute global trip count 494 if (incr == 1) { 495 trip_count = *pupper - *plower + 1; 496 } else if (incr == -1) { 497 trip_count = *plower - *pupper + 1; 498 } else if (incr > 0) { 499 // upper-lower can exceed the limit of signed type 500 trip_count = (UT)(*pupper - *plower) / incr + 1; 501 } else { 502 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 503 } 504 505 *pstride = *pupper - *plower; // just in case (can be unused) 506 if (trip_count <= nteams) { 507 KMP_DEBUG_ASSERT( 508 __kmp_static == kmp_sch_static_greedy || 509 __kmp_static == 510 kmp_sch_static_balanced); // Unknown static scheduling type. 511 // only masters of some teams get single iteration, other threads get 512 // nothing 513 if (team_id < trip_count && tid == 0) { 514 *pupper = *pupperDist = *plower = *plower + team_id * incr; 515 } else { 516 *pupperDist = *pupper; 517 *plower = *pupper + incr; // compiler should skip loop body 518 } 519 if (plastiter != NULL) 520 *plastiter = (tid == 0 && team_id == trip_count - 1); 521 } else { 522 // Get the team's chunk first (each team gets at most one chunk) 523 if (__kmp_static == kmp_sch_static_balanced) { 524 UT chunkD = trip_count / nteams; 525 UT extras = trip_count % nteams; 526 *plower += 527 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 528 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 529 if (plastiter != NULL) 530 *plastiter = (team_id == nteams - 1); 531 } else { 532 T chunk_inc_count = 533 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 534 T upper = *pupper; 535 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 536 // Unknown static scheduling type. 537 *plower += team_id * chunk_inc_count; 538 *pupperDist = *plower + chunk_inc_count - incr; 539 // Check/correct bounds if needed 540 if (incr > 0) { 541 if (*pupperDist < *plower) 542 *pupperDist = traits_t<T>::max_value; 543 if (plastiter != NULL) 544 *plastiter = *plower <= upper && *pupperDist > upper - incr; 545 if (*pupperDist > upper) 546 *pupperDist = upper; // tracker C73258 547 if (*plower > *pupperDist) { 548 *pupper = *pupperDist; // no iterations available for the team 549 goto end; 550 } 551 } else { 552 if (*pupperDist > *plower) 553 *pupperDist = traits_t<T>::min_value; 554 if (plastiter != NULL) 555 *plastiter = *plower >= upper && *pupperDist < upper - incr; 556 if (*pupperDist < upper) 557 *pupperDist = upper; // tracker C73258 558 if (*plower < *pupperDist) { 559 *pupper = *pupperDist; // no iterations available for the team 560 goto end; 561 } 562 } 563 } 564 // Get the parallel loop chunk now (for thread) 565 // compute trip count for team's chunk 566 if (incr == 1) { 567 trip_count = *pupperDist - *plower + 1; 568 } else if (incr == -1) { 569 trip_count = *plower - *pupperDist + 1; 570 } else if (incr > 1) { 571 // upper-lower can exceed the limit of signed type 572 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 573 } else { 574 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 575 } 576 KMP_DEBUG_ASSERT(trip_count); 577 switch (schedule) { 578 case kmp_sch_static: { 579 if (trip_count <= nth) { 580 KMP_DEBUG_ASSERT( 581 __kmp_static == kmp_sch_static_greedy || 582 __kmp_static == 583 kmp_sch_static_balanced); // Unknown static scheduling type. 584 if (tid < trip_count) 585 *pupper = *plower = *plower + tid * incr; 586 else 587 *plower = *pupper + incr; // no iterations available 588 if (plastiter != NULL) 589 if (*plastiter != 0 && !(tid == trip_count - 1)) 590 *plastiter = 0; 591 } else { 592 if (__kmp_static == kmp_sch_static_balanced) { 593 UT chunkL = trip_count / nth; 594 UT extras = trip_count % nth; 595 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 596 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 597 if (plastiter != NULL) 598 if (*plastiter != 0 && !(tid == nth - 1)) 599 *plastiter = 0; 600 } else { 601 T chunk_inc_count = 602 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 603 T upper = *pupperDist; 604 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 605 // Unknown static scheduling type. 606 *plower += tid * chunk_inc_count; 607 *pupper = *plower + chunk_inc_count - incr; 608 if (incr > 0) { 609 if (*pupper < *plower) 610 *pupper = traits_t<T>::max_value; 611 if (plastiter != NULL) 612 if (*plastiter != 0 && 613 !(*plower <= upper && *pupper > upper - incr)) 614 *plastiter = 0; 615 if (*pupper > upper) 616 *pupper = upper; // tracker C73258 617 } else { 618 if (*pupper > *plower) 619 *pupper = traits_t<T>::min_value; 620 if (plastiter != NULL) 621 if (*plastiter != 0 && 622 !(*plower >= upper && *pupper < upper - incr)) 623 *plastiter = 0; 624 if (*pupper < upper) 625 *pupper = upper; // tracker C73258 626 } 627 } 628 } 629 break; 630 } 631 case kmp_sch_static_chunked: { 632 ST span; 633 if (chunk < 1) 634 chunk = 1; 635 span = chunk * incr; 636 *pstride = span * nth; 637 *plower = *plower + (span * tid); 638 *pupper = *plower + span - incr; 639 if (plastiter != NULL) 640 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 641 *plastiter = 0; 642 break; 643 } 644 default: 645 KMP_ASSERT2(0, 646 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 647 break; 648 } 649 } 650 end:; 651 #ifdef KMP_DEBUG 652 { 653 char *buff; 654 // create format specifiers before the debug output 655 buff = __kmp_str_format( 656 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 657 "stride=%%%s signed?<%s>\n", 658 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 659 traits_t<ST>::spec, traits_t<T>::spec); 660 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 661 __kmp_str_free(&buff); 662 } 663 #endif 664 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 665 KMP_STATS_LOOP_END(OMP_distribute_iterations); 666 return; 667 } 668 669 template <typename T> 670 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 671 kmp_int32 *p_last, T *p_lb, T *p_ub, 672 typename traits_t<T>::signed_t *p_st, 673 typename traits_t<T>::signed_t incr, 674 typename traits_t<T>::signed_t chunk) { 675 // The routine returns the first chunk distributed to the team and 676 // stride for next chunks calculation. 677 // Last iteration flag set for the team that will execute 678 // the last iteration of the loop. 679 // The routine is called for dist_schedule(static,chunk) only. 680 typedef typename traits_t<T>::unsigned_t UT; 681 typedef typename traits_t<T>::signed_t ST; 682 kmp_uint32 team_id; 683 kmp_uint32 nteams; 684 UT trip_count; 685 T lower; 686 T upper; 687 ST span; 688 kmp_team_t *team; 689 kmp_info_t *th; 690 691 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 692 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 693 __kmp_assert_valid_gtid(gtid); 694 #ifdef KMP_DEBUG 695 { 696 char *buff; 697 // create format specifiers before the debug output 698 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 699 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 700 traits_t<T>::spec, traits_t<T>::spec, 701 traits_t<ST>::spec, traits_t<ST>::spec, 702 traits_t<T>::spec); 703 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 704 __kmp_str_free(&buff); 705 } 706 #endif 707 708 lower = *p_lb; 709 upper = *p_ub; 710 if (__kmp_env_consistency_check) { 711 if (incr == 0) { 712 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 713 loc); 714 } 715 if (incr > 0 ? (upper < lower) : (lower < upper)) { 716 // The loop is illegal. 717 // Some zero-trip loops maintained by compiler, e.g.: 718 // for(i=10;i<0;++i) // lower >= upper - run-time check 719 // for(i=0;i>10;--i) // lower <= upper - run-time check 720 // for(i=0;i>10;++i) // incr > 0 - compile-time check 721 // for(i=10;i<0;--i) // incr < 0 - compile-time check 722 // Compiler does not check the following illegal loops: 723 // for(i=0;i<10;i+=incr) // where incr<0 724 // for(i=10;i>0;i-=incr) // where incr<0 725 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 726 } 727 } 728 th = __kmp_threads[gtid]; 729 team = th->th.th_team; 730 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 731 nteams = th->th.th_teams_size.nteams; 732 team_id = team->t.t_master_tid; 733 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 734 735 // compute trip count 736 if (incr == 1) { 737 trip_count = upper - lower + 1; 738 } else if (incr == -1) { 739 trip_count = lower - upper + 1; 740 } else if (incr > 0) { 741 // upper-lower can exceed the limit of signed type 742 trip_count = (UT)(upper - lower) / incr + 1; 743 } else { 744 trip_count = (UT)(lower - upper) / (-incr) + 1; 745 } 746 if (chunk < 1) 747 chunk = 1; 748 span = chunk * incr; 749 *p_st = span * nteams; 750 *p_lb = lower + (span * team_id); 751 *p_ub = *p_lb + span - incr; 752 if (p_last != NULL) 753 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 754 // Correct upper bound if needed 755 if (incr > 0) { 756 if (*p_ub < *p_lb) // overflow? 757 *p_ub = traits_t<T>::max_value; 758 if (*p_ub > upper) 759 *p_ub = upper; // tracker C73258 760 } else { // incr < 0 761 if (*p_ub > *p_lb) 762 *p_ub = traits_t<T>::min_value; 763 if (*p_ub < upper) 764 *p_ub = upper; // tracker C73258 765 } 766 #ifdef KMP_DEBUG 767 { 768 char *buff; 769 // create format specifiers before the debug output 770 buff = 771 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 772 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 773 traits_t<T>::spec, traits_t<T>::spec, 774 traits_t<ST>::spec, traits_t<ST>::spec); 775 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 776 __kmp_str_free(&buff); 777 } 778 #endif 779 } 780 781 //------------------------------------------------------------------------------ 782 extern "C" { 783 /*! 784 @ingroup WORK_SHARING 785 @param loc Source code location 786 @param gtid Global thread id of this thread 787 @param schedtype Scheduling type 788 @param plastiter Pointer to the "last iteration" flag 789 @param plower Pointer to the lower bound 790 @param pupper Pointer to the upper bound 791 @param pstride Pointer to the stride 792 @param incr Loop increment 793 @param chunk The chunk size 794 795 Each of the four functions here are identical apart from the argument types. 796 797 The functions compute the upper and lower bounds and stride to be used for the 798 set of iterations to be executed by the current thread from the statically 799 scheduled loop that is described by the initial values of the bounds, stride, 800 increment and chunk size. 801 802 @{ 803 */ 804 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 805 kmp_int32 *plastiter, kmp_int32 *plower, 806 kmp_int32 *pupper, kmp_int32 *pstride, 807 kmp_int32 incr, kmp_int32 chunk) { 808 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 809 pupper, pstride, incr, chunk 810 #if OMPT_SUPPORT && OMPT_OPTIONAL 811 , 812 OMPT_GET_RETURN_ADDRESS(0) 813 #endif 814 ); 815 } 816 817 /*! 818 See @ref __kmpc_for_static_init_4 819 */ 820 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 821 kmp_int32 schedtype, kmp_int32 *plastiter, 822 kmp_uint32 *plower, kmp_uint32 *pupper, 823 kmp_int32 *pstride, kmp_int32 incr, 824 kmp_int32 chunk) { 825 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 826 pupper, pstride, incr, chunk 827 #if OMPT_SUPPORT && OMPT_OPTIONAL 828 , 829 OMPT_GET_RETURN_ADDRESS(0) 830 #endif 831 ); 832 } 833 834 /*! 835 See @ref __kmpc_for_static_init_4 836 */ 837 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 838 kmp_int32 *plastiter, kmp_int64 *plower, 839 kmp_int64 *pupper, kmp_int64 *pstride, 840 kmp_int64 incr, kmp_int64 chunk) { 841 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 842 pupper, pstride, incr, chunk 843 #if OMPT_SUPPORT && OMPT_OPTIONAL 844 , 845 OMPT_GET_RETURN_ADDRESS(0) 846 #endif 847 ); 848 } 849 850 /*! 851 See @ref __kmpc_for_static_init_4 852 */ 853 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 854 kmp_int32 schedtype, kmp_int32 *plastiter, 855 kmp_uint64 *plower, kmp_uint64 *pupper, 856 kmp_int64 *pstride, kmp_int64 incr, 857 kmp_int64 chunk) { 858 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 859 pupper, pstride, incr, chunk 860 #if OMPT_SUPPORT && OMPT_OPTIONAL 861 , 862 OMPT_GET_RETURN_ADDRESS(0) 863 #endif 864 ); 865 } 866 /*! 867 @} 868 */ 869 870 /*! 871 @ingroup WORK_SHARING 872 @param loc Source code location 873 @param gtid Global thread id of this thread 874 @param schedule Scheduling type for the parallel loop 875 @param plastiter Pointer to the "last iteration" flag 876 @param plower Pointer to the lower bound 877 @param pupper Pointer to the upper bound of loop chunk 878 @param pupperD Pointer to the upper bound of dist_chunk 879 @param pstride Pointer to the stride for parallel loop 880 @param incr Loop increment 881 @param chunk The chunk size for the parallel loop 882 883 Each of the four functions here are identical apart from the argument types. 884 885 The functions compute the upper and lower bounds and strides to be used for the 886 set of iterations to be executed by the current thread from the statically 887 scheduled loop that is described by the initial values of the bounds, strides, 888 increment and chunks for parallel loop and distribute constructs. 889 890 @{ 891 */ 892 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 893 kmp_int32 schedule, kmp_int32 *plastiter, 894 kmp_int32 *plower, kmp_int32 *pupper, 895 kmp_int32 *pupperD, kmp_int32 *pstride, 896 kmp_int32 incr, kmp_int32 chunk) { 897 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 898 pupper, pupperD, pstride, incr, chunk); 899 } 900 901 /*! 902 See @ref __kmpc_dist_for_static_init_4 903 */ 904 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 905 kmp_int32 schedule, kmp_int32 *plastiter, 906 kmp_uint32 *plower, kmp_uint32 *pupper, 907 kmp_uint32 *pupperD, kmp_int32 *pstride, 908 kmp_int32 incr, kmp_int32 chunk) { 909 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 910 pupper, pupperD, pstride, incr, chunk); 911 } 912 913 /*! 914 See @ref __kmpc_dist_for_static_init_4 915 */ 916 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 917 kmp_int32 schedule, kmp_int32 *plastiter, 918 kmp_int64 *plower, kmp_int64 *pupper, 919 kmp_int64 *pupperD, kmp_int64 *pstride, 920 kmp_int64 incr, kmp_int64 chunk) { 921 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 922 pupper, pupperD, pstride, incr, chunk); 923 } 924 925 /*! 926 See @ref __kmpc_dist_for_static_init_4 927 */ 928 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 929 kmp_int32 schedule, kmp_int32 *plastiter, 930 kmp_uint64 *plower, kmp_uint64 *pupper, 931 kmp_uint64 *pupperD, kmp_int64 *pstride, 932 kmp_int64 incr, kmp_int64 chunk) { 933 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 934 pupper, pupperD, pstride, incr, chunk); 935 } 936 /*! 937 @} 938 */ 939 940 //------------------------------------------------------------------------------ 941 // Auxiliary routines for Distribute Parallel Loop construct implementation 942 // Transfer call to template< type T > 943 // __kmp_team_static_init( ident_t *loc, int gtid, 944 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 945 946 /*! 947 @ingroup WORK_SHARING 948 @{ 949 @param loc Source location 950 @param gtid Global thread id 951 @param p_last pointer to last iteration flag 952 @param p_lb pointer to Lower bound 953 @param p_ub pointer to Upper bound 954 @param p_st Step (or increment if you prefer) 955 @param incr Loop increment 956 @param chunk The chunk size to block with 957 958 The functions compute the upper and lower bounds and stride to be used for the 959 set of iterations to be executed by the current team from the statically 960 scheduled loop that is described by the initial values of the bounds, stride, 961 increment and chunk for the distribute construct as part of composite distribute 962 parallel loop construct. These functions are all identical apart from the types 963 of the arguments. 964 */ 965 966 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 967 kmp_int32 *p_lb, kmp_int32 *p_ub, 968 kmp_int32 *p_st, kmp_int32 incr, 969 kmp_int32 chunk) { 970 KMP_DEBUG_ASSERT(__kmp_init_serial); 971 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 972 chunk); 973 } 974 975 /*! 976 See @ref __kmpc_team_static_init_4 977 */ 978 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 979 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 980 kmp_int32 *p_st, kmp_int32 incr, 981 kmp_int32 chunk) { 982 KMP_DEBUG_ASSERT(__kmp_init_serial); 983 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 984 chunk); 985 } 986 987 /*! 988 See @ref __kmpc_team_static_init_4 989 */ 990 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 991 kmp_int64 *p_lb, kmp_int64 *p_ub, 992 kmp_int64 *p_st, kmp_int64 incr, 993 kmp_int64 chunk) { 994 KMP_DEBUG_ASSERT(__kmp_init_serial); 995 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 996 chunk); 997 } 998 999 /*! 1000 See @ref __kmpc_team_static_init_4 1001 */ 1002 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1003 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1004 kmp_int64 *p_st, kmp_int64 incr, 1005 kmp_int64 chunk) { 1006 KMP_DEBUG_ASSERT(__kmp_init_serial); 1007 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1008 chunk); 1009 } 1010 /*! 1011 @} 1012 */ 1013 1014 } // extern "C" 1015