1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 template <typename T> 65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 66 kmp_int32 schedtype, kmp_int32 *plastiter, 67 T *plower, T *pupper, 68 typename traits_t<T>::signed_t *pstride, 69 typename traits_t<T>::signed_t incr, 70 typename traits_t<T>::signed_t chunk 71 #if OMPT_SUPPORT && OMPT_OPTIONAL 72 , 73 void *codeptr 74 #endif 75 ) { 76 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 77 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 78 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 79 80 typedef typename traits_t<T>::unsigned_t UT; 81 typedef typename traits_t<T>::signed_t ST; 82 /* this all has to be changed back to TID and such.. */ 83 kmp_int32 gtid = global_tid; 84 kmp_uint32 tid; 85 kmp_uint32 nth; 86 UT trip_count; 87 kmp_team_t *team; 88 kmp_info_t *th = __kmp_threads[gtid]; 89 90 #if OMPT_SUPPORT && OMPT_OPTIONAL 91 ompt_team_info_t *team_info = NULL; 92 ompt_task_info_t *task_info = NULL; 93 ompt_work_t ompt_work_type = ompt_work_loop; 94 95 static kmp_int8 warn = 0; 96 97 if (ompt_enabled.ompt_callback_work) { 98 // Only fully initialize variables needed by OMPT if OMPT is enabled. 99 team_info = __ompt_get_teaminfo(0, NULL); 100 task_info = __ompt_get_task_info_object(0); 101 // Determine workshare type 102 if (loc != NULL) { 103 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 104 ompt_work_type = ompt_work_loop; 105 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 106 ompt_work_type = ompt_work_sections; 107 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 108 ompt_work_type = ompt_work_distribute; 109 } else { 110 kmp_int8 bool_res = 111 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 112 if (bool_res) 113 KMP_WARNING(OmptOutdatedWorkshare); 114 } 115 KMP_DEBUG_ASSERT(ompt_work_type); 116 } 117 } 118 #endif 119 120 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 121 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 122 #ifdef KMP_DEBUG 123 { 124 char *buff; 125 // create format specifiers before the debug output 126 buff = __kmp_str_format( 127 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 128 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 129 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 130 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 131 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 132 *pstride, incr, chunk)); 133 __kmp_str_free(&buff); 134 } 135 #endif 136 137 if (__kmp_env_consistency_check) { 138 __kmp_push_workshare(global_tid, ct_pdo, loc); 139 if (incr == 0) { 140 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 141 loc); 142 } 143 } 144 /* special handling for zero-trip loops */ 145 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 146 if (plastiter != NULL) 147 *plastiter = FALSE; 148 /* leave pupper and plower set to entire iteration space */ 149 *pstride = incr; /* value should never be used */ 150 // *plower = *pupper - incr; 151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 154 #ifdef KMP_DEBUG 155 { 156 char *buff; 157 // create format specifiers before the debug output 158 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 159 "lower=%%%s upper=%%%s stride = %%%s " 160 "signed?<%s>, loc = %%s\n", 161 traits_t<T>::spec, traits_t<T>::spec, 162 traits_t<ST>::spec, traits_t<T>::spec); 163 KD_TRACE(100, 164 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 165 __kmp_str_free(&buff); 166 } 167 #endif 168 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 169 170 #if OMPT_SUPPORT && OMPT_OPTIONAL 171 if (ompt_enabled.ompt_callback_work) { 172 ompt_callbacks.ompt_callback(ompt_callback_work)( 173 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 174 &(task_info->task_data), 0, codeptr); 175 } 176 #endif 177 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 178 return; 179 } 180 181 // Although there are schedule enumerations above kmp_ord_upper which are not 182 // schedules for "distribute", the only ones which are useful are dynamic, so 183 // cannot be seen here, since this codepath is only executed for static 184 // schedules. 185 if (schedtype > kmp_ord_upper) { 186 // we are in DISTRIBUTE construct 187 schedtype += kmp_sch_static - 188 kmp_distribute_static; // AC: convert to usual schedule type 189 tid = th->th.th_team->t.t_master_tid; 190 team = th->th.th_team->t.t_parent; 191 } else { 192 tid = __kmp_tid_from_gtid(global_tid); 193 team = th->th.th_team; 194 } 195 196 /* determine if "for" loop is an active worksharing construct */ 197 if (team->t.t_serialized) { 198 /* serialized parallel, each thread executes whole iteration space */ 199 if (plastiter != NULL) 200 *plastiter = TRUE; 201 /* leave pupper and plower set to entire iteration space */ 202 *pstride = 203 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 204 205 #ifdef KMP_DEBUG 206 { 207 char *buff; 208 // create format specifiers before the debug output 209 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 210 "lower=%%%s upper=%%%s stride = %%%s\n", 211 traits_t<T>::spec, traits_t<T>::spec, 212 traits_t<ST>::spec); 213 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 214 __kmp_str_free(&buff); 215 } 216 #endif 217 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 218 219 #if OMPT_SUPPORT && OMPT_OPTIONAL 220 if (ompt_enabled.ompt_callback_work) { 221 ompt_callbacks.ompt_callback(ompt_callback_work)( 222 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 223 &(task_info->task_data), *pstride, codeptr); 224 } 225 #endif 226 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 227 return; 228 } 229 nth = team->t.t_nproc; 230 if (nth == 1) { 231 if (plastiter != NULL) 232 *plastiter = TRUE; 233 *pstride = 234 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 235 #ifdef KMP_DEBUG 236 { 237 char *buff; 238 // create format specifiers before the debug output 239 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 240 "lower=%%%s upper=%%%s stride = %%%s\n", 241 traits_t<T>::spec, traits_t<T>::spec, 242 traits_t<ST>::spec); 243 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 244 __kmp_str_free(&buff); 245 } 246 #endif 247 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 248 249 #if OMPT_SUPPORT && OMPT_OPTIONAL 250 if (ompt_enabled.ompt_callback_work) { 251 ompt_callbacks.ompt_callback(ompt_callback_work)( 252 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 253 &(task_info->task_data), *pstride, codeptr); 254 } 255 #endif 256 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 257 return; 258 } 259 260 /* compute trip count */ 261 if (incr == 1) { 262 trip_count = *pupper - *plower + 1; 263 } else if (incr == -1) { 264 trip_count = *plower - *pupper + 1; 265 } else if (incr > 0) { 266 // upper-lower can exceed the limit of signed type 267 trip_count = (UT)(*pupper - *plower) / incr + 1; 268 } else { 269 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 270 } 271 272 #if KMP_STATS_ENABLED 273 if (KMP_MASTER_GTID(gtid)) { 274 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 275 } 276 #endif 277 278 if (__kmp_env_consistency_check) { 279 /* tripcount overflow? */ 280 if (trip_count == 0 && *pupper != *plower) { 281 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 282 loc); 283 } 284 } 285 286 /* compute remaining parameters */ 287 switch (schedtype) { 288 case kmp_sch_static: { 289 if (trip_count < nth) { 290 KMP_DEBUG_ASSERT( 291 __kmp_static == kmp_sch_static_greedy || 292 __kmp_static == 293 kmp_sch_static_balanced); // Unknown static scheduling type. 294 if (tid < trip_count) { 295 *pupper = *plower = *plower + tid * incr; 296 } else { 297 *plower = *pupper + incr; 298 } 299 if (plastiter != NULL) 300 *plastiter = (tid == trip_count - 1); 301 } else { 302 if (__kmp_static == kmp_sch_static_balanced) { 303 UT small_chunk = trip_count / nth; 304 UT extras = trip_count % nth; 305 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 306 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 307 if (plastiter != NULL) 308 *plastiter = (tid == nth - 1); 309 } else { 310 T big_chunk_inc_count = 311 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 312 T old_upper = *pupper; 313 314 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 315 // Unknown static scheduling type. 316 317 *plower += tid * big_chunk_inc_count; 318 *pupper = *plower + big_chunk_inc_count - incr; 319 if (incr > 0) { 320 if (*pupper < *plower) 321 *pupper = traits_t<T>::max_value; 322 if (plastiter != NULL) 323 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 324 if (*pupper > old_upper) 325 *pupper = old_upper; // tracker C73258 326 } else { 327 if (*pupper > *plower) 328 *pupper = traits_t<T>::min_value; 329 if (plastiter != NULL) 330 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 331 if (*pupper < old_upper) 332 *pupper = old_upper; // tracker C73258 333 } 334 } 335 } 336 *pstride = trip_count; 337 break; 338 } 339 case kmp_sch_static_chunked: { 340 ST span; 341 if (chunk < 1) { 342 chunk = 1; 343 } 344 span = chunk * incr; 345 *pstride = span * nth; 346 *plower = *plower + (span * tid); 347 *pupper = *plower + span - incr; 348 if (plastiter != NULL) 349 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 350 break; 351 } 352 case kmp_sch_static_balanced_chunked: { 353 T old_upper = *pupper; 354 // round up to make sure the chunk is enough to cover all iterations 355 UT span = (trip_count + nth - 1) / nth; 356 357 // perform chunk adjustment 358 chunk = (span + chunk - 1) & ~(chunk - 1); 359 360 span = chunk * incr; 361 *plower = *plower + (span * tid); 362 *pupper = *plower + span - incr; 363 if (incr > 0) { 364 if (*pupper > old_upper) 365 *pupper = old_upper; 366 } else if (*pupper < old_upper) 367 *pupper = old_upper; 368 369 if (plastiter != NULL) 370 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 371 break; 372 } 373 default: 374 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 375 break; 376 } 377 378 #if USE_ITT_BUILD 379 // Report loop metadata 380 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 381 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 382 team->t.t_active_level == 1) { 383 kmp_uint64 cur_chunk = chunk; 384 // Calculate chunk in case it was not specified; it is specified for 385 // kmp_sch_static_chunked 386 if (schedtype == kmp_sch_static) { 387 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 388 } 389 // 0 - "static" schedule 390 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 391 } 392 #endif 393 #ifdef KMP_DEBUG 394 { 395 char *buff; 396 // create format specifiers before the debug output 397 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 398 "upper=%%%s stride = %%%s signed?<%s>\n", 399 traits_t<T>::spec, traits_t<T>::spec, 400 traits_t<ST>::spec, traits_t<T>::spec); 401 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 402 __kmp_str_free(&buff); 403 } 404 #endif 405 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 406 407 #if OMPT_SUPPORT && OMPT_OPTIONAL 408 if (ompt_enabled.ompt_callback_work) { 409 ompt_callbacks.ompt_callback(ompt_callback_work)( 410 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 411 &(task_info->task_data), trip_count, codeptr); 412 } 413 #endif 414 415 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 416 return; 417 } 418 419 template <typename T> 420 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 421 kmp_int32 schedule, kmp_int32 *plastiter, 422 T *plower, T *pupper, T *pupperDist, 423 typename traits_t<T>::signed_t *pstride, 424 typename traits_t<T>::signed_t incr, 425 typename traits_t<T>::signed_t chunk) { 426 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 427 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 428 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 429 typedef typename traits_t<T>::unsigned_t UT; 430 typedef typename traits_t<T>::signed_t ST; 431 kmp_uint32 tid; 432 kmp_uint32 nth; 433 kmp_uint32 team_id; 434 kmp_uint32 nteams; 435 UT trip_count; 436 kmp_team_t *team; 437 kmp_info_t *th; 438 439 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 440 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 441 #ifdef KMP_DEBUG 442 { 443 char *buff; 444 // create format specifiers before the debug output 445 buff = __kmp_str_format( 446 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 447 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 448 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 449 traits_t<ST>::spec, traits_t<T>::spec); 450 KD_TRACE(100, 451 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 452 __kmp_str_free(&buff); 453 } 454 #endif 455 456 if (__kmp_env_consistency_check) { 457 __kmp_push_workshare(gtid, ct_pdo, loc); 458 if (incr == 0) { 459 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 460 loc); 461 } 462 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 463 // The loop is illegal. 464 // Some zero-trip loops maintained by compiler, e.g.: 465 // for(i=10;i<0;++i) // lower >= upper - run-time check 466 // for(i=0;i>10;--i) // lower <= upper - run-time check 467 // for(i=0;i>10;++i) // incr > 0 - compile-time check 468 // for(i=10;i<0;--i) // incr < 0 - compile-time check 469 // Compiler does not check the following illegal loops: 470 // for(i=0;i<10;i+=incr) // where incr<0 471 // for(i=10;i>0;i-=incr) // where incr<0 472 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 473 } 474 } 475 tid = __kmp_tid_from_gtid(gtid); 476 th = __kmp_threads[gtid]; 477 nth = th->th.th_team_nproc; 478 team = th->th.th_team; 479 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 480 nteams = th->th.th_teams_size.nteams; 481 team_id = team->t.t_master_tid; 482 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 483 484 // compute global trip count 485 if (incr == 1) { 486 trip_count = *pupper - *plower + 1; 487 } else if (incr == -1) { 488 trip_count = *plower - *pupper + 1; 489 } else if (incr > 0) { 490 // upper-lower can exceed the limit of signed type 491 trip_count = (UT)(*pupper - *plower) / incr + 1; 492 } else { 493 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 494 } 495 496 *pstride = *pupper - *plower; // just in case (can be unused) 497 if (trip_count <= nteams) { 498 KMP_DEBUG_ASSERT( 499 __kmp_static == kmp_sch_static_greedy || 500 __kmp_static == 501 kmp_sch_static_balanced); // Unknown static scheduling type. 502 // only masters of some teams get single iteration, other threads get 503 // nothing 504 if (team_id < trip_count && tid == 0) { 505 *pupper = *pupperDist = *plower = *plower + team_id * incr; 506 } else { 507 *pupperDist = *pupper; 508 *plower = *pupper + incr; // compiler should skip loop body 509 } 510 if (plastiter != NULL) 511 *plastiter = (tid == 0 && team_id == trip_count - 1); 512 } else { 513 // Get the team's chunk first (each team gets at most one chunk) 514 if (__kmp_static == kmp_sch_static_balanced) { 515 UT chunkD = trip_count / nteams; 516 UT extras = trip_count % nteams; 517 *plower += 518 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 519 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 520 if (plastiter != NULL) 521 *plastiter = (team_id == nteams - 1); 522 } else { 523 T chunk_inc_count = 524 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 525 T upper = *pupper; 526 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 527 // Unknown static scheduling type. 528 *plower += team_id * chunk_inc_count; 529 *pupperDist = *plower + chunk_inc_count - incr; 530 // Check/correct bounds if needed 531 if (incr > 0) { 532 if (*pupperDist < *plower) 533 *pupperDist = traits_t<T>::max_value; 534 if (plastiter != NULL) 535 *plastiter = *plower <= upper && *pupperDist > upper - incr; 536 if (*pupperDist > upper) 537 *pupperDist = upper; // tracker C73258 538 if (*plower > *pupperDist) { 539 *pupper = *pupperDist; // no iterations available for the team 540 goto end; 541 } 542 } else { 543 if (*pupperDist > *plower) 544 *pupperDist = traits_t<T>::min_value; 545 if (plastiter != NULL) 546 *plastiter = *plower >= upper && *pupperDist < upper - incr; 547 if (*pupperDist < upper) 548 *pupperDist = upper; // tracker C73258 549 if (*plower < *pupperDist) { 550 *pupper = *pupperDist; // no iterations available for the team 551 goto end; 552 } 553 } 554 } 555 // Get the parallel loop chunk now (for thread) 556 // compute trip count for team's chunk 557 if (incr == 1) { 558 trip_count = *pupperDist - *plower + 1; 559 } else if (incr == -1) { 560 trip_count = *plower - *pupperDist + 1; 561 } else if (incr > 1) { 562 // upper-lower can exceed the limit of signed type 563 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 564 } else { 565 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 566 } 567 KMP_DEBUG_ASSERT(trip_count); 568 switch (schedule) { 569 case kmp_sch_static: { 570 if (trip_count <= nth) { 571 KMP_DEBUG_ASSERT( 572 __kmp_static == kmp_sch_static_greedy || 573 __kmp_static == 574 kmp_sch_static_balanced); // Unknown static scheduling type. 575 if (tid < trip_count) 576 *pupper = *plower = *plower + tid * incr; 577 else 578 *plower = *pupper + incr; // no iterations available 579 if (plastiter != NULL) 580 if (*plastiter != 0 && !(tid == trip_count - 1)) 581 *plastiter = 0; 582 } else { 583 if (__kmp_static == kmp_sch_static_balanced) { 584 UT chunkL = trip_count / nth; 585 UT extras = trip_count % nth; 586 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 587 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 588 if (plastiter != NULL) 589 if (*plastiter != 0 && !(tid == nth - 1)) 590 *plastiter = 0; 591 } else { 592 T chunk_inc_count = 593 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 594 T upper = *pupperDist; 595 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 596 // Unknown static scheduling type. 597 *plower += tid * chunk_inc_count; 598 *pupper = *plower + chunk_inc_count - incr; 599 if (incr > 0) { 600 if (*pupper < *plower) 601 *pupper = traits_t<T>::max_value; 602 if (plastiter != NULL) 603 if (*plastiter != 0 && 604 !(*plower <= upper && *pupper > upper - incr)) 605 *plastiter = 0; 606 if (*pupper > upper) 607 *pupper = upper; // tracker C73258 608 } else { 609 if (*pupper > *plower) 610 *pupper = traits_t<T>::min_value; 611 if (plastiter != NULL) 612 if (*plastiter != 0 && 613 !(*plower >= upper && *pupper < upper - incr)) 614 *plastiter = 0; 615 if (*pupper < upper) 616 *pupper = upper; // tracker C73258 617 } 618 } 619 } 620 break; 621 } 622 case kmp_sch_static_chunked: { 623 ST span; 624 if (chunk < 1) 625 chunk = 1; 626 span = chunk * incr; 627 *pstride = span * nth; 628 *plower = *plower + (span * tid); 629 *pupper = *plower + span - incr; 630 if (plastiter != NULL) 631 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 632 *plastiter = 0; 633 break; 634 } 635 default: 636 KMP_ASSERT2(0, 637 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 638 break; 639 } 640 } 641 end:; 642 #ifdef KMP_DEBUG 643 { 644 char *buff; 645 // create format specifiers before the debug output 646 buff = __kmp_str_format( 647 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 648 "stride=%%%s signed?<%s>\n", 649 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 650 traits_t<ST>::spec, traits_t<T>::spec); 651 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 652 __kmp_str_free(&buff); 653 } 654 #endif 655 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 656 KMP_STATS_LOOP_END(OMP_distribute_iterations); 657 return; 658 } 659 660 template <typename T> 661 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 662 kmp_int32 *p_last, T *p_lb, T *p_ub, 663 typename traits_t<T>::signed_t *p_st, 664 typename traits_t<T>::signed_t incr, 665 typename traits_t<T>::signed_t chunk) { 666 // The routine returns the first chunk distributed to the team and 667 // stride for next chunks calculation. 668 // Last iteration flag set for the team that will execute 669 // the last iteration of the loop. 670 // The routine is called for dist_schedue(static,chunk) only. 671 typedef typename traits_t<T>::unsigned_t UT; 672 typedef typename traits_t<T>::signed_t ST; 673 kmp_uint32 team_id; 674 kmp_uint32 nteams; 675 UT trip_count; 676 T lower; 677 T upper; 678 ST span; 679 kmp_team_t *team; 680 kmp_info_t *th; 681 682 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 683 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 684 #ifdef KMP_DEBUG 685 { 686 char *buff; 687 // create format specifiers before the debug output 688 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 689 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 690 traits_t<T>::spec, traits_t<T>::spec, 691 traits_t<ST>::spec, traits_t<ST>::spec, 692 traits_t<T>::spec); 693 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 694 __kmp_str_free(&buff); 695 } 696 #endif 697 698 lower = *p_lb; 699 upper = *p_ub; 700 if (__kmp_env_consistency_check) { 701 if (incr == 0) { 702 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 703 loc); 704 } 705 if (incr > 0 ? (upper < lower) : (lower < upper)) { 706 // The loop is illegal. 707 // Some zero-trip loops maintained by compiler, e.g.: 708 // for(i=10;i<0;++i) // lower >= upper - run-time check 709 // for(i=0;i>10;--i) // lower <= upper - run-time check 710 // for(i=0;i>10;++i) // incr > 0 - compile-time check 711 // for(i=10;i<0;--i) // incr < 0 - compile-time check 712 // Compiler does not check the following illegal loops: 713 // for(i=0;i<10;i+=incr) // where incr<0 714 // for(i=10;i>0;i-=incr) // where incr<0 715 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 716 } 717 } 718 th = __kmp_threads[gtid]; 719 team = th->th.th_team; 720 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 721 nteams = th->th.th_teams_size.nteams; 722 team_id = team->t.t_master_tid; 723 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 724 725 // compute trip count 726 if (incr == 1) { 727 trip_count = upper - lower + 1; 728 } else if (incr == -1) { 729 trip_count = lower - upper + 1; 730 } else if (incr > 0) { 731 // upper-lower can exceed the limit of signed type 732 trip_count = (UT)(upper - lower) / incr + 1; 733 } else { 734 trip_count = (UT)(lower - upper) / (-incr) + 1; 735 } 736 if (chunk < 1) 737 chunk = 1; 738 span = chunk * incr; 739 *p_st = span * nteams; 740 *p_lb = lower + (span * team_id); 741 *p_ub = *p_lb + span - incr; 742 if (p_last != NULL) 743 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 744 // Correct upper bound if needed 745 if (incr > 0) { 746 if (*p_ub < *p_lb) // overflow? 747 *p_ub = traits_t<T>::max_value; 748 if (*p_ub > upper) 749 *p_ub = upper; // tracker C73258 750 } else { // incr < 0 751 if (*p_ub > *p_lb) 752 *p_ub = traits_t<T>::min_value; 753 if (*p_ub < upper) 754 *p_ub = upper; // tracker C73258 755 } 756 #ifdef KMP_DEBUG 757 { 758 char *buff; 759 // create format specifiers before the debug output 760 buff = 761 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 762 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 763 traits_t<T>::spec, traits_t<T>::spec, 764 traits_t<ST>::spec, traits_t<ST>::spec); 765 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 766 __kmp_str_free(&buff); 767 } 768 #endif 769 } 770 771 //------------------------------------------------------------------------------ 772 extern "C" { 773 /*! 774 @ingroup WORK_SHARING 775 @param loc Source code location 776 @param gtid Global thread id of this thread 777 @param schedtype Scheduling type 778 @param plastiter Pointer to the "last iteration" flag 779 @param plower Pointer to the lower bound 780 @param pupper Pointer to the upper bound 781 @param pstride Pointer to the stride 782 @param incr Loop increment 783 @param chunk The chunk size 784 785 Each of the four functions here are identical apart from the argument types. 786 787 The functions compute the upper and lower bounds and stride to be used for the 788 set of iterations to be executed by the current thread from the statically 789 scheduled loop that is described by the initial values of the bounds, stride, 790 increment and chunk size. 791 792 @{ 793 */ 794 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 795 kmp_int32 *plastiter, kmp_int32 *plower, 796 kmp_int32 *pupper, kmp_int32 *pstride, 797 kmp_int32 incr, kmp_int32 chunk) { 798 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 799 pupper, pstride, incr, chunk 800 #if OMPT_SUPPORT && OMPT_OPTIONAL 801 , 802 OMPT_GET_RETURN_ADDRESS(0) 803 #endif 804 ); 805 } 806 807 /*! 808 See @ref __kmpc_for_static_init_4 809 */ 810 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 811 kmp_int32 schedtype, kmp_int32 *plastiter, 812 kmp_uint32 *plower, kmp_uint32 *pupper, 813 kmp_int32 *pstride, kmp_int32 incr, 814 kmp_int32 chunk) { 815 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 816 pupper, pstride, incr, chunk 817 #if OMPT_SUPPORT && OMPT_OPTIONAL 818 , 819 OMPT_GET_RETURN_ADDRESS(0) 820 #endif 821 ); 822 } 823 824 /*! 825 See @ref __kmpc_for_static_init_4 826 */ 827 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 828 kmp_int32 *plastiter, kmp_int64 *plower, 829 kmp_int64 *pupper, kmp_int64 *pstride, 830 kmp_int64 incr, kmp_int64 chunk) { 831 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 832 pupper, pstride, incr, chunk 833 #if OMPT_SUPPORT && OMPT_OPTIONAL 834 , 835 OMPT_GET_RETURN_ADDRESS(0) 836 #endif 837 ); 838 } 839 840 /*! 841 See @ref __kmpc_for_static_init_4 842 */ 843 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 844 kmp_int32 schedtype, kmp_int32 *plastiter, 845 kmp_uint64 *plower, kmp_uint64 *pupper, 846 kmp_int64 *pstride, kmp_int64 incr, 847 kmp_int64 chunk) { 848 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 849 pupper, pstride, incr, chunk 850 #if OMPT_SUPPORT && OMPT_OPTIONAL 851 , 852 OMPT_GET_RETURN_ADDRESS(0) 853 #endif 854 ); 855 } 856 /*! 857 @} 858 */ 859 860 /*! 861 @ingroup WORK_SHARING 862 @param loc Source code location 863 @param gtid Global thread id of this thread 864 @param schedule Scheduling type for the parallel loop 865 @param plastiter Pointer to the "last iteration" flag 866 @param plower Pointer to the lower bound 867 @param pupper Pointer to the upper bound of loop chunk 868 @param pupperD Pointer to the upper bound of dist_chunk 869 @param pstride Pointer to the stride for parallel loop 870 @param incr Loop increment 871 @param chunk The chunk size for the parallel loop 872 873 Each of the four functions here are identical apart from the argument types. 874 875 The functions compute the upper and lower bounds and strides to be used for the 876 set of iterations to be executed by the current thread from the statically 877 scheduled loop that is described by the initial values of the bounds, strides, 878 increment and chunks for parallel loop and distribute constructs. 879 880 @{ 881 */ 882 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 883 kmp_int32 schedule, kmp_int32 *plastiter, 884 kmp_int32 *plower, kmp_int32 *pupper, 885 kmp_int32 *pupperD, kmp_int32 *pstride, 886 kmp_int32 incr, kmp_int32 chunk) { 887 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 888 pupper, pupperD, pstride, incr, chunk); 889 } 890 891 /*! 892 See @ref __kmpc_dist_for_static_init_4 893 */ 894 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 895 kmp_int32 schedule, kmp_int32 *plastiter, 896 kmp_uint32 *plower, kmp_uint32 *pupper, 897 kmp_uint32 *pupperD, kmp_int32 *pstride, 898 kmp_int32 incr, kmp_int32 chunk) { 899 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 900 pupper, pupperD, pstride, incr, chunk); 901 } 902 903 /*! 904 See @ref __kmpc_dist_for_static_init_4 905 */ 906 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 907 kmp_int32 schedule, kmp_int32 *plastiter, 908 kmp_int64 *plower, kmp_int64 *pupper, 909 kmp_int64 *pupperD, kmp_int64 *pstride, 910 kmp_int64 incr, kmp_int64 chunk) { 911 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 912 pupper, pupperD, pstride, incr, chunk); 913 } 914 915 /*! 916 See @ref __kmpc_dist_for_static_init_4 917 */ 918 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 919 kmp_int32 schedule, kmp_int32 *plastiter, 920 kmp_uint64 *plower, kmp_uint64 *pupper, 921 kmp_uint64 *pupperD, kmp_int64 *pstride, 922 kmp_int64 incr, kmp_int64 chunk) { 923 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 924 pupper, pupperD, pstride, incr, chunk); 925 } 926 /*! 927 @} 928 */ 929 930 //------------------------------------------------------------------------------ 931 // Auxiliary routines for Distribute Parallel Loop construct implementation 932 // Transfer call to template< type T > 933 // __kmp_team_static_init( ident_t *loc, int gtid, 934 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 935 936 /*! 937 @ingroup WORK_SHARING 938 @{ 939 @param loc Source location 940 @param gtid Global thread id 941 @param p_last pointer to last iteration flag 942 @param p_lb pointer to Lower bound 943 @param p_ub pointer to Upper bound 944 @param p_st Step (or increment if you prefer) 945 @param incr Loop increment 946 @param chunk The chunk size to block with 947 948 The functions compute the upper and lower bounds and stride to be used for the 949 set of iterations to be executed by the current team from the statically 950 scheduled loop that is described by the initial values of the bounds, stride, 951 increment and chunk for the distribute construct as part of composite distribute 952 parallel loop construct. These functions are all identical apart from the types 953 of the arguments. 954 */ 955 956 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 957 kmp_int32 *p_lb, kmp_int32 *p_ub, 958 kmp_int32 *p_st, kmp_int32 incr, 959 kmp_int32 chunk) { 960 KMP_DEBUG_ASSERT(__kmp_init_serial); 961 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 962 chunk); 963 } 964 965 /*! 966 See @ref __kmpc_team_static_init_4 967 */ 968 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 969 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 970 kmp_int32 *p_st, kmp_int32 incr, 971 kmp_int32 chunk) { 972 KMP_DEBUG_ASSERT(__kmp_init_serial); 973 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 974 chunk); 975 } 976 977 /*! 978 See @ref __kmpc_team_static_init_4 979 */ 980 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 981 kmp_int64 *p_lb, kmp_int64 *p_ub, 982 kmp_int64 *p_st, kmp_int64 incr, 983 kmp_int64 chunk) { 984 KMP_DEBUG_ASSERT(__kmp_init_serial); 985 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 986 chunk); 987 } 988 989 /*! 990 See @ref __kmpc_team_static_init_4 991 */ 992 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 993 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 994 kmp_int64 *p_st, kmp_int64 incr, 995 kmp_int64 chunk) { 996 KMP_DEBUG_ASSERT(__kmp_init_serial); 997 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 998 chunk); 999 } 1000 /*! 1001 @} 1002 */ 1003 1004 } // extern "C" 1005