1 /* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 /* Static scheduling initialization. 14 15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 16 it may change values between parallel regions. __kmp_max_nth 17 is the largest value __kmp_nth may take, 1 is the smallest. */ 18 19 #include "kmp.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_itt.h" 23 #include "kmp_stats.h" 24 #include "kmp_str.h" 25 26 #if OMPT_SUPPORT 27 #include "ompt-specific.h" 28 #endif 29 30 #ifdef KMP_DEBUG 31 //------------------------------------------------------------------------- 32 // template for debug prints specification ( d, u, lld, llu ) 33 char const *traits_t<int>::spec = "d"; 34 char const *traits_t<unsigned int>::spec = "u"; 35 char const *traits_t<long long>::spec = "lld"; 36 char const *traits_t<unsigned long long>::spec = "llu"; 37 char const *traits_t<long>::spec = "ld"; 38 //------------------------------------------------------------------------- 39 #endif 40 41 #if KMP_STATS_ENABLED 42 #define KMP_STATS_LOOP_END(stat) \ 43 { \ 44 kmp_int64 t; \ 45 kmp_int64 u = (kmp_int64)(*pupper); \ 46 kmp_int64 l = (kmp_int64)(*plower); \ 47 kmp_int64 i = (kmp_int64)incr; \ 48 if (i == 1) { \ 49 t = u - l + 1; \ 50 } else if (i == -1) { \ 51 t = l - u + 1; \ 52 } else if (i > 0) { \ 53 t = (u - l) / i + 1; \ 54 } else { \ 55 t = (l - u) / (-i) + 1; \ 56 } \ 57 KMP_COUNT_VALUE(stat, t); \ 58 KMP_POP_PARTITIONED_TIMER(); \ 59 } 60 #else 61 #define KMP_STATS_LOOP_END(stat) /* Nothing */ 62 #endif 63 64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; 65 static inline void check_loc(ident_t *&loc) { 66 if (loc == NULL) 67 loc = &loc_stub; // may need to report location info to ittnotify 68 } 69 70 template <typename T> 71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 72 kmp_int32 schedtype, kmp_int32 *plastiter, 73 T *plower, T *pupper, 74 typename traits_t<T>::signed_t *pstride, 75 typename traits_t<T>::signed_t incr, 76 typename traits_t<T>::signed_t chunk 77 #if OMPT_SUPPORT && OMPT_OPTIONAL 78 , 79 void *codeptr 80 #endif 81 ) { 82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 85 86 // Clear monotonic/nonmonotonic bits (ignore it) 87 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype); 88 89 typedef typename traits_t<T>::unsigned_t UT; 90 typedef typename traits_t<T>::signed_t ST; 91 /* this all has to be changed back to TID and such.. */ 92 kmp_int32 gtid = global_tid; 93 kmp_uint32 tid; 94 kmp_uint32 nth; 95 UT trip_count; 96 kmp_team_t *team; 97 __kmp_assert_valid_gtid(gtid); 98 kmp_info_t *th = __kmp_threads[gtid]; 99 100 #if OMPT_SUPPORT && OMPT_OPTIONAL 101 ompt_team_info_t *team_info = NULL; 102 ompt_task_info_t *task_info = NULL; 103 ompt_work_t ompt_work_type = ompt_work_loop; 104 105 static kmp_int8 warn = 0; 106 107 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 108 // Only fully initialize variables needed by OMPT if OMPT is enabled. 109 team_info = __ompt_get_teaminfo(0, NULL); 110 task_info = __ompt_get_task_info_object(0); 111 // Determine workshare type 112 if (loc != NULL) { 113 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 114 ompt_work_type = ompt_work_loop; 115 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 116 ompt_work_type = ompt_work_sections; 117 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 118 ompt_work_type = ompt_work_distribute; 119 } else { 120 kmp_int8 bool_res = 121 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 122 if (bool_res) 123 KMP_WARNING(OmptOutdatedWorkshare); 124 } 125 KMP_DEBUG_ASSERT(ompt_work_type); 126 } 127 } 128 #endif 129 130 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 131 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 132 #ifdef KMP_DEBUG 133 { 134 char *buff; 135 // create format specifiers before the debug output 136 buff = __kmp_str_format( 137 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 138 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 139 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 140 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 141 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 142 *pstride, incr, chunk)); 143 __kmp_str_free(&buff); 144 } 145 #endif 146 147 if (__kmp_env_consistency_check) { 148 __kmp_push_workshare(global_tid, ct_pdo, loc); 149 if (incr == 0) { 150 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 151 loc); 152 } 153 } 154 /* special handling for zero-trip loops */ 155 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 156 if (plastiter != NULL) 157 *plastiter = FALSE; 158 /* leave pupper and plower set to entire iteration space */ 159 *pstride = incr; /* value should never be used */ 160 // *plower = *pupper - incr; 161 // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 162 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 163 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 164 #ifdef KMP_DEBUG 165 { 166 char *buff; 167 // create format specifiers before the debug output 168 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 169 "lower=%%%s upper=%%%s stride = %%%s " 170 "signed?<%s>, loc = %%s\n", 171 traits_t<T>::spec, traits_t<T>::spec, 172 traits_t<ST>::spec, traits_t<T>::spec); 173 check_loc(loc); 174 KD_TRACE(100, 175 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 176 __kmp_str_free(&buff); 177 } 178 #endif 179 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 180 181 #if OMPT_SUPPORT && OMPT_OPTIONAL 182 if (ompt_enabled.ompt_callback_work) { 183 ompt_callbacks.ompt_callback(ompt_callback_work)( 184 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 185 &(task_info->task_data), 0, codeptr); 186 } 187 #endif 188 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 189 return; 190 } 191 192 // Although there are schedule enumerations above kmp_ord_upper which are not 193 // schedules for "distribute", the only ones which are useful are dynamic, so 194 // cannot be seen here, since this codepath is only executed for static 195 // schedules. 196 if (schedtype > kmp_ord_upper) { 197 // we are in DISTRIBUTE construct 198 schedtype += kmp_sch_static - 199 kmp_distribute_static; // AC: convert to usual schedule type 200 if (th->th.th_team->t.t_serialized > 1) { 201 tid = 0; 202 team = th->th.th_team; 203 } else { 204 tid = th->th.th_team->t.t_master_tid; 205 team = th->th.th_team->t.t_parent; 206 } 207 } else { 208 tid = __kmp_tid_from_gtid(global_tid); 209 team = th->th.th_team; 210 } 211 212 /* determine if "for" loop is an active worksharing construct */ 213 if (team->t.t_serialized) { 214 /* serialized parallel, each thread executes whole iteration space */ 215 if (plastiter != NULL) 216 *plastiter = TRUE; 217 /* leave pupper and plower set to entire iteration space */ 218 *pstride = 219 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 220 221 #ifdef KMP_DEBUG 222 { 223 char *buff; 224 // create format specifiers before the debug output 225 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 226 "lower=%%%s upper=%%%s stride = %%%s\n", 227 traits_t<T>::spec, traits_t<T>::spec, 228 traits_t<ST>::spec); 229 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 230 __kmp_str_free(&buff); 231 } 232 #endif 233 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 234 235 #if OMPT_SUPPORT && OMPT_OPTIONAL 236 if (ompt_enabled.ompt_callback_work) { 237 ompt_callbacks.ompt_callback(ompt_callback_work)( 238 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 239 &(task_info->task_data), *pstride, codeptr); 240 } 241 #endif 242 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 243 return; 244 } 245 nth = team->t.t_nproc; 246 if (nth == 1) { 247 if (plastiter != NULL) 248 *plastiter = TRUE; 249 *pstride = 250 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 251 #ifdef KMP_DEBUG 252 { 253 char *buff; 254 // create format specifiers before the debug output 255 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 256 "lower=%%%s upper=%%%s stride = %%%s\n", 257 traits_t<T>::spec, traits_t<T>::spec, 258 traits_t<ST>::spec); 259 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 260 __kmp_str_free(&buff); 261 } 262 #endif 263 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 264 265 #if OMPT_SUPPORT && OMPT_OPTIONAL 266 if (ompt_enabled.ompt_callback_work) { 267 ompt_callbacks.ompt_callback(ompt_callback_work)( 268 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 269 &(task_info->task_data), *pstride, codeptr); 270 } 271 #endif 272 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 273 return; 274 } 275 276 /* compute trip count */ 277 if (incr == 1) { 278 trip_count = *pupper - *plower + 1; 279 } else if (incr == -1) { 280 trip_count = *plower - *pupper + 1; 281 } else if (incr > 0) { 282 // upper-lower can exceed the limit of signed type 283 trip_count = (UT)(*pupper - *plower) / incr + 1; 284 } else { 285 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 286 } 287 288 #if KMP_STATS_ENABLED 289 if (KMP_MASTER_GTID(gtid)) { 290 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count); 291 } 292 #endif 293 294 if (__kmp_env_consistency_check) { 295 /* tripcount overflow? */ 296 if (trip_count == 0 && *pupper != *plower) { 297 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 298 loc); 299 } 300 } 301 302 /* compute remaining parameters */ 303 switch (schedtype) { 304 case kmp_sch_static: { 305 if (trip_count < nth) { 306 KMP_DEBUG_ASSERT( 307 __kmp_static == kmp_sch_static_greedy || 308 __kmp_static == 309 kmp_sch_static_balanced); // Unknown static scheduling type. 310 if (tid < trip_count) { 311 *pupper = *plower = *plower + tid * incr; 312 } else { 313 // set bounds so non-active threads execute no iterations 314 *plower = *pupper + (incr > 0 ? 1 : -1); 315 } 316 if (plastiter != NULL) 317 *plastiter = (tid == trip_count - 1); 318 } else { 319 if (__kmp_static == kmp_sch_static_balanced) { 320 UT small_chunk = trip_count / nth; 321 UT extras = trip_count % nth; 322 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 323 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 324 if (plastiter != NULL) 325 *plastiter = (tid == nth - 1); 326 } else { 327 T big_chunk_inc_count = 328 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 329 T old_upper = *pupper; 330 331 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 332 // Unknown static scheduling type. 333 334 *plower += tid * big_chunk_inc_count; 335 *pupper = *plower + big_chunk_inc_count - incr; 336 if (incr > 0) { 337 if (*pupper < *plower) 338 *pupper = traits_t<T>::max_value; 339 if (plastiter != NULL) 340 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 341 if (*pupper > old_upper) 342 *pupper = old_upper; // tracker C73258 343 } else { 344 if (*pupper > *plower) 345 *pupper = traits_t<T>::min_value; 346 if (plastiter != NULL) 347 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 348 if (*pupper < old_upper) 349 *pupper = old_upper; // tracker C73258 350 } 351 } 352 } 353 *pstride = trip_count; 354 break; 355 } 356 case kmp_sch_static_chunked: { 357 ST span; 358 UT nchunks; 359 if (chunk < 1) 360 chunk = 1; 361 else if ((UT)chunk > trip_count) 362 chunk = trip_count; 363 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0); 364 span = chunk * incr; 365 if (nchunks < nth) { 366 *pstride = span * nchunks; 367 if (tid < nchunks) { 368 *plower = *plower + (span * tid); 369 *pupper = *plower + span - incr; 370 } else { 371 *plower = *pupper + (incr > 0 ? 1 : -1); 372 } 373 } else { 374 *pstride = span * nth; 375 *plower = *plower + (span * tid); 376 *pupper = *plower + span - incr; 377 } 378 if (plastiter != NULL) 379 *plastiter = (tid == (nchunks - 1) % nth); 380 break; 381 } 382 case kmp_sch_static_balanced_chunked: { 383 T old_upper = *pupper; 384 // round up to make sure the chunk is enough to cover all iterations 385 UT span = (trip_count + nth - 1) / nth; 386 387 // perform chunk adjustment 388 chunk = (span + chunk - 1) & ~(chunk - 1); 389 390 span = chunk * incr; 391 *plower = *plower + (span * tid); 392 *pupper = *plower + span - incr; 393 if (incr > 0) { 394 if (*pupper > old_upper) 395 *pupper = old_upper; 396 } else if (*pupper < old_upper) 397 *pupper = old_upper; 398 399 if (plastiter != NULL) 400 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 401 break; 402 } 403 default: 404 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 405 break; 406 } 407 408 #if USE_ITT_BUILD 409 // Report loop metadata 410 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 411 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL && 412 team->t.t_active_level == 1) { 413 kmp_uint64 cur_chunk = chunk; 414 check_loc(loc); 415 // Calculate chunk in case it was not specified; it is specified for 416 // kmp_sch_static_chunked 417 if (schedtype == kmp_sch_static) { 418 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 419 } 420 // 0 - "static" schedule 421 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 422 } 423 #endif 424 #ifdef KMP_DEBUG 425 { 426 char *buff; 427 // create format specifiers before the debug output 428 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 429 "upper=%%%s stride = %%%s signed?<%s>\n", 430 traits_t<T>::spec, traits_t<T>::spec, 431 traits_t<ST>::spec, traits_t<T>::spec); 432 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 433 __kmp_str_free(&buff); 434 } 435 #endif 436 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 437 438 #if OMPT_SUPPORT && OMPT_OPTIONAL 439 if (ompt_enabled.ompt_callback_work) { 440 ompt_callbacks.ompt_callback(ompt_callback_work)( 441 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 442 &(task_info->task_data), trip_count, codeptr); 443 } 444 if (ompt_enabled.ompt_callback_dispatch) { 445 ompt_dispatch_t dispatch_type; 446 ompt_data_t instance = ompt_data_none; 447 ompt_dispatch_chunk_t dispatch_chunk; 448 if (ompt_work_type == ompt_work_sections) { 449 dispatch_type = ompt_dispatch_section; 450 instance.ptr = codeptr; 451 } else { 452 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr); 453 dispatch_type = (ompt_work_type == ompt_work_distribute) 454 ? ompt_dispatch_distribute_chunk 455 : ompt_dispatch_ws_loop_chunk; 456 instance.ptr = &dispatch_chunk; 457 } 458 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 459 &(team_info->parallel_data), &(task_info->task_data), dispatch_type, 460 instance); 461 } 462 #endif 463 464 KMP_STATS_LOOP_END(OMP_loop_static_iterations); 465 return; 466 } 467 468 template <typename T> 469 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 470 kmp_int32 schedule, kmp_int32 *plastiter, 471 T *plower, T *pupper, T *pupperDist, 472 typename traits_t<T>::signed_t *pstride, 473 typename traits_t<T>::signed_t incr, 474 typename traits_t<T>::signed_t chunk 475 #if OMPT_SUPPORT && OMPT_OPTIONAL 476 , 477 void *codeptr 478 #endif 479 ) { 480 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 481 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); 482 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); 483 typedef typename traits_t<T>::unsigned_t UT; 484 typedef typename traits_t<T>::signed_t ST; 485 kmp_uint32 tid; 486 kmp_uint32 nth; 487 kmp_uint32 team_id; 488 kmp_uint32 nteams; 489 UT trip_count; 490 kmp_team_t *team; 491 kmp_info_t *th; 492 493 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 494 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 495 __kmp_assert_valid_gtid(gtid); 496 #ifdef KMP_DEBUG 497 { 498 char *buff; 499 // create format specifiers before the debug output 500 buff = __kmp_str_format( 501 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 502 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 503 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 504 traits_t<ST>::spec, traits_t<T>::spec); 505 KD_TRACE(100, 506 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 507 __kmp_str_free(&buff); 508 } 509 #endif 510 511 if (__kmp_env_consistency_check) { 512 __kmp_push_workshare(gtid, ct_pdo, loc); 513 if (incr == 0) { 514 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 515 loc); 516 } 517 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 518 // The loop is illegal. 519 // Some zero-trip loops maintained by compiler, e.g.: 520 // for(i=10;i<0;++i) // lower >= upper - run-time check 521 // for(i=0;i>10;--i) // lower <= upper - run-time check 522 // for(i=0;i>10;++i) // incr > 0 - compile-time check 523 // for(i=10;i<0;--i) // incr < 0 - compile-time check 524 // Compiler does not check the following illegal loops: 525 // for(i=0;i<10;i+=incr) // where incr<0 526 // for(i=10;i>0;i-=incr) // where incr<0 527 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 528 } 529 } 530 tid = __kmp_tid_from_gtid(gtid); 531 th = __kmp_threads[gtid]; 532 nth = th->th.th_team_nproc; 533 team = th->th.th_team; 534 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 535 nteams = th->th.th_teams_size.nteams; 536 team_id = team->t.t_master_tid; 537 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 538 539 // compute global trip count 540 if (incr == 1) { 541 trip_count = *pupper - *plower + 1; 542 } else if (incr == -1) { 543 trip_count = *plower - *pupper + 1; 544 } else if (incr > 0) { 545 // upper-lower can exceed the limit of signed type 546 trip_count = (UT)(*pupper - *plower) / incr + 1; 547 } else { 548 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 549 } 550 551 *pstride = *pupper - *plower; // just in case (can be unused) 552 if (trip_count <= nteams) { 553 KMP_DEBUG_ASSERT( 554 __kmp_static == kmp_sch_static_greedy || 555 __kmp_static == 556 kmp_sch_static_balanced); // Unknown static scheduling type. 557 // only primary threads of some teams get single iteration, other threads 558 // get nothing 559 if (team_id < trip_count && tid == 0) { 560 *pupper = *pupperDist = *plower = *plower + team_id * incr; 561 } else { 562 *pupperDist = *pupper; 563 *plower = *pupper + incr; // compiler should skip loop body 564 } 565 if (plastiter != NULL) 566 *plastiter = (tid == 0 && team_id == trip_count - 1); 567 } else { 568 // Get the team's chunk first (each team gets at most one chunk) 569 if (__kmp_static == kmp_sch_static_balanced) { 570 UT chunkD = trip_count / nteams; 571 UT extras = trip_count % nteams; 572 *plower += 573 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 574 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 575 if (plastiter != NULL) 576 *plastiter = (team_id == nteams - 1); 577 } else { 578 T chunk_inc_count = 579 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 580 T upper = *pupper; 581 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 582 // Unknown static scheduling type. 583 *plower += team_id * chunk_inc_count; 584 *pupperDist = *plower + chunk_inc_count - incr; 585 // Check/correct bounds if needed 586 if (incr > 0) { 587 if (*pupperDist < *plower) 588 *pupperDist = traits_t<T>::max_value; 589 if (plastiter != NULL) 590 *plastiter = *plower <= upper && *pupperDist > upper - incr; 591 if (*pupperDist > upper) 592 *pupperDist = upper; // tracker C73258 593 if (*plower > *pupperDist) { 594 *pupper = *pupperDist; // no iterations available for the team 595 goto end; 596 } 597 } else { 598 if (*pupperDist > *plower) 599 *pupperDist = traits_t<T>::min_value; 600 if (plastiter != NULL) 601 *plastiter = *plower >= upper && *pupperDist < upper - incr; 602 if (*pupperDist < upper) 603 *pupperDist = upper; // tracker C73258 604 if (*plower < *pupperDist) { 605 *pupper = *pupperDist; // no iterations available for the team 606 goto end; 607 } 608 } 609 } 610 // Get the parallel loop chunk now (for thread) 611 // compute trip count for team's chunk 612 if (incr == 1) { 613 trip_count = *pupperDist - *plower + 1; 614 } else if (incr == -1) { 615 trip_count = *plower - *pupperDist + 1; 616 } else if (incr > 1) { 617 // upper-lower can exceed the limit of signed type 618 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 619 } else { 620 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 621 } 622 KMP_DEBUG_ASSERT(trip_count); 623 switch (schedule) { 624 case kmp_sch_static: { 625 if (trip_count <= nth) { 626 KMP_DEBUG_ASSERT( 627 __kmp_static == kmp_sch_static_greedy || 628 __kmp_static == 629 kmp_sch_static_balanced); // Unknown static scheduling type. 630 if (tid < trip_count) 631 *pupper = *plower = *plower + tid * incr; 632 else 633 *plower = *pupper + incr; // no iterations available 634 if (plastiter != NULL) 635 if (*plastiter != 0 && !(tid == trip_count - 1)) 636 *plastiter = 0; 637 } else { 638 if (__kmp_static == kmp_sch_static_balanced) { 639 UT chunkL = trip_count / nth; 640 UT extras = trip_count % nth; 641 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 642 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 643 if (plastiter != NULL) 644 if (*plastiter != 0 && !(tid == nth - 1)) 645 *plastiter = 0; 646 } else { 647 T chunk_inc_count = 648 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 649 T upper = *pupperDist; 650 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 651 // Unknown static scheduling type. 652 *plower += tid * chunk_inc_count; 653 *pupper = *plower + chunk_inc_count - incr; 654 if (incr > 0) { 655 if (*pupper < *plower) 656 *pupper = traits_t<T>::max_value; 657 if (plastiter != NULL) 658 if (*plastiter != 0 && 659 !(*plower <= upper && *pupper > upper - incr)) 660 *plastiter = 0; 661 if (*pupper > upper) 662 *pupper = upper; // tracker C73258 663 } else { 664 if (*pupper > *plower) 665 *pupper = traits_t<T>::min_value; 666 if (plastiter != NULL) 667 if (*plastiter != 0 && 668 !(*plower >= upper && *pupper < upper - incr)) 669 *plastiter = 0; 670 if (*pupper < upper) 671 *pupper = upper; // tracker C73258 672 } 673 } 674 } 675 break; 676 } 677 case kmp_sch_static_chunked: { 678 ST span; 679 if (chunk < 1) 680 chunk = 1; 681 span = chunk * incr; 682 *pstride = span * nth; 683 *plower = *plower + (span * tid); 684 *pupper = *plower + span - incr; 685 if (plastiter != NULL) 686 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 687 *plastiter = 0; 688 break; 689 } 690 default: 691 KMP_ASSERT2(0, 692 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 693 break; 694 } 695 } 696 end:; 697 #ifdef KMP_DEBUG 698 { 699 char *buff; 700 // create format specifiers before the debug output 701 buff = __kmp_str_format( 702 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 703 "stride=%%%s signed?<%s>\n", 704 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 705 traits_t<ST>::spec, traits_t<T>::spec); 706 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 707 __kmp_str_free(&buff); 708 } 709 #endif 710 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 711 #if OMPT_SUPPORT && OMPT_OPTIONAL 712 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { 713 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 714 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 715 if (ompt_enabled.ompt_callback_work) { 716 ompt_callbacks.ompt_callback(ompt_callback_work)( 717 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), 718 &(task_info->task_data), 0, codeptr); 719 } 720 if (ompt_enabled.ompt_callback_dispatch) { 721 ompt_data_t instance = ompt_data_none; 722 ompt_dispatch_chunk_t dispatch_chunk; 723 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr); 724 instance.ptr = &dispatch_chunk; 725 ompt_callbacks.ompt_callback(ompt_callback_dispatch)( 726 &(team_info->parallel_data), &(task_info->task_data), 727 ompt_dispatch_distribute_chunk, instance); 728 } 729 } 730 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 731 KMP_STATS_LOOP_END(OMP_distribute_iterations); 732 return; 733 } 734 735 template <typename T> 736 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 737 kmp_int32 *p_last, T *p_lb, T *p_ub, 738 typename traits_t<T>::signed_t *p_st, 739 typename traits_t<T>::signed_t incr, 740 typename traits_t<T>::signed_t chunk) { 741 // The routine returns the first chunk distributed to the team and 742 // stride for next chunks calculation. 743 // Last iteration flag set for the team that will execute 744 // the last iteration of the loop. 745 // The routine is called for dist_schedule(static,chunk) only. 746 typedef typename traits_t<T>::unsigned_t UT; 747 typedef typename traits_t<T>::signed_t ST; 748 kmp_uint32 team_id; 749 kmp_uint32 nteams; 750 UT trip_count; 751 T lower; 752 T upper; 753 ST span; 754 kmp_team_t *team; 755 kmp_info_t *th; 756 757 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 758 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 759 __kmp_assert_valid_gtid(gtid); 760 #ifdef KMP_DEBUG 761 { 762 char *buff; 763 // create format specifiers before the debug output 764 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 765 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 766 traits_t<T>::spec, traits_t<T>::spec, 767 traits_t<ST>::spec, traits_t<ST>::spec, 768 traits_t<T>::spec); 769 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 770 __kmp_str_free(&buff); 771 } 772 #endif 773 774 lower = *p_lb; 775 upper = *p_ub; 776 if (__kmp_env_consistency_check) { 777 if (incr == 0) { 778 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 779 loc); 780 } 781 if (incr > 0 ? (upper < lower) : (lower < upper)) { 782 // The loop is illegal. 783 // Some zero-trip loops maintained by compiler, e.g.: 784 // for(i=10;i<0;++i) // lower >= upper - run-time check 785 // for(i=0;i>10;--i) // lower <= upper - run-time check 786 // for(i=0;i>10;++i) // incr > 0 - compile-time check 787 // for(i=10;i<0;--i) // incr < 0 - compile-time check 788 // Compiler does not check the following illegal loops: 789 // for(i=0;i<10;i+=incr) // where incr<0 790 // for(i=10;i>0;i-=incr) // where incr<0 791 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 792 } 793 } 794 th = __kmp_threads[gtid]; 795 team = th->th.th_team; 796 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 797 nteams = th->th.th_teams_size.nteams; 798 team_id = team->t.t_master_tid; 799 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 800 801 // compute trip count 802 if (incr == 1) { 803 trip_count = upper - lower + 1; 804 } else if (incr == -1) { 805 trip_count = lower - upper + 1; 806 } else if (incr > 0) { 807 // upper-lower can exceed the limit of signed type 808 trip_count = (UT)(upper - lower) / incr + 1; 809 } else { 810 trip_count = (UT)(lower - upper) / (-incr) + 1; 811 } 812 if (chunk < 1) 813 chunk = 1; 814 span = chunk * incr; 815 *p_st = span * nteams; 816 *p_lb = lower + (span * team_id); 817 *p_ub = *p_lb + span - incr; 818 if (p_last != NULL) 819 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 820 // Correct upper bound if needed 821 if (incr > 0) { 822 if (*p_ub < *p_lb) // overflow? 823 *p_ub = traits_t<T>::max_value; 824 if (*p_ub > upper) 825 *p_ub = upper; // tracker C73258 826 } else { // incr < 0 827 if (*p_ub > *p_lb) 828 *p_ub = traits_t<T>::min_value; 829 if (*p_ub < upper) 830 *p_ub = upper; // tracker C73258 831 } 832 #ifdef KMP_DEBUG 833 { 834 char *buff; 835 // create format specifiers before the debug output 836 buff = 837 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 838 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 839 traits_t<T>::spec, traits_t<T>::spec, 840 traits_t<ST>::spec, traits_t<ST>::spec); 841 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 842 __kmp_str_free(&buff); 843 } 844 #endif 845 } 846 847 //------------------------------------------------------------------------------ 848 extern "C" { 849 /*! 850 @ingroup WORK_SHARING 851 @param loc Source code location 852 @param gtid Global thread id of this thread 853 @param schedtype Scheduling type 854 @param plastiter Pointer to the "last iteration" flag 855 @param plower Pointer to the lower bound 856 @param pupper Pointer to the upper bound 857 @param pstride Pointer to the stride 858 @param incr Loop increment 859 @param chunk The chunk size 860 861 Each of the four functions here are identical apart from the argument types. 862 863 The functions compute the upper and lower bounds and stride to be used for the 864 set of iterations to be executed by the current thread from the statically 865 scheduled loop that is described by the initial values of the bounds, stride, 866 increment and chunk size. 867 868 @{ 869 */ 870 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 871 kmp_int32 *plastiter, kmp_int32 *plower, 872 kmp_int32 *pupper, kmp_int32 *pstride, 873 kmp_int32 incr, kmp_int32 chunk) { 874 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 875 pupper, pstride, incr, chunk 876 #if OMPT_SUPPORT && OMPT_OPTIONAL 877 , 878 OMPT_GET_RETURN_ADDRESS(0) 879 #endif 880 ); 881 } 882 883 /*! 884 See @ref __kmpc_for_static_init_4 885 */ 886 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 887 kmp_int32 schedtype, kmp_int32 *plastiter, 888 kmp_uint32 *plower, kmp_uint32 *pupper, 889 kmp_int32 *pstride, kmp_int32 incr, 890 kmp_int32 chunk) { 891 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 892 pupper, pstride, incr, chunk 893 #if OMPT_SUPPORT && OMPT_OPTIONAL 894 , 895 OMPT_GET_RETURN_ADDRESS(0) 896 #endif 897 ); 898 } 899 900 /*! 901 See @ref __kmpc_for_static_init_4 902 */ 903 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 904 kmp_int32 *plastiter, kmp_int64 *plower, 905 kmp_int64 *pupper, kmp_int64 *pstride, 906 kmp_int64 incr, kmp_int64 chunk) { 907 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 908 pupper, pstride, incr, chunk 909 #if OMPT_SUPPORT && OMPT_OPTIONAL 910 , 911 OMPT_GET_RETURN_ADDRESS(0) 912 #endif 913 ); 914 } 915 916 /*! 917 See @ref __kmpc_for_static_init_4 918 */ 919 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 920 kmp_int32 schedtype, kmp_int32 *plastiter, 921 kmp_uint64 *plower, kmp_uint64 *pupper, 922 kmp_int64 *pstride, kmp_int64 incr, 923 kmp_int64 chunk) { 924 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 925 pupper, pstride, incr, chunk 926 #if OMPT_SUPPORT && OMPT_OPTIONAL 927 , 928 OMPT_GET_RETURN_ADDRESS(0) 929 #endif 930 ); 931 } 932 /*! 933 @} 934 */ 935 936 #if OMPT_SUPPORT && OMPT_OPTIONAL 937 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0) 938 #else 939 #define OMPT_CODEPTR_ARG 940 #endif 941 942 /*! 943 @ingroup WORK_SHARING 944 @param loc Source code location 945 @param gtid Global thread id of this thread 946 @param schedule Scheduling type for the parallel loop 947 @param plastiter Pointer to the "last iteration" flag 948 @param plower Pointer to the lower bound 949 @param pupper Pointer to the upper bound of loop chunk 950 @param pupperD Pointer to the upper bound of dist_chunk 951 @param pstride Pointer to the stride for parallel loop 952 @param incr Loop increment 953 @param chunk The chunk size for the parallel loop 954 955 Each of the four functions here are identical apart from the argument types. 956 957 The functions compute the upper and lower bounds and strides to be used for the 958 set of iterations to be executed by the current thread from the statically 959 scheduled loop that is described by the initial values of the bounds, strides, 960 increment and chunks for parallel loop and distribute constructs. 961 962 @{ 963 */ 964 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 965 kmp_int32 schedule, kmp_int32 *plastiter, 966 kmp_int32 *plower, kmp_int32 *pupper, 967 kmp_int32 *pupperD, kmp_int32 *pstride, 968 kmp_int32 incr, kmp_int32 chunk) { 969 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 970 pupper, pupperD, pstride, incr, 971 chunk OMPT_CODEPTR_ARG); 972 } 973 974 /*! 975 See @ref __kmpc_dist_for_static_init_4 976 */ 977 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 978 kmp_int32 schedule, kmp_int32 *plastiter, 979 kmp_uint32 *plower, kmp_uint32 *pupper, 980 kmp_uint32 *pupperD, kmp_int32 *pstride, 981 kmp_int32 incr, kmp_int32 chunk) { 982 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 983 pupper, pupperD, pstride, incr, 984 chunk OMPT_CODEPTR_ARG); 985 } 986 987 /*! 988 See @ref __kmpc_dist_for_static_init_4 989 */ 990 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 991 kmp_int32 schedule, kmp_int32 *plastiter, 992 kmp_int64 *plower, kmp_int64 *pupper, 993 kmp_int64 *pupperD, kmp_int64 *pstride, 994 kmp_int64 incr, kmp_int64 chunk) { 995 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 996 pupper, pupperD, pstride, incr, 997 chunk OMPT_CODEPTR_ARG); 998 } 999 1000 /*! 1001 See @ref __kmpc_dist_for_static_init_4 1002 */ 1003 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 1004 kmp_int32 schedule, kmp_int32 *plastiter, 1005 kmp_uint64 *plower, kmp_uint64 *pupper, 1006 kmp_uint64 *pupperD, kmp_int64 *pstride, 1007 kmp_int64 incr, kmp_int64 chunk) { 1008 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 1009 pupper, pupperD, pstride, incr, 1010 chunk OMPT_CODEPTR_ARG); 1011 } 1012 /*! 1013 @} 1014 */ 1015 1016 //------------------------------------------------------------------------------ 1017 // Auxiliary routines for Distribute Parallel Loop construct implementation 1018 // Transfer call to template< type T > 1019 // __kmp_team_static_init( ident_t *loc, int gtid, 1020 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 1021 1022 /*! 1023 @ingroup WORK_SHARING 1024 @{ 1025 @param loc Source location 1026 @param gtid Global thread id 1027 @param p_last pointer to last iteration flag 1028 @param p_lb pointer to Lower bound 1029 @param p_ub pointer to Upper bound 1030 @param p_st Step (or increment if you prefer) 1031 @param incr Loop increment 1032 @param chunk The chunk size to block with 1033 1034 The functions compute the upper and lower bounds and stride to be used for the 1035 set of iterations to be executed by the current team from the statically 1036 scheduled loop that is described by the initial values of the bounds, stride, 1037 increment and chunk for the distribute construct as part of composite distribute 1038 parallel loop construct. These functions are all identical apart from the types 1039 of the arguments. 1040 */ 1041 1042 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1043 kmp_int32 *p_lb, kmp_int32 *p_ub, 1044 kmp_int32 *p_st, kmp_int32 incr, 1045 kmp_int32 chunk) { 1046 KMP_DEBUG_ASSERT(__kmp_init_serial); 1047 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1048 chunk); 1049 } 1050 1051 /*! 1052 See @ref __kmpc_team_static_init_4 1053 */ 1054 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1055 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 1056 kmp_int32 *p_st, kmp_int32 incr, 1057 kmp_int32 chunk) { 1058 KMP_DEBUG_ASSERT(__kmp_init_serial); 1059 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1060 chunk); 1061 } 1062 1063 /*! 1064 See @ref __kmpc_team_static_init_4 1065 */ 1066 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1067 kmp_int64 *p_lb, kmp_int64 *p_ub, 1068 kmp_int64 *p_st, kmp_int64 incr, 1069 kmp_int64 chunk) { 1070 KMP_DEBUG_ASSERT(__kmp_init_serial); 1071 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1072 chunk); 1073 } 1074 1075 /*! 1076 See @ref __kmpc_team_static_init_4 1077 */ 1078 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 1079 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 1080 kmp_int64 *p_st, kmp_int64 incr, 1081 kmp_int64 chunk) { 1082 KMP_DEBUG_ASSERT(__kmp_init_serial); 1083 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 1084 chunk); 1085 } 1086 /*! 1087 @} 1088 */ 1089 1090 } // extern "C" 1091