xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_sched.cpp (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 template <typename T>
65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66                                   kmp_int32 schedtype, kmp_int32 *plastiter,
67                                   T *plower, T *pupper,
68                                   typename traits_t<T>::signed_t *pstride,
69                                   typename traits_t<T>::signed_t incr,
70                                   typename traits_t<T>::signed_t chunk
71 #if OMPT_SUPPORT && OMPT_OPTIONAL
72                                   ,
73                                   void *codeptr
74 #endif
75                                   ) {
76   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79 
80   typedef typename traits_t<T>::unsigned_t UT;
81   typedef typename traits_t<T>::signed_t ST;
82   /*  this all has to be changed back to TID and such.. */
83   kmp_int32 gtid = global_tid;
84   kmp_uint32 tid;
85   kmp_uint32 nth;
86   UT trip_count;
87   kmp_team_t *team;
88   kmp_info_t *th = __kmp_threads[gtid];
89 
90 #if OMPT_SUPPORT && OMPT_OPTIONAL
91   ompt_team_info_t *team_info = NULL;
92   ompt_task_info_t *task_info = NULL;
93   ompt_work_t ompt_work_type = ompt_work_loop;
94 
95   static kmp_int8 warn = 0;
96 
97   if (ompt_enabled.ompt_callback_work) {
98     // Only fully initialize variables needed by OMPT if OMPT is enabled.
99     team_info = __ompt_get_teaminfo(0, NULL);
100     task_info = __ompt_get_task_info_object(0);
101     // Determine workshare type
102     if (loc != NULL) {
103       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104         ompt_work_type = ompt_work_loop;
105       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106         ompt_work_type = ompt_work_sections;
107       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108         ompt_work_type = ompt_work_distribute;
109       } else {
110         kmp_int8 bool_res =
111             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112         if (bool_res)
113           KMP_WARNING(OmptOutdatedWorkshare);
114       }
115       KMP_DEBUG_ASSERT(ompt_work_type);
116     }
117   }
118 #endif
119 
120   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122 #ifdef KMP_DEBUG
123   {
124     char *buff;
125     // create format specifiers before the debug output
126     buff = __kmp_str_format(
127         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132                    *pstride, incr, chunk));
133     __kmp_str_free(&buff);
134   }
135 #endif
136 
137   if (__kmp_env_consistency_check) {
138     __kmp_push_workshare(global_tid, ct_pdo, loc);
139     if (incr == 0) {
140       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141                             loc);
142     }
143   }
144   /* special handling for zero-trip loops */
145   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146     if (plastiter != NULL)
147       *plastiter = FALSE;
148     /* leave pupper and plower set to entire iteration space */
149     *pstride = incr; /* value should never be used */
150 // *plower = *pupper - incr;
151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154 #ifdef KMP_DEBUG
155     {
156       char *buff;
157       // create format specifiers before the debug output
158       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159                               "lower=%%%s upper=%%%s stride = %%%s "
160                               "signed?<%s>, loc = %%s\n",
161                               traits_t<T>::spec, traits_t<T>::spec,
162                               traits_t<ST>::spec, traits_t<T>::spec);
163       KD_TRACE(100,
164                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165       __kmp_str_free(&buff);
166     }
167 #endif
168     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169 
170 #if OMPT_SUPPORT && OMPT_OPTIONAL
171     if (ompt_enabled.ompt_callback_work) {
172       ompt_callbacks.ompt_callback(ompt_callback_work)(
173           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174           &(task_info->task_data), 0, codeptr);
175     }
176 #endif
177     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178     return;
179   }
180 
181   // Although there are schedule enumerations above kmp_ord_upper which are not
182   // schedules for "distribute", the only ones which are useful are dynamic, so
183   // cannot be seen here, since this codepath is only executed for static
184   // schedules.
185   if (schedtype > kmp_ord_upper) {
186     // we are in DISTRIBUTE construct
187     schedtype += kmp_sch_static -
188                  kmp_distribute_static; // AC: convert to usual schedule type
189     tid = th->th.th_team->t.t_master_tid;
190     team = th->th.th_team->t.t_parent;
191   } else {
192     tid = __kmp_tid_from_gtid(global_tid);
193     team = th->th.th_team;
194   }
195 
196   /* determine if "for" loop is an active worksharing construct */
197   if (team->t.t_serialized) {
198     /* serialized parallel, each thread executes whole iteration space */
199     if (plastiter != NULL)
200       *plastiter = TRUE;
201     /* leave pupper and plower set to entire iteration space */
202     *pstride =
203         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
204 
205 #ifdef KMP_DEBUG
206     {
207       char *buff;
208       // create format specifiers before the debug output
209       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
210                               "lower=%%%s upper=%%%s stride = %%%s\n",
211                               traits_t<T>::spec, traits_t<T>::spec,
212                               traits_t<ST>::spec);
213       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
214       __kmp_str_free(&buff);
215     }
216 #endif
217     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
218 
219 #if OMPT_SUPPORT && OMPT_OPTIONAL
220     if (ompt_enabled.ompt_callback_work) {
221       ompt_callbacks.ompt_callback(ompt_callback_work)(
222           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
223           &(task_info->task_data), *pstride, codeptr);
224     }
225 #endif
226     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
227     return;
228   }
229   nth = team->t.t_nproc;
230   if (nth == 1) {
231     if (plastiter != NULL)
232       *plastiter = TRUE;
233     *pstride =
234         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
235 #ifdef KMP_DEBUG
236     {
237       char *buff;
238       // create format specifiers before the debug output
239       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
240                               "lower=%%%s upper=%%%s stride = %%%s\n",
241                               traits_t<T>::spec, traits_t<T>::spec,
242                               traits_t<ST>::spec);
243       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
244       __kmp_str_free(&buff);
245     }
246 #endif
247     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
248 
249 #if OMPT_SUPPORT && OMPT_OPTIONAL
250     if (ompt_enabled.ompt_callback_work) {
251       ompt_callbacks.ompt_callback(ompt_callback_work)(
252           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
253           &(task_info->task_data), *pstride, codeptr);
254     }
255 #endif
256     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
257     return;
258   }
259 
260   /* compute trip count */
261   if (incr == 1) {
262     trip_count = *pupper - *plower + 1;
263   } else if (incr == -1) {
264     trip_count = *plower - *pupper + 1;
265   } else if (incr > 0) {
266     // upper-lower can exceed the limit of signed type
267     trip_count = (UT)(*pupper - *plower) / incr + 1;
268   } else {
269     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
270   }
271 
272 #if KMP_STATS_ENABLED
273   if (KMP_MASTER_GTID(gtid)) {
274     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
275   }
276 #endif
277 
278   if (__kmp_env_consistency_check) {
279     /* tripcount overflow? */
280     if (trip_count == 0 && *pupper != *plower) {
281       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
282                             loc);
283     }
284   }
285 
286   /* compute remaining parameters */
287   switch (schedtype) {
288   case kmp_sch_static: {
289     if (trip_count < nth) {
290       KMP_DEBUG_ASSERT(
291           __kmp_static == kmp_sch_static_greedy ||
292           __kmp_static ==
293               kmp_sch_static_balanced); // Unknown static scheduling type.
294       if (tid < trip_count) {
295         *pupper = *plower = *plower + tid * incr;
296       } else {
297         *plower = *pupper + incr;
298       }
299       if (plastiter != NULL)
300         *plastiter = (tid == trip_count - 1);
301     } else {
302       if (__kmp_static == kmp_sch_static_balanced) {
303         UT small_chunk = trip_count / nth;
304         UT extras = trip_count % nth;
305         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
306         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
307         if (plastiter != NULL)
308           *plastiter = (tid == nth - 1);
309       } else {
310         T big_chunk_inc_count =
311             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
312         T old_upper = *pupper;
313 
314         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
315         // Unknown static scheduling type.
316 
317         *plower += tid * big_chunk_inc_count;
318         *pupper = *plower + big_chunk_inc_count - incr;
319         if (incr > 0) {
320           if (*pupper < *plower)
321             *pupper = traits_t<T>::max_value;
322           if (plastiter != NULL)
323             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
324           if (*pupper > old_upper)
325             *pupper = old_upper; // tracker C73258
326         } else {
327           if (*pupper > *plower)
328             *pupper = traits_t<T>::min_value;
329           if (plastiter != NULL)
330             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
331           if (*pupper < old_upper)
332             *pupper = old_upper; // tracker C73258
333         }
334       }
335     }
336     *pstride = trip_count;
337     break;
338   }
339   case kmp_sch_static_chunked: {
340     ST span;
341     if (chunk < 1) {
342       chunk = 1;
343     }
344     span = chunk * incr;
345     *pstride = span * nth;
346     *plower = *plower + (span * tid);
347     *pupper = *plower + span - incr;
348     if (plastiter != NULL)
349       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
350     break;
351   }
352   case kmp_sch_static_balanced_chunked: {
353     T old_upper = *pupper;
354     // round up to make sure the chunk is enough to cover all iterations
355     UT span = (trip_count + nth - 1) / nth;
356 
357     // perform chunk adjustment
358     chunk = (span + chunk - 1) & ~(chunk - 1);
359 
360     span = chunk * incr;
361     *plower = *plower + (span * tid);
362     *pupper = *plower + span - incr;
363     if (incr > 0) {
364       if (*pupper > old_upper)
365         *pupper = old_upper;
366     } else if (*pupper < old_upper)
367       *pupper = old_upper;
368 
369     if (plastiter != NULL)
370       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
371     break;
372   }
373   default:
374     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
375     break;
376   }
377 
378 #if USE_ITT_BUILD
379   // Report loop metadata
380   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
381       __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
382       team->t.t_active_level == 1) {
383     kmp_uint64 cur_chunk = chunk;
384     // Calculate chunk in case it was not specified; it is specified for
385     // kmp_sch_static_chunked
386     if (schedtype == kmp_sch_static) {
387       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
388     }
389     // 0 - "static" schedule
390     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
391   }
392 #endif
393 #ifdef KMP_DEBUG
394   {
395     char *buff;
396     // create format specifiers before the debug output
397     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
398                             "upper=%%%s stride = %%%s signed?<%s>\n",
399                             traits_t<T>::spec, traits_t<T>::spec,
400                             traits_t<ST>::spec, traits_t<T>::spec);
401     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
402     __kmp_str_free(&buff);
403   }
404 #endif
405   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
406 
407 #if OMPT_SUPPORT && OMPT_OPTIONAL
408   if (ompt_enabled.ompt_callback_work) {
409     ompt_callbacks.ompt_callback(ompt_callback_work)(
410         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
411         &(task_info->task_data), trip_count, codeptr);
412   }
413 #endif
414 
415   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
416   return;
417 }
418 
419 template <typename T>
420 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
421                                        kmp_int32 schedule, kmp_int32 *plastiter,
422                                        T *plower, T *pupper, T *pupperDist,
423                                        typename traits_t<T>::signed_t *pstride,
424                                        typename traits_t<T>::signed_t incr,
425                                        typename traits_t<T>::signed_t chunk) {
426   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
427   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
428   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
429   typedef typename traits_t<T>::unsigned_t UT;
430   typedef typename traits_t<T>::signed_t ST;
431   kmp_uint32 tid;
432   kmp_uint32 nth;
433   kmp_uint32 team_id;
434   kmp_uint32 nteams;
435   UT trip_count;
436   kmp_team_t *team;
437   kmp_info_t *th;
438 
439   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
440   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
441 #ifdef KMP_DEBUG
442   {
443     char *buff;
444     // create format specifiers before the debug output
445     buff = __kmp_str_format(
446         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
447         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
448         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
449         traits_t<ST>::spec, traits_t<T>::spec);
450     KD_TRACE(100,
451              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
452     __kmp_str_free(&buff);
453   }
454 #endif
455 
456   if (__kmp_env_consistency_check) {
457     __kmp_push_workshare(gtid, ct_pdo, loc);
458     if (incr == 0) {
459       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
460                             loc);
461     }
462     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
463       // The loop is illegal.
464       // Some zero-trip loops maintained by compiler, e.g.:
465       //   for(i=10;i<0;++i) // lower >= upper - run-time check
466       //   for(i=0;i>10;--i) // lower <= upper - run-time check
467       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
468       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
469       // Compiler does not check the following illegal loops:
470       //   for(i=0;i<10;i+=incr) // where incr<0
471       //   for(i=10;i>0;i-=incr) // where incr<0
472       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
473     }
474   }
475   tid = __kmp_tid_from_gtid(gtid);
476   th = __kmp_threads[gtid];
477   nth = th->th.th_team_nproc;
478   team = th->th.th_team;
479   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
480   nteams = th->th.th_teams_size.nteams;
481   team_id = team->t.t_master_tid;
482   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
483 
484   // compute global trip count
485   if (incr == 1) {
486     trip_count = *pupper - *plower + 1;
487   } else if (incr == -1) {
488     trip_count = *plower - *pupper + 1;
489   } else if (incr > 0) {
490     // upper-lower can exceed the limit of signed type
491     trip_count = (UT)(*pupper - *plower) / incr + 1;
492   } else {
493     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
494   }
495 
496   *pstride = *pupper - *plower; // just in case (can be unused)
497   if (trip_count <= nteams) {
498     KMP_DEBUG_ASSERT(
499         __kmp_static == kmp_sch_static_greedy ||
500         __kmp_static ==
501             kmp_sch_static_balanced); // Unknown static scheduling type.
502     // only masters of some teams get single iteration, other threads get
503     // nothing
504     if (team_id < trip_count && tid == 0) {
505       *pupper = *pupperDist = *plower = *plower + team_id * incr;
506     } else {
507       *pupperDist = *pupper;
508       *plower = *pupper + incr; // compiler should skip loop body
509     }
510     if (plastiter != NULL)
511       *plastiter = (tid == 0 && team_id == trip_count - 1);
512   } else {
513     // Get the team's chunk first (each team gets at most one chunk)
514     if (__kmp_static == kmp_sch_static_balanced) {
515       UT chunkD = trip_count / nteams;
516       UT extras = trip_count % nteams;
517       *plower +=
518           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
519       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
520       if (plastiter != NULL)
521         *plastiter = (team_id == nteams - 1);
522     } else {
523       T chunk_inc_count =
524           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
525       T upper = *pupper;
526       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
527       // Unknown static scheduling type.
528       *plower += team_id * chunk_inc_count;
529       *pupperDist = *plower + chunk_inc_count - incr;
530       // Check/correct bounds if needed
531       if (incr > 0) {
532         if (*pupperDist < *plower)
533           *pupperDist = traits_t<T>::max_value;
534         if (plastiter != NULL)
535           *plastiter = *plower <= upper && *pupperDist > upper - incr;
536         if (*pupperDist > upper)
537           *pupperDist = upper; // tracker C73258
538         if (*plower > *pupperDist) {
539           *pupper = *pupperDist; // no iterations available for the team
540           goto end;
541         }
542       } else {
543         if (*pupperDist > *plower)
544           *pupperDist = traits_t<T>::min_value;
545         if (plastiter != NULL)
546           *plastiter = *plower >= upper && *pupperDist < upper - incr;
547         if (*pupperDist < upper)
548           *pupperDist = upper; // tracker C73258
549         if (*plower < *pupperDist) {
550           *pupper = *pupperDist; // no iterations available for the team
551           goto end;
552         }
553       }
554     }
555     // Get the parallel loop chunk now (for thread)
556     // compute trip count for team's chunk
557     if (incr == 1) {
558       trip_count = *pupperDist - *plower + 1;
559     } else if (incr == -1) {
560       trip_count = *plower - *pupperDist + 1;
561     } else if (incr > 1) {
562       // upper-lower can exceed the limit of signed type
563       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
564     } else {
565       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
566     }
567     KMP_DEBUG_ASSERT(trip_count);
568     switch (schedule) {
569     case kmp_sch_static: {
570       if (trip_count <= nth) {
571         KMP_DEBUG_ASSERT(
572             __kmp_static == kmp_sch_static_greedy ||
573             __kmp_static ==
574                 kmp_sch_static_balanced); // Unknown static scheduling type.
575         if (tid < trip_count)
576           *pupper = *plower = *plower + tid * incr;
577         else
578           *plower = *pupper + incr; // no iterations available
579         if (plastiter != NULL)
580           if (*plastiter != 0 && !(tid == trip_count - 1))
581             *plastiter = 0;
582       } else {
583         if (__kmp_static == kmp_sch_static_balanced) {
584           UT chunkL = trip_count / nth;
585           UT extras = trip_count % nth;
586           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
587           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
588           if (plastiter != NULL)
589             if (*plastiter != 0 && !(tid == nth - 1))
590               *plastiter = 0;
591         } else {
592           T chunk_inc_count =
593               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
594           T upper = *pupperDist;
595           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
596           // Unknown static scheduling type.
597           *plower += tid * chunk_inc_count;
598           *pupper = *plower + chunk_inc_count - incr;
599           if (incr > 0) {
600             if (*pupper < *plower)
601               *pupper = traits_t<T>::max_value;
602             if (plastiter != NULL)
603               if (*plastiter != 0 &&
604                   !(*plower <= upper && *pupper > upper - incr))
605                 *plastiter = 0;
606             if (*pupper > upper)
607               *pupper = upper; // tracker C73258
608           } else {
609             if (*pupper > *plower)
610               *pupper = traits_t<T>::min_value;
611             if (plastiter != NULL)
612               if (*plastiter != 0 &&
613                   !(*plower >= upper && *pupper < upper - incr))
614                 *plastiter = 0;
615             if (*pupper < upper)
616               *pupper = upper; // tracker C73258
617           }
618         }
619       }
620       break;
621     }
622     case kmp_sch_static_chunked: {
623       ST span;
624       if (chunk < 1)
625         chunk = 1;
626       span = chunk * incr;
627       *pstride = span * nth;
628       *plower = *plower + (span * tid);
629       *pupper = *plower + span - incr;
630       if (plastiter != NULL)
631         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
632           *plastiter = 0;
633       break;
634     }
635     default:
636       KMP_ASSERT2(0,
637                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
638       break;
639     }
640   }
641 end:;
642 #ifdef KMP_DEBUG
643   {
644     char *buff;
645     // create format specifiers before the debug output
646     buff = __kmp_str_format(
647         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
648         "stride=%%%s signed?<%s>\n",
649         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
650         traits_t<ST>::spec, traits_t<T>::spec);
651     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
652     __kmp_str_free(&buff);
653   }
654 #endif
655   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
656   KMP_STATS_LOOP_END(OMP_distribute_iterations);
657   return;
658 }
659 
660 template <typename T>
661 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
662                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
663                                    typename traits_t<T>::signed_t *p_st,
664                                    typename traits_t<T>::signed_t incr,
665                                    typename traits_t<T>::signed_t chunk) {
666   // The routine returns the first chunk distributed to the team and
667   // stride for next chunks calculation.
668   // Last iteration flag set for the team that will execute
669   // the last iteration of the loop.
670   // The routine is called for dist_schedule(static,chunk) only.
671   typedef typename traits_t<T>::unsigned_t UT;
672   typedef typename traits_t<T>::signed_t ST;
673   kmp_uint32 team_id;
674   kmp_uint32 nteams;
675   UT trip_count;
676   T lower;
677   T upper;
678   ST span;
679   kmp_team_t *team;
680   kmp_info_t *th;
681 
682   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
683   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
684 #ifdef KMP_DEBUG
685   {
686     char *buff;
687     // create format specifiers before the debug output
688     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
689                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
690                             traits_t<T>::spec, traits_t<T>::spec,
691                             traits_t<ST>::spec, traits_t<ST>::spec,
692                             traits_t<T>::spec);
693     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
694     __kmp_str_free(&buff);
695   }
696 #endif
697 
698   lower = *p_lb;
699   upper = *p_ub;
700   if (__kmp_env_consistency_check) {
701     if (incr == 0) {
702       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
703                             loc);
704     }
705     if (incr > 0 ? (upper < lower) : (lower < upper)) {
706       // The loop is illegal.
707       // Some zero-trip loops maintained by compiler, e.g.:
708       //   for(i=10;i<0;++i) // lower >= upper - run-time check
709       //   for(i=0;i>10;--i) // lower <= upper - run-time check
710       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
711       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
712       // Compiler does not check the following illegal loops:
713       //   for(i=0;i<10;i+=incr) // where incr<0
714       //   for(i=10;i>0;i-=incr) // where incr<0
715       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
716     }
717   }
718   th = __kmp_threads[gtid];
719   team = th->th.th_team;
720   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
721   nteams = th->th.th_teams_size.nteams;
722   team_id = team->t.t_master_tid;
723   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
724 
725   // compute trip count
726   if (incr == 1) {
727     trip_count = upper - lower + 1;
728   } else if (incr == -1) {
729     trip_count = lower - upper + 1;
730   } else if (incr > 0) {
731     // upper-lower can exceed the limit of signed type
732     trip_count = (UT)(upper - lower) / incr + 1;
733   } else {
734     trip_count = (UT)(lower - upper) / (-incr) + 1;
735   }
736   if (chunk < 1)
737     chunk = 1;
738   span = chunk * incr;
739   *p_st = span * nteams;
740   *p_lb = lower + (span * team_id);
741   *p_ub = *p_lb + span - incr;
742   if (p_last != NULL)
743     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
744   // Correct upper bound if needed
745   if (incr > 0) {
746     if (*p_ub < *p_lb) // overflow?
747       *p_ub = traits_t<T>::max_value;
748     if (*p_ub > upper)
749       *p_ub = upper; // tracker C73258
750   } else { // incr < 0
751     if (*p_ub > *p_lb)
752       *p_ub = traits_t<T>::min_value;
753     if (*p_ub < upper)
754       *p_ub = upper; // tracker C73258
755   }
756 #ifdef KMP_DEBUG
757   {
758     char *buff;
759     // create format specifiers before the debug output
760     buff =
761         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
762                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
763                          traits_t<T>::spec, traits_t<T>::spec,
764                          traits_t<ST>::spec, traits_t<ST>::spec);
765     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
766     __kmp_str_free(&buff);
767   }
768 #endif
769 }
770 
771 //------------------------------------------------------------------------------
772 extern "C" {
773 /*!
774 @ingroup WORK_SHARING
775 @param    loc       Source code location
776 @param    gtid      Global thread id of this thread
777 @param    schedtype  Scheduling type
778 @param    plastiter Pointer to the "last iteration" flag
779 @param    plower    Pointer to the lower bound
780 @param    pupper    Pointer to the upper bound
781 @param    pstride   Pointer to the stride
782 @param    incr      Loop increment
783 @param    chunk     The chunk size
784 
785 Each of the four functions here are identical apart from the argument types.
786 
787 The functions compute the upper and lower bounds and stride to be used for the
788 set of iterations to be executed by the current thread from the statically
789 scheduled loop that is described by the initial values of the bounds, stride,
790 increment and chunk size.
791 
792 @{
793 */
794 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
795                               kmp_int32 *plastiter, kmp_int32 *plower,
796                               kmp_int32 *pupper, kmp_int32 *pstride,
797                               kmp_int32 incr, kmp_int32 chunk) {
798   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
799                                    pupper, pstride, incr, chunk
800 #if OMPT_SUPPORT && OMPT_OPTIONAL
801                                    ,
802                                    OMPT_GET_RETURN_ADDRESS(0)
803 #endif
804                                        );
805 }
806 
807 /*!
808  See @ref __kmpc_for_static_init_4
809  */
810 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
811                                kmp_int32 schedtype, kmp_int32 *plastiter,
812                                kmp_uint32 *plower, kmp_uint32 *pupper,
813                                kmp_int32 *pstride, kmp_int32 incr,
814                                kmp_int32 chunk) {
815   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
816                                     pupper, pstride, incr, chunk
817 #if OMPT_SUPPORT && OMPT_OPTIONAL
818                                     ,
819                                     OMPT_GET_RETURN_ADDRESS(0)
820 #endif
821                                         );
822 }
823 
824 /*!
825  See @ref __kmpc_for_static_init_4
826  */
827 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
828                               kmp_int32 *plastiter, kmp_int64 *plower,
829                               kmp_int64 *pupper, kmp_int64 *pstride,
830                               kmp_int64 incr, kmp_int64 chunk) {
831   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
832                                    pupper, pstride, incr, chunk
833 #if OMPT_SUPPORT && OMPT_OPTIONAL
834                                    ,
835                                    OMPT_GET_RETURN_ADDRESS(0)
836 #endif
837                                        );
838 }
839 
840 /*!
841  See @ref __kmpc_for_static_init_4
842  */
843 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
844                                kmp_int32 schedtype, kmp_int32 *plastiter,
845                                kmp_uint64 *plower, kmp_uint64 *pupper,
846                                kmp_int64 *pstride, kmp_int64 incr,
847                                kmp_int64 chunk) {
848   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
849                                     pupper, pstride, incr, chunk
850 #if OMPT_SUPPORT && OMPT_OPTIONAL
851                                     ,
852                                     OMPT_GET_RETURN_ADDRESS(0)
853 #endif
854                                         );
855 }
856 /*!
857 @}
858 */
859 
860 /*!
861 @ingroup WORK_SHARING
862 @param    loc       Source code location
863 @param    gtid      Global thread id of this thread
864 @param    schedule  Scheduling type for the parallel loop
865 @param    plastiter Pointer to the "last iteration" flag
866 @param    plower    Pointer to the lower bound
867 @param    pupper    Pointer to the upper bound of loop chunk
868 @param    pupperD   Pointer to the upper bound of dist_chunk
869 @param    pstride   Pointer to the stride for parallel loop
870 @param    incr      Loop increment
871 @param    chunk     The chunk size for the parallel loop
872 
873 Each of the four functions here are identical apart from the argument types.
874 
875 The functions compute the upper and lower bounds and strides to be used for the
876 set of iterations to be executed by the current thread from the statically
877 scheduled loop that is described by the initial values of the bounds, strides,
878 increment and chunks for parallel loop and distribute constructs.
879 
880 @{
881 */
882 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
883                                    kmp_int32 schedule, kmp_int32 *plastiter,
884                                    kmp_int32 *plower, kmp_int32 *pupper,
885                                    kmp_int32 *pupperD, kmp_int32 *pstride,
886                                    kmp_int32 incr, kmp_int32 chunk) {
887   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
888                                         pupper, pupperD, pstride, incr, chunk);
889 }
890 
891 /*!
892  See @ref __kmpc_dist_for_static_init_4
893  */
894 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
895                                     kmp_int32 schedule, kmp_int32 *plastiter,
896                                     kmp_uint32 *plower, kmp_uint32 *pupper,
897                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
898                                     kmp_int32 incr, kmp_int32 chunk) {
899   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
900                                          pupper, pupperD, pstride, incr, chunk);
901 }
902 
903 /*!
904  See @ref __kmpc_dist_for_static_init_4
905  */
906 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
907                                    kmp_int32 schedule, kmp_int32 *plastiter,
908                                    kmp_int64 *plower, kmp_int64 *pupper,
909                                    kmp_int64 *pupperD, kmp_int64 *pstride,
910                                    kmp_int64 incr, kmp_int64 chunk) {
911   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
912                                         pupper, pupperD, pstride, incr, chunk);
913 }
914 
915 /*!
916  See @ref __kmpc_dist_for_static_init_4
917  */
918 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
919                                     kmp_int32 schedule, kmp_int32 *plastiter,
920                                     kmp_uint64 *plower, kmp_uint64 *pupper,
921                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
922                                     kmp_int64 incr, kmp_int64 chunk) {
923   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
924                                          pupper, pupperD, pstride, incr, chunk);
925 }
926 /*!
927 @}
928 */
929 
930 //------------------------------------------------------------------------------
931 // Auxiliary routines for Distribute Parallel Loop construct implementation
932 //    Transfer call to template< type T >
933 //    __kmp_team_static_init( ident_t *loc, int gtid,
934 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
935 
936 /*!
937 @ingroup WORK_SHARING
938 @{
939 @param loc Source location
940 @param gtid Global thread id
941 @param p_last pointer to last iteration flag
942 @param p_lb  pointer to Lower bound
943 @param p_ub  pointer to Upper bound
944 @param p_st  Step (or increment if you prefer)
945 @param incr  Loop increment
946 @param chunk The chunk size to block with
947 
948 The functions compute the upper and lower bounds and stride to be used for the
949 set of iterations to be executed by the current team from the statically
950 scheduled loop that is described by the initial values of the bounds, stride,
951 increment and chunk for the distribute construct as part of composite distribute
952 parallel loop construct. These functions are all identical apart from the types
953 of the arguments.
954 */
955 
956 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
957                                kmp_int32 *p_lb, kmp_int32 *p_ub,
958                                kmp_int32 *p_st, kmp_int32 incr,
959                                kmp_int32 chunk) {
960   KMP_DEBUG_ASSERT(__kmp_init_serial);
961   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
962                                     chunk);
963 }
964 
965 /*!
966  See @ref __kmpc_team_static_init_4
967  */
968 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
970                                 kmp_int32 *p_st, kmp_int32 incr,
971                                 kmp_int32 chunk) {
972   KMP_DEBUG_ASSERT(__kmp_init_serial);
973   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974                                      chunk);
975 }
976 
977 /*!
978  See @ref __kmpc_team_static_init_4
979  */
980 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981                                kmp_int64 *p_lb, kmp_int64 *p_ub,
982                                kmp_int64 *p_st, kmp_int64 incr,
983                                kmp_int64 chunk) {
984   KMP_DEBUG_ASSERT(__kmp_init_serial);
985   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986                                     chunk);
987 }
988 
989 /*!
990  See @ref __kmpc_team_static_init_4
991  */
992 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
994                                 kmp_int64 *p_st, kmp_int64 incr,
995                                 kmp_int64 chunk) {
996   KMP_DEBUG_ASSERT(__kmp_init_serial);
997   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998                                      chunk);
999 }
1000 /*!
1001 @}
1002 */
1003 
1004 } // extern "C"
1005