xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_sched.cpp (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66   if (loc == NULL)
67     loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72                                   kmp_int32 schedtype, kmp_int32 *plastiter,
73                                   T *plower, T *pupper,
74                                   typename traits_t<T>::signed_t *pstride,
75                                   typename traits_t<T>::signed_t incr,
76                                   typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78                                   ,
79                                   void *codeptr
80 #endif
81 ) {
82   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86   typedef typename traits_t<T>::unsigned_t UT;
87   typedef typename traits_t<T>::signed_t ST;
88   /*  this all has to be changed back to TID and such.. */
89   kmp_int32 gtid = global_tid;
90   kmp_uint32 tid;
91   kmp_uint32 nth;
92   UT trip_count;
93   kmp_team_t *team;
94   __kmp_assert_valid_gtid(gtid);
95   kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98   ompt_team_info_t *team_info = NULL;
99   ompt_task_info_t *task_info = NULL;
100   ompt_work_t ompt_work_type = ompt_work_loop;
101 
102   static kmp_int8 warn = 0;
103 
104   if (ompt_enabled.ompt_callback_work) {
105     // Only fully initialize variables needed by OMPT if OMPT is enabled.
106     team_info = __ompt_get_teaminfo(0, NULL);
107     task_info = __ompt_get_task_info_object(0);
108     // Determine workshare type
109     if (loc != NULL) {
110       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111         ompt_work_type = ompt_work_loop;
112       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113         ompt_work_type = ompt_work_sections;
114       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115         ompt_work_type = ompt_work_distribute;
116       } else {
117         kmp_int8 bool_res =
118             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119         if (bool_res)
120           KMP_WARNING(OmptOutdatedWorkshare);
121       }
122       KMP_DEBUG_ASSERT(ompt_work_type);
123     }
124   }
125 #endif
126 
127   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130   {
131     char *buff;
132     // create format specifiers before the debug output
133     buff = __kmp_str_format(
134         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139                    *pstride, incr, chunk));
140     __kmp_str_free(&buff);
141   }
142 #endif
143 
144   if (__kmp_env_consistency_check) {
145     __kmp_push_workshare(global_tid, ct_pdo, loc);
146     if (incr == 0) {
147       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148                             loc);
149     }
150   }
151   /* special handling for zero-trip loops */
152   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153     if (plastiter != NULL)
154       *plastiter = FALSE;
155     /* leave pupper and plower set to entire iteration space */
156     *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162     {
163       char *buff;
164       // create format specifiers before the debug output
165       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166                               "lower=%%%s upper=%%%s stride = %%%s "
167                               "signed?<%s>, loc = %%s\n",
168                               traits_t<T>::spec, traits_t<T>::spec,
169                               traits_t<ST>::spec, traits_t<T>::spec);
170       check_loc(loc);
171       KD_TRACE(100,
172                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173       __kmp_str_free(&buff);
174     }
175 #endif
176     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177 
178 #if OMPT_SUPPORT && OMPT_OPTIONAL
179     if (ompt_enabled.ompt_callback_work) {
180       ompt_callbacks.ompt_callback(ompt_callback_work)(
181           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182           &(task_info->task_data), 0, codeptr);
183     }
184 #endif
185     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186     return;
187   }
188 
189   // Although there are schedule enumerations above kmp_ord_upper which are not
190   // schedules for "distribute", the only ones which are useful are dynamic, so
191   // cannot be seen here, since this codepath is only executed for static
192   // schedules.
193   if (schedtype > kmp_ord_upper) {
194     // we are in DISTRIBUTE construct
195     schedtype += kmp_sch_static -
196                  kmp_distribute_static; // AC: convert to usual schedule type
197     tid = th->th.th_team->t.t_master_tid;
198     team = th->th.th_team->t.t_parent;
199   } else {
200     tid = __kmp_tid_from_gtid(global_tid);
201     team = th->th.th_team;
202   }
203 
204   /* determine if "for" loop is an active worksharing construct */
205   if (team->t.t_serialized) {
206     /* serialized parallel, each thread executes whole iteration space */
207     if (plastiter != NULL)
208       *plastiter = TRUE;
209     /* leave pupper and plower set to entire iteration space */
210     *pstride =
211         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
212 
213 #ifdef KMP_DEBUG
214     {
215       char *buff;
216       // create format specifiers before the debug output
217       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
218                               "lower=%%%s upper=%%%s stride = %%%s\n",
219                               traits_t<T>::spec, traits_t<T>::spec,
220                               traits_t<ST>::spec);
221       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
222       __kmp_str_free(&buff);
223     }
224 #endif
225     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
226 
227 #if OMPT_SUPPORT && OMPT_OPTIONAL
228     if (ompt_enabled.ompt_callback_work) {
229       ompt_callbacks.ompt_callback(ompt_callback_work)(
230           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
231           &(task_info->task_data), *pstride, codeptr);
232     }
233 #endif
234     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
235     return;
236   }
237   nth = team->t.t_nproc;
238   if (nth == 1) {
239     if (plastiter != NULL)
240       *plastiter = TRUE;
241     *pstride =
242         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
243 #ifdef KMP_DEBUG
244     {
245       char *buff;
246       // create format specifiers before the debug output
247       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
248                               "lower=%%%s upper=%%%s stride = %%%s\n",
249                               traits_t<T>::spec, traits_t<T>::spec,
250                               traits_t<ST>::spec);
251       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
252       __kmp_str_free(&buff);
253     }
254 #endif
255     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
256 
257 #if OMPT_SUPPORT && OMPT_OPTIONAL
258     if (ompt_enabled.ompt_callback_work) {
259       ompt_callbacks.ompt_callback(ompt_callback_work)(
260           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
261           &(task_info->task_data), *pstride, codeptr);
262     }
263 #endif
264     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
265     return;
266   }
267 
268   /* compute trip count */
269   if (incr == 1) {
270     trip_count = *pupper - *plower + 1;
271   } else if (incr == -1) {
272     trip_count = *plower - *pupper + 1;
273   } else if (incr > 0) {
274     // upper-lower can exceed the limit of signed type
275     trip_count = (UT)(*pupper - *plower) / incr + 1;
276   } else {
277     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
278   }
279 
280 #if KMP_STATS_ENABLED
281   if (KMP_MASTER_GTID(gtid)) {
282     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
283   }
284 #endif
285 
286   if (__kmp_env_consistency_check) {
287     /* tripcount overflow? */
288     if (trip_count == 0 && *pupper != *plower) {
289       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
290                             loc);
291     }
292   }
293 
294   /* compute remaining parameters */
295   switch (schedtype) {
296   case kmp_sch_static: {
297     if (trip_count < nth) {
298       KMP_DEBUG_ASSERT(
299           __kmp_static == kmp_sch_static_greedy ||
300           __kmp_static ==
301               kmp_sch_static_balanced); // Unknown static scheduling type.
302       if (tid < trip_count) {
303         *pupper = *plower = *plower + tid * incr;
304       } else {
305         // set bounds so non-active threads execute no iterations
306         *plower = *pupper + (incr > 0 ? 1 : -1);
307       }
308       if (plastiter != NULL)
309         *plastiter = (tid == trip_count - 1);
310     } else {
311       if (__kmp_static == kmp_sch_static_balanced) {
312         UT small_chunk = trip_count / nth;
313         UT extras = trip_count % nth;
314         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
315         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
316         if (plastiter != NULL)
317           *plastiter = (tid == nth - 1);
318       } else {
319         T big_chunk_inc_count =
320             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
321         T old_upper = *pupper;
322 
323         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
324         // Unknown static scheduling type.
325 
326         *plower += tid * big_chunk_inc_count;
327         *pupper = *plower + big_chunk_inc_count - incr;
328         if (incr > 0) {
329           if (*pupper < *plower)
330             *pupper = traits_t<T>::max_value;
331           if (plastiter != NULL)
332             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
333           if (*pupper > old_upper)
334             *pupper = old_upper; // tracker C73258
335         } else {
336           if (*pupper > *plower)
337             *pupper = traits_t<T>::min_value;
338           if (plastiter != NULL)
339             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
340           if (*pupper < old_upper)
341             *pupper = old_upper; // tracker C73258
342         }
343       }
344     }
345     *pstride = trip_count;
346     break;
347   }
348   case kmp_sch_static_chunked: {
349     ST span;
350     UT nchunks;
351     if (chunk < 1)
352       chunk = 1;
353     else if ((UT)chunk > trip_count)
354       chunk = trip_count;
355     nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
356     span = chunk * incr;
357     if (nchunks < nth) {
358       *pstride = span * nchunks;
359       if (tid < nchunks) {
360         *plower = *plower + (span * tid);
361         *pupper = *plower + span - incr;
362       } else {
363         *plower = *pupper + (incr > 0 ? 1 : -1);
364       }
365     } else {
366       *pstride = span * nth;
367       *plower = *plower + (span * tid);
368       *pupper = *plower + span - incr;
369     }
370     if (plastiter != NULL)
371       *plastiter = (tid == (nchunks - 1) % nth);
372     break;
373   }
374   case kmp_sch_static_balanced_chunked: {
375     T old_upper = *pupper;
376     // round up to make sure the chunk is enough to cover all iterations
377     UT span = (trip_count + nth - 1) / nth;
378 
379     // perform chunk adjustment
380     chunk = (span + chunk - 1) & ~(chunk - 1);
381 
382     span = chunk * incr;
383     *plower = *plower + (span * tid);
384     *pupper = *plower + span - incr;
385     if (incr > 0) {
386       if (*pupper > old_upper)
387         *pupper = old_upper;
388     } else if (*pupper < old_upper)
389       *pupper = old_upper;
390 
391     if (plastiter != NULL)
392       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
393     break;
394   }
395   default:
396     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
397     break;
398   }
399 
400 #if USE_ITT_BUILD
401   // Report loop metadata
402   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
403       __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
404       team->t.t_active_level == 1) {
405     kmp_uint64 cur_chunk = chunk;
406     check_loc(loc);
407     // Calculate chunk in case it was not specified; it is specified for
408     // kmp_sch_static_chunked
409     if (schedtype == kmp_sch_static) {
410       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
411     }
412     // 0 - "static" schedule
413     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
414   }
415 #endif
416 #ifdef KMP_DEBUG
417   {
418     char *buff;
419     // create format specifiers before the debug output
420     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
421                             "upper=%%%s stride = %%%s signed?<%s>\n",
422                             traits_t<T>::spec, traits_t<T>::spec,
423                             traits_t<ST>::spec, traits_t<T>::spec);
424     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
425     __kmp_str_free(&buff);
426   }
427 #endif
428   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
429 
430 #if OMPT_SUPPORT && OMPT_OPTIONAL
431   if (ompt_enabled.ompt_callback_work) {
432     ompt_callbacks.ompt_callback(ompt_callback_work)(
433         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
434         &(task_info->task_data), trip_count, codeptr);
435   }
436 #endif
437 
438   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
439   return;
440 }
441 
442 template <typename T>
443 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
444                                        kmp_int32 schedule, kmp_int32 *plastiter,
445                                        T *plower, T *pupper, T *pupperDist,
446                                        typename traits_t<T>::signed_t *pstride,
447                                        typename traits_t<T>::signed_t incr,
448                                        typename traits_t<T>::signed_t chunk) {
449   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
450   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
451   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
452   typedef typename traits_t<T>::unsigned_t UT;
453   typedef typename traits_t<T>::signed_t ST;
454   kmp_uint32 tid;
455   kmp_uint32 nth;
456   kmp_uint32 team_id;
457   kmp_uint32 nteams;
458   UT trip_count;
459   kmp_team_t *team;
460   kmp_info_t *th;
461 
462   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
463   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
464   __kmp_assert_valid_gtid(gtid);
465 #ifdef KMP_DEBUG
466   {
467     char *buff;
468     // create format specifiers before the debug output
469     buff = __kmp_str_format(
470         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
471         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
472         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
473         traits_t<ST>::spec, traits_t<T>::spec);
474     KD_TRACE(100,
475              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
476     __kmp_str_free(&buff);
477   }
478 #endif
479 
480   if (__kmp_env_consistency_check) {
481     __kmp_push_workshare(gtid, ct_pdo, loc);
482     if (incr == 0) {
483       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
484                             loc);
485     }
486     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
487       // The loop is illegal.
488       // Some zero-trip loops maintained by compiler, e.g.:
489       //   for(i=10;i<0;++i) // lower >= upper - run-time check
490       //   for(i=0;i>10;--i) // lower <= upper - run-time check
491       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
492       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
493       // Compiler does not check the following illegal loops:
494       //   for(i=0;i<10;i+=incr) // where incr<0
495       //   for(i=10;i>0;i-=incr) // where incr<0
496       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
497     }
498   }
499   tid = __kmp_tid_from_gtid(gtid);
500   th = __kmp_threads[gtid];
501   nth = th->th.th_team_nproc;
502   team = th->th.th_team;
503   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
504   nteams = th->th.th_teams_size.nteams;
505   team_id = team->t.t_master_tid;
506   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
507 
508   // compute global trip count
509   if (incr == 1) {
510     trip_count = *pupper - *plower + 1;
511   } else if (incr == -1) {
512     trip_count = *plower - *pupper + 1;
513   } else if (incr > 0) {
514     // upper-lower can exceed the limit of signed type
515     trip_count = (UT)(*pupper - *plower) / incr + 1;
516   } else {
517     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
518   }
519 
520   *pstride = *pupper - *plower; // just in case (can be unused)
521   if (trip_count <= nteams) {
522     KMP_DEBUG_ASSERT(
523         __kmp_static == kmp_sch_static_greedy ||
524         __kmp_static ==
525             kmp_sch_static_balanced); // Unknown static scheduling type.
526     // only primary threads of some teams get single iteration, other threads
527     // get nothing
528     if (team_id < trip_count && tid == 0) {
529       *pupper = *pupperDist = *plower = *plower + team_id * incr;
530     } else {
531       *pupperDist = *pupper;
532       *plower = *pupper + incr; // compiler should skip loop body
533     }
534     if (plastiter != NULL)
535       *plastiter = (tid == 0 && team_id == trip_count - 1);
536   } else {
537     // Get the team's chunk first (each team gets at most one chunk)
538     if (__kmp_static == kmp_sch_static_balanced) {
539       UT chunkD = trip_count / nteams;
540       UT extras = trip_count % nteams;
541       *plower +=
542           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
543       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
544       if (plastiter != NULL)
545         *plastiter = (team_id == nteams - 1);
546     } else {
547       T chunk_inc_count =
548           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
549       T upper = *pupper;
550       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
551       // Unknown static scheduling type.
552       *plower += team_id * chunk_inc_count;
553       *pupperDist = *plower + chunk_inc_count - incr;
554       // Check/correct bounds if needed
555       if (incr > 0) {
556         if (*pupperDist < *plower)
557           *pupperDist = traits_t<T>::max_value;
558         if (plastiter != NULL)
559           *plastiter = *plower <= upper && *pupperDist > upper - incr;
560         if (*pupperDist > upper)
561           *pupperDist = upper; // tracker C73258
562         if (*plower > *pupperDist) {
563           *pupper = *pupperDist; // no iterations available for the team
564           goto end;
565         }
566       } else {
567         if (*pupperDist > *plower)
568           *pupperDist = traits_t<T>::min_value;
569         if (plastiter != NULL)
570           *plastiter = *plower >= upper && *pupperDist < upper - incr;
571         if (*pupperDist < upper)
572           *pupperDist = upper; // tracker C73258
573         if (*plower < *pupperDist) {
574           *pupper = *pupperDist; // no iterations available for the team
575           goto end;
576         }
577       }
578     }
579     // Get the parallel loop chunk now (for thread)
580     // compute trip count for team's chunk
581     if (incr == 1) {
582       trip_count = *pupperDist - *plower + 1;
583     } else if (incr == -1) {
584       trip_count = *plower - *pupperDist + 1;
585     } else if (incr > 1) {
586       // upper-lower can exceed the limit of signed type
587       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
588     } else {
589       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
590     }
591     KMP_DEBUG_ASSERT(trip_count);
592     switch (schedule) {
593     case kmp_sch_static: {
594       if (trip_count <= nth) {
595         KMP_DEBUG_ASSERT(
596             __kmp_static == kmp_sch_static_greedy ||
597             __kmp_static ==
598                 kmp_sch_static_balanced); // Unknown static scheduling type.
599         if (tid < trip_count)
600           *pupper = *plower = *plower + tid * incr;
601         else
602           *plower = *pupper + incr; // no iterations available
603         if (plastiter != NULL)
604           if (*plastiter != 0 && !(tid == trip_count - 1))
605             *plastiter = 0;
606       } else {
607         if (__kmp_static == kmp_sch_static_balanced) {
608           UT chunkL = trip_count / nth;
609           UT extras = trip_count % nth;
610           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
611           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
612           if (plastiter != NULL)
613             if (*plastiter != 0 && !(tid == nth - 1))
614               *plastiter = 0;
615         } else {
616           T chunk_inc_count =
617               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
618           T upper = *pupperDist;
619           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
620           // Unknown static scheduling type.
621           *plower += tid * chunk_inc_count;
622           *pupper = *plower + chunk_inc_count - incr;
623           if (incr > 0) {
624             if (*pupper < *plower)
625               *pupper = traits_t<T>::max_value;
626             if (plastiter != NULL)
627               if (*plastiter != 0 &&
628                   !(*plower <= upper && *pupper > upper - incr))
629                 *plastiter = 0;
630             if (*pupper > upper)
631               *pupper = upper; // tracker C73258
632           } else {
633             if (*pupper > *plower)
634               *pupper = traits_t<T>::min_value;
635             if (plastiter != NULL)
636               if (*plastiter != 0 &&
637                   !(*plower >= upper && *pupper < upper - incr))
638                 *plastiter = 0;
639             if (*pupper < upper)
640               *pupper = upper; // tracker C73258
641           }
642         }
643       }
644       break;
645     }
646     case kmp_sch_static_chunked: {
647       ST span;
648       if (chunk < 1)
649         chunk = 1;
650       span = chunk * incr;
651       *pstride = span * nth;
652       *plower = *plower + (span * tid);
653       *pupper = *plower + span - incr;
654       if (plastiter != NULL)
655         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
656           *plastiter = 0;
657       break;
658     }
659     default:
660       KMP_ASSERT2(0,
661                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
662       break;
663     }
664   }
665 end:;
666 #ifdef KMP_DEBUG
667   {
668     char *buff;
669     // create format specifiers before the debug output
670     buff = __kmp_str_format(
671         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
672         "stride=%%%s signed?<%s>\n",
673         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
674         traits_t<ST>::spec, traits_t<T>::spec);
675     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
676     __kmp_str_free(&buff);
677   }
678 #endif
679   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
680   KMP_STATS_LOOP_END(OMP_distribute_iterations);
681   return;
682 }
683 
684 template <typename T>
685 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
686                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
687                                    typename traits_t<T>::signed_t *p_st,
688                                    typename traits_t<T>::signed_t incr,
689                                    typename traits_t<T>::signed_t chunk) {
690   // The routine returns the first chunk distributed to the team and
691   // stride for next chunks calculation.
692   // Last iteration flag set for the team that will execute
693   // the last iteration of the loop.
694   // The routine is called for dist_schedule(static,chunk) only.
695   typedef typename traits_t<T>::unsigned_t UT;
696   typedef typename traits_t<T>::signed_t ST;
697   kmp_uint32 team_id;
698   kmp_uint32 nteams;
699   UT trip_count;
700   T lower;
701   T upper;
702   ST span;
703   kmp_team_t *team;
704   kmp_info_t *th;
705 
706   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
707   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
708   __kmp_assert_valid_gtid(gtid);
709 #ifdef KMP_DEBUG
710   {
711     char *buff;
712     // create format specifiers before the debug output
713     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
714                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
715                             traits_t<T>::spec, traits_t<T>::spec,
716                             traits_t<ST>::spec, traits_t<ST>::spec,
717                             traits_t<T>::spec);
718     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
719     __kmp_str_free(&buff);
720   }
721 #endif
722 
723   lower = *p_lb;
724   upper = *p_ub;
725   if (__kmp_env_consistency_check) {
726     if (incr == 0) {
727       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
728                             loc);
729     }
730     if (incr > 0 ? (upper < lower) : (lower < upper)) {
731       // The loop is illegal.
732       // Some zero-trip loops maintained by compiler, e.g.:
733       //   for(i=10;i<0;++i) // lower >= upper - run-time check
734       //   for(i=0;i>10;--i) // lower <= upper - run-time check
735       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
736       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
737       // Compiler does not check the following illegal loops:
738       //   for(i=0;i<10;i+=incr) // where incr<0
739       //   for(i=10;i>0;i-=incr) // where incr<0
740       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
741     }
742   }
743   th = __kmp_threads[gtid];
744   team = th->th.th_team;
745   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
746   nteams = th->th.th_teams_size.nteams;
747   team_id = team->t.t_master_tid;
748   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
749 
750   // compute trip count
751   if (incr == 1) {
752     trip_count = upper - lower + 1;
753   } else if (incr == -1) {
754     trip_count = lower - upper + 1;
755   } else if (incr > 0) {
756     // upper-lower can exceed the limit of signed type
757     trip_count = (UT)(upper - lower) / incr + 1;
758   } else {
759     trip_count = (UT)(lower - upper) / (-incr) + 1;
760   }
761   if (chunk < 1)
762     chunk = 1;
763   span = chunk * incr;
764   *p_st = span * nteams;
765   *p_lb = lower + (span * team_id);
766   *p_ub = *p_lb + span - incr;
767   if (p_last != NULL)
768     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
769   // Correct upper bound if needed
770   if (incr > 0) {
771     if (*p_ub < *p_lb) // overflow?
772       *p_ub = traits_t<T>::max_value;
773     if (*p_ub > upper)
774       *p_ub = upper; // tracker C73258
775   } else { // incr < 0
776     if (*p_ub > *p_lb)
777       *p_ub = traits_t<T>::min_value;
778     if (*p_ub < upper)
779       *p_ub = upper; // tracker C73258
780   }
781 #ifdef KMP_DEBUG
782   {
783     char *buff;
784     // create format specifiers before the debug output
785     buff =
786         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
787                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
788                          traits_t<T>::spec, traits_t<T>::spec,
789                          traits_t<ST>::spec, traits_t<ST>::spec);
790     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
791     __kmp_str_free(&buff);
792   }
793 #endif
794 }
795 
796 //------------------------------------------------------------------------------
797 extern "C" {
798 /*!
799 @ingroup WORK_SHARING
800 @param    loc       Source code location
801 @param    gtid      Global thread id of this thread
802 @param    schedtype  Scheduling type
803 @param    plastiter Pointer to the "last iteration" flag
804 @param    plower    Pointer to the lower bound
805 @param    pupper    Pointer to the upper bound
806 @param    pstride   Pointer to the stride
807 @param    incr      Loop increment
808 @param    chunk     The chunk size
809 
810 Each of the four functions here are identical apart from the argument types.
811 
812 The functions compute the upper and lower bounds and stride to be used for the
813 set of iterations to be executed by the current thread from the statically
814 scheduled loop that is described by the initial values of the bounds, stride,
815 increment and chunk size.
816 
817 @{
818 */
819 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
820                               kmp_int32 *plastiter, kmp_int32 *plower,
821                               kmp_int32 *pupper, kmp_int32 *pstride,
822                               kmp_int32 incr, kmp_int32 chunk) {
823   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
824                                    pupper, pstride, incr, chunk
825 #if OMPT_SUPPORT && OMPT_OPTIONAL
826                                    ,
827                                    OMPT_GET_RETURN_ADDRESS(0)
828 #endif
829   );
830 }
831 
832 /*!
833  See @ref __kmpc_for_static_init_4
834  */
835 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
836                                kmp_int32 schedtype, kmp_int32 *plastiter,
837                                kmp_uint32 *plower, kmp_uint32 *pupper,
838                                kmp_int32 *pstride, kmp_int32 incr,
839                                kmp_int32 chunk) {
840   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
841                                     pupper, pstride, incr, chunk
842 #if OMPT_SUPPORT && OMPT_OPTIONAL
843                                     ,
844                                     OMPT_GET_RETURN_ADDRESS(0)
845 #endif
846   );
847 }
848 
849 /*!
850  See @ref __kmpc_for_static_init_4
851  */
852 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
853                               kmp_int32 *plastiter, kmp_int64 *plower,
854                               kmp_int64 *pupper, kmp_int64 *pstride,
855                               kmp_int64 incr, kmp_int64 chunk) {
856   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
857                                    pupper, pstride, incr, chunk
858 #if OMPT_SUPPORT && OMPT_OPTIONAL
859                                    ,
860                                    OMPT_GET_RETURN_ADDRESS(0)
861 #endif
862   );
863 }
864 
865 /*!
866  See @ref __kmpc_for_static_init_4
867  */
868 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
869                                kmp_int32 schedtype, kmp_int32 *plastiter,
870                                kmp_uint64 *plower, kmp_uint64 *pupper,
871                                kmp_int64 *pstride, kmp_int64 incr,
872                                kmp_int64 chunk) {
873   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
874                                     pupper, pstride, incr, chunk
875 #if OMPT_SUPPORT && OMPT_OPTIONAL
876                                     ,
877                                     OMPT_GET_RETURN_ADDRESS(0)
878 #endif
879   );
880 }
881 /*!
882 @}
883 */
884 
885 /*!
886 @ingroup WORK_SHARING
887 @param    loc       Source code location
888 @param    gtid      Global thread id of this thread
889 @param    schedule  Scheduling type for the parallel loop
890 @param    plastiter Pointer to the "last iteration" flag
891 @param    plower    Pointer to the lower bound
892 @param    pupper    Pointer to the upper bound of loop chunk
893 @param    pupperD   Pointer to the upper bound of dist_chunk
894 @param    pstride   Pointer to the stride for parallel loop
895 @param    incr      Loop increment
896 @param    chunk     The chunk size for the parallel loop
897 
898 Each of the four functions here are identical apart from the argument types.
899 
900 The functions compute the upper and lower bounds and strides to be used for the
901 set of iterations to be executed by the current thread from the statically
902 scheduled loop that is described by the initial values of the bounds, strides,
903 increment and chunks for parallel loop and distribute constructs.
904 
905 @{
906 */
907 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
908                                    kmp_int32 schedule, kmp_int32 *plastiter,
909                                    kmp_int32 *plower, kmp_int32 *pupper,
910                                    kmp_int32 *pupperD, kmp_int32 *pstride,
911                                    kmp_int32 incr, kmp_int32 chunk) {
912   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
913                                         pupper, pupperD, pstride, incr, chunk);
914 }
915 
916 /*!
917  See @ref __kmpc_dist_for_static_init_4
918  */
919 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
920                                     kmp_int32 schedule, kmp_int32 *plastiter,
921                                     kmp_uint32 *plower, kmp_uint32 *pupper,
922                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
923                                     kmp_int32 incr, kmp_int32 chunk) {
924   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
925                                          pupper, pupperD, pstride, incr, chunk);
926 }
927 
928 /*!
929  See @ref __kmpc_dist_for_static_init_4
930  */
931 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
932                                    kmp_int32 schedule, kmp_int32 *plastiter,
933                                    kmp_int64 *plower, kmp_int64 *pupper,
934                                    kmp_int64 *pupperD, kmp_int64 *pstride,
935                                    kmp_int64 incr, kmp_int64 chunk) {
936   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
937                                         pupper, pupperD, pstride, incr, chunk);
938 }
939 
940 /*!
941  See @ref __kmpc_dist_for_static_init_4
942  */
943 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
944                                     kmp_int32 schedule, kmp_int32 *plastiter,
945                                     kmp_uint64 *plower, kmp_uint64 *pupper,
946                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
947                                     kmp_int64 incr, kmp_int64 chunk) {
948   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
949                                          pupper, pupperD, pstride, incr, chunk);
950 }
951 /*!
952 @}
953 */
954 
955 //------------------------------------------------------------------------------
956 // Auxiliary routines for Distribute Parallel Loop construct implementation
957 //    Transfer call to template< type T >
958 //    __kmp_team_static_init( ident_t *loc, int gtid,
959 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
960 
961 /*!
962 @ingroup WORK_SHARING
963 @{
964 @param loc Source location
965 @param gtid Global thread id
966 @param p_last pointer to last iteration flag
967 @param p_lb  pointer to Lower bound
968 @param p_ub  pointer to Upper bound
969 @param p_st  Step (or increment if you prefer)
970 @param incr  Loop increment
971 @param chunk The chunk size to block with
972 
973 The functions compute the upper and lower bounds and stride to be used for the
974 set of iterations to be executed by the current team from the statically
975 scheduled loop that is described by the initial values of the bounds, stride,
976 increment and chunk for the distribute construct as part of composite distribute
977 parallel loop construct. These functions are all identical apart from the types
978 of the arguments.
979 */
980 
981 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
982                                kmp_int32 *p_lb, kmp_int32 *p_ub,
983                                kmp_int32 *p_st, kmp_int32 incr,
984                                kmp_int32 chunk) {
985   KMP_DEBUG_ASSERT(__kmp_init_serial);
986   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
987                                     chunk);
988 }
989 
990 /*!
991  See @ref __kmpc_team_static_init_4
992  */
993 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
994                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
995                                 kmp_int32 *p_st, kmp_int32 incr,
996                                 kmp_int32 chunk) {
997   KMP_DEBUG_ASSERT(__kmp_init_serial);
998   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
999                                      chunk);
1000 }
1001 
1002 /*!
1003  See @ref __kmpc_team_static_init_4
1004  */
1005 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1006                                kmp_int64 *p_lb, kmp_int64 *p_ub,
1007                                kmp_int64 *p_st, kmp_int64 incr,
1008                                kmp_int64 chunk) {
1009   KMP_DEBUG_ASSERT(__kmp_init_serial);
1010   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1011                                     chunk);
1012 }
1013 
1014 /*!
1015  See @ref __kmpc_team_static_init_4
1016  */
1017 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1018                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1019                                 kmp_int64 *p_st, kmp_int64 incr,
1020                                 kmp_int64 chunk) {
1021   KMP_DEBUG_ASSERT(__kmp_init_serial);
1022   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1023                                      chunk);
1024 }
1025 /*!
1026 @}
1027 */
1028 
1029 } // extern "C"
1030