xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_itt.inl (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14// Inline function definitions. This file should be included into kmp_itt.h file
15// for production build (to let compiler inline functions) or into kmp_itt.c
16// file for debug build (to reduce the number of files to recompile and save
17// build time).
18
19#include "kmp.h"
20#include "kmp_str.h"
21
22#if KMP_ITT_DEBUG
23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24#define KMP_ITT_DEBUG_LOCK()                                                   \
25  { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26#define KMP_ITT_DEBUG_PRINT(...)                                               \
27  {                                                                            \
28    fprintf(stderr, "#%02d: ", __kmp_get_gtid());                              \
29    fprintf(stderr, __VA_ARGS__);                                              \
30    fflush(stderr);                                                            \
31    __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock);                       \
32  }
33#else
34#define KMP_ITT_DEBUG_LOCK()
35#define KMP_ITT_DEBUG_PRINT(...)
36#endif // KMP_ITT_DEBUG
37
38// Ensure that the functions are static if they're supposed to be being inlined.
39// Otherwise they cannot be used in more than one file, since there will be
40// multiple definitions.
41#if KMP_DEBUG
42#define LINKAGE
43#else
44#define LINKAGE static inline
45#endif
46
47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48// this API to support user-defined synchronization primitives, but does not use
49// ZCA; it would be safe to turn this off until wider support becomes available.
50#if USE_ITT_ZCA
51#ifdef __INTEL_COMPILER
52#if __INTEL_COMPILER >= 1200
53#undef __itt_sync_acquired
54#undef __itt_sync_releasing
55#define __itt_sync_acquired(addr)                                              \
56  __notify_zc_intrinsic((char *)"sync_acquired", addr)
57#define __itt_sync_releasing(addr)                                             \
58  __notify_intrinsic((char *)"sync_releasing", addr)
59#endif
60#endif
61#endif
62
63static kmp_bootstrap_lock_t metadata_lock =
64    KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
65
66/* Parallel region reporting.
67 * __kmp_itt_region_forking should be called by master thread of a team.
68   Exact moment of call does not matter, but it should be completed before any
69   thread of this team calls __kmp_itt_region_starting.
70 * __kmp_itt_region_starting should be called by each thread of a team just
71   before entering parallel region body.
72 * __kmp_itt_region_finished should be called by each thread of a team right
73   after returning from parallel region body.
74 * __kmp_itt_region_joined should be called by master thread of a team, after
75   all threads called __kmp_itt_region_finished.
76
77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
78 execute some more user code -- such a thread can execute tasks.
79
80 Note: The overhead of logging region_starting and region_finished in each
81 thread is too large, so these calls are not used. */
82
83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
84#if USE_ITT_NOTIFY
85  kmp_team_t *team = __kmp_team_from_gtid(gtid);
86  if (team->t.t_active_level > 1) {
87    // The frame notifications are only supported for the outermost teams.
88    return;
89  }
90  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
91  if (loc) {
92    // Use the reserved_2 field to store the index to the region domain.
93    // Assume that reserved_2 contains zero initially.  Since zero is special
94    // value here, store the index into domain array increased by 1.
95    if (loc->reserved_2 == 0) {
96      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
97        int frm =
98            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
99        if (frm >= KMP_MAX_FRAME_DOMAINS) {
100          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
101          return; // loc->reserved_2 is still 0
102        }
103        // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
104        //    frm = loc->reserved_2 - 1;   // get value saved by other thread
105        //    for same loc
106        //} // AC: this block is to replace next unsynchronized line
107
108        // We need to save indexes for both region and barrier frames. We'll use
109        // loc->reserved_2 field but put region index to the low two bytes and
110        // barrier indexes to the high two bytes. It is OK because
111        // KMP_MAX_FRAME_DOMAINS = 512.
112        loc->reserved_2 |= (frm + 1); // save "new" value
113
114        // Transform compiler-generated region location into the format
115        // that the tools more or less standardized on:
116        //   "<func>$omp$parallel@[file:]<line>[:<col>]"
117        char *buff = NULL;
118        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
119        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
120                                team_size, str_loc.file, str_loc.line,
121                                str_loc.col);
122
123        __itt_suppress_push(__itt_suppress_memory_errors);
124        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
125        __itt_suppress_pop();
126
127        __kmp_str_free(&buff);
128        if (barriers) {
129          if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
130            int frm = KMP_TEST_THEN_INC32(
131                &__kmp_barrier_domain_count); // get "old" value
132            if (frm >= KMP_MAX_FRAME_DOMAINS) {
133              KMP_TEST_THEN_DEC32(
134                  &__kmp_barrier_domain_count); // revert the count
135              return; // loc->reserved_2 is still 0
136            }
137            char *buff = NULL;
138            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
139                                    str_loc.file, str_loc.col);
140            __itt_suppress_push(__itt_suppress_memory_errors);
141            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
142            __itt_suppress_pop();
143            __kmp_str_free(&buff);
144            // Save the barrier frame index to the high two bytes.
145            loc->reserved_2 |= (frm + 1) << 16;
146          }
147        }
148        __kmp_str_loc_free(&str_loc);
149        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
150      }
151    } else { // Region domain exists for this location
152      // Check if team size was changed. Then create new region domain for this
153      // location
154      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
155      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
156          (__kmp_itt_region_team_size[frm] != team_size)) {
157        char *buff = NULL;
158        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
159        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
160                                team_size, str_loc.file, str_loc.line,
161                                str_loc.col);
162
163        __itt_suppress_push(__itt_suppress_memory_errors);
164        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
165        __itt_suppress_pop();
166
167        __kmp_str_free(&buff);
168        __kmp_str_loc_free(&str_loc);
169        __kmp_itt_region_team_size[frm] = team_size;
170        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
171      } else { // Team size was not changed. Use existing domain.
172        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
173      }
174    }
175    KMP_ITT_DEBUG_LOCK();
176    KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
177                        loc->reserved_2, loc);
178  }
179#endif
180} // __kmp_itt_region_forking
181
182// -----------------------------------------------------------------------------
183LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
184                                    __itt_timestamp end, int imbalance,
185                                    ident_t *loc, int team_size, int region) {
186#if USE_ITT_NOTIFY
187  if (region) {
188    kmp_team_t *team = __kmp_team_from_gtid(gtid);
189    int serialized = (region == 2 ? 1 : 0);
190    if (team->t.t_active_level + serialized > 1) {
191      // The frame notifications are only supported for the outermost teams.
192      return;
193    }
194    // Check region domain has not been created before. It's index is saved in
195    // the low two bytes.
196    if ((loc->reserved_2 & 0x0000FFFF) == 0) {
197      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
198        int frm =
199            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
200        if (frm >= KMP_MAX_FRAME_DOMAINS) {
201          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
202          return; // loc->reserved_2 is still 0
203        }
204
205        // We need to save indexes for both region and barrier frames. We'll use
206        // loc->reserved_2 field but put region index to the low two bytes and
207        // barrier indexes to the high two bytes. It is OK because
208        // KMP_MAX_FRAME_DOMAINS = 512.
209        loc->reserved_2 |= (frm + 1); // save "new" value
210
211        // Transform compiler-generated region location into the format
212        // that the tools more or less standardized on:
213        //   "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
214        char *buff = NULL;
215        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
216        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
217                                team_size, str_loc.file, str_loc.line,
218                                str_loc.col);
219
220        __itt_suppress_push(__itt_suppress_memory_errors);
221        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
222        __itt_suppress_pop();
223
224        __kmp_str_free(&buff);
225        __kmp_str_loc_free(&str_loc);
226        __kmp_itt_region_team_size[frm] = team_size;
227        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
228      }
229    } else { // Region domain exists for this location
230      // Check if team size was changed. Then create new region domain for this
231      // location
232      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
233      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
234          (__kmp_itt_region_team_size[frm] != team_size)) {
235        char *buff = NULL;
236        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
237        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
238                                team_size, str_loc.file, str_loc.line,
239                                str_loc.col);
240
241        __itt_suppress_push(__itt_suppress_memory_errors);
242        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
243        __itt_suppress_pop();
244
245        __kmp_str_free(&buff);
246        __kmp_str_loc_free(&str_loc);
247        __kmp_itt_region_team_size[frm] = team_size;
248        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
249      } else { // Team size was not changed. Use existing domain.
250        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
251      }
252    }
253    KMP_ITT_DEBUG_LOCK();
254    KMP_ITT_DEBUG_PRINT(
255        "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
256        gtid, loc->reserved_2, region, loc, begin, end);
257    return;
258  } else { // called for barrier reporting
259    if (loc) {
260      if ((loc->reserved_2 & 0xFFFF0000) == 0) {
261        if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
262          int frm = KMP_TEST_THEN_INC32(
263              &__kmp_barrier_domain_count); // get "old" value
264          if (frm >= KMP_MAX_FRAME_DOMAINS) {
265            KMP_TEST_THEN_DEC32(
266                &__kmp_barrier_domain_count); // revert the count
267            return; // loc->reserved_2 is still 0
268          }
269          // Save the barrier frame index to the high two bytes.
270          loc->reserved_2 |= (frm + 1) << 16; // save "new" value
271
272          // Transform compiler-generated region location into the format
273          // that the tools more or less standardized on:
274          //   "<func>$omp$frame@[file:]<line>[:<col>]"
275          kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
276          if (imbalance) {
277            char *buff_imb = NULL;
278            buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
279                                        str_loc.func, team_size, str_loc.file,
280                                        str_loc.col);
281            __itt_suppress_push(__itt_suppress_memory_errors);
282            __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
283            __itt_suppress_pop();
284            __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
285                                  end);
286            __kmp_str_free(&buff_imb);
287          } else {
288            char *buff = NULL;
289            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
290                                    str_loc.file, str_loc.col);
291            __itt_suppress_push(__itt_suppress_memory_errors);
292            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
293            __itt_suppress_pop();
294            __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
295                                  end);
296            __kmp_str_free(&buff);
297          }
298          __kmp_str_loc_free(&str_loc);
299        }
300      } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
301        if (imbalance) {
302          __itt_frame_submit_v3(
303              __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
304              begin, end);
305        } else {
306          __itt_frame_submit_v3(
307              __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
308              begin, end);
309        }
310      }
311      KMP_ITT_DEBUG_LOCK();
312      KMP_ITT_DEBUG_PRINT(
313          "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
314          loc->reserved_2, loc, begin, end);
315    }
316  }
317#endif
318} // __kmp_itt_frame_submit
319
320// -----------------------------------------------------------------------------
321LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
322                                          kmp_uint64 end, kmp_uint64 imbalance,
323                                          kmp_uint64 reduction) {
324#if USE_ITT_NOTIFY
325  if (metadata_domain == NULL) {
326    __kmp_acquire_bootstrap_lock(&metadata_lock);
327    if (metadata_domain == NULL) {
328      __itt_suppress_push(__itt_suppress_memory_errors);
329      metadata_domain = __itt_domain_create("OMP Metadata");
330      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
331      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
332      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
333      __itt_suppress_pop();
334    }
335    __kmp_release_bootstrap_lock(&metadata_lock);
336  }
337
338  kmp_uint64 imbalance_data[4];
339  imbalance_data[0] = begin;
340  imbalance_data[1] = end;
341  imbalance_data[2] = imbalance;
342  imbalance_data[3] = reduction;
343
344  __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
345                     __itt_metadata_u64, 4, imbalance_data);
346#endif
347} // __kmp_itt_metadata_imbalance
348
349// -----------------------------------------------------------------------------
350LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
351                                     kmp_uint64 iterations, kmp_uint64 chunk) {
352#if USE_ITT_NOTIFY
353  if (metadata_domain == NULL) {
354    __kmp_acquire_bootstrap_lock(&metadata_lock);
355    if (metadata_domain == NULL) {
356      __itt_suppress_push(__itt_suppress_memory_errors);
357      metadata_domain = __itt_domain_create("OMP Metadata");
358      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
359      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
360      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
361      __itt_suppress_pop();
362    }
363    __kmp_release_bootstrap_lock(&metadata_lock);
364  }
365
366  // Parse line and column from psource string: ";file;func;line;col;;"
367  char *s_line;
368  char *s_col;
369  KMP_DEBUG_ASSERT(loc->psource);
370#ifdef __cplusplus
371  s_line = strchr(CCAST(char *, loc->psource), ';');
372#else
373  s_line = strchr(loc->psource, ';');
374#endif
375  KMP_DEBUG_ASSERT(s_line);
376  s_line = strchr(s_line + 1, ';'); // 2-nd semicolon
377  KMP_DEBUG_ASSERT(s_line);
378  s_line = strchr(s_line + 1, ';'); // 3-rd semicolon
379  KMP_DEBUG_ASSERT(s_line);
380  s_col = strchr(s_line + 1, ';'); // 4-th semicolon
381  KMP_DEBUG_ASSERT(s_col);
382
383  kmp_uint64 loop_data[5];
384  loop_data[0] = atoi(s_line + 1); // read line
385  loop_data[1] = atoi(s_col + 1); // read column
386  loop_data[2] = sched_type;
387  loop_data[3] = iterations;
388  loop_data[4] = chunk;
389
390  __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
391                     __itt_metadata_u64, 5, loop_data);
392#endif
393} // __kmp_itt_metadata_loop
394
395// -----------------------------------------------------------------------------
396LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
397#if USE_ITT_NOTIFY
398  if (metadata_domain == NULL) {
399    __kmp_acquire_bootstrap_lock(&metadata_lock);
400    if (metadata_domain == NULL) {
401      __itt_suppress_push(__itt_suppress_memory_errors);
402      metadata_domain = __itt_domain_create("OMP Metadata");
403      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
404      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
405      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
406      __itt_suppress_pop();
407    }
408    __kmp_release_bootstrap_lock(&metadata_lock);
409  }
410
411  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
412  kmp_uint64 single_data[2];
413  single_data[0] = str_loc.line;
414  single_data[1] = str_loc.col;
415
416  __kmp_str_loc_free(&str_loc);
417
418  __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
419                     __itt_metadata_u64, 2, single_data);
420#endif
421} // __kmp_itt_metadata_single
422
423// -----------------------------------------------------------------------------
424LINKAGE void __kmp_itt_region_starting(int gtid) {
425#if USE_ITT_NOTIFY
426#endif
427} // __kmp_itt_region_starting
428
429// -----------------------------------------------------------------------------
430LINKAGE void __kmp_itt_region_finished(int gtid) {
431#if USE_ITT_NOTIFY
432#endif
433} // __kmp_itt_region_finished
434
435// ----------------------------------------------------------------------------
436LINKAGE void __kmp_itt_region_joined(int gtid) {
437#if USE_ITT_NOTIFY
438  kmp_team_t *team = __kmp_team_from_gtid(gtid);
439  if (team->t.t_active_level > 1) {
440    // The frame notifications are only supported for the outermost teams.
441    return;
442  }
443  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
444  if (loc && loc->reserved_2) {
445    unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
446    if (frm < KMP_MAX_FRAME_DOMAINS) {
447      KMP_ITT_DEBUG_LOCK();
448      __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
449      KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
450                          loc->reserved_2, loc);
451    }
452  }
453#endif
454} // __kmp_itt_region_joined
455
456/* Barriers reporting.
457
458   A barrier consists of two phases:
459   1. Gather -- master waits for arriving of all the worker threads; each
460      worker thread registers arrival and goes further.
461   2. Release -- each worker threads waits until master lets it go; master lets
462      worker threads go.
463
464   Function should be called by each thread:
465   * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
466   * __kmp_itt_barrier_middle()   -- between gather and release phases.
467   * __kmp_itt_barrier_finished() -- after release phase.
468
469   Note: Call __kmp_itt_barrier_object() before call to
470   __kmp_itt_barrier_starting() and save result in local variable.
471   __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
472   would return itt sync object for the next barrier!
473
474   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
475   does not have barrier object or barrier data structure. Barrier is just a
476   counter in team and thread structures. We could use an address of team
477   structure as a barrier sync object, but ITT wants different objects for
478   different barriers (even whithin the same team). So let us use team address
479   as barrier sync object for the first barrier, then increase it by one for the
480   next barrier, and so on (but wrap it not to use addresses outside of team
481   structure). */
482
483void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
484                               int delta // 0 (current barrier) is default
485                               // value; specify -1 to get previous
486                               // barrier.
487                               ) {
488  void *object = NULL;
489#if USE_ITT_NOTIFY
490  kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
491  kmp_team_t *team = thr->th.th_team;
492
493  // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
494  // be NULL. This "if" helps to avoid crash. However, this is not complete
495  // solution, and reporting fork/join barriers to ITT should be revisited.
496
497  if (team != NULL) {
498    // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
499    // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
500    kmp_uint64 counter =
501        team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
502    // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
503    // barriers of different types do not have the same ids.
504    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
505    // This condition is a must (we would have zero divide otherwise).
506    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
507    // More strong condition: make sure we have room at least for for two
508    // different ids (for each barrier type).
509    object = reinterpret_cast<void *>(
510        kmp_uintptr_t(team) +
511        counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier +
512        bt);
513    KMP_ITT_DEBUG_LOCK();
514    KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
515                        counter, object);
516
517    if (set_name) {
518      ident_t const *loc = NULL;
519      char const *src = NULL;
520      char const *type = "OMP Barrier";
521      switch (bt) {
522      case bs_plain_barrier: {
523        // For plain barrier compiler calls __kmpc_barrier() function, which
524        // saves location in thr->th.th_ident.
525        loc = thr->th.th_ident;
526        // Get the barrier type from flags provided by compiler.
527        kmp_int32 expl = 0;
528        kmp_uint32 impl = 0;
529        if (loc != NULL) {
530          src = loc->psource;
531          expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
532          impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
533        }
534        if (impl) {
535          switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
536          case KMP_IDENT_BARRIER_IMPL_FOR: {
537            type = "OMP For Barrier";
538          } break;
539          case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
540            type = "OMP Sections Barrier";
541          } break;
542          case KMP_IDENT_BARRIER_IMPL_SINGLE: {
543            type = "OMP Single Barrier";
544          } break;
545          case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
546            type = "OMP Workshare Barrier";
547          } break;
548          default: {
549            type = "OMP Implicit Barrier";
550            KMP_DEBUG_ASSERT(0);
551          }
552          }
553        } else if (expl) {
554          type = "OMP Explicit Barrier";
555        }
556      } break;
557      case bs_forkjoin_barrier: {
558        // In case of fork/join barrier we can read thr->th.th_ident, because it
559        // contains location of last passed construct (while join barrier is not
560        // such one). Use th_ident of master thread instead -- __kmp_join_call()
561        // called by the master thread saves location.
562        //
563        // AC: cannot read from master because __kmp_join_call may be not called
564        //    yet, so we read the location from team. This is the same location.
565        //    And team is valid at the enter to join barrier where this happens.
566        loc = team->t.t_ident;
567        if (loc != NULL) {
568          src = loc->psource;
569        }
570        type = "OMP Join Barrier";
571      } break;
572      }
573      KMP_ITT_DEBUG_LOCK();
574      __itt_sync_create(object, type, src, __itt_attr_barrier);
575      KMP_ITT_DEBUG_PRINT(
576          "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
577          type, src);
578    }
579  }
580#endif
581  return object;
582} // __kmp_itt_barrier_object
583
584// -----------------------------------------------------------------------------
585void __kmp_itt_barrier_starting(int gtid, void *object) {
586#if USE_ITT_NOTIFY
587  if (!KMP_MASTER_GTID(gtid)) {
588    KMP_ITT_DEBUG_LOCK();
589    __itt_sync_releasing(object);
590    KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
591  }
592  KMP_ITT_DEBUG_LOCK();
593  __itt_sync_prepare(object);
594  KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
595#endif
596} // __kmp_itt_barrier_starting
597
598// -----------------------------------------------------------------------------
599void __kmp_itt_barrier_middle(int gtid, void *object) {
600#if USE_ITT_NOTIFY
601  if (KMP_MASTER_GTID(gtid)) {
602    KMP_ITT_DEBUG_LOCK();
603    __itt_sync_acquired(object);
604    KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
605    KMP_ITT_DEBUG_LOCK();
606    __itt_sync_releasing(object);
607    KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
608  } else {
609  }
610#endif
611} // __kmp_itt_barrier_middle
612
613// -----------------------------------------------------------------------------
614void __kmp_itt_barrier_finished(int gtid, void *object) {
615#if USE_ITT_NOTIFY
616  if (KMP_MASTER_GTID(gtid)) {
617  } else {
618    KMP_ITT_DEBUG_LOCK();
619    __itt_sync_acquired(object);
620    KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
621  }
622#endif
623} // __kmp_itt_barrier_finished
624
625/* Taskwait reporting.
626   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
627   does not have taskwait structure, so we need to construct something. */
628
629void *__kmp_itt_taskwait_object(int gtid) {
630  void *object = NULL;
631#if USE_ITT_NOTIFY
632  if (__itt_sync_create_ptr) {
633    kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
634    kmp_taskdata_t *taskdata = thread->th.th_current_task;
635    object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
636                                      taskdata->td_taskwait_counter %
637                                          sizeof(kmp_taskdata_t));
638  }
639#endif
640  return object;
641} // __kmp_itt_taskwait_object
642
643void __kmp_itt_taskwait_starting(int gtid, void *object) {
644#if USE_ITT_NOTIFY
645  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
646  kmp_taskdata_t *taskdata = thread->th.th_current_task;
647  ident_t const *loc = taskdata->td_taskwait_ident;
648  char const *src = (loc == NULL ? NULL : loc->psource);
649  KMP_ITT_DEBUG_LOCK();
650  __itt_sync_create(object, "OMP Taskwait", src, 0);
651  KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
652                      object, src);
653  KMP_ITT_DEBUG_LOCK();
654  __itt_sync_prepare(object);
655  KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
656#endif
657} // __kmp_itt_taskwait_starting
658
659void __kmp_itt_taskwait_finished(int gtid, void *object) {
660#if USE_ITT_NOTIFY
661  KMP_ITT_DEBUG_LOCK();
662  __itt_sync_acquired(object);
663  KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
664  KMP_ITT_DEBUG_LOCK();
665  __itt_sync_destroy(object);
666  KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
667#endif
668} // __kmp_itt_taskwait_finished
669
670/* Task reporting.
671   Only those tasks are reported which are executed by a thread spinning at
672   barrier (or taskwait). Synch object passed to the function must be barrier of
673   taskwait the threads waiting at. */
674
675void __kmp_itt_task_starting(
676    void *object // ITT sync object: barrier or taskwait.
677    ) {
678#if USE_ITT_NOTIFY
679  if (object != NULL) {
680    KMP_ITT_DEBUG_LOCK();
681    __itt_sync_cancel(object);
682    KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
683  }
684#endif
685} // __kmp_itt_task_starting
686
687// -----------------------------------------------------------------------------
688void __kmp_itt_task_finished(
689    void *object // ITT sync object: barrier or taskwait.
690    ) {
691#if USE_ITT_NOTIFY
692  KMP_ITT_DEBUG_LOCK();
693  __itt_sync_prepare(object);
694  KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
695#endif
696} // __kmp_itt_task_finished
697
698/* Lock reporting.
699 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
700   operation (set/unset). It is not a real event shown to the user but just
701   setting a name for synchronization object. `lock' is an address of sync
702   object, the same address should be used in all subsequent calls.
703 * __kmp_itt_lock_acquiring() should be called before setting the lock.
704 * __kmp_itt_lock_acquired() should be called after setting the lock.
705 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
706 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
707   for the lock.
708 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
709   operation. After __kmp_itt_lock_destroyed() all the references to the same
710   address will be considered as another sync object, not related with the
711   original one.  */
712
713#if KMP_USE_DYNAMIC_LOCK
714// Takes location information directly
715__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
716                                       const ident_t *loc) {
717#if USE_ITT_NOTIFY
718  if (__itt_sync_create_ptr) {
719    char const *src = (loc == NULL ? NULL : loc->psource);
720    KMP_ITT_DEBUG_LOCK();
721    __itt_sync_create(lock, type, src, 0);
722    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
723                        src);
724  }
725#endif
726}
727#else // KMP_USE_DYNAMIC_LOCK
728// Internal guts -- common code for locks and critical sections, do not call
729// directly.
730__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
731#if USE_ITT_NOTIFY
732  if (__itt_sync_create_ptr) {
733    ident_t const *loc = NULL;
734    if (__kmp_get_user_lock_location_ != NULL)
735      loc = __kmp_get_user_lock_location_((lock));
736    char const *src = (loc == NULL ? NULL : loc->psource);
737    KMP_ITT_DEBUG_LOCK();
738    __itt_sync_create(lock, type, src, 0);
739    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
740                        src);
741  }
742#endif
743} // ___kmp_itt_lock_init
744#endif // KMP_USE_DYNAMIC_LOCK
745
746// Internal guts -- common code for locks and critical sections, do not call
747// directly.
748__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
749#if USE_ITT_NOTIFY
750  KMP_ITT_DEBUG_LOCK();
751  __itt_sync_destroy(lock);
752  KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
753#endif
754} // ___kmp_itt_lock_fini
755
756// -----------------------------------------------------------------------------
757#if KMP_USE_DYNAMIC_LOCK
758void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
759  ___kmp_itt_lock_init(lock, "OMP Lock", loc);
760}
761#else
762void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
763  ___kmp_itt_lock_init(lock, "OMP Lock");
764} // __kmp_itt_lock_creating
765#endif
766
767void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
768#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
769  // postpone lock object access
770  if (__itt_sync_prepare_ptr) {
771    if (KMP_EXTRACT_D_TAG(lock) == 0) {
772      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
773      __itt_sync_prepare(ilk->lock);
774    } else {
775      __itt_sync_prepare(lock);
776    }
777  }
778#else
779  __itt_sync_prepare(lock);
780#endif
781} // __kmp_itt_lock_acquiring
782
783void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
784#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
785  // postpone lock object access
786  if (__itt_sync_acquired_ptr) {
787    if (KMP_EXTRACT_D_TAG(lock) == 0) {
788      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
789      __itt_sync_acquired(ilk->lock);
790    } else {
791      __itt_sync_acquired(lock);
792    }
793  }
794#else
795  __itt_sync_acquired(lock);
796#endif
797} // __kmp_itt_lock_acquired
798
799void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
800#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
801  if (__itt_sync_releasing_ptr) {
802    if (KMP_EXTRACT_D_TAG(lock) == 0) {
803      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
804      __itt_sync_releasing(ilk->lock);
805    } else {
806      __itt_sync_releasing(lock);
807    }
808  }
809#else
810  __itt_sync_releasing(lock);
811#endif
812} // __kmp_itt_lock_releasing
813
814void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
815#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
816  if (__itt_sync_cancel_ptr) {
817    if (KMP_EXTRACT_D_TAG(lock) == 0) {
818      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
819      __itt_sync_cancel(ilk->lock);
820    } else {
821      __itt_sync_cancel(lock);
822    }
823  }
824#else
825  __itt_sync_cancel(lock);
826#endif
827} // __kmp_itt_lock_cancelled
828
829void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
830  ___kmp_itt_lock_fini(lock, "OMP Lock");
831} // __kmp_itt_lock_destroyed
832
833/* Critical reporting.
834   Critical sections are treated exactly as locks (but have different object
835   type). */
836#if KMP_USE_DYNAMIC_LOCK
837void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
838  ___kmp_itt_lock_init(lock, "OMP Critical", loc);
839}
840#else
841void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
842  ___kmp_itt_lock_init(lock, "OMP Critical");
843} // __kmp_itt_critical_creating
844#endif
845
846void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
847  __itt_sync_prepare(lock);
848} // __kmp_itt_critical_acquiring
849
850void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
851  __itt_sync_acquired(lock);
852} // __kmp_itt_critical_acquired
853
854void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
855  __itt_sync_releasing(lock);
856} // __kmp_itt_critical_releasing
857
858void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
859  ___kmp_itt_lock_fini(lock, "OMP Critical");
860} // __kmp_itt_critical_destroyed
861
862/* Single reporting. */
863
864void __kmp_itt_single_start(int gtid) {
865#if USE_ITT_NOTIFY
866  if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
867    kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
868    ident_t *loc = thr->th.th_ident;
869    char const *src = (loc == NULL ? NULL : loc->psource);
870    kmp_str_buf_t name;
871    __kmp_str_buf_init(&name);
872    __kmp_str_buf_print(&name, "OMP Single-%s", src);
873    KMP_ITT_DEBUG_LOCK();
874    thr->th.th_itt_mark_single = __itt_mark_create(name.str);
875    KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
876                        thr->th.th_itt_mark_single);
877    __kmp_str_buf_free(&name);
878    KMP_ITT_DEBUG_LOCK();
879    __itt_mark(thr->th.th_itt_mark_single, NULL);
880    KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
881                        thr->th.th_itt_mark_single);
882  }
883#endif
884} // __kmp_itt_single_start
885
886void __kmp_itt_single_end(int gtid) {
887#if USE_ITT_NOTIFY
888  __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
889  KMP_ITT_DEBUG_LOCK();
890  __itt_mark_off(mark);
891  KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
892#endif
893} // __kmp_itt_single_end
894
895/* Ordered reporting.
896 * __kmp_itt_ordered_init is called by each thread *before* first using sync
897   object. ITT team would like it to be called once, but it requires extra
898   synchronization.
899 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
900   section (before synchronization).
901 * __kmp_itt_ordered_start is called just before entering user code (after
902   synchronization).
903 * __kmp_itt_ordered_end is called after returning from user code.
904
905 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
906 Events are not generated in case of serialized team. */
907
908void __kmp_itt_ordered_init(int gtid) {
909#if USE_ITT_NOTIFY
910  if (__itt_sync_create_ptr) {
911    kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
912    ident_t const *loc = thr->th.th_ident;
913    char const *src = (loc == NULL ? NULL : loc->psource);
914    __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
915                      "OMP Ordered", src, 0);
916  }
917#endif
918} // __kmp_itt_ordered_init
919
920void __kmp_itt_ordered_prep(int gtid) {
921#if USE_ITT_NOTIFY
922  if (__itt_sync_create_ptr) {
923    kmp_team_t *t = __kmp_team_from_gtid(gtid);
924    if (!t->t.t_serialized) {
925      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
926      __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
927    }
928  }
929#endif
930} // __kmp_itt_ordered_prep
931
932void __kmp_itt_ordered_start(int gtid) {
933#if USE_ITT_NOTIFY
934  if (__itt_sync_create_ptr) {
935    kmp_team_t *t = __kmp_team_from_gtid(gtid);
936    if (!t->t.t_serialized) {
937      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
938      __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
939    }
940  }
941#endif
942} // __kmp_itt_ordered_start
943
944void __kmp_itt_ordered_end(int gtid) {
945#if USE_ITT_NOTIFY
946  if (__itt_sync_create_ptr) {
947    kmp_team_t *t = __kmp_team_from_gtid(gtid);
948    if (!t->t.t_serialized) {
949      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
950      __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
951    }
952  }
953#endif
954} // __kmp_itt_ordered_end
955
956/* Threads reporting. */
957
958void __kmp_itt_thread_ignore() {
959  __itt_thr_ignore();
960} // __kmp_itt_thread_ignore
961
962void __kmp_itt_thread_name(int gtid) {
963#if USE_ITT_NOTIFY
964  if (__itt_thr_name_set_ptr) {
965    kmp_str_buf_t name;
966    __kmp_str_buf_init(&name);
967    if (KMP_MASTER_GTID(gtid)) {
968      __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
969    } else {
970      __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
971    }
972    KMP_ITT_DEBUG_LOCK();
973    __itt_thr_name_set(name.str, name.used);
974    KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
975    __kmp_str_buf_free(&name);
976  }
977#endif
978} // __kmp_itt_thread_name
979
980/* System object reporting.
981   ITT catches operations with system sync objects (like Windows* OS on IA-32
982   architecture API critical sections and events). We only need to specify
983   name ("OMP Scheduler") for the object to let ITT know it is an object used
984   by OpenMP RTL for internal purposes. */
985
986void __kmp_itt_system_object_created(void *object, char const *name) {
987#if USE_ITT_NOTIFY
988  KMP_ITT_DEBUG_LOCK();
989  __itt_sync_create(object, "OMP Scheduler", name, 0);
990  KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
991                      object, name);
992#endif
993} // __kmp_itt_system_object_created
994
995/* Stack stitching api.
996   Master calls "create" and put the stitching id into team structure.
997   Workers read the stitching id and call "enter" / "leave" api.
998   Master calls "destroy" at the end of the parallel region. */
999
1000__itt_caller __kmp_itt_stack_caller_create() {
1001#if USE_ITT_NOTIFY
1002  if (!__itt_stack_caller_create_ptr)
1003    return NULL;
1004  KMP_ITT_DEBUG_LOCK();
1005  __itt_caller id = __itt_stack_caller_create();
1006  KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
1007  return id;
1008#endif
1009  return NULL;
1010}
1011
1012void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1013#if USE_ITT_NOTIFY
1014  if (__itt_stack_caller_destroy_ptr) {
1015    KMP_ITT_DEBUG_LOCK();
1016    __itt_stack_caller_destroy(id);
1017    KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1018  }
1019#endif
1020}
1021
1022void __kmp_itt_stack_callee_enter(__itt_caller id) {
1023#if USE_ITT_NOTIFY
1024  if (__itt_stack_callee_enter_ptr) {
1025    KMP_ITT_DEBUG_LOCK();
1026    __itt_stack_callee_enter(id);
1027    KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1028  }
1029#endif
1030}
1031
1032void __kmp_itt_stack_callee_leave(__itt_caller id) {
1033#if USE_ITT_NOTIFY
1034  if (__itt_stack_callee_leave_ptr) {
1035    KMP_ITT_DEBUG_LOCK();
1036    __itt_stack_callee_leave(id);
1037    KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1038  }
1039#endif
1040}
1041
1042#endif /* USE_ITT_BUILD */
1043