xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_itt.inl (revision 9729f076e4d93c5a37e78d427bfe0f1ab99bbcc6)
1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14// Inline function definitions. This file should be included into kmp_itt.h file
15// for production build (to let compiler inline functions) or into kmp_itt.c
16// file for debug build (to reduce the number of files to recompile and save
17// build time).
18
19#include "kmp.h"
20#include "kmp_str.h"
21
22#if KMP_ITT_DEBUG
23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24#define KMP_ITT_DEBUG_LOCK()                                                   \
25  { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26#define KMP_ITT_DEBUG_PRINT(...)                                               \
27  {                                                                            \
28    fprintf(stderr, "#%02d: ", __kmp_get_gtid());                              \
29    fprintf(stderr, __VA_ARGS__);                                              \
30    fflush(stderr);                                                            \
31    __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock);                       \
32  }
33#else
34#define KMP_ITT_DEBUG_LOCK()
35#define KMP_ITT_DEBUG_PRINT(...)
36#endif // KMP_ITT_DEBUG
37
38// Ensure that the functions are static if they're supposed to be being inlined.
39// Otherwise they cannot be used in more than one file, since there will be
40// multiple definitions.
41#if KMP_DEBUG
42#define LINKAGE
43#else
44#define LINKAGE static inline
45#endif
46
47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48// this API to support user-defined synchronization primitives, but does not use
49// ZCA; it would be safe to turn this off until wider support becomes available.
50#if USE_ITT_ZCA
51#ifdef __INTEL_COMPILER
52#if __INTEL_COMPILER >= 1200
53#undef __itt_sync_acquired
54#undef __itt_sync_releasing
55#define __itt_sync_acquired(addr)                                              \
56  __notify_zc_intrinsic((char *)"sync_acquired", addr)
57#define __itt_sync_releasing(addr)                                             \
58  __notify_intrinsic((char *)"sync_releasing", addr)
59#endif
60#endif
61#endif
62
63static kmp_bootstrap_lock_t metadata_lock =
64    KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
65
66#if USE_ITT_NOTIFY
67LINKAGE size_t __kmp_itthash_hash(kmp_intptr_t addr, size_t hsize) {
68  return ((addr >> 6) ^ (addr >> 2)) % hsize;
69}
70LINKAGE kmp_itthash_entry *__kmp_itthash_find(kmp_info_t *thread,
71                                              kmp_itthash_t *h, ident_t *loc,
72                                              int team_size) {
73  kmp_itthash_entry_t *entry;
74  size_t bucket = __kmp_itthash_hash((kmp_intptr_t)loc, KMP_MAX_FRAME_DOMAINS);
75  for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
76    if (entry->loc == loc && entry->team_size == team_size)
77      break;
78
79  if (entry == NULL) {
80    // two foreign threads could report frames concurrently
81    int cnt = KMP_TEST_THEN_INC32(&h->count);
82    if (cnt >= KMP_MAX_FRAME_DOMAINS) {
83      KMP_TEST_THEN_DEC32(&h->count); // revert the count
84      return entry; // too many entries
85    }
86    // create new entry
87    entry = (kmp_itthash_entry_t *)__kmp_thread_malloc(
88        thread, sizeof(kmp_itthash_entry_t));
89    entry->loc = loc;
90    entry->team_size = team_size;
91    entry->d = NULL;
92    entry->next_in_bucket = h->buckets[bucket];
93    while (!KMP_COMPARE_AND_STORE_PTR(&h->buckets[bucket],
94                                      entry->next_in_bucket, entry)) {
95      KMP_CPU_PAUSE();
96      entry->next_in_bucket = h->buckets[bucket];
97    }
98  }
99#if KMP_DEBUG
100  else {
101    // check the contents of the location info is unique
102    KMP_DEBUG_ASSERT(loc->psource == entry->loc->psource);
103  }
104#endif
105  return entry;
106}
107#endif
108
109/* Parallel region reporting.
110 * __kmp_itt_region_forking should be called by primary thread of a team.
111   Exact moment of call does not matter, but it should be completed before any
112   thread of this team calls __kmp_itt_region_starting.
113 * __kmp_itt_region_starting should be called by each thread of a team just
114   before entering parallel region body.
115 * __kmp_itt_region_finished should be called by each thread of a team right
116   after returning from parallel region body.
117 * __kmp_itt_region_joined should be called by primary thread of a team, after
118   all threads called __kmp_itt_region_finished.
119
120 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
121 execute some more user code -- such a thread can execute tasks.
122
123 Note: The overhead of logging region_starting and region_finished in each
124 thread is too large, so these calls are not used. */
125
126LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
127#if USE_ITT_NOTIFY
128  kmp_team_t *team = __kmp_team_from_gtid(gtid);
129  if (team->t.t_active_level > 1) {
130    // The frame notifications are only supported for the outermost teams.
131    return;
132  }
133  kmp_info_t *th = __kmp_thread_from_gtid(gtid);
134  ident_t *loc = th->th.th_ident;
135  if (!loc) {
136    // no sense to report a region without location info
137    return;
138  }
139  kmp_itthash_entry *e;
140  e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size);
141  if (e == NULL)
142    return; // too many entries in the hash
143  if (e->d == NULL) {
144    // Transform compiler-generated region location into the format
145    // that the tools more or less standardized on:
146    //   "<func>$omp$parallel@[file:]<line>[:<col>]"
147    char *buff = NULL;
148    kmp_str_loc_t str_loc =
149        __kmp_str_loc_init(loc->psource, /* init_fname */ false);
150    buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
151                            team_size, str_loc.file, str_loc.line, str_loc.col);
152
153    __itt_suppress_push(__itt_suppress_memory_errors);
154    e->d = __itt_domain_create(buff);
155    KMP_ASSERT(e->d != NULL);
156    __itt_suppress_pop();
157
158    __kmp_str_free(&buff);
159    if (barriers) {
160      kmp_itthash_entry *e;
161      e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0);
162      if (e != NULL) {
163        KMP_DEBUG_ASSERT(e->d == NULL);
164        char *buff = NULL;
165        buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
166                                str_loc.file, str_loc.line);
167        __itt_suppress_push(__itt_suppress_memory_errors);
168        e->d = __itt_domain_create(buff);
169        KMP_ASSERT(e->d != NULL);
170        __itt_suppress_pop();
171        __kmp_str_free(&buff);
172      }
173    }
174    __kmp_str_loc_free(&str_loc);
175  }
176  __itt_frame_begin_v3(e->d, NULL);
177  KMP_ITT_DEBUG_LOCK();
178  KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
179                      loc);
180#endif
181} // __kmp_itt_region_forking
182
183// -----------------------------------------------------------------------------
184LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
185                                    __itt_timestamp end, int imbalance,
186                                    ident_t *loc, int team_size, int region) {
187#if USE_ITT_NOTIFY
188  if (!loc) {
189    // no sense to report a region without location info
190    return;
191  }
192  kmp_info_t *th = __kmp_thread_from_gtid(gtid);
193  if (region) {
194    kmp_team_t *team = __kmp_team_from_gtid(gtid);
195    int serialized = (region == 2 ? 1 : 0);
196    if (team->t.t_active_level + serialized > 1) {
197      // The frame notifications are only supported for the outermost teams.
198      return;
199    }
200    // Check region domain has not been created before.
201    kmp_itthash_entry *e;
202    e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size);
203    if (e == NULL)
204      return; // too many entries in the hash
205    if (e->d == NULL) { // new entry, need to calculate domain
206      // Transform compiler-generated region location into the format
207      // that the tools more or less standardized on:
208      //   "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
209      char *buff = NULL;
210      kmp_str_loc_t str_loc =
211          __kmp_str_loc_init(loc->psource, /* init_fname */ false);
212      buff =
213          __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
214                           team_size, str_loc.file, str_loc.line, str_loc.col);
215      __itt_suppress_push(__itt_suppress_memory_errors);
216      e->d = __itt_domain_create(buff);
217      KMP_ASSERT(e->d != NULL);
218      __itt_suppress_pop();
219
220      __kmp_str_free(&buff);
221      __kmp_str_loc_free(&str_loc);
222    }
223    __itt_frame_submit_v3(e->d, NULL, begin, end);
224    KMP_ITT_DEBUG_LOCK();
225    KMP_ITT_DEBUG_PRINT(
226        "[reg sub] gtid=%d, domain=%p, region:%d, loc:%p, beg:%llu, end:%llu\n",
227        gtid, e->d, region, loc, begin, end);
228    return;
229  } else { // called for barrier reporting
230    kmp_itthash_entry *e;
231    e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0);
232    if (e == NULL)
233      return; // too many entries in the hash
234    if (e->d == NULL) { // new entry, need to calculate domain
235      // Transform compiler-generated region location into the format
236      // that the tools more or less standardized on:
237      //   "<func>$omp$frame@[file:]<line>[:<col>]"
238      kmp_str_loc_t str_loc =
239          __kmp_str_loc_init(loc->psource, /* init_fname */ false);
240      char *buff = NULL;
241      if (imbalance) {
242        buff =
243            __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", str_loc.func,
244                             team_size, str_loc.file, str_loc.line);
245      } else {
246        buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
247                                str_loc.file, str_loc.line);
248      }
249      __itt_suppress_push(__itt_suppress_memory_errors);
250      e->d = __itt_domain_create(buff);
251      KMP_ASSERT(e->d != NULL);
252      __itt_suppress_pop();
253      __kmp_str_free(&buff);
254      __kmp_str_loc_free(&str_loc);
255    }
256    __itt_frame_submit_v3(e->d, NULL, begin, end);
257    KMP_ITT_DEBUG_LOCK();
258    KMP_ITT_DEBUG_PRINT(
259        "[frm sub] gtid=%d, domain=%p, loc:%p, beg:%llu, end:%llu\n", gtid,
260        e->d, loc, begin, end);
261  }
262#endif
263} // __kmp_itt_frame_submit
264
265// -----------------------------------------------------------------------------
266LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
267                                          kmp_uint64 end, kmp_uint64 imbalance,
268                                          kmp_uint64 reduction) {
269#if USE_ITT_NOTIFY
270  if (metadata_domain == NULL) {
271    __kmp_acquire_bootstrap_lock(&metadata_lock);
272    if (metadata_domain == NULL) {
273      __itt_suppress_push(__itt_suppress_memory_errors);
274      metadata_domain = __itt_domain_create("OMP Metadata");
275      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
276      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
277      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
278      __itt_suppress_pop();
279    }
280    __kmp_release_bootstrap_lock(&metadata_lock);
281  }
282
283  kmp_uint64 imbalance_data[4];
284  imbalance_data[0] = begin;
285  imbalance_data[1] = end;
286  imbalance_data[2] = imbalance;
287  imbalance_data[3] = reduction;
288
289  __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
290                     __itt_metadata_u64, 4, imbalance_data);
291#endif
292} // __kmp_itt_metadata_imbalance
293
294// -----------------------------------------------------------------------------
295LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
296                                     kmp_uint64 iterations, kmp_uint64 chunk) {
297#if USE_ITT_NOTIFY
298  if (metadata_domain == NULL) {
299    __kmp_acquire_bootstrap_lock(&metadata_lock);
300    if (metadata_domain == NULL) {
301      __itt_suppress_push(__itt_suppress_memory_errors);
302      metadata_domain = __itt_domain_create("OMP Metadata");
303      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
304      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
305      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
306      __itt_suppress_pop();
307    }
308    __kmp_release_bootstrap_lock(&metadata_lock);
309  }
310
311  // Parse line and column from psource string: ";file;func;line;col;;"
312  KMP_DEBUG_ASSERT(loc->psource);
313  kmp_uint64 loop_data[5];
314  int line, col;
315  __kmp_str_loc_numbers(loc->psource, &line, &col);
316  loop_data[0] = line;
317  loop_data[1] = col;
318  loop_data[2] = sched_type;
319  loop_data[3] = iterations;
320  loop_data[4] = chunk;
321
322  __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
323                     __itt_metadata_u64, 5, loop_data);
324#endif
325} // __kmp_itt_metadata_loop
326
327// -----------------------------------------------------------------------------
328LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
329#if USE_ITT_NOTIFY
330  if (metadata_domain == NULL) {
331    __kmp_acquire_bootstrap_lock(&metadata_lock);
332    if (metadata_domain == NULL) {
333      __itt_suppress_push(__itt_suppress_memory_errors);
334      metadata_domain = __itt_domain_create("OMP Metadata");
335      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
336      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
337      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
338      __itt_suppress_pop();
339    }
340    __kmp_release_bootstrap_lock(&metadata_lock);
341  }
342
343  int line, col;
344  __kmp_str_loc_numbers(loc->psource, &line, &col);
345  kmp_uint64 single_data[2];
346  single_data[0] = line;
347  single_data[1] = col;
348
349  __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
350                     __itt_metadata_u64, 2, single_data);
351#endif
352} // __kmp_itt_metadata_single
353
354// -----------------------------------------------------------------------------
355LINKAGE void __kmp_itt_region_starting(int gtid) {
356#if USE_ITT_NOTIFY
357#endif
358} // __kmp_itt_region_starting
359
360// -----------------------------------------------------------------------------
361LINKAGE void __kmp_itt_region_finished(int gtid) {
362#if USE_ITT_NOTIFY
363#endif
364} // __kmp_itt_region_finished
365
366// ----------------------------------------------------------------------------
367LINKAGE void __kmp_itt_region_joined(int gtid) {
368#if USE_ITT_NOTIFY
369  kmp_team_t *team = __kmp_team_from_gtid(gtid);
370  if (team->t.t_active_level > 1) {
371    // The frame notifications are only supported for the outermost teams.
372    return;
373  }
374  kmp_info_t *th = __kmp_thread_from_gtid(gtid);
375  ident_t *loc = th->th.th_ident;
376  if (loc) {
377    kmp_itthash_entry *e = __kmp_itthash_find(th, &__kmp_itt_region_domains,
378                                              loc, th->th.th_team_nproc);
379    if (e == NULL)
380      return; // too many entries in the hash
381    KMP_DEBUG_ASSERT(e->d);
382    KMP_ITT_DEBUG_LOCK();
383    __itt_frame_end_v3(e->d, NULL);
384    KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
385                        loc);
386  }
387#endif
388} // __kmp_itt_region_joined
389
390/* Barriers reporting.
391
392   A barrier consists of two phases:
393   1. Gather -- primary thread waits for all worker threads to arrive; each
394      worker thread registers arrival and goes further.
395   2. Release -- each worker thread waits until primary thread lets it go;
396      primary thread lets worker threads go.
397
398   Function should be called by each thread:
399   * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
400   * __kmp_itt_barrier_middle()   -- between gather and release phases.
401   * __kmp_itt_barrier_finished() -- after release phase.
402
403   Note: Call __kmp_itt_barrier_object() before call to
404   __kmp_itt_barrier_starting() and save result in local variable.
405   __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
406   would return itt sync object for the next barrier!
407
408   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
409   does not have barrier object or barrier data structure. Barrier is just a
410   counter in team and thread structures. We could use an address of team
411   structure as a barrier sync object, but ITT wants different objects for
412   different barriers (even whithin the same team). So let us use team address
413   as barrier sync object for the first barrier, then increase it by one for the
414   next barrier, and so on (but wrap it not to use addresses outside of team
415   structure). */
416
417void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
418                               int delta // 0 (current barrier) is default
419                               // value; specify -1 to get previous
420                               // barrier.
421                               ) {
422  void *object = NULL;
423#if USE_ITT_NOTIFY
424  kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
425  kmp_team_t *team = thr->th.th_team;
426
427  // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
428  // be NULL. This "if" helps to avoid crash. However, this is not complete
429  // solution, and reporting fork/join barriers to ITT should be revisited.
430
431  if (team != NULL) {
432    // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
433    // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
434    kmp_uint64 counter =
435        team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
436    // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
437    // barriers of different types do not have the same ids.
438    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
439    // This condition is a must (we would have zero divide otherwise).
440    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
441    // More strong condition: make sure we have room at least for for two
442    // different ids (for each barrier type).
443    object = reinterpret_cast<void *>(
444        (kmp_uintptr_t)(team) +
445        (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) *
446            bs_last_barrier +
447        bt);
448    KMP_ITT_DEBUG_LOCK();
449    KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
450                        counter, object);
451
452    if (set_name) {
453      ident_t const *loc = NULL;
454      char const *src = NULL;
455      char const *type = "OMP Barrier";
456      switch (bt) {
457      case bs_plain_barrier: {
458        // For plain barrier compiler calls __kmpc_barrier() function, which
459        // saves location in thr->th.th_ident.
460        loc = thr->th.th_ident;
461        // Get the barrier type from flags provided by compiler.
462        kmp_int32 expl = 0;
463        kmp_uint32 impl = 0;
464        if (loc != NULL) {
465          src = loc->psource;
466          expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
467          impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
468        }
469        if (impl) {
470          switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
471          case KMP_IDENT_BARRIER_IMPL_FOR: {
472            type = "OMP For Barrier";
473          } break;
474          case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
475            type = "OMP Sections Barrier";
476          } break;
477          case KMP_IDENT_BARRIER_IMPL_SINGLE: {
478            type = "OMP Single Barrier";
479          } break;
480          case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
481            type = "OMP Workshare Barrier";
482          } break;
483          default: {
484            type = "OMP Implicit Barrier";
485            KMP_DEBUG_ASSERT(0);
486          }
487          }
488        } else if (expl) {
489          type = "OMP Explicit Barrier";
490        }
491      } break;
492      case bs_forkjoin_barrier: {
493        // In case of fork/join barrier we can read thr->th.th_ident, because it
494        // contains location of last passed construct (while join barrier is not
495        // such one). Use th_ident of primary thread instead --
496        // __kmp_join_call() called by the primary thread saves location.
497        //
498        // AC: cannot read from primary thread because __kmp_join_call may not
499        //    be called yet, so we read the location from team. This is the
500        //    same location. Team is valid on entry to join barrier where this
501        //    happens.
502        loc = team->t.t_ident;
503        if (loc != NULL) {
504          src = loc->psource;
505        }
506        type = "OMP Join Barrier";
507      } break;
508      }
509      KMP_ITT_DEBUG_LOCK();
510      __itt_sync_create(object, type, src, __itt_attr_barrier);
511      KMP_ITT_DEBUG_PRINT(
512          "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
513          type, src);
514    }
515  }
516#endif
517  return object;
518} // __kmp_itt_barrier_object
519
520// -----------------------------------------------------------------------------
521void __kmp_itt_barrier_starting(int gtid, void *object) {
522#if USE_ITT_NOTIFY
523  if (!KMP_MASTER_GTID(gtid)) {
524    KMP_ITT_DEBUG_LOCK();
525    __itt_sync_releasing(object);
526    KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
527  }
528  KMP_ITT_DEBUG_LOCK();
529  __itt_sync_prepare(object);
530  KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
531#endif
532} // __kmp_itt_barrier_starting
533
534// -----------------------------------------------------------------------------
535void __kmp_itt_barrier_middle(int gtid, void *object) {
536#if USE_ITT_NOTIFY
537  if (KMP_MASTER_GTID(gtid)) {
538    KMP_ITT_DEBUG_LOCK();
539    __itt_sync_acquired(object);
540    KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
541    KMP_ITT_DEBUG_LOCK();
542    __itt_sync_releasing(object);
543    KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
544  } else {
545  }
546#endif
547} // __kmp_itt_barrier_middle
548
549// -----------------------------------------------------------------------------
550void __kmp_itt_barrier_finished(int gtid, void *object) {
551#if USE_ITT_NOTIFY
552  if (KMP_MASTER_GTID(gtid)) {
553  } else {
554    KMP_ITT_DEBUG_LOCK();
555    __itt_sync_acquired(object);
556    KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
557  }
558#endif
559} // __kmp_itt_barrier_finished
560
561/* Taskwait reporting.
562   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
563   does not have taskwait structure, so we need to construct something. */
564
565void *__kmp_itt_taskwait_object(int gtid) {
566  void *object = NULL;
567#if USE_ITT_NOTIFY
568  if (UNLIKELY(__itt_sync_create_ptr)) {
569    kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
570    kmp_taskdata_t *taskdata = thread->th.th_current_task;
571    object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
572                                      taskdata->td_taskwait_counter %
573                                          sizeof(kmp_taskdata_t));
574  }
575#endif
576  return object;
577} // __kmp_itt_taskwait_object
578
579void __kmp_itt_taskwait_starting(int gtid, void *object) {
580#if USE_ITT_NOTIFY
581  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
582  kmp_taskdata_t *taskdata = thread->th.th_current_task;
583  ident_t const *loc = taskdata->td_taskwait_ident;
584  char const *src = (loc == NULL ? NULL : loc->psource);
585  KMP_ITT_DEBUG_LOCK();
586  __itt_sync_create(object, "OMP Taskwait", src, 0);
587  KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
588                      object, src);
589  KMP_ITT_DEBUG_LOCK();
590  __itt_sync_prepare(object);
591  KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
592#endif
593} // __kmp_itt_taskwait_starting
594
595void __kmp_itt_taskwait_finished(int gtid, void *object) {
596#if USE_ITT_NOTIFY
597  KMP_ITT_DEBUG_LOCK();
598  __itt_sync_acquired(object);
599  KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
600  KMP_ITT_DEBUG_LOCK();
601  __itt_sync_destroy(object);
602  KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
603#endif
604} // __kmp_itt_taskwait_finished
605
606/* Task reporting.
607   Only those tasks are reported which are executed by a thread spinning at
608   barrier (or taskwait). Synch object passed to the function must be barrier of
609   taskwait the threads waiting at. */
610
611void __kmp_itt_task_starting(
612    void *object // ITT sync object: barrier or taskwait.
613    ) {
614#if USE_ITT_NOTIFY
615  if (UNLIKELY(object != NULL)) {
616    KMP_ITT_DEBUG_LOCK();
617    __itt_sync_cancel(object);
618    KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
619  }
620#endif
621} // __kmp_itt_task_starting
622
623// -----------------------------------------------------------------------------
624void __kmp_itt_task_finished(
625    void *object // ITT sync object: barrier or taskwait.
626    ) {
627#if USE_ITT_NOTIFY
628  KMP_ITT_DEBUG_LOCK();
629  __itt_sync_prepare(object);
630  KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
631#endif
632} // __kmp_itt_task_finished
633
634/* Lock reporting.
635 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
636   operation (set/unset). It is not a real event shown to the user but just
637   setting a name for synchronization object. `lock' is an address of sync
638   object, the same address should be used in all subsequent calls.
639 * __kmp_itt_lock_acquiring() should be called before setting the lock.
640 * __kmp_itt_lock_acquired() should be called after setting the lock.
641 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
642 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
643   for the lock.
644 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
645   operation. After __kmp_itt_lock_destroyed() all the references to the same
646   address will be considered as another sync object, not related with the
647   original one.  */
648
649#if KMP_USE_DYNAMIC_LOCK
650// Takes location information directly
651__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
652                                       const ident_t *loc) {
653#if USE_ITT_NOTIFY
654  if (__itt_sync_create_ptr) {
655    char const *src = (loc == NULL ? NULL : loc->psource);
656    KMP_ITT_DEBUG_LOCK();
657    __itt_sync_create(lock, type, src, 0);
658    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
659                        src);
660  }
661#endif
662}
663#else // KMP_USE_DYNAMIC_LOCK
664// Internal guts -- common code for locks and critical sections, do not call
665// directly.
666__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
667#if USE_ITT_NOTIFY
668  if (__itt_sync_create_ptr) {
669    ident_t const *loc = NULL;
670    if (__kmp_get_user_lock_location_ != NULL)
671      loc = __kmp_get_user_lock_location_((lock));
672    char const *src = (loc == NULL ? NULL : loc->psource);
673    KMP_ITT_DEBUG_LOCK();
674    __itt_sync_create(lock, type, src, 0);
675    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
676                        src);
677  }
678#endif
679} // ___kmp_itt_lock_init
680#endif // KMP_USE_DYNAMIC_LOCK
681
682// Internal guts -- common code for locks and critical sections, do not call
683// directly.
684__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
685#if USE_ITT_NOTIFY
686  KMP_ITT_DEBUG_LOCK();
687  __itt_sync_destroy(lock);
688  KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
689#endif
690} // ___kmp_itt_lock_fini
691
692// -----------------------------------------------------------------------------
693#if KMP_USE_DYNAMIC_LOCK
694void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
695  ___kmp_itt_lock_init(lock, "OMP Lock", loc);
696}
697#else
698void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
699  ___kmp_itt_lock_init(lock, "OMP Lock");
700} // __kmp_itt_lock_creating
701#endif
702
703void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
704#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
705  // postpone lock object access
706  if (__itt_sync_prepare_ptr) {
707    if (KMP_EXTRACT_D_TAG(lock) == 0) {
708      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
709      __itt_sync_prepare(ilk->lock);
710    } else {
711      __itt_sync_prepare(lock);
712    }
713  }
714#else
715  __itt_sync_prepare(lock);
716#endif
717} // __kmp_itt_lock_acquiring
718
719void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
720#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
721  // postpone lock object access
722  if (__itt_sync_acquired_ptr) {
723    if (KMP_EXTRACT_D_TAG(lock) == 0) {
724      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
725      __itt_sync_acquired(ilk->lock);
726    } else {
727      __itt_sync_acquired(lock);
728    }
729  }
730#else
731  __itt_sync_acquired(lock);
732#endif
733} // __kmp_itt_lock_acquired
734
735void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
736#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
737  if (__itt_sync_releasing_ptr) {
738    if (KMP_EXTRACT_D_TAG(lock) == 0) {
739      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
740      __itt_sync_releasing(ilk->lock);
741    } else {
742      __itt_sync_releasing(lock);
743    }
744  }
745#else
746  __itt_sync_releasing(lock);
747#endif
748} // __kmp_itt_lock_releasing
749
750void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
751#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
752  if (__itt_sync_cancel_ptr) {
753    if (KMP_EXTRACT_D_TAG(lock) == 0) {
754      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
755      __itt_sync_cancel(ilk->lock);
756    } else {
757      __itt_sync_cancel(lock);
758    }
759  }
760#else
761  __itt_sync_cancel(lock);
762#endif
763} // __kmp_itt_lock_cancelled
764
765void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
766  ___kmp_itt_lock_fini(lock, "OMP Lock");
767} // __kmp_itt_lock_destroyed
768
769/* Critical reporting.
770   Critical sections are treated exactly as locks (but have different object
771   type). */
772#if KMP_USE_DYNAMIC_LOCK
773void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
774  ___kmp_itt_lock_init(lock, "OMP Critical", loc);
775}
776#else
777void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
778  ___kmp_itt_lock_init(lock, "OMP Critical");
779} // __kmp_itt_critical_creating
780#endif
781
782void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
783  __itt_sync_prepare(lock);
784} // __kmp_itt_critical_acquiring
785
786void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
787  __itt_sync_acquired(lock);
788} // __kmp_itt_critical_acquired
789
790void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
791  __itt_sync_releasing(lock);
792} // __kmp_itt_critical_releasing
793
794void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
795  ___kmp_itt_lock_fini(lock, "OMP Critical");
796} // __kmp_itt_critical_destroyed
797
798/* Single reporting. */
799
800void __kmp_itt_single_start(int gtid) {
801#if USE_ITT_NOTIFY
802  if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
803    kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
804    ident_t *loc = thr->th.th_ident;
805    char const *src = (loc == NULL ? NULL : loc->psource);
806    kmp_str_buf_t name;
807    __kmp_str_buf_init(&name);
808    __kmp_str_buf_print(&name, "OMP Single-%s", src);
809    KMP_ITT_DEBUG_LOCK();
810    thr->th.th_itt_mark_single = __itt_mark_create(name.str);
811    KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
812                        thr->th.th_itt_mark_single);
813    __kmp_str_buf_free(&name);
814    KMP_ITT_DEBUG_LOCK();
815    __itt_mark(thr->th.th_itt_mark_single, NULL);
816    KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
817                        thr->th.th_itt_mark_single);
818  }
819#endif
820} // __kmp_itt_single_start
821
822void __kmp_itt_single_end(int gtid) {
823#if USE_ITT_NOTIFY
824  __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
825  KMP_ITT_DEBUG_LOCK();
826  __itt_mark_off(mark);
827  KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
828#endif
829} // __kmp_itt_single_end
830
831/* Ordered reporting.
832 * __kmp_itt_ordered_init is called by each thread *before* first using sync
833   object. ITT team would like it to be called once, but it requires extra
834   synchronization.
835 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
836   section (before synchronization).
837 * __kmp_itt_ordered_start is called just before entering user code (after
838   synchronization).
839 * __kmp_itt_ordered_end is called after returning from user code.
840
841 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
842 Events are not generated in case of serialized team. */
843
844void __kmp_itt_ordered_init(int gtid) {
845#if USE_ITT_NOTIFY
846  if (__itt_sync_create_ptr) {
847    kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
848    ident_t const *loc = thr->th.th_ident;
849    char const *src = (loc == NULL ? NULL : loc->psource);
850    __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
851                      "OMP Ordered", src, 0);
852  }
853#endif
854} // __kmp_itt_ordered_init
855
856void __kmp_itt_ordered_prep(int gtid) {
857#if USE_ITT_NOTIFY
858  if (__itt_sync_create_ptr) {
859    kmp_team_t *t = __kmp_team_from_gtid(gtid);
860    if (!t->t.t_serialized) {
861      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
862      __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
863    }
864  }
865#endif
866} // __kmp_itt_ordered_prep
867
868void __kmp_itt_ordered_start(int gtid) {
869#if USE_ITT_NOTIFY
870  if (__itt_sync_create_ptr) {
871    kmp_team_t *t = __kmp_team_from_gtid(gtid);
872    if (!t->t.t_serialized) {
873      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
874      __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
875    }
876  }
877#endif
878} // __kmp_itt_ordered_start
879
880void __kmp_itt_ordered_end(int gtid) {
881#if USE_ITT_NOTIFY
882  if (__itt_sync_create_ptr) {
883    kmp_team_t *t = __kmp_team_from_gtid(gtid);
884    if (!t->t.t_serialized) {
885      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
886      __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
887    }
888  }
889#endif
890} // __kmp_itt_ordered_end
891
892/* Threads reporting. */
893
894void __kmp_itt_thread_ignore() {
895  __itt_thr_ignore();
896} // __kmp_itt_thread_ignore
897
898void __kmp_itt_thread_name(int gtid) {
899#if USE_ITT_NOTIFY
900  if (__itt_thr_name_set_ptr) {
901    kmp_str_buf_t name;
902    __kmp_str_buf_init(&name);
903    if (KMP_MASTER_GTID(gtid)) {
904      __kmp_str_buf_print(&name, "OMP Primary Thread #%d", gtid);
905    } else {
906      __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
907    }
908    KMP_ITT_DEBUG_LOCK();
909    __itt_thr_name_set(name.str, name.used);
910    KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
911    __kmp_str_buf_free(&name);
912  }
913#endif
914} // __kmp_itt_thread_name
915
916/* System object reporting.
917   ITT catches operations with system sync objects (like Windows* OS on IA-32
918   architecture API critical sections and events). We only need to specify
919   name ("OMP Scheduler") for the object to let ITT know it is an object used
920   by OpenMP RTL for internal purposes. */
921
922void __kmp_itt_system_object_created(void *object, char const *name) {
923#if USE_ITT_NOTIFY
924  KMP_ITT_DEBUG_LOCK();
925  __itt_sync_create(object, "OMP Scheduler", name, 0);
926  KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
927                      object, name);
928#endif
929} // __kmp_itt_system_object_created
930
931/* Stack stitching api.
932   Primary thread calls "create" and put the stitching id into team structure.
933   Workers read the stitching id and call "enter" / "leave" api.
934   Primary thread calls "destroy" at the end of the parallel region. */
935
936__itt_caller __kmp_itt_stack_caller_create() {
937#if USE_ITT_NOTIFY
938  if (!__itt_stack_caller_create_ptr)
939    return NULL;
940  KMP_ITT_DEBUG_LOCK();
941  __itt_caller id = __itt_stack_caller_create();
942  KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
943  return id;
944#endif
945  return NULL;
946}
947
948void __kmp_itt_stack_caller_destroy(__itt_caller id) {
949#if USE_ITT_NOTIFY
950  if (__itt_stack_caller_destroy_ptr) {
951    KMP_ITT_DEBUG_LOCK();
952    __itt_stack_caller_destroy(id);
953    KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
954  }
955#endif
956}
957
958void __kmp_itt_stack_callee_enter(__itt_caller id) {
959#if USE_ITT_NOTIFY
960  if (__itt_stack_callee_enter_ptr) {
961    KMP_ITT_DEBUG_LOCK();
962    __itt_stack_callee_enter(id);
963    KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
964  }
965#endif
966}
967
968void __kmp_itt_stack_callee_leave(__itt_caller id) {
969#if USE_ITT_NOTIFY
970  if (__itt_stack_callee_leave_ptr) {
971    KMP_ITT_DEBUG_LOCK();
972    __itt_stack_callee_leave(id);
973    KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
974  }
975#endif
976}
977
978#endif /* USE_ITT_BUILD */
979