xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_itt.inl (revision 994297b01b98816bea1abf45ae4bac1bc69ee7a0)
1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14// Inline function definitions. This file should be included into kmp_itt.h file
15// for production build (to let compiler inline functions) or into kmp_itt.c
16// file for debug build (to reduce the number of files to recompile and save
17// build time).
18
19#include "kmp.h"
20#include "kmp_str.h"
21
22#if KMP_ITT_DEBUG
23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24#define KMP_ITT_DEBUG_LOCK()                                                   \
25  { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26#define KMP_ITT_DEBUG_PRINT(...)                                               \
27  {                                                                            \
28    fprintf(stderr, "#%02d: ", __kmp_get_gtid());                              \
29    fprintf(stderr, __VA_ARGS__);                                              \
30    fflush(stderr);                                                            \
31    __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock);                       \
32  }
33#else
34#define KMP_ITT_DEBUG_LOCK()
35#define KMP_ITT_DEBUG_PRINT(...)
36#endif // KMP_ITT_DEBUG
37
38// Ensure that the functions are static if they're supposed to be being inlined.
39// Otherwise they cannot be used in more than one file, since there will be
40// multiple definitions.
41#if KMP_DEBUG
42#define LINKAGE
43#else
44#define LINKAGE static inline
45#endif
46
47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48// this API to support user-defined synchronization primitives, but does not use
49// ZCA; it would be safe to turn this off until wider support becomes available.
50#if USE_ITT_ZCA
51#ifdef __INTEL_COMPILER
52#if __INTEL_COMPILER >= 1200
53#undef __itt_sync_acquired
54#undef __itt_sync_releasing
55#define __itt_sync_acquired(addr)                                              \
56  __notify_zc_intrinsic((char *)"sync_acquired", addr)
57#define __itt_sync_releasing(addr)                                             \
58  __notify_intrinsic((char *)"sync_releasing", addr)
59#endif
60#endif
61#endif
62
63static kmp_bootstrap_lock_t metadata_lock =
64    KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
65
66/* Parallel region reporting.
67 * __kmp_itt_region_forking should be called by primary thread of a team.
68   Exact moment of call does not matter, but it should be completed before any
69   thread of this team calls __kmp_itt_region_starting.
70 * __kmp_itt_region_starting should be called by each thread of a team just
71   before entering parallel region body.
72 * __kmp_itt_region_finished should be called by each thread of a team right
73   after returning from parallel region body.
74 * __kmp_itt_region_joined should be called by primary thread of a team, after
75   all threads called __kmp_itt_region_finished.
76
77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
78 execute some more user code -- such a thread can execute tasks.
79
80 Note: The overhead of logging region_starting and region_finished in each
81 thread is too large, so these calls are not used. */
82
83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
84#if USE_ITT_NOTIFY
85  kmp_team_t *team = __kmp_team_from_gtid(gtid);
86  if (team->t.t_active_level > 1) {
87    // The frame notifications are only supported for the outermost teams.
88    return;
89  }
90  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
91  if (loc) {
92    // Use the reserved_2 field to store the index to the region domain.
93    // Assume that reserved_2 contains zero initially.  Since zero is special
94    // value here, store the index into domain array increased by 1.
95    if (loc->reserved_2 == 0) {
96      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
97        int frm =
98            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
99        if (frm >= KMP_MAX_FRAME_DOMAINS) {
100          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
101          return; // loc->reserved_2 is still 0
102        }
103        // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
104        //    frm = loc->reserved_2 - 1;   // get value saved by other thread
105        //    for same loc
106        //} // AC: this block is to replace next unsynchronized line
107
108        // We need to save indexes for both region and barrier frames. We'll use
109        // loc->reserved_2 field but put region index to the low two bytes and
110        // barrier indexes to the high two bytes. It is OK because
111        // KMP_MAX_FRAME_DOMAINS = 512.
112        loc->reserved_2 |= (frm + 1); // save "new" value
113
114        // Transform compiler-generated region location into the format
115        // that the tools more or less standardized on:
116        //   "<func>$omp$parallel@[file:]<line>[:<col>]"
117        char *buff = NULL;
118        kmp_str_loc_t str_loc =
119            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
120        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
121                                team_size, str_loc.file, str_loc.line,
122                                str_loc.col);
123
124        __itt_suppress_push(__itt_suppress_memory_errors);
125        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
126        __itt_suppress_pop();
127
128        __kmp_str_free(&buff);
129        if (barriers) {
130          if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
131            int frm = KMP_TEST_THEN_INC32(
132                &__kmp_barrier_domain_count); // get "old" value
133            if (frm >= KMP_MAX_FRAME_DOMAINS) {
134              KMP_TEST_THEN_DEC32(
135                  &__kmp_barrier_domain_count); // revert the count
136              return; // loc->reserved_2 is still 0
137            }
138            char *buff = NULL;
139            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
140                                    str_loc.file, str_loc.col);
141            __itt_suppress_push(__itt_suppress_memory_errors);
142            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
143            __itt_suppress_pop();
144            __kmp_str_free(&buff);
145            // Save the barrier frame index to the high two bytes.
146            loc->reserved_2 |= (frm + 1) << 16;
147          }
148        }
149        __kmp_str_loc_free(&str_loc);
150        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
151      }
152    } else { // Region domain exists for this location
153      // Check if team size was changed. Then create new region domain for this
154      // location
155      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
156      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
157          (__kmp_itt_region_team_size[frm] != team_size)) {
158        char *buff = NULL;
159        kmp_str_loc_t str_loc =
160            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
161        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
162                                team_size, str_loc.file, str_loc.line,
163                                str_loc.col);
164
165        __itt_suppress_push(__itt_suppress_memory_errors);
166        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
167        __itt_suppress_pop();
168
169        __kmp_str_free(&buff);
170        __kmp_str_loc_free(&str_loc);
171        __kmp_itt_region_team_size[frm] = team_size;
172        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
173      } else { // Team size was not changed. Use existing domain.
174        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
175      }
176    }
177    KMP_ITT_DEBUG_LOCK();
178    KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
179                        loc->reserved_2, loc);
180  }
181#endif
182} // __kmp_itt_region_forking
183
184// -----------------------------------------------------------------------------
185LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
186                                    __itt_timestamp end, int imbalance,
187                                    ident_t *loc, int team_size, int region) {
188#if USE_ITT_NOTIFY
189  if (region) {
190    kmp_team_t *team = __kmp_team_from_gtid(gtid);
191    int serialized = (region == 2 ? 1 : 0);
192    if (team->t.t_active_level + serialized > 1) {
193      // The frame notifications are only supported for the outermost teams.
194      return;
195    }
196    // Check region domain has not been created before. It's index is saved in
197    // the low two bytes.
198    if ((loc->reserved_2 & 0x0000FFFF) == 0) {
199      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
200        int frm =
201            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
202        if (frm >= KMP_MAX_FRAME_DOMAINS) {
203          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
204          return; // loc->reserved_2 is still 0
205        }
206
207        // We need to save indexes for both region and barrier frames. We'll use
208        // loc->reserved_2 field but put region index to the low two bytes and
209        // barrier indexes to the high two bytes. It is OK because
210        // KMP_MAX_FRAME_DOMAINS = 512.
211        loc->reserved_2 |= (frm + 1); // save "new" value
212
213        // Transform compiler-generated region location into the format
214        // that the tools more or less standardized on:
215        //   "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
216        char *buff = NULL;
217        kmp_str_loc_t str_loc =
218            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
219        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
220                                team_size, str_loc.file, str_loc.line,
221                                str_loc.col);
222
223        __itt_suppress_push(__itt_suppress_memory_errors);
224        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
225        __itt_suppress_pop();
226
227        __kmp_str_free(&buff);
228        __kmp_str_loc_free(&str_loc);
229        __kmp_itt_region_team_size[frm] = team_size;
230        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
231      }
232    } else { // Region domain exists for this location
233      // Check if team size was changed. Then create new region domain for this
234      // location
235      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
236      if (frm >= KMP_MAX_FRAME_DOMAINS)
237        return; // something's gone wrong, returning
238      if (__kmp_itt_region_team_size[frm] != team_size) {
239        char *buff = NULL;
240        kmp_str_loc_t str_loc =
241            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
242        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
243                                team_size, str_loc.file, str_loc.line,
244                                str_loc.col);
245
246        __itt_suppress_push(__itt_suppress_memory_errors);
247        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
248        __itt_suppress_pop();
249
250        __kmp_str_free(&buff);
251        __kmp_str_loc_free(&str_loc);
252        __kmp_itt_region_team_size[frm] = team_size;
253        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
254      } else { // Team size was not changed. Use existing domain.
255        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
256      }
257    }
258    KMP_ITT_DEBUG_LOCK();
259    KMP_ITT_DEBUG_PRINT(
260        "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
261        gtid, loc->reserved_2, region, loc, begin, end);
262    return;
263  } else { // called for barrier reporting
264    if (loc) {
265      if ((loc->reserved_2 & 0xFFFF0000) == 0) {
266        if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
267          int frm = KMP_TEST_THEN_INC32(
268              &__kmp_barrier_domain_count); // get "old" value
269          if (frm >= KMP_MAX_FRAME_DOMAINS) {
270            KMP_TEST_THEN_DEC32(
271                &__kmp_barrier_domain_count); // revert the count
272            return; // loc->reserved_2 is still 0
273          }
274          // Save the barrier frame index to the high two bytes.
275          loc->reserved_2 |= (frm + 1) << 16; // save "new" value
276
277          // Transform compiler-generated region location into the format
278          // that the tools more or less standardized on:
279          //   "<func>$omp$frame@[file:]<line>[:<col>]"
280          kmp_str_loc_t str_loc =
281              __kmp_str_loc_init(loc->psource, /* init_fname */ false);
282          if (imbalance) {
283            char *buff_imb = NULL;
284            buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
285                                        str_loc.func, team_size, str_loc.file,
286                                        str_loc.col);
287            __itt_suppress_push(__itt_suppress_memory_errors);
288            __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
289            __itt_suppress_pop();
290            __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
291                                  end);
292            __kmp_str_free(&buff_imb);
293          } else {
294            char *buff = NULL;
295            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
296                                    str_loc.file, str_loc.col);
297            __itt_suppress_push(__itt_suppress_memory_errors);
298            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
299            __itt_suppress_pop();
300            __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
301                                  end);
302            __kmp_str_free(&buff);
303          }
304          __kmp_str_loc_free(&str_loc);
305        }
306      } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
307        if (imbalance) {
308          __itt_frame_submit_v3(
309              __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
310              begin, end);
311        } else {
312          __itt_frame_submit_v3(
313              __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
314              begin, end);
315        }
316      }
317      KMP_ITT_DEBUG_LOCK();
318      KMP_ITT_DEBUG_PRINT(
319          "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
320          loc->reserved_2, loc, begin, end);
321    }
322  }
323#endif
324} // __kmp_itt_frame_submit
325
326// -----------------------------------------------------------------------------
327LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
328                                          kmp_uint64 end, kmp_uint64 imbalance,
329                                          kmp_uint64 reduction) {
330#if USE_ITT_NOTIFY
331  if (metadata_domain == NULL) {
332    __kmp_acquire_bootstrap_lock(&metadata_lock);
333    if (metadata_domain == NULL) {
334      __itt_suppress_push(__itt_suppress_memory_errors);
335      metadata_domain = __itt_domain_create("OMP Metadata");
336      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
337      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
338      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
339      __itt_suppress_pop();
340    }
341    __kmp_release_bootstrap_lock(&metadata_lock);
342  }
343
344  kmp_uint64 imbalance_data[4];
345  imbalance_data[0] = begin;
346  imbalance_data[1] = end;
347  imbalance_data[2] = imbalance;
348  imbalance_data[3] = reduction;
349
350  __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
351                     __itt_metadata_u64, 4, imbalance_data);
352#endif
353} // __kmp_itt_metadata_imbalance
354
355// -----------------------------------------------------------------------------
356LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
357                                     kmp_uint64 iterations, kmp_uint64 chunk) {
358#if USE_ITT_NOTIFY
359  if (metadata_domain == NULL) {
360    __kmp_acquire_bootstrap_lock(&metadata_lock);
361    if (metadata_domain == NULL) {
362      __itt_suppress_push(__itt_suppress_memory_errors);
363      metadata_domain = __itt_domain_create("OMP Metadata");
364      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
365      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
366      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
367      __itt_suppress_pop();
368    }
369    __kmp_release_bootstrap_lock(&metadata_lock);
370  }
371
372  // Parse line and column from psource string: ";file;func;line;col;;"
373  KMP_DEBUG_ASSERT(loc->psource);
374  kmp_uint64 loop_data[5];
375  int line, col;
376  __kmp_str_loc_numbers(loc->psource, &line, &col);
377  loop_data[0] = line;
378  loop_data[1] = col;
379  loop_data[2] = sched_type;
380  loop_data[3] = iterations;
381  loop_data[4] = chunk;
382
383  __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
384                     __itt_metadata_u64, 5, loop_data);
385#endif
386} // __kmp_itt_metadata_loop
387
388// -----------------------------------------------------------------------------
389LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
390#if USE_ITT_NOTIFY
391  if (metadata_domain == NULL) {
392    __kmp_acquire_bootstrap_lock(&metadata_lock);
393    if (metadata_domain == NULL) {
394      __itt_suppress_push(__itt_suppress_memory_errors);
395      metadata_domain = __itt_domain_create("OMP Metadata");
396      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
397      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
398      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
399      __itt_suppress_pop();
400    }
401    __kmp_release_bootstrap_lock(&metadata_lock);
402  }
403
404  int line, col;
405  __kmp_str_loc_numbers(loc->psource, &line, &col);
406  kmp_uint64 single_data[2];
407  single_data[0] = line;
408  single_data[1] = col;
409
410  __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
411                     __itt_metadata_u64, 2, single_data);
412#endif
413} // __kmp_itt_metadata_single
414
415// -----------------------------------------------------------------------------
416LINKAGE void __kmp_itt_region_starting(int gtid) {
417#if USE_ITT_NOTIFY
418#endif
419} // __kmp_itt_region_starting
420
421// -----------------------------------------------------------------------------
422LINKAGE void __kmp_itt_region_finished(int gtid) {
423#if USE_ITT_NOTIFY
424#endif
425} // __kmp_itt_region_finished
426
427// ----------------------------------------------------------------------------
428LINKAGE void __kmp_itt_region_joined(int gtid) {
429#if USE_ITT_NOTIFY
430  kmp_team_t *team = __kmp_team_from_gtid(gtid);
431  if (team->t.t_active_level > 1) {
432    // The frame notifications are only supported for the outermost teams.
433    return;
434  }
435  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
436  if (loc && loc->reserved_2) {
437    unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
438    if (frm < KMP_MAX_FRAME_DOMAINS) {
439      KMP_ITT_DEBUG_LOCK();
440      __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
441      KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
442                          loc->reserved_2, loc);
443    }
444  }
445#endif
446} // __kmp_itt_region_joined
447
448/* Barriers reporting.
449
450   A barrier consists of two phases:
451   1. Gather -- primary thread waits for all worker threads to arrive; each
452      worker thread registers arrival and goes further.
453   2. Release -- each worker thread waits until primary thread lets it go;
454      primary thread lets worker threads go.
455
456   Function should be called by each thread:
457   * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
458   * __kmp_itt_barrier_middle()   -- between gather and release phases.
459   * __kmp_itt_barrier_finished() -- after release phase.
460
461   Note: Call __kmp_itt_barrier_object() before call to
462   __kmp_itt_barrier_starting() and save result in local variable.
463   __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
464   would return itt sync object for the next barrier!
465
466   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
467   does not have barrier object or barrier data structure. Barrier is just a
468   counter in team and thread structures. We could use an address of team
469   structure as a barrier sync object, but ITT wants different objects for
470   different barriers (even whithin the same team). So let us use team address
471   as barrier sync object for the first barrier, then increase it by one for the
472   next barrier, and so on (but wrap it not to use addresses outside of team
473   structure). */
474
475void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
476                               int delta // 0 (current barrier) is default
477                               // value; specify -1 to get previous
478                               // barrier.
479                               ) {
480  void *object = NULL;
481#if USE_ITT_NOTIFY
482  kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
483  kmp_team_t *team = thr->th.th_team;
484
485  // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
486  // be NULL. This "if" helps to avoid crash. However, this is not complete
487  // solution, and reporting fork/join barriers to ITT should be revisited.
488
489  if (team != NULL) {
490    // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
491    // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
492    kmp_uint64 counter =
493        team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
494    // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
495    // barriers of different types do not have the same ids.
496    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
497    // This condition is a must (we would have zero divide otherwise).
498    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
499    // More strong condition: make sure we have room at least for for two
500    // different ids (for each barrier type).
501    object = reinterpret_cast<void *>(
502        (kmp_uintptr_t)(team) +
503        (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) *
504            bs_last_barrier +
505        bt);
506    KMP_ITT_DEBUG_LOCK();
507    KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
508                        counter, object);
509
510    if (set_name) {
511      ident_t const *loc = NULL;
512      char const *src = NULL;
513      char const *type = "OMP Barrier";
514      switch (bt) {
515      case bs_plain_barrier: {
516        // For plain barrier compiler calls __kmpc_barrier() function, which
517        // saves location in thr->th.th_ident.
518        loc = thr->th.th_ident;
519        // Get the barrier type from flags provided by compiler.
520        kmp_int32 expl = 0;
521        kmp_uint32 impl = 0;
522        if (loc != NULL) {
523          src = loc->psource;
524          expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
525          impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
526        }
527        if (impl) {
528          switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
529          case KMP_IDENT_BARRIER_IMPL_FOR: {
530            type = "OMP For Barrier";
531          } break;
532          case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
533            type = "OMP Sections Barrier";
534          } break;
535          case KMP_IDENT_BARRIER_IMPL_SINGLE: {
536            type = "OMP Single Barrier";
537          } break;
538          case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
539            type = "OMP Workshare Barrier";
540          } break;
541          default: {
542            type = "OMP Implicit Barrier";
543            KMP_DEBUG_ASSERT(0);
544          }
545          }
546        } else if (expl) {
547          type = "OMP Explicit Barrier";
548        }
549      } break;
550      case bs_forkjoin_barrier: {
551        // In case of fork/join barrier we can read thr->th.th_ident, because it
552        // contains location of last passed construct (while join barrier is not
553        // such one). Use th_ident of primary thread instead --
554        // __kmp_join_call() called by the primary thread saves location.
555        //
556        // AC: cannot read from primary thread because __kmp_join_call may not
557        //    be called yet, so we read the location from team. This is the
558        //    same location. Team is valid on entry to join barrier where this
559        //    happens.
560        loc = team->t.t_ident;
561        if (loc != NULL) {
562          src = loc->psource;
563        }
564        type = "OMP Join Barrier";
565      } break;
566      }
567      KMP_ITT_DEBUG_LOCK();
568      __itt_sync_create(object, type, src, __itt_attr_barrier);
569      KMP_ITT_DEBUG_PRINT(
570          "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
571          type, src);
572    }
573  }
574#endif
575  return object;
576} // __kmp_itt_barrier_object
577
578// -----------------------------------------------------------------------------
579void __kmp_itt_barrier_starting(int gtid, void *object) {
580#if USE_ITT_NOTIFY
581  if (!KMP_MASTER_GTID(gtid)) {
582    KMP_ITT_DEBUG_LOCK();
583    __itt_sync_releasing(object);
584    KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
585  }
586  KMP_ITT_DEBUG_LOCK();
587  __itt_sync_prepare(object);
588  KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
589#endif
590} // __kmp_itt_barrier_starting
591
592// -----------------------------------------------------------------------------
593void __kmp_itt_barrier_middle(int gtid, void *object) {
594#if USE_ITT_NOTIFY
595  if (KMP_MASTER_GTID(gtid)) {
596    KMP_ITT_DEBUG_LOCK();
597    __itt_sync_acquired(object);
598    KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
599    KMP_ITT_DEBUG_LOCK();
600    __itt_sync_releasing(object);
601    KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
602  } else {
603  }
604#endif
605} // __kmp_itt_barrier_middle
606
607// -----------------------------------------------------------------------------
608void __kmp_itt_barrier_finished(int gtid, void *object) {
609#if USE_ITT_NOTIFY
610  if (KMP_MASTER_GTID(gtid)) {
611  } else {
612    KMP_ITT_DEBUG_LOCK();
613    __itt_sync_acquired(object);
614    KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
615  }
616#endif
617} // __kmp_itt_barrier_finished
618
619/* Taskwait reporting.
620   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
621   does not have taskwait structure, so we need to construct something. */
622
623void *__kmp_itt_taskwait_object(int gtid) {
624  void *object = NULL;
625#if USE_ITT_NOTIFY
626  if (UNLIKELY(__itt_sync_create_ptr)) {
627    kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
628    kmp_taskdata_t *taskdata = thread->th.th_current_task;
629    object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
630                                      taskdata->td_taskwait_counter %
631                                          sizeof(kmp_taskdata_t));
632  }
633#endif
634  return object;
635} // __kmp_itt_taskwait_object
636
637void __kmp_itt_taskwait_starting(int gtid, void *object) {
638#if USE_ITT_NOTIFY
639  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
640  kmp_taskdata_t *taskdata = thread->th.th_current_task;
641  ident_t const *loc = taskdata->td_taskwait_ident;
642  char const *src = (loc == NULL ? NULL : loc->psource);
643  KMP_ITT_DEBUG_LOCK();
644  __itt_sync_create(object, "OMP Taskwait", src, 0);
645  KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
646                      object, src);
647  KMP_ITT_DEBUG_LOCK();
648  __itt_sync_prepare(object);
649  KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
650#endif
651} // __kmp_itt_taskwait_starting
652
653void __kmp_itt_taskwait_finished(int gtid, void *object) {
654#if USE_ITT_NOTIFY
655  KMP_ITT_DEBUG_LOCK();
656  __itt_sync_acquired(object);
657  KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
658  KMP_ITT_DEBUG_LOCK();
659  __itt_sync_destroy(object);
660  KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
661#endif
662} // __kmp_itt_taskwait_finished
663
664/* Task reporting.
665   Only those tasks are reported which are executed by a thread spinning at
666   barrier (or taskwait). Synch object passed to the function must be barrier of
667   taskwait the threads waiting at. */
668
669void __kmp_itt_task_starting(
670    void *object // ITT sync object: barrier or taskwait.
671    ) {
672#if USE_ITT_NOTIFY
673  if (UNLIKELY(object != NULL)) {
674    KMP_ITT_DEBUG_LOCK();
675    __itt_sync_cancel(object);
676    KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
677  }
678#endif
679} // __kmp_itt_task_starting
680
681// -----------------------------------------------------------------------------
682void __kmp_itt_task_finished(
683    void *object // ITT sync object: barrier or taskwait.
684    ) {
685#if USE_ITT_NOTIFY
686  KMP_ITT_DEBUG_LOCK();
687  __itt_sync_prepare(object);
688  KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
689#endif
690} // __kmp_itt_task_finished
691
692/* Lock reporting.
693 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
694   operation (set/unset). It is not a real event shown to the user but just
695   setting a name for synchronization object. `lock' is an address of sync
696   object, the same address should be used in all subsequent calls.
697 * __kmp_itt_lock_acquiring() should be called before setting the lock.
698 * __kmp_itt_lock_acquired() should be called after setting the lock.
699 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
700 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
701   for the lock.
702 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
703   operation. After __kmp_itt_lock_destroyed() all the references to the same
704   address will be considered as another sync object, not related with the
705   original one.  */
706
707#if KMP_USE_DYNAMIC_LOCK
708// Takes location information directly
709__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
710                                       const ident_t *loc) {
711#if USE_ITT_NOTIFY
712  if (__itt_sync_create_ptr) {
713    char const *src = (loc == NULL ? NULL : loc->psource);
714    KMP_ITT_DEBUG_LOCK();
715    __itt_sync_create(lock, type, src, 0);
716    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
717                        src);
718  }
719#endif
720}
721#else // KMP_USE_DYNAMIC_LOCK
722// Internal guts -- common code for locks and critical sections, do not call
723// directly.
724__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
725#if USE_ITT_NOTIFY
726  if (__itt_sync_create_ptr) {
727    ident_t const *loc = NULL;
728    if (__kmp_get_user_lock_location_ != NULL)
729      loc = __kmp_get_user_lock_location_((lock));
730    char const *src = (loc == NULL ? NULL : loc->psource);
731    KMP_ITT_DEBUG_LOCK();
732    __itt_sync_create(lock, type, src, 0);
733    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
734                        src);
735  }
736#endif
737} // ___kmp_itt_lock_init
738#endif // KMP_USE_DYNAMIC_LOCK
739
740// Internal guts -- common code for locks and critical sections, do not call
741// directly.
742__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
743#if USE_ITT_NOTIFY
744  KMP_ITT_DEBUG_LOCK();
745  __itt_sync_destroy(lock);
746  KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
747#endif
748} // ___kmp_itt_lock_fini
749
750// -----------------------------------------------------------------------------
751#if KMP_USE_DYNAMIC_LOCK
752void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
753  ___kmp_itt_lock_init(lock, "OMP Lock", loc);
754}
755#else
756void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
757  ___kmp_itt_lock_init(lock, "OMP Lock");
758} // __kmp_itt_lock_creating
759#endif
760
761void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
762#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
763  // postpone lock object access
764  if (__itt_sync_prepare_ptr) {
765    if (KMP_EXTRACT_D_TAG(lock) == 0) {
766      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
767      __itt_sync_prepare(ilk->lock);
768    } else {
769      __itt_sync_prepare(lock);
770    }
771  }
772#else
773  __itt_sync_prepare(lock);
774#endif
775} // __kmp_itt_lock_acquiring
776
777void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
778#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
779  // postpone lock object access
780  if (__itt_sync_acquired_ptr) {
781    if (KMP_EXTRACT_D_TAG(lock) == 0) {
782      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
783      __itt_sync_acquired(ilk->lock);
784    } else {
785      __itt_sync_acquired(lock);
786    }
787  }
788#else
789  __itt_sync_acquired(lock);
790#endif
791} // __kmp_itt_lock_acquired
792
793void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
794#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
795  if (__itt_sync_releasing_ptr) {
796    if (KMP_EXTRACT_D_TAG(lock) == 0) {
797      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
798      __itt_sync_releasing(ilk->lock);
799    } else {
800      __itt_sync_releasing(lock);
801    }
802  }
803#else
804  __itt_sync_releasing(lock);
805#endif
806} // __kmp_itt_lock_releasing
807
808void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
809#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
810  if (__itt_sync_cancel_ptr) {
811    if (KMP_EXTRACT_D_TAG(lock) == 0) {
812      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
813      __itt_sync_cancel(ilk->lock);
814    } else {
815      __itt_sync_cancel(lock);
816    }
817  }
818#else
819  __itt_sync_cancel(lock);
820#endif
821} // __kmp_itt_lock_cancelled
822
823void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
824  ___kmp_itt_lock_fini(lock, "OMP Lock");
825} // __kmp_itt_lock_destroyed
826
827/* Critical reporting.
828   Critical sections are treated exactly as locks (but have different object
829   type). */
830#if KMP_USE_DYNAMIC_LOCK
831void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
832  ___kmp_itt_lock_init(lock, "OMP Critical", loc);
833}
834#else
835void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
836  ___kmp_itt_lock_init(lock, "OMP Critical");
837} // __kmp_itt_critical_creating
838#endif
839
840void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
841  __itt_sync_prepare(lock);
842} // __kmp_itt_critical_acquiring
843
844void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
845  __itt_sync_acquired(lock);
846} // __kmp_itt_critical_acquired
847
848void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
849  __itt_sync_releasing(lock);
850} // __kmp_itt_critical_releasing
851
852void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
853  ___kmp_itt_lock_fini(lock, "OMP Critical");
854} // __kmp_itt_critical_destroyed
855
856/* Single reporting. */
857
858void __kmp_itt_single_start(int gtid) {
859#if USE_ITT_NOTIFY
860  if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
861    kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
862    ident_t *loc = thr->th.th_ident;
863    char const *src = (loc == NULL ? NULL : loc->psource);
864    kmp_str_buf_t name;
865    __kmp_str_buf_init(&name);
866    __kmp_str_buf_print(&name, "OMP Single-%s", src);
867    KMP_ITT_DEBUG_LOCK();
868    thr->th.th_itt_mark_single = __itt_mark_create(name.str);
869    KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
870                        thr->th.th_itt_mark_single);
871    __kmp_str_buf_free(&name);
872    KMP_ITT_DEBUG_LOCK();
873    __itt_mark(thr->th.th_itt_mark_single, NULL);
874    KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
875                        thr->th.th_itt_mark_single);
876  }
877#endif
878} // __kmp_itt_single_start
879
880void __kmp_itt_single_end(int gtid) {
881#if USE_ITT_NOTIFY
882  __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
883  KMP_ITT_DEBUG_LOCK();
884  __itt_mark_off(mark);
885  KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
886#endif
887} // __kmp_itt_single_end
888
889/* Ordered reporting.
890 * __kmp_itt_ordered_init is called by each thread *before* first using sync
891   object. ITT team would like it to be called once, but it requires extra
892   synchronization.
893 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
894   section (before synchronization).
895 * __kmp_itt_ordered_start is called just before entering user code (after
896   synchronization).
897 * __kmp_itt_ordered_end is called after returning from user code.
898
899 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
900 Events are not generated in case of serialized team. */
901
902void __kmp_itt_ordered_init(int gtid) {
903#if USE_ITT_NOTIFY
904  if (__itt_sync_create_ptr) {
905    kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
906    ident_t const *loc = thr->th.th_ident;
907    char const *src = (loc == NULL ? NULL : loc->psource);
908    __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
909                      "OMP Ordered", src, 0);
910  }
911#endif
912} // __kmp_itt_ordered_init
913
914void __kmp_itt_ordered_prep(int gtid) {
915#if USE_ITT_NOTIFY
916  if (__itt_sync_create_ptr) {
917    kmp_team_t *t = __kmp_team_from_gtid(gtid);
918    if (!t->t.t_serialized) {
919      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
920      __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
921    }
922  }
923#endif
924} // __kmp_itt_ordered_prep
925
926void __kmp_itt_ordered_start(int gtid) {
927#if USE_ITT_NOTIFY
928  if (__itt_sync_create_ptr) {
929    kmp_team_t *t = __kmp_team_from_gtid(gtid);
930    if (!t->t.t_serialized) {
931      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
932      __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
933    }
934  }
935#endif
936} // __kmp_itt_ordered_start
937
938void __kmp_itt_ordered_end(int gtid) {
939#if USE_ITT_NOTIFY
940  if (__itt_sync_create_ptr) {
941    kmp_team_t *t = __kmp_team_from_gtid(gtid);
942    if (!t->t.t_serialized) {
943      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
944      __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
945    }
946  }
947#endif
948} // __kmp_itt_ordered_end
949
950/* Threads reporting. */
951
952void __kmp_itt_thread_ignore() {
953  __itt_thr_ignore();
954} // __kmp_itt_thread_ignore
955
956void __kmp_itt_thread_name(int gtid) {
957#if USE_ITT_NOTIFY
958  if (__itt_thr_name_set_ptr) {
959    kmp_str_buf_t name;
960    __kmp_str_buf_init(&name);
961    if (KMP_MASTER_GTID(gtid)) {
962      __kmp_str_buf_print(&name, "OMP Primary Thread #%d", gtid);
963    } else {
964      __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
965    }
966    KMP_ITT_DEBUG_LOCK();
967    __itt_thr_name_set(name.str, name.used);
968    KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
969    __kmp_str_buf_free(&name);
970  }
971#endif
972} // __kmp_itt_thread_name
973
974/* System object reporting.
975   ITT catches operations with system sync objects (like Windows* OS on IA-32
976   architecture API critical sections and events). We only need to specify
977   name ("OMP Scheduler") for the object to let ITT know it is an object used
978   by OpenMP RTL for internal purposes. */
979
980void __kmp_itt_system_object_created(void *object, char const *name) {
981#if USE_ITT_NOTIFY
982  KMP_ITT_DEBUG_LOCK();
983  __itt_sync_create(object, "OMP Scheduler", name, 0);
984  KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
985                      object, name);
986#endif
987} // __kmp_itt_system_object_created
988
989/* Stack stitching api.
990   Primary thread calls "create" and put the stitching id into team structure.
991   Workers read the stitching id and call "enter" / "leave" api.
992   Primary thread calls "destroy" at the end of the parallel region. */
993
994__itt_caller __kmp_itt_stack_caller_create() {
995#if USE_ITT_NOTIFY
996  if (!__itt_stack_caller_create_ptr)
997    return NULL;
998  KMP_ITT_DEBUG_LOCK();
999  __itt_caller id = __itt_stack_caller_create();
1000  KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
1001  return id;
1002#endif
1003  return NULL;
1004}
1005
1006void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1007#if USE_ITT_NOTIFY
1008  if (__itt_stack_caller_destroy_ptr) {
1009    KMP_ITT_DEBUG_LOCK();
1010    __itt_stack_caller_destroy(id);
1011    KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1012  }
1013#endif
1014}
1015
1016void __kmp_itt_stack_callee_enter(__itt_caller id) {
1017#if USE_ITT_NOTIFY
1018  if (__itt_stack_callee_enter_ptr) {
1019    KMP_ITT_DEBUG_LOCK();
1020    __itt_stack_callee_enter(id);
1021    KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1022  }
1023#endif
1024}
1025
1026void __kmp_itt_stack_callee_leave(__itt_caller id) {
1027#if USE_ITT_NOTIFY
1028  if (__itt_stack_callee_leave_ptr) {
1029    KMP_ITT_DEBUG_LOCK();
1030    __itt_stack_callee_leave(id);
1031    KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1032  }
1033#endif
1034}
1035
1036#endif /* USE_ITT_BUILD */
1037