xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_itt.inl (revision 19261079b74319502c6ffa1249920079f0f69a72)
1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14// Inline function definitions. This file should be included into kmp_itt.h file
15// for production build (to let compiler inline functions) or into kmp_itt.c
16// file for debug build (to reduce the number of files to recompile and save
17// build time).
18
19#include "kmp.h"
20#include "kmp_str.h"
21
22#if KMP_ITT_DEBUG
23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24#define KMP_ITT_DEBUG_LOCK()                                                   \
25  { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26#define KMP_ITT_DEBUG_PRINT(...)                                               \
27  {                                                                            \
28    fprintf(stderr, "#%02d: ", __kmp_get_gtid());                              \
29    fprintf(stderr, __VA_ARGS__);                                              \
30    fflush(stderr);                                                            \
31    __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock);                       \
32  }
33#else
34#define KMP_ITT_DEBUG_LOCK()
35#define KMP_ITT_DEBUG_PRINT(...)
36#endif // KMP_ITT_DEBUG
37
38// Ensure that the functions are static if they're supposed to be being inlined.
39// Otherwise they cannot be used in more than one file, since there will be
40// multiple definitions.
41#if KMP_DEBUG
42#define LINKAGE
43#else
44#define LINKAGE static inline
45#endif
46
47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48// this API to support user-defined synchronization primitives, but does not use
49// ZCA; it would be safe to turn this off until wider support becomes available.
50#if USE_ITT_ZCA
51#ifdef __INTEL_COMPILER
52#if __INTEL_COMPILER >= 1200
53#undef __itt_sync_acquired
54#undef __itt_sync_releasing
55#define __itt_sync_acquired(addr)                                              \
56  __notify_zc_intrinsic((char *)"sync_acquired", addr)
57#define __itt_sync_releasing(addr)                                             \
58  __notify_intrinsic((char *)"sync_releasing", addr)
59#endif
60#endif
61#endif
62
63static kmp_bootstrap_lock_t metadata_lock =
64    KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
65
66/* Parallel region reporting.
67 * __kmp_itt_region_forking should be called by master thread of a team.
68   Exact moment of call does not matter, but it should be completed before any
69   thread of this team calls __kmp_itt_region_starting.
70 * __kmp_itt_region_starting should be called by each thread of a team just
71   before entering parallel region body.
72 * __kmp_itt_region_finished should be called by each thread of a team right
73   after returning from parallel region body.
74 * __kmp_itt_region_joined should be called by master thread of a team, after
75   all threads called __kmp_itt_region_finished.
76
77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
78 execute some more user code -- such a thread can execute tasks.
79
80 Note: The overhead of logging region_starting and region_finished in each
81 thread is too large, so these calls are not used. */
82
83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
84#if USE_ITT_NOTIFY
85  kmp_team_t *team = __kmp_team_from_gtid(gtid);
86  if (team->t.t_active_level > 1) {
87    // The frame notifications are only supported for the outermost teams.
88    return;
89  }
90  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
91  if (loc) {
92    // Use the reserved_2 field to store the index to the region domain.
93    // Assume that reserved_2 contains zero initially.  Since zero is special
94    // value here, store the index into domain array increased by 1.
95    if (loc->reserved_2 == 0) {
96      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
97        int frm =
98            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
99        if (frm >= KMP_MAX_FRAME_DOMAINS) {
100          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
101          return; // loc->reserved_2 is still 0
102        }
103        // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
104        //    frm = loc->reserved_2 - 1;   // get value saved by other thread
105        //    for same loc
106        //} // AC: this block is to replace next unsynchronized line
107
108        // We need to save indexes for both region and barrier frames. We'll use
109        // loc->reserved_2 field but put region index to the low two bytes and
110        // barrier indexes to the high two bytes. It is OK because
111        // KMP_MAX_FRAME_DOMAINS = 512.
112        loc->reserved_2 |= (frm + 1); // save "new" value
113
114        // Transform compiler-generated region location into the format
115        // that the tools more or less standardized on:
116        //   "<func>$omp$parallel@[file:]<line>[:<col>]"
117        char *buff = NULL;
118        kmp_str_loc_t str_loc =
119            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
120        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
121                                team_size, str_loc.file, str_loc.line,
122                                str_loc.col);
123
124        __itt_suppress_push(__itt_suppress_memory_errors);
125        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
126        __itt_suppress_pop();
127
128        __kmp_str_free(&buff);
129        if (barriers) {
130          if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
131            int frm = KMP_TEST_THEN_INC32(
132                &__kmp_barrier_domain_count); // get "old" value
133            if (frm >= KMP_MAX_FRAME_DOMAINS) {
134              KMP_TEST_THEN_DEC32(
135                  &__kmp_barrier_domain_count); // revert the count
136              return; // loc->reserved_2 is still 0
137            }
138            char *buff = NULL;
139            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
140                                    str_loc.file, str_loc.col);
141            __itt_suppress_push(__itt_suppress_memory_errors);
142            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
143            __itt_suppress_pop();
144            __kmp_str_free(&buff);
145            // Save the barrier frame index to the high two bytes.
146            loc->reserved_2 |= (frm + 1) << 16;
147          }
148        }
149        __kmp_str_loc_free(&str_loc);
150        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
151      }
152    } else { // Region domain exists for this location
153      // Check if team size was changed. Then create new region domain for this
154      // location
155      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
156      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
157          (__kmp_itt_region_team_size[frm] != team_size)) {
158        char *buff = NULL;
159        kmp_str_loc_t str_loc =
160            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
161        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
162                                team_size, str_loc.file, str_loc.line,
163                                str_loc.col);
164
165        __itt_suppress_push(__itt_suppress_memory_errors);
166        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
167        __itt_suppress_pop();
168
169        __kmp_str_free(&buff);
170        __kmp_str_loc_free(&str_loc);
171        __kmp_itt_region_team_size[frm] = team_size;
172        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
173      } else { // Team size was not changed. Use existing domain.
174        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
175      }
176    }
177    KMP_ITT_DEBUG_LOCK();
178    KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
179                        loc->reserved_2, loc);
180  }
181#endif
182} // __kmp_itt_region_forking
183
184// -----------------------------------------------------------------------------
185LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
186                                    __itt_timestamp end, int imbalance,
187                                    ident_t *loc, int team_size, int region) {
188#if USE_ITT_NOTIFY
189  if (region) {
190    kmp_team_t *team = __kmp_team_from_gtid(gtid);
191    int serialized = (region == 2 ? 1 : 0);
192    if (team->t.t_active_level + serialized > 1) {
193      // The frame notifications are only supported for the outermost teams.
194      return;
195    }
196    // Check region domain has not been created before. It's index is saved in
197    // the low two bytes.
198    if ((loc->reserved_2 & 0x0000FFFF) == 0) {
199      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
200        int frm =
201            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
202        if (frm >= KMP_MAX_FRAME_DOMAINS) {
203          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
204          return; // loc->reserved_2 is still 0
205        }
206
207        // We need to save indexes for both region and barrier frames. We'll use
208        // loc->reserved_2 field but put region index to the low two bytes and
209        // barrier indexes to the high two bytes. It is OK because
210        // KMP_MAX_FRAME_DOMAINS = 512.
211        loc->reserved_2 |= (frm + 1); // save "new" value
212
213        // Transform compiler-generated region location into the format
214        // that the tools more or less standardized on:
215        //   "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
216        char *buff = NULL;
217        kmp_str_loc_t str_loc =
218            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
219        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
220                                team_size, str_loc.file, str_loc.line,
221                                str_loc.col);
222
223        __itt_suppress_push(__itt_suppress_memory_errors);
224        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
225        __itt_suppress_pop();
226
227        __kmp_str_free(&buff);
228        __kmp_str_loc_free(&str_loc);
229        __kmp_itt_region_team_size[frm] = team_size;
230        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
231      }
232    } else { // Region domain exists for this location
233      // Check if team size was changed. Then create new region domain for this
234      // location
235      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
236      if (frm >= KMP_MAX_FRAME_DOMAINS)
237        return; // something's gone wrong, returning
238      if (__kmp_itt_region_team_size[frm] != team_size) {
239        char *buff = NULL;
240        kmp_str_loc_t str_loc =
241            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
242        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
243                                team_size, str_loc.file, str_loc.line,
244                                str_loc.col);
245
246        __itt_suppress_push(__itt_suppress_memory_errors);
247        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
248        __itt_suppress_pop();
249
250        __kmp_str_free(&buff);
251        __kmp_str_loc_free(&str_loc);
252        __kmp_itt_region_team_size[frm] = team_size;
253        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
254      } else { // Team size was not changed. Use existing domain.
255        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
256      }
257    }
258    KMP_ITT_DEBUG_LOCK();
259    KMP_ITT_DEBUG_PRINT(
260        "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
261        gtid, loc->reserved_2, region, loc, begin, end);
262    return;
263  } else { // called for barrier reporting
264    if (loc) {
265      if ((loc->reserved_2 & 0xFFFF0000) == 0) {
266        if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
267          int frm = KMP_TEST_THEN_INC32(
268              &__kmp_barrier_domain_count); // get "old" value
269          if (frm >= KMP_MAX_FRAME_DOMAINS) {
270            KMP_TEST_THEN_DEC32(
271                &__kmp_barrier_domain_count); // revert the count
272            return; // loc->reserved_2 is still 0
273          }
274          // Save the barrier frame index to the high two bytes.
275          loc->reserved_2 |= (frm + 1) << 16; // save "new" value
276
277          // Transform compiler-generated region location into the format
278          // that the tools more or less standardized on:
279          //   "<func>$omp$frame@[file:]<line>[:<col>]"
280          kmp_str_loc_t str_loc =
281              __kmp_str_loc_init(loc->psource, /* init_fname */ false);
282          if (imbalance) {
283            char *buff_imb = NULL;
284            buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
285                                        str_loc.func, team_size, str_loc.file,
286                                        str_loc.col);
287            __itt_suppress_push(__itt_suppress_memory_errors);
288            __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
289            __itt_suppress_pop();
290            __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
291                                  end);
292            __kmp_str_free(&buff_imb);
293          } else {
294            char *buff = NULL;
295            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
296                                    str_loc.file, str_loc.col);
297            __itt_suppress_push(__itt_suppress_memory_errors);
298            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
299            __itt_suppress_pop();
300            __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
301                                  end);
302            __kmp_str_free(&buff);
303          }
304          __kmp_str_loc_free(&str_loc);
305        }
306      } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
307        if (imbalance) {
308          __itt_frame_submit_v3(
309              __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
310              begin, end);
311        } else {
312          __itt_frame_submit_v3(
313              __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
314              begin, end);
315        }
316      }
317      KMP_ITT_DEBUG_LOCK();
318      KMP_ITT_DEBUG_PRINT(
319          "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
320          loc->reserved_2, loc, begin, end);
321    }
322  }
323#endif
324} // __kmp_itt_frame_submit
325
326// -----------------------------------------------------------------------------
327LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
328                                          kmp_uint64 end, kmp_uint64 imbalance,
329                                          kmp_uint64 reduction) {
330#if USE_ITT_NOTIFY
331  if (metadata_domain == NULL) {
332    __kmp_acquire_bootstrap_lock(&metadata_lock);
333    if (metadata_domain == NULL) {
334      __itt_suppress_push(__itt_suppress_memory_errors);
335      metadata_domain = __itt_domain_create("OMP Metadata");
336      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
337      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
338      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
339      __itt_suppress_pop();
340    }
341    __kmp_release_bootstrap_lock(&metadata_lock);
342  }
343
344  kmp_uint64 imbalance_data[4];
345  imbalance_data[0] = begin;
346  imbalance_data[1] = end;
347  imbalance_data[2] = imbalance;
348  imbalance_data[3] = reduction;
349
350  __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
351                     __itt_metadata_u64, 4, imbalance_data);
352#endif
353} // __kmp_itt_metadata_imbalance
354
355// -----------------------------------------------------------------------------
356LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
357                                     kmp_uint64 iterations, kmp_uint64 chunk) {
358#if USE_ITT_NOTIFY
359  if (metadata_domain == NULL) {
360    __kmp_acquire_bootstrap_lock(&metadata_lock);
361    if (metadata_domain == NULL) {
362      __itt_suppress_push(__itt_suppress_memory_errors);
363      metadata_domain = __itt_domain_create("OMP Metadata");
364      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
365      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
366      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
367      __itt_suppress_pop();
368    }
369    __kmp_release_bootstrap_lock(&metadata_lock);
370  }
371
372  // Parse line and column from psource string: ";file;func;line;col;;"
373  KMP_DEBUG_ASSERT(loc->psource);
374  kmp_uint64 loop_data[5];
375  int line, col;
376  __kmp_str_loc_numbers(loc->psource, &line, &col);
377  loop_data[0] = line;
378  loop_data[1] = col;
379  loop_data[2] = sched_type;
380  loop_data[3] = iterations;
381  loop_data[4] = chunk;
382
383  __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
384                     __itt_metadata_u64, 5, loop_data);
385#endif
386} // __kmp_itt_metadata_loop
387
388// -----------------------------------------------------------------------------
389LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
390#if USE_ITT_NOTIFY
391  if (metadata_domain == NULL) {
392    __kmp_acquire_bootstrap_lock(&metadata_lock);
393    if (metadata_domain == NULL) {
394      __itt_suppress_push(__itt_suppress_memory_errors);
395      metadata_domain = __itt_domain_create("OMP Metadata");
396      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
397      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
398      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
399      __itt_suppress_pop();
400    }
401    __kmp_release_bootstrap_lock(&metadata_lock);
402  }
403
404  int line, col;
405  __kmp_str_loc_numbers(loc->psource, &line, &col);
406  kmp_uint64 single_data[2];
407  single_data[0] = line;
408  single_data[1] = col;
409
410  __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
411                     __itt_metadata_u64, 2, single_data);
412#endif
413} // __kmp_itt_metadata_single
414
415// -----------------------------------------------------------------------------
416LINKAGE void __kmp_itt_region_starting(int gtid) {
417#if USE_ITT_NOTIFY
418#endif
419} // __kmp_itt_region_starting
420
421// -----------------------------------------------------------------------------
422LINKAGE void __kmp_itt_region_finished(int gtid) {
423#if USE_ITT_NOTIFY
424#endif
425} // __kmp_itt_region_finished
426
427// ----------------------------------------------------------------------------
428LINKAGE void __kmp_itt_region_joined(int gtid) {
429#if USE_ITT_NOTIFY
430  kmp_team_t *team = __kmp_team_from_gtid(gtid);
431  if (team->t.t_active_level > 1) {
432    // The frame notifications are only supported for the outermost teams.
433    return;
434  }
435  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
436  if (loc && loc->reserved_2) {
437    unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
438    if (frm < KMP_MAX_FRAME_DOMAINS) {
439      KMP_ITT_DEBUG_LOCK();
440      __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
441      KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
442                          loc->reserved_2, loc);
443    }
444  }
445#endif
446} // __kmp_itt_region_joined
447
448/* Barriers reporting.
449
450   A barrier consists of two phases:
451   1. Gather -- master waits for arriving of all the worker threads; each
452      worker thread registers arrival and goes further.
453   2. Release -- each worker threads waits until master lets it go; master lets
454      worker threads go.
455
456   Function should be called by each thread:
457   * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
458   * __kmp_itt_barrier_middle()   -- between gather and release phases.
459   * __kmp_itt_barrier_finished() -- after release phase.
460
461   Note: Call __kmp_itt_barrier_object() before call to
462   __kmp_itt_barrier_starting() and save result in local variable.
463   __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
464   would return itt sync object for the next barrier!
465
466   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
467   does not have barrier object or barrier data structure. Barrier is just a
468   counter in team and thread structures. We could use an address of team
469   structure as a barrier sync object, but ITT wants different objects for
470   different barriers (even whithin the same team). So let us use team address
471   as barrier sync object for the first barrier, then increase it by one for the
472   next barrier, and so on (but wrap it not to use addresses outside of team
473   structure). */
474
475void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
476                               int delta // 0 (current barrier) is default
477                               // value; specify -1 to get previous
478                               // barrier.
479                               ) {
480  void *object = NULL;
481#if USE_ITT_NOTIFY
482  kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
483  kmp_team_t *team = thr->th.th_team;
484
485  // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
486  // be NULL. This "if" helps to avoid crash. However, this is not complete
487  // solution, and reporting fork/join barriers to ITT should be revisited.
488
489  if (team != NULL) {
490    // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
491    // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
492    kmp_uint64 counter =
493        team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
494    // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
495    // barriers of different types do not have the same ids.
496    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
497    // This condition is a must (we would have zero divide otherwise).
498    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
499    // More strong condition: make sure we have room at least for for two
500    // different ids (for each barrier type).
501    object = reinterpret_cast<void *>(
502        (kmp_uintptr_t)(team) +
503        (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) *
504            bs_last_barrier +
505        bt);
506    KMP_ITT_DEBUG_LOCK();
507    KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
508                        counter, object);
509
510    if (set_name) {
511      ident_t const *loc = NULL;
512      char const *src = NULL;
513      char const *type = "OMP Barrier";
514      switch (bt) {
515      case bs_plain_barrier: {
516        // For plain barrier compiler calls __kmpc_barrier() function, which
517        // saves location in thr->th.th_ident.
518        loc = thr->th.th_ident;
519        // Get the barrier type from flags provided by compiler.
520        kmp_int32 expl = 0;
521        kmp_uint32 impl = 0;
522        if (loc != NULL) {
523          src = loc->psource;
524          expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
525          impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
526        }
527        if (impl) {
528          switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
529          case KMP_IDENT_BARRIER_IMPL_FOR: {
530            type = "OMP For Barrier";
531          } break;
532          case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
533            type = "OMP Sections Barrier";
534          } break;
535          case KMP_IDENT_BARRIER_IMPL_SINGLE: {
536            type = "OMP Single Barrier";
537          } break;
538          case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
539            type = "OMP Workshare Barrier";
540          } break;
541          default: {
542            type = "OMP Implicit Barrier";
543            KMP_DEBUG_ASSERT(0);
544          }
545          }
546        } else if (expl) {
547          type = "OMP Explicit Barrier";
548        }
549      } break;
550      case bs_forkjoin_barrier: {
551        // In case of fork/join barrier we can read thr->th.th_ident, because it
552        // contains location of last passed construct (while join barrier is not
553        // such one). Use th_ident of master thread instead -- __kmp_join_call()
554        // called by the master thread saves location.
555        //
556        // AC: cannot read from master because __kmp_join_call may be not called
557        //    yet, so we read the location from team. This is the same location.
558        //    And team is valid at the enter to join barrier where this happens.
559        loc = team->t.t_ident;
560        if (loc != NULL) {
561          src = loc->psource;
562        }
563        type = "OMP Join Barrier";
564      } break;
565      }
566      KMP_ITT_DEBUG_LOCK();
567      __itt_sync_create(object, type, src, __itt_attr_barrier);
568      KMP_ITT_DEBUG_PRINT(
569          "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
570          type, src);
571    }
572  }
573#endif
574  return object;
575} // __kmp_itt_barrier_object
576
577// -----------------------------------------------------------------------------
578void __kmp_itt_barrier_starting(int gtid, void *object) {
579#if USE_ITT_NOTIFY
580  if (!KMP_MASTER_GTID(gtid)) {
581    KMP_ITT_DEBUG_LOCK();
582    __itt_sync_releasing(object);
583    KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
584  }
585  KMP_ITT_DEBUG_LOCK();
586  __itt_sync_prepare(object);
587  KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
588#endif
589} // __kmp_itt_barrier_starting
590
591// -----------------------------------------------------------------------------
592void __kmp_itt_barrier_middle(int gtid, void *object) {
593#if USE_ITT_NOTIFY
594  if (KMP_MASTER_GTID(gtid)) {
595    KMP_ITT_DEBUG_LOCK();
596    __itt_sync_acquired(object);
597    KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
598    KMP_ITT_DEBUG_LOCK();
599    __itt_sync_releasing(object);
600    KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
601  } else {
602  }
603#endif
604} // __kmp_itt_barrier_middle
605
606// -----------------------------------------------------------------------------
607void __kmp_itt_barrier_finished(int gtid, void *object) {
608#if USE_ITT_NOTIFY
609  if (KMP_MASTER_GTID(gtid)) {
610  } else {
611    KMP_ITT_DEBUG_LOCK();
612    __itt_sync_acquired(object);
613    KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
614  }
615#endif
616} // __kmp_itt_barrier_finished
617
618/* Taskwait reporting.
619   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
620   does not have taskwait structure, so we need to construct something. */
621
622void *__kmp_itt_taskwait_object(int gtid) {
623  void *object = NULL;
624#if USE_ITT_NOTIFY
625  if (UNLIKELY(__itt_sync_create_ptr)) {
626    kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
627    kmp_taskdata_t *taskdata = thread->th.th_current_task;
628    object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
629                                      taskdata->td_taskwait_counter %
630                                          sizeof(kmp_taskdata_t));
631  }
632#endif
633  return object;
634} // __kmp_itt_taskwait_object
635
636void __kmp_itt_taskwait_starting(int gtid, void *object) {
637#if USE_ITT_NOTIFY
638  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
639  kmp_taskdata_t *taskdata = thread->th.th_current_task;
640  ident_t const *loc = taskdata->td_taskwait_ident;
641  char const *src = (loc == NULL ? NULL : loc->psource);
642  KMP_ITT_DEBUG_LOCK();
643  __itt_sync_create(object, "OMP Taskwait", src, 0);
644  KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
645                      object, src);
646  KMP_ITT_DEBUG_LOCK();
647  __itt_sync_prepare(object);
648  KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
649#endif
650} // __kmp_itt_taskwait_starting
651
652void __kmp_itt_taskwait_finished(int gtid, void *object) {
653#if USE_ITT_NOTIFY
654  KMP_ITT_DEBUG_LOCK();
655  __itt_sync_acquired(object);
656  KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
657  KMP_ITT_DEBUG_LOCK();
658  __itt_sync_destroy(object);
659  KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
660#endif
661} // __kmp_itt_taskwait_finished
662
663/* Task reporting.
664   Only those tasks are reported which are executed by a thread spinning at
665   barrier (or taskwait). Synch object passed to the function must be barrier of
666   taskwait the threads waiting at. */
667
668void __kmp_itt_task_starting(
669    void *object // ITT sync object: barrier or taskwait.
670    ) {
671#if USE_ITT_NOTIFY
672  if (UNLIKELY(object != NULL)) {
673    KMP_ITT_DEBUG_LOCK();
674    __itt_sync_cancel(object);
675    KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
676  }
677#endif
678} // __kmp_itt_task_starting
679
680// -----------------------------------------------------------------------------
681void __kmp_itt_task_finished(
682    void *object // ITT sync object: barrier or taskwait.
683    ) {
684#if USE_ITT_NOTIFY
685  KMP_ITT_DEBUG_LOCK();
686  __itt_sync_prepare(object);
687  KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
688#endif
689} // __kmp_itt_task_finished
690
691/* Lock reporting.
692 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
693   operation (set/unset). It is not a real event shown to the user but just
694   setting a name for synchronization object. `lock' is an address of sync
695   object, the same address should be used in all subsequent calls.
696 * __kmp_itt_lock_acquiring() should be called before setting the lock.
697 * __kmp_itt_lock_acquired() should be called after setting the lock.
698 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
699 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
700   for the lock.
701 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
702   operation. After __kmp_itt_lock_destroyed() all the references to the same
703   address will be considered as another sync object, not related with the
704   original one.  */
705
706#if KMP_USE_DYNAMIC_LOCK
707// Takes location information directly
708__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
709                                       const ident_t *loc) {
710#if USE_ITT_NOTIFY
711  if (__itt_sync_create_ptr) {
712    char const *src = (loc == NULL ? NULL : loc->psource);
713    KMP_ITT_DEBUG_LOCK();
714    __itt_sync_create(lock, type, src, 0);
715    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
716                        src);
717  }
718#endif
719}
720#else // KMP_USE_DYNAMIC_LOCK
721// Internal guts -- common code for locks and critical sections, do not call
722// directly.
723__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
724#if USE_ITT_NOTIFY
725  if (__itt_sync_create_ptr) {
726    ident_t const *loc = NULL;
727    if (__kmp_get_user_lock_location_ != NULL)
728      loc = __kmp_get_user_lock_location_((lock));
729    char const *src = (loc == NULL ? NULL : loc->psource);
730    KMP_ITT_DEBUG_LOCK();
731    __itt_sync_create(lock, type, src, 0);
732    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
733                        src);
734  }
735#endif
736} // ___kmp_itt_lock_init
737#endif // KMP_USE_DYNAMIC_LOCK
738
739// Internal guts -- common code for locks and critical sections, do not call
740// directly.
741__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
742#if USE_ITT_NOTIFY
743  KMP_ITT_DEBUG_LOCK();
744  __itt_sync_destroy(lock);
745  KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
746#endif
747} // ___kmp_itt_lock_fini
748
749// -----------------------------------------------------------------------------
750#if KMP_USE_DYNAMIC_LOCK
751void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
752  ___kmp_itt_lock_init(lock, "OMP Lock", loc);
753}
754#else
755void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
756  ___kmp_itt_lock_init(lock, "OMP Lock");
757} // __kmp_itt_lock_creating
758#endif
759
760void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
761#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
762  // postpone lock object access
763  if (__itt_sync_prepare_ptr) {
764    if (KMP_EXTRACT_D_TAG(lock) == 0) {
765      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
766      __itt_sync_prepare(ilk->lock);
767    } else {
768      __itt_sync_prepare(lock);
769    }
770  }
771#else
772  __itt_sync_prepare(lock);
773#endif
774} // __kmp_itt_lock_acquiring
775
776void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
777#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
778  // postpone lock object access
779  if (__itt_sync_acquired_ptr) {
780    if (KMP_EXTRACT_D_TAG(lock) == 0) {
781      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
782      __itt_sync_acquired(ilk->lock);
783    } else {
784      __itt_sync_acquired(lock);
785    }
786  }
787#else
788  __itt_sync_acquired(lock);
789#endif
790} // __kmp_itt_lock_acquired
791
792void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
793#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
794  if (__itt_sync_releasing_ptr) {
795    if (KMP_EXTRACT_D_TAG(lock) == 0) {
796      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
797      __itt_sync_releasing(ilk->lock);
798    } else {
799      __itt_sync_releasing(lock);
800    }
801  }
802#else
803  __itt_sync_releasing(lock);
804#endif
805} // __kmp_itt_lock_releasing
806
807void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
808#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
809  if (__itt_sync_cancel_ptr) {
810    if (KMP_EXTRACT_D_TAG(lock) == 0) {
811      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
812      __itt_sync_cancel(ilk->lock);
813    } else {
814      __itt_sync_cancel(lock);
815    }
816  }
817#else
818  __itt_sync_cancel(lock);
819#endif
820} // __kmp_itt_lock_cancelled
821
822void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
823  ___kmp_itt_lock_fini(lock, "OMP Lock");
824} // __kmp_itt_lock_destroyed
825
826/* Critical reporting.
827   Critical sections are treated exactly as locks (but have different object
828   type). */
829#if KMP_USE_DYNAMIC_LOCK
830void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
831  ___kmp_itt_lock_init(lock, "OMP Critical", loc);
832}
833#else
834void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
835  ___kmp_itt_lock_init(lock, "OMP Critical");
836} // __kmp_itt_critical_creating
837#endif
838
839void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
840  __itt_sync_prepare(lock);
841} // __kmp_itt_critical_acquiring
842
843void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
844  __itt_sync_acquired(lock);
845} // __kmp_itt_critical_acquired
846
847void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
848  __itt_sync_releasing(lock);
849} // __kmp_itt_critical_releasing
850
851void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
852  ___kmp_itt_lock_fini(lock, "OMP Critical");
853} // __kmp_itt_critical_destroyed
854
855/* Single reporting. */
856
857void __kmp_itt_single_start(int gtid) {
858#if USE_ITT_NOTIFY
859  if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
860    kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
861    ident_t *loc = thr->th.th_ident;
862    char const *src = (loc == NULL ? NULL : loc->psource);
863    kmp_str_buf_t name;
864    __kmp_str_buf_init(&name);
865    __kmp_str_buf_print(&name, "OMP Single-%s", src);
866    KMP_ITT_DEBUG_LOCK();
867    thr->th.th_itt_mark_single = __itt_mark_create(name.str);
868    KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
869                        thr->th.th_itt_mark_single);
870    __kmp_str_buf_free(&name);
871    KMP_ITT_DEBUG_LOCK();
872    __itt_mark(thr->th.th_itt_mark_single, NULL);
873    KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
874                        thr->th.th_itt_mark_single);
875  }
876#endif
877} // __kmp_itt_single_start
878
879void __kmp_itt_single_end(int gtid) {
880#if USE_ITT_NOTIFY
881  __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
882  KMP_ITT_DEBUG_LOCK();
883  __itt_mark_off(mark);
884  KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
885#endif
886} // __kmp_itt_single_end
887
888/* Ordered reporting.
889 * __kmp_itt_ordered_init is called by each thread *before* first using sync
890   object. ITT team would like it to be called once, but it requires extra
891   synchronization.
892 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
893   section (before synchronization).
894 * __kmp_itt_ordered_start is called just before entering user code (after
895   synchronization).
896 * __kmp_itt_ordered_end is called after returning from user code.
897
898 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
899 Events are not generated in case of serialized team. */
900
901void __kmp_itt_ordered_init(int gtid) {
902#if USE_ITT_NOTIFY
903  if (__itt_sync_create_ptr) {
904    kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
905    ident_t const *loc = thr->th.th_ident;
906    char const *src = (loc == NULL ? NULL : loc->psource);
907    __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
908                      "OMP Ordered", src, 0);
909  }
910#endif
911} // __kmp_itt_ordered_init
912
913void __kmp_itt_ordered_prep(int gtid) {
914#if USE_ITT_NOTIFY
915  if (__itt_sync_create_ptr) {
916    kmp_team_t *t = __kmp_team_from_gtid(gtid);
917    if (!t->t.t_serialized) {
918      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
919      __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
920    }
921  }
922#endif
923} // __kmp_itt_ordered_prep
924
925void __kmp_itt_ordered_start(int gtid) {
926#if USE_ITT_NOTIFY
927  if (__itt_sync_create_ptr) {
928    kmp_team_t *t = __kmp_team_from_gtid(gtid);
929    if (!t->t.t_serialized) {
930      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
931      __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
932    }
933  }
934#endif
935} // __kmp_itt_ordered_start
936
937void __kmp_itt_ordered_end(int gtid) {
938#if USE_ITT_NOTIFY
939  if (__itt_sync_create_ptr) {
940    kmp_team_t *t = __kmp_team_from_gtid(gtid);
941    if (!t->t.t_serialized) {
942      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
943      __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
944    }
945  }
946#endif
947} // __kmp_itt_ordered_end
948
949/* Threads reporting. */
950
951void __kmp_itt_thread_ignore() {
952  __itt_thr_ignore();
953} // __kmp_itt_thread_ignore
954
955void __kmp_itt_thread_name(int gtid) {
956#if USE_ITT_NOTIFY
957  if (__itt_thr_name_set_ptr) {
958    kmp_str_buf_t name;
959    __kmp_str_buf_init(&name);
960    if (KMP_MASTER_GTID(gtid)) {
961      __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
962    } else {
963      __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
964    }
965    KMP_ITT_DEBUG_LOCK();
966    __itt_thr_name_set(name.str, name.used);
967    KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
968    __kmp_str_buf_free(&name);
969  }
970#endif
971} // __kmp_itt_thread_name
972
973/* System object reporting.
974   ITT catches operations with system sync objects (like Windows* OS on IA-32
975   architecture API critical sections and events). We only need to specify
976   name ("OMP Scheduler") for the object to let ITT know it is an object used
977   by OpenMP RTL for internal purposes. */
978
979void __kmp_itt_system_object_created(void *object, char const *name) {
980#if USE_ITT_NOTIFY
981  KMP_ITT_DEBUG_LOCK();
982  __itt_sync_create(object, "OMP Scheduler", name, 0);
983  KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
984                      object, name);
985#endif
986} // __kmp_itt_system_object_created
987
988/* Stack stitching api.
989   Master calls "create" and put the stitching id into team structure.
990   Workers read the stitching id and call "enter" / "leave" api.
991   Master calls "destroy" at the end of the parallel region. */
992
993__itt_caller __kmp_itt_stack_caller_create() {
994#if USE_ITT_NOTIFY
995  if (!__itt_stack_caller_create_ptr)
996    return NULL;
997  KMP_ITT_DEBUG_LOCK();
998  __itt_caller id = __itt_stack_caller_create();
999  KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
1000  return id;
1001#endif
1002  return NULL;
1003}
1004
1005void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1006#if USE_ITT_NOTIFY
1007  if (__itt_stack_caller_destroy_ptr) {
1008    KMP_ITT_DEBUG_LOCK();
1009    __itt_stack_caller_destroy(id);
1010    KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1011  }
1012#endif
1013}
1014
1015void __kmp_itt_stack_callee_enter(__itt_caller id) {
1016#if USE_ITT_NOTIFY
1017  if (__itt_stack_callee_enter_ptr) {
1018    KMP_ITT_DEBUG_LOCK();
1019    __itt_stack_callee_enter(id);
1020    KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1021  }
1022#endif
1023}
1024
1025void __kmp_itt_stack_callee_leave(__itt_caller id) {
1026#if USE_ITT_NOTIFY
1027  if (__itt_stack_callee_leave_ptr) {
1028    KMP_ITT_DEBUG_LOCK();
1029    __itt_stack_callee_leave(id);
1030    KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1031  }
1032#endif
1033}
1034
1035#endif /* USE_ITT_BUILD */
1036