1#if USE_ITT_BUILD 2/* 3 * kmp_itt.inl -- Inline functions of ITT Notify. 4 */ 5 6//===----------------------------------------------------------------------===// 7// 8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9// See https://llvm.org/LICENSE.txt for license information. 10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11// 12//===----------------------------------------------------------------------===// 13 14// Inline function definitions. This file should be included into kmp_itt.h file 15// for production build (to let compiler inline functions) or into kmp_itt.c 16// file for debug build (to reduce the number of files to recompile and save 17// build time). 18 19#include "kmp.h" 20#include "kmp_str.h" 21 22#if KMP_ITT_DEBUG 23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 24#define KMP_ITT_DEBUG_LOCK() \ 25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } 26#define KMP_ITT_DEBUG_PRINT(...) \ 27 { \ 28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ 29 fprintf(stderr, __VA_ARGS__); \ 30 fflush(stderr); \ 31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ 32 } 33#else 34#define KMP_ITT_DEBUG_LOCK() 35#define KMP_ITT_DEBUG_PRINT(...) 36#endif // KMP_ITT_DEBUG 37 38// Ensure that the functions are static if they're supposed to be being inlined. 39// Otherwise they cannot be used in more than one file, since there will be 40// multiple definitions. 41#if KMP_DEBUG 42#define LINKAGE 43#else 44#define LINKAGE static inline 45#endif 46 47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses 48// this API to support user-defined synchronization primitives, but does not use 49// ZCA; it would be safe to turn this off until wider support becomes available. 50#if USE_ITT_ZCA 51#ifdef __INTEL_COMPILER 52#if __INTEL_COMPILER >= 1200 53#undef __itt_sync_acquired 54#undef __itt_sync_releasing 55#define __itt_sync_acquired(addr) \ 56 __notify_zc_intrinsic((char *)"sync_acquired", addr) 57#define __itt_sync_releasing(addr) \ 58 __notify_intrinsic((char *)"sync_releasing", addr) 59#endif 60#endif 61#endif 62 63static kmp_bootstrap_lock_t metadata_lock = 64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); 65 66#if USE_ITT_NOTIFY 67LINKAGE size_t __kmp_itthash_hash(kmp_intptr_t addr, size_t hsize) { 68 return ((addr >> 6) ^ (addr >> 2)) % hsize; 69} 70LINKAGE kmp_itthash_entry *__kmp_itthash_find(kmp_info_t *thread, 71 kmp_itthash_t *h, ident_t *loc, 72 int team_size) { 73 kmp_itthash_entry_t *entry; 74 size_t bucket = __kmp_itthash_hash((kmp_intptr_t)loc, KMP_MAX_FRAME_DOMAINS); 75 for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) 76 if (entry->loc == loc && entry->team_size == team_size) 77 break; 78 79 if (entry == NULL) { 80 // two foreign threads could report frames concurrently 81 int cnt = KMP_TEST_THEN_INC32(&h->count); 82 if (cnt >= KMP_MAX_FRAME_DOMAINS) { 83 KMP_TEST_THEN_DEC32(&h->count); // revert the count 84 return entry; // too many entries 85 } 86 // create new entry 87 entry = (kmp_itthash_entry_t *)__kmp_thread_malloc( 88 thread, sizeof(kmp_itthash_entry_t)); 89 entry->loc = loc; 90 entry->team_size = team_size; 91 entry->d = NULL; 92 entry->next_in_bucket = h->buckets[bucket]; 93 while (!KMP_COMPARE_AND_STORE_PTR(&h->buckets[bucket], 94 entry->next_in_bucket, entry)) { 95 KMP_CPU_PAUSE(); 96 entry->next_in_bucket = h->buckets[bucket]; 97 } 98 } 99#if KMP_DEBUG 100 else { 101 // check the contents of the location info is unique 102 KMP_DEBUG_ASSERT(loc->psource == entry->loc->psource); 103 } 104#endif 105 return entry; 106} 107#endif 108 109/* Parallel region reporting. 110 * __kmp_itt_region_forking should be called by primary thread of a team. 111 Exact moment of call does not matter, but it should be completed before any 112 thread of this team calls __kmp_itt_region_starting. 113 * __kmp_itt_region_starting should be called by each thread of a team just 114 before entering parallel region body. 115 * __kmp_itt_region_finished should be called by each thread of a team right 116 after returning from parallel region body. 117 * __kmp_itt_region_joined should be called by primary thread of a team, after 118 all threads called __kmp_itt_region_finished. 119 120 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can 121 execute some more user code -- such a thread can execute tasks. 122 123 Note: The overhead of logging region_starting and region_finished in each 124 thread is too large, so these calls are not used. */ 125 126LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { 127#if USE_ITT_NOTIFY 128 kmp_team_t *team = __kmp_team_from_gtid(gtid); 129 if (team->t.t_active_level > 1) { 130 // The frame notifications are only supported for the outermost teams. 131 return; 132 } 133 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 134 ident_t *loc = th->th.th_ident; 135 if (!loc) { 136 // no sense to report a region without location info 137 return; 138 } 139 kmp_itthash_entry *e; 140 e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size); 141 if (e == NULL) 142 return; // too many entries in the hash 143 if (e->d == NULL) { 144 // Transform compiler-generated region location into the format 145 // that the tools more or less standardized on: 146 // "<func>$omp$parallel@[file:]<line>[:<col>]" 147 char *buff = NULL; 148 kmp_str_loc_t str_loc = 149 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 150 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 151 team_size, str_loc.file, str_loc.line, str_loc.col); 152 153 __itt_suppress_push(__itt_suppress_memory_errors); 154 e->d = __itt_domain_create(buff); 155 KMP_ASSERT(e->d != NULL); 156 __itt_suppress_pop(); 157 158 __kmp_str_free(&buff); 159 if (barriers) { 160 kmp_itthash_entry *e; 161 e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0); 162 if (e != NULL) { 163 KMP_DEBUG_ASSERT(e->d == NULL); 164 char *buff = NULL; 165 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 166 str_loc.file, str_loc.line); 167 __itt_suppress_push(__itt_suppress_memory_errors); 168 e->d = __itt_domain_create(buff); 169 KMP_ASSERT(e->d != NULL); 170 __itt_suppress_pop(); 171 __kmp_str_free(&buff); 172 } 173 } 174 __kmp_str_loc_free(&str_loc); 175 } 176 __itt_frame_begin_v3(e->d, NULL); 177 KMP_ITT_DEBUG_LOCK(); 178 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, domain=%p, loc:%p\n", gtid, e->d, 179 loc); 180#endif 181} // __kmp_itt_region_forking 182 183// ----------------------------------------------------------------------------- 184LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 185 __itt_timestamp end, int imbalance, 186 ident_t *loc, int team_size, int region) { 187#if USE_ITT_NOTIFY 188 if (!loc) { 189 // no sense to report a region without location info 190 return; 191 } 192 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 193 if (region) { 194 kmp_team_t *team = __kmp_team_from_gtid(gtid); 195 int serialized = (region == 2 ? 1 : 0); 196 if (team->t.t_active_level + serialized > 1) { 197 // The frame notifications are only supported for the outermost teams. 198 return; 199 } 200 // Check region domain has not been created before. 201 kmp_itthash_entry *e; 202 e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size); 203 if (e == NULL) 204 return; // too many entries in the hash 205 if (e->d == NULL) { // new entry, need to calculate domain 206 // Transform compiler-generated region location into the format 207 // that the tools more or less standardized on: 208 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 209 char *buff = NULL; 210 kmp_str_loc_t str_loc = 211 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 212 buff = 213 __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 214 team_size, str_loc.file, str_loc.line, str_loc.col); 215 __itt_suppress_push(__itt_suppress_memory_errors); 216 e->d = __itt_domain_create(buff); 217 KMP_ASSERT(e->d != NULL); 218 __itt_suppress_pop(); 219 220 __kmp_str_free(&buff); 221 __kmp_str_loc_free(&str_loc); 222 } 223 __itt_frame_submit_v3(e->d, NULL, begin, end); 224 KMP_ITT_DEBUG_LOCK(); 225 KMP_ITT_DEBUG_PRINT( 226 "[reg sub] gtid=%d, domain=%p, region:%d, loc:%p, beg:%llu, end:%llu\n", 227 gtid, e->d, region, loc, begin, end); 228 return; 229 } else { // called for barrier reporting 230 kmp_itthash_entry *e; 231 e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0); 232 if (e == NULL) 233 return; // too many entries in the hash 234 if (e->d == NULL) { // new entry, need to calculate domain 235 // Transform compiler-generated region location into the format 236 // that the tools more or less standardized on: 237 // "<func>$omp$frame@[file:]<line>[:<col>]" 238 kmp_str_loc_t str_loc = 239 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 240 char *buff = NULL; 241 if (imbalance) { 242 buff = 243 __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", str_loc.func, 244 team_size, str_loc.file, str_loc.line); 245 } else { 246 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 247 str_loc.file, str_loc.line); 248 } 249 __itt_suppress_push(__itt_suppress_memory_errors); 250 e->d = __itt_domain_create(buff); 251 KMP_ASSERT(e->d != NULL); 252 __itt_suppress_pop(); 253 __kmp_str_free(&buff); 254 __kmp_str_loc_free(&str_loc); 255 } 256 __itt_frame_submit_v3(e->d, NULL, begin, end); 257 KMP_ITT_DEBUG_LOCK(); 258 KMP_ITT_DEBUG_PRINT( 259 "[frm sub] gtid=%d, domain=%p, loc:%p, beg:%llu, end:%llu\n", gtid, 260 e->d, loc, begin, end); 261 } 262#endif 263} // __kmp_itt_frame_submit 264 265// ----------------------------------------------------------------------------- 266LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 267 kmp_uint64 end, kmp_uint64 imbalance, 268 kmp_uint64 reduction) { 269#if USE_ITT_NOTIFY 270 if (metadata_domain == NULL) { 271 __kmp_acquire_bootstrap_lock(&metadata_lock); 272 if (metadata_domain == NULL) { 273 __itt_suppress_push(__itt_suppress_memory_errors); 274 metadata_domain = __itt_domain_create("OMP Metadata"); 275 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 276 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 277 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 278 __itt_suppress_pop(); 279 } 280 __kmp_release_bootstrap_lock(&metadata_lock); 281 } 282 283 kmp_uint64 imbalance_data[4]; 284 imbalance_data[0] = begin; 285 imbalance_data[1] = end; 286 imbalance_data[2] = imbalance; 287 imbalance_data[3] = reduction; 288 289 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, 290 __itt_metadata_u64, 4, imbalance_data); 291#endif 292} // __kmp_itt_metadata_imbalance 293 294// ----------------------------------------------------------------------------- 295LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 296 kmp_uint64 iterations, kmp_uint64 chunk) { 297#if USE_ITT_NOTIFY 298 if (metadata_domain == NULL) { 299 __kmp_acquire_bootstrap_lock(&metadata_lock); 300 if (metadata_domain == NULL) { 301 __itt_suppress_push(__itt_suppress_memory_errors); 302 metadata_domain = __itt_domain_create("OMP Metadata"); 303 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 304 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 305 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 306 __itt_suppress_pop(); 307 } 308 __kmp_release_bootstrap_lock(&metadata_lock); 309 } 310 311 // Parse line and column from psource string: ";file;func;line;col;;" 312 KMP_DEBUG_ASSERT(loc->psource); 313 kmp_uint64 loop_data[5]; 314 int line, col; 315 __kmp_str_loc_numbers(loc->psource, &line, &col); 316 loop_data[0] = line; 317 loop_data[1] = col; 318 loop_data[2] = sched_type; 319 loop_data[3] = iterations; 320 loop_data[4] = chunk; 321 322 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, 323 __itt_metadata_u64, 5, loop_data); 324#endif 325} // __kmp_itt_metadata_loop 326 327// ----------------------------------------------------------------------------- 328LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { 329#if USE_ITT_NOTIFY 330 if (metadata_domain == NULL) { 331 __kmp_acquire_bootstrap_lock(&metadata_lock); 332 if (metadata_domain == NULL) { 333 __itt_suppress_push(__itt_suppress_memory_errors); 334 metadata_domain = __itt_domain_create("OMP Metadata"); 335 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 336 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 337 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 338 __itt_suppress_pop(); 339 } 340 __kmp_release_bootstrap_lock(&metadata_lock); 341 } 342 343 int line, col; 344 __kmp_str_loc_numbers(loc->psource, &line, &col); 345 kmp_uint64 single_data[2]; 346 single_data[0] = line; 347 single_data[1] = col; 348 349 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, 350 __itt_metadata_u64, 2, single_data); 351#endif 352} // __kmp_itt_metadata_single 353 354// ----------------------------------------------------------------------------- 355LINKAGE void __kmp_itt_region_starting(int gtid) { 356#if USE_ITT_NOTIFY 357#endif 358} // __kmp_itt_region_starting 359 360// ----------------------------------------------------------------------------- 361LINKAGE void __kmp_itt_region_finished(int gtid) { 362#if USE_ITT_NOTIFY 363#endif 364} // __kmp_itt_region_finished 365 366// ---------------------------------------------------------------------------- 367LINKAGE void __kmp_itt_region_joined(int gtid) { 368#if USE_ITT_NOTIFY 369 kmp_team_t *team = __kmp_team_from_gtid(gtid); 370 if (team->t.t_active_level > 1) { 371 // The frame notifications are only supported for the outermost teams. 372 return; 373 } 374 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 375 ident_t *loc = th->th.th_ident; 376 if (loc) { 377 kmp_itthash_entry *e = __kmp_itthash_find(th, &__kmp_itt_region_domains, 378 loc, th->th.th_team_nproc); 379 if (e == NULL) 380 return; // too many entries in the hash 381 KMP_DEBUG_ASSERT(e->d); 382 KMP_ITT_DEBUG_LOCK(); 383 __itt_frame_end_v3(e->d, NULL); 384 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, domain=%p, loc:%p\n", gtid, e->d, 385 loc); 386 } 387#endif 388} // __kmp_itt_region_joined 389 390/* Barriers reporting. 391 392 A barrier consists of two phases: 393 1. Gather -- primary thread waits for all worker threads to arrive; each 394 worker thread registers arrival and goes further. 395 2. Release -- each worker thread waits until primary thread lets it go; 396 primary thread lets worker threads go. 397 398 Function should be called by each thread: 399 * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 400 * __kmp_itt_barrier_middle() -- between gather and release phases. 401 * __kmp_itt_barrier_finished() -- after release phase. 402 403 Note: Call __kmp_itt_barrier_object() before call to 404 __kmp_itt_barrier_starting() and save result in local variable. 405 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) 406 would return itt sync object for the next barrier! 407 408 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 409 does not have barrier object or barrier data structure. Barrier is just a 410 counter in team and thread structures. We could use an address of team 411 structure as a barrier sync object, but ITT wants different objects for 412 different barriers (even whithin the same team). So let us use team address 413 as barrier sync object for the first barrier, then increase it by one for the 414 next barrier, and so on (but wrap it not to use addresses outside of team 415 structure). */ 416 417void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, 418 int delta // 0 (current barrier) is default 419 // value; specify -1 to get previous 420 // barrier. 421 ) { 422 void *object = NULL; 423#if USE_ITT_NOTIFY 424 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 425 kmp_team_t *team = thr->th.th_team; 426 427 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can 428 // be NULL. This "if" helps to avoid crash. However, this is not complete 429 // solution, and reporting fork/join barriers to ITT should be revisited. 430 431 if (team != NULL) { 432 // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. 433 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 434 kmp_uint64 counter = 435 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 436 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so 437 // barriers of different types do not have the same ids. 438 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); 439 // This condition is a must (we would have zero divide otherwise). 440 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); 441 // More strong condition: make sure we have room at least for for two 442 // different ids (for each barrier type). 443 object = reinterpret_cast<void *>( 444 (kmp_uintptr_t)(team) + 445 (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) * 446 bs_last_barrier + 447 bt); 448 KMP_ITT_DEBUG_LOCK(); 449 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, 450 counter, object); 451 452 if (set_name) { 453 ident_t const *loc = NULL; 454 char const *src = NULL; 455 char const *type = "OMP Barrier"; 456 switch (bt) { 457 case bs_plain_barrier: { 458 // For plain barrier compiler calls __kmpc_barrier() function, which 459 // saves location in thr->th.th_ident. 460 loc = thr->th.th_ident; 461 // Get the barrier type from flags provided by compiler. 462 kmp_int32 expl = 0; 463 kmp_uint32 impl = 0; 464 if (loc != NULL) { 465 src = loc->psource; 466 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; 467 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; 468 } 469 if (impl) { 470 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { 471 case KMP_IDENT_BARRIER_IMPL_FOR: { 472 type = "OMP For Barrier"; 473 } break; 474 case KMP_IDENT_BARRIER_IMPL_SECTIONS: { 475 type = "OMP Sections Barrier"; 476 } break; 477 case KMP_IDENT_BARRIER_IMPL_SINGLE: { 478 type = "OMP Single Barrier"; 479 } break; 480 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { 481 type = "OMP Workshare Barrier"; 482 } break; 483 default: { 484 type = "OMP Implicit Barrier"; 485 KMP_DEBUG_ASSERT(0); 486 } 487 } 488 } else if (expl) { 489 type = "OMP Explicit Barrier"; 490 } 491 } break; 492 case bs_forkjoin_barrier: { 493 // In case of fork/join barrier we can read thr->th.th_ident, because it 494 // contains location of last passed construct (while join barrier is not 495 // such one). Use th_ident of primary thread instead -- 496 // __kmp_join_call() called by the primary thread saves location. 497 // 498 // AC: cannot read from primary thread because __kmp_join_call may not 499 // be called yet, so we read the location from team. This is the 500 // same location. Team is valid on entry to join barrier where this 501 // happens. 502 loc = team->t.t_ident; 503 if (loc != NULL) { 504 src = loc->psource; 505 } 506 type = "OMP Join Barrier"; 507 } break; 508 } 509 KMP_ITT_DEBUG_LOCK(); 510 __itt_sync_create(object, type, src, __itt_attr_barrier); 511 KMP_ITT_DEBUG_PRINT( 512 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, 513 type, src); 514 } 515 } 516#endif 517 return object; 518} // __kmp_itt_barrier_object 519 520// ----------------------------------------------------------------------------- 521void __kmp_itt_barrier_starting(int gtid, void *object) { 522#if USE_ITT_NOTIFY 523 if (!KMP_MASTER_GTID(gtid)) { 524 KMP_ITT_DEBUG_LOCK(); 525 __itt_sync_releasing(object); 526 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); 527 } 528 KMP_ITT_DEBUG_LOCK(); 529 __itt_sync_prepare(object); 530 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); 531#endif 532} // __kmp_itt_barrier_starting 533 534// ----------------------------------------------------------------------------- 535void __kmp_itt_barrier_middle(int gtid, void *object) { 536#if USE_ITT_NOTIFY 537 if (KMP_MASTER_GTID(gtid)) { 538 KMP_ITT_DEBUG_LOCK(); 539 __itt_sync_acquired(object); 540 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); 541 KMP_ITT_DEBUG_LOCK(); 542 __itt_sync_releasing(object); 543 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); 544 } else { 545 } 546#endif 547} // __kmp_itt_barrier_middle 548 549// ----------------------------------------------------------------------------- 550void __kmp_itt_barrier_finished(int gtid, void *object) { 551#if USE_ITT_NOTIFY 552 if (KMP_MASTER_GTID(gtid)) { 553 } else { 554 KMP_ITT_DEBUG_LOCK(); 555 __itt_sync_acquired(object); 556 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); 557 } 558#endif 559} // __kmp_itt_barrier_finished 560 561/* Taskwait reporting. 562 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 563 does not have taskwait structure, so we need to construct something. */ 564 565void *__kmp_itt_taskwait_object(int gtid) { 566 void *object = NULL; 567#if USE_ITT_NOTIFY 568 if (UNLIKELY(__itt_sync_create_ptr)) { 569 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 570 kmp_taskdata_t *taskdata = thread->th.th_current_task; 571 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + 572 taskdata->td_taskwait_counter % 573 sizeof(kmp_taskdata_t)); 574 } 575#endif 576 return object; 577} // __kmp_itt_taskwait_object 578 579void __kmp_itt_taskwait_starting(int gtid, void *object) { 580#if USE_ITT_NOTIFY 581 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 582 kmp_taskdata_t *taskdata = thread->th.th_current_task; 583 ident_t const *loc = taskdata->td_taskwait_ident; 584 char const *src = (loc == NULL ? NULL : loc->psource); 585 KMP_ITT_DEBUG_LOCK(); 586 __itt_sync_create(object, "OMP Taskwait", src, 0); 587 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", 588 object, src); 589 KMP_ITT_DEBUG_LOCK(); 590 __itt_sync_prepare(object); 591 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); 592#endif 593} // __kmp_itt_taskwait_starting 594 595void __kmp_itt_taskwait_finished(int gtid, void *object) { 596#if USE_ITT_NOTIFY 597 KMP_ITT_DEBUG_LOCK(); 598 __itt_sync_acquired(object); 599 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); 600 KMP_ITT_DEBUG_LOCK(); 601 __itt_sync_destroy(object); 602 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); 603#endif 604} // __kmp_itt_taskwait_finished 605 606/* Task reporting. 607 Only those tasks are reported which are executed by a thread spinning at 608 barrier (or taskwait). Synch object passed to the function must be barrier of 609 taskwait the threads waiting at. */ 610 611void __kmp_itt_task_starting( 612 void *object // ITT sync object: barrier or taskwait. 613 ) { 614#if USE_ITT_NOTIFY 615 if (UNLIKELY(object != NULL)) { 616 KMP_ITT_DEBUG_LOCK(); 617 __itt_sync_cancel(object); 618 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); 619 } 620#endif 621} // __kmp_itt_task_starting 622 623// ----------------------------------------------------------------------------- 624void __kmp_itt_task_finished( 625 void *object // ITT sync object: barrier or taskwait. 626 ) { 627#if USE_ITT_NOTIFY 628 KMP_ITT_DEBUG_LOCK(); 629 __itt_sync_prepare(object); 630 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); 631#endif 632} // __kmp_itt_task_finished 633 634/* Lock reporting. 635 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock 636 operation (set/unset). It is not a real event shown to the user but just 637 setting a name for synchronization object. `lock' is an address of sync 638 object, the same address should be used in all subsequent calls. 639 * __kmp_itt_lock_acquiring() should be called before setting the lock. 640 * __kmp_itt_lock_acquired() should be called after setting the lock. 641 * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 642 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting 643 for the lock. 644 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock 645 operation. After __kmp_itt_lock_destroyed() all the references to the same 646 address will be considered as another sync object, not related with the 647 original one. */ 648 649#if KMP_USE_DYNAMIC_LOCK 650// Takes location information directly 651__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, 652 const ident_t *loc) { 653#if USE_ITT_NOTIFY 654 if (__itt_sync_create_ptr) { 655 char const *src = (loc == NULL ? NULL : loc->psource); 656 KMP_ITT_DEBUG_LOCK(); 657 __itt_sync_create(lock, type, src, 0); 658 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 659 src); 660 } 661#endif 662} 663#else // KMP_USE_DYNAMIC_LOCK 664// Internal guts -- common code for locks and critical sections, do not call 665// directly. 666__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { 667#if USE_ITT_NOTIFY 668 if (__itt_sync_create_ptr) { 669 ident_t const *loc = NULL; 670 if (__kmp_get_user_lock_location_ != NULL) 671 loc = __kmp_get_user_lock_location_((lock)); 672 char const *src = (loc == NULL ? NULL : loc->psource); 673 KMP_ITT_DEBUG_LOCK(); 674 __itt_sync_create(lock, type, src, 0); 675 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 676 src); 677 } 678#endif 679} // ___kmp_itt_lock_init 680#endif // KMP_USE_DYNAMIC_LOCK 681 682// Internal guts -- common code for locks and critical sections, do not call 683// directly. 684__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { 685#if USE_ITT_NOTIFY 686 KMP_ITT_DEBUG_LOCK(); 687 __itt_sync_destroy(lock); 688 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); 689#endif 690} // ___kmp_itt_lock_fini 691 692// ----------------------------------------------------------------------------- 693#if KMP_USE_DYNAMIC_LOCK 694void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { 695 ___kmp_itt_lock_init(lock, "OMP Lock", loc); 696} 697#else 698void __kmp_itt_lock_creating(kmp_user_lock_p lock) { 699 ___kmp_itt_lock_init(lock, "OMP Lock"); 700} // __kmp_itt_lock_creating 701#endif 702 703void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { 704#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 705 // postpone lock object access 706 if (__itt_sync_prepare_ptr) { 707 if (KMP_EXTRACT_D_TAG(lock) == 0) { 708 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 709 __itt_sync_prepare(ilk->lock); 710 } else { 711 __itt_sync_prepare(lock); 712 } 713 } 714#else 715 __itt_sync_prepare(lock); 716#endif 717} // __kmp_itt_lock_acquiring 718 719void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { 720#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 721 // postpone lock object access 722 if (__itt_sync_acquired_ptr) { 723 if (KMP_EXTRACT_D_TAG(lock) == 0) { 724 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 725 __itt_sync_acquired(ilk->lock); 726 } else { 727 __itt_sync_acquired(lock); 728 } 729 } 730#else 731 __itt_sync_acquired(lock); 732#endif 733} // __kmp_itt_lock_acquired 734 735void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { 736#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 737 if (__itt_sync_releasing_ptr) { 738 if (KMP_EXTRACT_D_TAG(lock) == 0) { 739 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 740 __itt_sync_releasing(ilk->lock); 741 } else { 742 __itt_sync_releasing(lock); 743 } 744 } 745#else 746 __itt_sync_releasing(lock); 747#endif 748} // __kmp_itt_lock_releasing 749 750void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { 751#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 752 if (__itt_sync_cancel_ptr) { 753 if (KMP_EXTRACT_D_TAG(lock) == 0) { 754 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 755 __itt_sync_cancel(ilk->lock); 756 } else { 757 __itt_sync_cancel(lock); 758 } 759 } 760#else 761 __itt_sync_cancel(lock); 762#endif 763} // __kmp_itt_lock_cancelled 764 765void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { 766 ___kmp_itt_lock_fini(lock, "OMP Lock"); 767} // __kmp_itt_lock_destroyed 768 769/* Critical reporting. 770 Critical sections are treated exactly as locks (but have different object 771 type). */ 772#if KMP_USE_DYNAMIC_LOCK 773void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { 774 ___kmp_itt_lock_init(lock, "OMP Critical", loc); 775} 776#else 777void __kmp_itt_critical_creating(kmp_user_lock_p lock) { 778 ___kmp_itt_lock_init(lock, "OMP Critical"); 779} // __kmp_itt_critical_creating 780#endif 781 782void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { 783 __itt_sync_prepare(lock); 784} // __kmp_itt_critical_acquiring 785 786void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { 787 __itt_sync_acquired(lock); 788} // __kmp_itt_critical_acquired 789 790void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { 791 __itt_sync_releasing(lock); 792} // __kmp_itt_critical_releasing 793 794void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { 795 ___kmp_itt_lock_fini(lock, "OMP Critical"); 796} // __kmp_itt_critical_destroyed 797 798/* Single reporting. */ 799 800void __kmp_itt_single_start(int gtid) { 801#if USE_ITT_NOTIFY 802 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { 803 kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); 804 ident_t *loc = thr->th.th_ident; 805 char const *src = (loc == NULL ? NULL : loc->psource); 806 kmp_str_buf_t name; 807 __kmp_str_buf_init(&name); 808 __kmp_str_buf_print(&name, "OMP Single-%s", src); 809 KMP_ITT_DEBUG_LOCK(); 810 thr->th.th_itt_mark_single = __itt_mark_create(name.str); 811 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, 812 thr->th.th_itt_mark_single); 813 __kmp_str_buf_free(&name); 814 KMP_ITT_DEBUG_LOCK(); 815 __itt_mark(thr->th.th_itt_mark_single, NULL); 816 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", 817 thr->th.th_itt_mark_single); 818 } 819#endif 820} // __kmp_itt_single_start 821 822void __kmp_itt_single_end(int gtid) { 823#if USE_ITT_NOTIFY 824 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; 825 KMP_ITT_DEBUG_LOCK(); 826 __itt_mark_off(mark); 827 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); 828#endif 829} // __kmp_itt_single_end 830 831/* Ordered reporting. 832 * __kmp_itt_ordered_init is called by each thread *before* first using sync 833 object. ITT team would like it to be called once, but it requires extra 834 synchronization. 835 * __kmp_itt_ordered_prep is called when thread is going to enter ordered 836 section (before synchronization). 837 * __kmp_itt_ordered_start is called just before entering user code (after 838 synchronization). 839 * __kmp_itt_ordered_end is called after returning from user code. 840 841 Sync object is th->th.th_dispatch->th_dispatch_sh_current. 842 Events are not generated in case of serialized team. */ 843 844void __kmp_itt_ordered_init(int gtid) { 845#if USE_ITT_NOTIFY 846 if (__itt_sync_create_ptr) { 847 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 848 ident_t const *loc = thr->th.th_ident; 849 char const *src = (loc == NULL ? NULL : loc->psource); 850 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, 851 "OMP Ordered", src, 0); 852 } 853#endif 854} // __kmp_itt_ordered_init 855 856void __kmp_itt_ordered_prep(int gtid) { 857#if USE_ITT_NOTIFY 858 if (__itt_sync_create_ptr) { 859 kmp_team_t *t = __kmp_team_from_gtid(gtid); 860 if (!t->t.t_serialized) { 861 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 862 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); 863 } 864 } 865#endif 866} // __kmp_itt_ordered_prep 867 868void __kmp_itt_ordered_start(int gtid) { 869#if USE_ITT_NOTIFY 870 if (__itt_sync_create_ptr) { 871 kmp_team_t *t = __kmp_team_from_gtid(gtid); 872 if (!t->t.t_serialized) { 873 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 874 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); 875 } 876 } 877#endif 878} // __kmp_itt_ordered_start 879 880void __kmp_itt_ordered_end(int gtid) { 881#if USE_ITT_NOTIFY 882 if (__itt_sync_create_ptr) { 883 kmp_team_t *t = __kmp_team_from_gtid(gtid); 884 if (!t->t.t_serialized) { 885 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 886 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); 887 } 888 } 889#endif 890} // __kmp_itt_ordered_end 891 892/* Threads reporting. */ 893 894void __kmp_itt_thread_ignore() { 895 __itt_thr_ignore(); 896} // __kmp_itt_thread_ignore 897 898void __kmp_itt_thread_name(int gtid) { 899#if USE_ITT_NOTIFY 900 if (__itt_thr_name_set_ptr) { 901 kmp_str_buf_t name; 902 __kmp_str_buf_init(&name); 903 if (KMP_MASTER_GTID(gtid)) { 904 __kmp_str_buf_print(&name, "OMP Primary Thread #%d", gtid); 905 } else { 906 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); 907 } 908 KMP_ITT_DEBUG_LOCK(); 909 __itt_thr_name_set(name.str, name.used); 910 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); 911 __kmp_str_buf_free(&name); 912 } 913#endif 914} // __kmp_itt_thread_name 915 916/* System object reporting. 917 ITT catches operations with system sync objects (like Windows* OS on IA-32 918 architecture API critical sections and events). We only need to specify 919 name ("OMP Scheduler") for the object to let ITT know it is an object used 920 by OpenMP RTL for internal purposes. */ 921 922void __kmp_itt_system_object_created(void *object, char const *name) { 923#if USE_ITT_NOTIFY 924 KMP_ITT_DEBUG_LOCK(); 925 __itt_sync_create(object, "OMP Scheduler", name, 0); 926 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", 927 object, name); 928#endif 929} // __kmp_itt_system_object_created 930 931/* Stack stitching api. 932 Primary thread calls "create" and put the stitching id into team structure. 933 Workers read the stitching id and call "enter" / "leave" api. 934 Primary thread calls "destroy" at the end of the parallel region. */ 935 936__itt_caller __kmp_itt_stack_caller_create() { 937#if USE_ITT_NOTIFY 938 if (!__itt_stack_caller_create_ptr) 939 return NULL; 940 KMP_ITT_DEBUG_LOCK(); 941 __itt_caller id = __itt_stack_caller_create(); 942 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); 943 return id; 944#endif 945 return NULL; 946} 947 948void __kmp_itt_stack_caller_destroy(__itt_caller id) { 949#if USE_ITT_NOTIFY 950 if (__itt_stack_caller_destroy_ptr) { 951 KMP_ITT_DEBUG_LOCK(); 952 __itt_stack_caller_destroy(id); 953 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); 954 } 955#endif 956} 957 958void __kmp_itt_stack_callee_enter(__itt_caller id) { 959#if USE_ITT_NOTIFY 960 if (__itt_stack_callee_enter_ptr) { 961 KMP_ITT_DEBUG_LOCK(); 962 __itt_stack_callee_enter(id); 963 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); 964 } 965#endif 966} 967 968void __kmp_itt_stack_callee_leave(__itt_caller id) { 969#if USE_ITT_NOTIFY 970 if (__itt_stack_callee_leave_ptr) { 971 KMP_ITT_DEBUG_LOCK(); 972 __itt_stack_callee_leave(id); 973 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); 974 } 975#endif 976} 977 978#endif /* USE_ITT_BUILD */ 979