1#if USE_ITT_BUILD 2/* 3 * kmp_itt.inl -- Inline functions of ITT Notify. 4 */ 5 6//===----------------------------------------------------------------------===// 7// 8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9// See https://llvm.org/LICENSE.txt for license information. 10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11// 12//===----------------------------------------------------------------------===// 13 14// Inline function definitions. This file should be included into kmp_itt.h file 15// for production build (to let compiler inline functions) or into kmp_itt.c 16// file for debug build (to reduce the number of files to recompile and save 17// build time). 18 19#include "kmp.h" 20#include "kmp_str.h" 21 22#if KMP_ITT_DEBUG 23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 24#define KMP_ITT_DEBUG_LOCK() \ 25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } 26#define KMP_ITT_DEBUG_PRINT(...) \ 27 { \ 28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ 29 fprintf(stderr, __VA_ARGS__); \ 30 fflush(stderr); \ 31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ 32 } 33#else 34#define KMP_ITT_DEBUG_LOCK() 35#define KMP_ITT_DEBUG_PRINT(...) 36#endif // KMP_ITT_DEBUG 37 38// Ensure that the functions are static if they're supposed to be being inlined. 39// Otherwise they cannot be used in more than one file, since there will be 40// multiple definitions. 41#if KMP_DEBUG 42#define LINKAGE 43#else 44#define LINKAGE static inline 45#endif 46 47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses 48// this API to support user-defined synchronization primitives, but does not use 49// ZCA; it would be safe to turn this off until wider support becomes available. 50#if USE_ITT_ZCA 51#ifdef __INTEL_COMPILER 52#if __INTEL_COMPILER >= 1200 53#undef __itt_sync_acquired 54#undef __itt_sync_releasing 55#define __itt_sync_acquired(addr) \ 56 __notify_zc_intrinsic((char *)"sync_acquired", addr) 57#define __itt_sync_releasing(addr) \ 58 __notify_intrinsic((char *)"sync_releasing", addr) 59#endif 60#endif 61#endif 62 63static kmp_bootstrap_lock_t metadata_lock = 64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); 65 66/* Parallel region reporting. 67 * __kmp_itt_region_forking should be called by master thread of a team. 68 Exact moment of call does not matter, but it should be completed before any 69 thread of this team calls __kmp_itt_region_starting. 70 * __kmp_itt_region_starting should be called by each thread of a team just 71 before entering parallel region body. 72 * __kmp_itt_region_finished should be called by each thread of a team right 73 after returning from parallel region body. 74 * __kmp_itt_region_joined should be called by master thread of a team, after 75 all threads called __kmp_itt_region_finished. 76 77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can 78 execute some more user code -- such a thread can execute tasks. 79 80 Note: The overhead of logging region_starting and region_finished in each 81 thread is too large, so these calls are not used. */ 82 83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { 84#if USE_ITT_NOTIFY 85 kmp_team_t *team = __kmp_team_from_gtid(gtid); 86 if (team->t.t_active_level > 1) { 87 // The frame notifications are only supported for the outermost teams. 88 return; 89 } 90 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 91 if (loc) { 92 // Use the reserved_2 field to store the index to the region domain. 93 // Assume that reserved_2 contains zero initially. Since zero is special 94 // value here, store the index into domain array increased by 1. 95 if (loc->reserved_2 == 0) { 96 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 97 int frm = 98 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 99 if (frm >= KMP_MAX_FRAME_DOMAINS) { 100 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 101 return; // loc->reserved_2 is still 0 102 } 103 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { 104 // frm = loc->reserved_2 - 1; // get value saved by other thread 105 // for same loc 106 //} // AC: this block is to replace next unsynchronized line 107 108 // We need to save indexes for both region and barrier frames. We'll use 109 // loc->reserved_2 field but put region index to the low two bytes and 110 // barrier indexes to the high two bytes. It is OK because 111 // KMP_MAX_FRAME_DOMAINS = 512. 112 loc->reserved_2 |= (frm + 1); // save "new" value 113 114 // Transform compiler-generated region location into the format 115 // that the tools more or less standardized on: 116 // "<func>$omp$parallel@[file:]<line>[:<col>]" 117 char *buff = NULL; 118 kmp_str_loc_t str_loc = 119 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 120 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 121 team_size, str_loc.file, str_loc.line, 122 str_loc.col); 123 124 __itt_suppress_push(__itt_suppress_memory_errors); 125 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 126 __itt_suppress_pop(); 127 128 __kmp_str_free(&buff); 129 if (barriers) { 130 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 131 int frm = KMP_TEST_THEN_INC32( 132 &__kmp_barrier_domain_count); // get "old" value 133 if (frm >= KMP_MAX_FRAME_DOMAINS) { 134 KMP_TEST_THEN_DEC32( 135 &__kmp_barrier_domain_count); // revert the count 136 return; // loc->reserved_2 is still 0 137 } 138 char *buff = NULL; 139 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 140 str_loc.file, str_loc.col); 141 __itt_suppress_push(__itt_suppress_memory_errors); 142 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 143 __itt_suppress_pop(); 144 __kmp_str_free(&buff); 145 // Save the barrier frame index to the high two bytes. 146 loc->reserved_2 |= (frm + 1) << 16; 147 } 148 } 149 __kmp_str_loc_free(&str_loc); 150 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 151 } 152 } else { // Region domain exists for this location 153 // Check if team size was changed. Then create new region domain for this 154 // location 155 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 156 if ((frm < KMP_MAX_FRAME_DOMAINS) && 157 (__kmp_itt_region_team_size[frm] != team_size)) { 158 char *buff = NULL; 159 kmp_str_loc_t str_loc = 160 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 161 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 162 team_size, str_loc.file, str_loc.line, 163 str_loc.col); 164 165 __itt_suppress_push(__itt_suppress_memory_errors); 166 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 167 __itt_suppress_pop(); 168 169 __kmp_str_free(&buff); 170 __kmp_str_loc_free(&str_loc); 171 __kmp_itt_region_team_size[frm] = team_size; 172 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 173 } else { // Team size was not changed. Use existing domain. 174 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 175 } 176 } 177 KMP_ITT_DEBUG_LOCK(); 178 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, 179 loc->reserved_2, loc); 180 } 181#endif 182} // __kmp_itt_region_forking 183 184// ----------------------------------------------------------------------------- 185LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 186 __itt_timestamp end, int imbalance, 187 ident_t *loc, int team_size, int region) { 188#if USE_ITT_NOTIFY 189 if (region) { 190 kmp_team_t *team = __kmp_team_from_gtid(gtid); 191 int serialized = (region == 2 ? 1 : 0); 192 if (team->t.t_active_level + serialized > 1) { 193 // The frame notifications are only supported for the outermost teams. 194 return; 195 } 196 // Check region domain has not been created before. It's index is saved in 197 // the low two bytes. 198 if ((loc->reserved_2 & 0x0000FFFF) == 0) { 199 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 200 int frm = 201 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 202 if (frm >= KMP_MAX_FRAME_DOMAINS) { 203 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 204 return; // loc->reserved_2 is still 0 205 } 206 207 // We need to save indexes for both region and barrier frames. We'll use 208 // loc->reserved_2 field but put region index to the low two bytes and 209 // barrier indexes to the high two bytes. It is OK because 210 // KMP_MAX_FRAME_DOMAINS = 512. 211 loc->reserved_2 |= (frm + 1); // save "new" value 212 213 // Transform compiler-generated region location into the format 214 // that the tools more or less standardized on: 215 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 216 char *buff = NULL; 217 kmp_str_loc_t str_loc = 218 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 219 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 220 team_size, str_loc.file, str_loc.line, 221 str_loc.col); 222 223 __itt_suppress_push(__itt_suppress_memory_errors); 224 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 225 __itt_suppress_pop(); 226 227 __kmp_str_free(&buff); 228 __kmp_str_loc_free(&str_loc); 229 __kmp_itt_region_team_size[frm] = team_size; 230 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 231 } 232 } else { // Region domain exists for this location 233 // Check if team size was changed. Then create new region domain for this 234 // location 235 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 236 if (frm >= KMP_MAX_FRAME_DOMAINS) 237 return; // something's gone wrong, returning 238 if (__kmp_itt_region_team_size[frm] != team_size) { 239 char *buff = NULL; 240 kmp_str_loc_t str_loc = 241 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 242 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 243 team_size, str_loc.file, str_loc.line, 244 str_loc.col); 245 246 __itt_suppress_push(__itt_suppress_memory_errors); 247 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 248 __itt_suppress_pop(); 249 250 __kmp_str_free(&buff); 251 __kmp_str_loc_free(&str_loc); 252 __kmp_itt_region_team_size[frm] = team_size; 253 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 254 } else { // Team size was not changed. Use existing domain. 255 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 256 } 257 } 258 KMP_ITT_DEBUG_LOCK(); 259 KMP_ITT_DEBUG_PRINT( 260 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", 261 gtid, loc->reserved_2, region, loc, begin, end); 262 return; 263 } else { // called for barrier reporting 264 if (loc) { 265 if ((loc->reserved_2 & 0xFFFF0000) == 0) { 266 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 267 int frm = KMP_TEST_THEN_INC32( 268 &__kmp_barrier_domain_count); // get "old" value 269 if (frm >= KMP_MAX_FRAME_DOMAINS) { 270 KMP_TEST_THEN_DEC32( 271 &__kmp_barrier_domain_count); // revert the count 272 return; // loc->reserved_2 is still 0 273 } 274 // Save the barrier frame index to the high two bytes. 275 loc->reserved_2 |= (frm + 1) << 16; // save "new" value 276 277 // Transform compiler-generated region location into the format 278 // that the tools more or less standardized on: 279 // "<func>$omp$frame@[file:]<line>[:<col>]" 280 kmp_str_loc_t str_loc = 281 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 282 if (imbalance) { 283 char *buff_imb = NULL; 284 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", 285 str_loc.func, team_size, str_loc.file, 286 str_loc.col); 287 __itt_suppress_push(__itt_suppress_memory_errors); 288 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); 289 __itt_suppress_pop(); 290 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, 291 end); 292 __kmp_str_free(&buff_imb); 293 } else { 294 char *buff = NULL; 295 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 296 str_loc.file, str_loc.col); 297 __itt_suppress_push(__itt_suppress_memory_errors); 298 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 299 __itt_suppress_pop(); 300 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, 301 end); 302 __kmp_str_free(&buff); 303 } 304 __kmp_str_loc_free(&str_loc); 305 } 306 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS 307 if (imbalance) { 308 __itt_frame_submit_v3( 309 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, 310 begin, end); 311 } else { 312 __itt_frame_submit_v3( 313 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, 314 begin, end); 315 } 316 } 317 KMP_ITT_DEBUG_LOCK(); 318 KMP_ITT_DEBUG_PRINT( 319 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, 320 loc->reserved_2, loc, begin, end); 321 } 322 } 323#endif 324} // __kmp_itt_frame_submit 325 326// ----------------------------------------------------------------------------- 327LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 328 kmp_uint64 end, kmp_uint64 imbalance, 329 kmp_uint64 reduction) { 330#if USE_ITT_NOTIFY 331 if (metadata_domain == NULL) { 332 __kmp_acquire_bootstrap_lock(&metadata_lock); 333 if (metadata_domain == NULL) { 334 __itt_suppress_push(__itt_suppress_memory_errors); 335 metadata_domain = __itt_domain_create("OMP Metadata"); 336 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 337 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 338 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 339 __itt_suppress_pop(); 340 } 341 __kmp_release_bootstrap_lock(&metadata_lock); 342 } 343 344 kmp_uint64 imbalance_data[4]; 345 imbalance_data[0] = begin; 346 imbalance_data[1] = end; 347 imbalance_data[2] = imbalance; 348 imbalance_data[3] = reduction; 349 350 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, 351 __itt_metadata_u64, 4, imbalance_data); 352#endif 353} // __kmp_itt_metadata_imbalance 354 355// ----------------------------------------------------------------------------- 356LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 357 kmp_uint64 iterations, kmp_uint64 chunk) { 358#if USE_ITT_NOTIFY 359 if (metadata_domain == NULL) { 360 __kmp_acquire_bootstrap_lock(&metadata_lock); 361 if (metadata_domain == NULL) { 362 __itt_suppress_push(__itt_suppress_memory_errors); 363 metadata_domain = __itt_domain_create("OMP Metadata"); 364 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 365 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 366 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 367 __itt_suppress_pop(); 368 } 369 __kmp_release_bootstrap_lock(&metadata_lock); 370 } 371 372 // Parse line and column from psource string: ";file;func;line;col;;" 373 KMP_DEBUG_ASSERT(loc->psource); 374 kmp_uint64 loop_data[5]; 375 int line, col; 376 __kmp_str_loc_numbers(loc->psource, &line, &col); 377 loop_data[0] = line; 378 loop_data[1] = col; 379 loop_data[2] = sched_type; 380 loop_data[3] = iterations; 381 loop_data[4] = chunk; 382 383 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, 384 __itt_metadata_u64, 5, loop_data); 385#endif 386} // __kmp_itt_metadata_loop 387 388// ----------------------------------------------------------------------------- 389LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { 390#if USE_ITT_NOTIFY 391 if (metadata_domain == NULL) { 392 __kmp_acquire_bootstrap_lock(&metadata_lock); 393 if (metadata_domain == NULL) { 394 __itt_suppress_push(__itt_suppress_memory_errors); 395 metadata_domain = __itt_domain_create("OMP Metadata"); 396 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 397 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 398 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 399 __itt_suppress_pop(); 400 } 401 __kmp_release_bootstrap_lock(&metadata_lock); 402 } 403 404 int line, col; 405 __kmp_str_loc_numbers(loc->psource, &line, &col); 406 kmp_uint64 single_data[2]; 407 single_data[0] = line; 408 single_data[1] = col; 409 410 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, 411 __itt_metadata_u64, 2, single_data); 412#endif 413} // __kmp_itt_metadata_single 414 415// ----------------------------------------------------------------------------- 416LINKAGE void __kmp_itt_region_starting(int gtid) { 417#if USE_ITT_NOTIFY 418#endif 419} // __kmp_itt_region_starting 420 421// ----------------------------------------------------------------------------- 422LINKAGE void __kmp_itt_region_finished(int gtid) { 423#if USE_ITT_NOTIFY 424#endif 425} // __kmp_itt_region_finished 426 427// ---------------------------------------------------------------------------- 428LINKAGE void __kmp_itt_region_joined(int gtid) { 429#if USE_ITT_NOTIFY 430 kmp_team_t *team = __kmp_team_from_gtid(gtid); 431 if (team->t.t_active_level > 1) { 432 // The frame notifications are only supported for the outermost teams. 433 return; 434 } 435 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 436 if (loc && loc->reserved_2) { 437 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 438 if (frm < KMP_MAX_FRAME_DOMAINS) { 439 KMP_ITT_DEBUG_LOCK(); 440 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); 441 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, 442 loc->reserved_2, loc); 443 } 444 } 445#endif 446} // __kmp_itt_region_joined 447 448/* Barriers reporting. 449 450 A barrier consists of two phases: 451 1. Gather -- master waits for arriving of all the worker threads; each 452 worker thread registers arrival and goes further. 453 2. Release -- each worker threads waits until master lets it go; master lets 454 worker threads go. 455 456 Function should be called by each thread: 457 * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 458 * __kmp_itt_barrier_middle() -- between gather and release phases. 459 * __kmp_itt_barrier_finished() -- after release phase. 460 461 Note: Call __kmp_itt_barrier_object() before call to 462 __kmp_itt_barrier_starting() and save result in local variable. 463 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) 464 would return itt sync object for the next barrier! 465 466 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 467 does not have barrier object or barrier data structure. Barrier is just a 468 counter in team and thread structures. We could use an address of team 469 structure as a barrier sync object, but ITT wants different objects for 470 different barriers (even whithin the same team). So let us use team address 471 as barrier sync object for the first barrier, then increase it by one for the 472 next barrier, and so on (but wrap it not to use addresses outside of team 473 structure). */ 474 475void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, 476 int delta // 0 (current barrier) is default 477 // value; specify -1 to get previous 478 // barrier. 479 ) { 480 void *object = NULL; 481#if USE_ITT_NOTIFY 482 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 483 kmp_team_t *team = thr->th.th_team; 484 485 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can 486 // be NULL. This "if" helps to avoid crash. However, this is not complete 487 // solution, and reporting fork/join barriers to ITT should be revisited. 488 489 if (team != NULL) { 490 // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. 491 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 492 kmp_uint64 counter = 493 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 494 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so 495 // barriers of different types do not have the same ids. 496 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); 497 // This condition is a must (we would have zero divide otherwise). 498 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); 499 // More strong condition: make sure we have room at least for for two 500 // different ids (for each barrier type). 501 object = reinterpret_cast<void *>( 502 (kmp_uintptr_t)(team) + 503 (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) * 504 bs_last_barrier + 505 bt); 506 KMP_ITT_DEBUG_LOCK(); 507 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, 508 counter, object); 509 510 if (set_name) { 511 ident_t const *loc = NULL; 512 char const *src = NULL; 513 char const *type = "OMP Barrier"; 514 switch (bt) { 515 case bs_plain_barrier: { 516 // For plain barrier compiler calls __kmpc_barrier() function, which 517 // saves location in thr->th.th_ident. 518 loc = thr->th.th_ident; 519 // Get the barrier type from flags provided by compiler. 520 kmp_int32 expl = 0; 521 kmp_uint32 impl = 0; 522 if (loc != NULL) { 523 src = loc->psource; 524 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; 525 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; 526 } 527 if (impl) { 528 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { 529 case KMP_IDENT_BARRIER_IMPL_FOR: { 530 type = "OMP For Barrier"; 531 } break; 532 case KMP_IDENT_BARRIER_IMPL_SECTIONS: { 533 type = "OMP Sections Barrier"; 534 } break; 535 case KMP_IDENT_BARRIER_IMPL_SINGLE: { 536 type = "OMP Single Barrier"; 537 } break; 538 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { 539 type = "OMP Workshare Barrier"; 540 } break; 541 default: { 542 type = "OMP Implicit Barrier"; 543 KMP_DEBUG_ASSERT(0); 544 } 545 } 546 } else if (expl) { 547 type = "OMP Explicit Barrier"; 548 } 549 } break; 550 case bs_forkjoin_barrier: { 551 // In case of fork/join barrier we can read thr->th.th_ident, because it 552 // contains location of last passed construct (while join barrier is not 553 // such one). Use th_ident of master thread instead -- __kmp_join_call() 554 // called by the master thread saves location. 555 // 556 // AC: cannot read from master because __kmp_join_call may be not called 557 // yet, so we read the location from team. This is the same location. 558 // And team is valid at the enter to join barrier where this happens. 559 loc = team->t.t_ident; 560 if (loc != NULL) { 561 src = loc->psource; 562 } 563 type = "OMP Join Barrier"; 564 } break; 565 } 566 KMP_ITT_DEBUG_LOCK(); 567 __itt_sync_create(object, type, src, __itt_attr_barrier); 568 KMP_ITT_DEBUG_PRINT( 569 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, 570 type, src); 571 } 572 } 573#endif 574 return object; 575} // __kmp_itt_barrier_object 576 577// ----------------------------------------------------------------------------- 578void __kmp_itt_barrier_starting(int gtid, void *object) { 579#if USE_ITT_NOTIFY 580 if (!KMP_MASTER_GTID(gtid)) { 581 KMP_ITT_DEBUG_LOCK(); 582 __itt_sync_releasing(object); 583 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); 584 } 585 KMP_ITT_DEBUG_LOCK(); 586 __itt_sync_prepare(object); 587 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); 588#endif 589} // __kmp_itt_barrier_starting 590 591// ----------------------------------------------------------------------------- 592void __kmp_itt_barrier_middle(int gtid, void *object) { 593#if USE_ITT_NOTIFY 594 if (KMP_MASTER_GTID(gtid)) { 595 KMP_ITT_DEBUG_LOCK(); 596 __itt_sync_acquired(object); 597 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); 598 KMP_ITT_DEBUG_LOCK(); 599 __itt_sync_releasing(object); 600 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); 601 } else { 602 } 603#endif 604} // __kmp_itt_barrier_middle 605 606// ----------------------------------------------------------------------------- 607void __kmp_itt_barrier_finished(int gtid, void *object) { 608#if USE_ITT_NOTIFY 609 if (KMP_MASTER_GTID(gtid)) { 610 } else { 611 KMP_ITT_DEBUG_LOCK(); 612 __itt_sync_acquired(object); 613 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); 614 } 615#endif 616} // __kmp_itt_barrier_finished 617 618/* Taskwait reporting. 619 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 620 does not have taskwait structure, so we need to construct something. */ 621 622void *__kmp_itt_taskwait_object(int gtid) { 623 void *object = NULL; 624#if USE_ITT_NOTIFY 625 if (UNLIKELY(__itt_sync_create_ptr)) { 626 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 627 kmp_taskdata_t *taskdata = thread->th.th_current_task; 628 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + 629 taskdata->td_taskwait_counter % 630 sizeof(kmp_taskdata_t)); 631 } 632#endif 633 return object; 634} // __kmp_itt_taskwait_object 635 636void __kmp_itt_taskwait_starting(int gtid, void *object) { 637#if USE_ITT_NOTIFY 638 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 639 kmp_taskdata_t *taskdata = thread->th.th_current_task; 640 ident_t const *loc = taskdata->td_taskwait_ident; 641 char const *src = (loc == NULL ? NULL : loc->psource); 642 KMP_ITT_DEBUG_LOCK(); 643 __itt_sync_create(object, "OMP Taskwait", src, 0); 644 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", 645 object, src); 646 KMP_ITT_DEBUG_LOCK(); 647 __itt_sync_prepare(object); 648 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); 649#endif 650} // __kmp_itt_taskwait_starting 651 652void __kmp_itt_taskwait_finished(int gtid, void *object) { 653#if USE_ITT_NOTIFY 654 KMP_ITT_DEBUG_LOCK(); 655 __itt_sync_acquired(object); 656 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); 657 KMP_ITT_DEBUG_LOCK(); 658 __itt_sync_destroy(object); 659 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); 660#endif 661} // __kmp_itt_taskwait_finished 662 663/* Task reporting. 664 Only those tasks are reported which are executed by a thread spinning at 665 barrier (or taskwait). Synch object passed to the function must be barrier of 666 taskwait the threads waiting at. */ 667 668void __kmp_itt_task_starting( 669 void *object // ITT sync object: barrier or taskwait. 670 ) { 671#if USE_ITT_NOTIFY 672 if (UNLIKELY(object != NULL)) { 673 KMP_ITT_DEBUG_LOCK(); 674 __itt_sync_cancel(object); 675 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); 676 } 677#endif 678} // __kmp_itt_task_starting 679 680// ----------------------------------------------------------------------------- 681void __kmp_itt_task_finished( 682 void *object // ITT sync object: barrier or taskwait. 683 ) { 684#if USE_ITT_NOTIFY 685 KMP_ITT_DEBUG_LOCK(); 686 __itt_sync_prepare(object); 687 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); 688#endif 689} // __kmp_itt_task_finished 690 691/* Lock reporting. 692 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock 693 operation (set/unset). It is not a real event shown to the user but just 694 setting a name for synchronization object. `lock' is an address of sync 695 object, the same address should be used in all subsequent calls. 696 * __kmp_itt_lock_acquiring() should be called before setting the lock. 697 * __kmp_itt_lock_acquired() should be called after setting the lock. 698 * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 699 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting 700 for the lock. 701 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock 702 operation. After __kmp_itt_lock_destroyed() all the references to the same 703 address will be considered as another sync object, not related with the 704 original one. */ 705 706#if KMP_USE_DYNAMIC_LOCK 707// Takes location information directly 708__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, 709 const ident_t *loc) { 710#if USE_ITT_NOTIFY 711 if (__itt_sync_create_ptr) { 712 char const *src = (loc == NULL ? NULL : loc->psource); 713 KMP_ITT_DEBUG_LOCK(); 714 __itt_sync_create(lock, type, src, 0); 715 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 716 src); 717 } 718#endif 719} 720#else // KMP_USE_DYNAMIC_LOCK 721// Internal guts -- common code for locks and critical sections, do not call 722// directly. 723__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { 724#if USE_ITT_NOTIFY 725 if (__itt_sync_create_ptr) { 726 ident_t const *loc = NULL; 727 if (__kmp_get_user_lock_location_ != NULL) 728 loc = __kmp_get_user_lock_location_((lock)); 729 char const *src = (loc == NULL ? NULL : loc->psource); 730 KMP_ITT_DEBUG_LOCK(); 731 __itt_sync_create(lock, type, src, 0); 732 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 733 src); 734 } 735#endif 736} // ___kmp_itt_lock_init 737#endif // KMP_USE_DYNAMIC_LOCK 738 739// Internal guts -- common code for locks and critical sections, do not call 740// directly. 741__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { 742#if USE_ITT_NOTIFY 743 KMP_ITT_DEBUG_LOCK(); 744 __itt_sync_destroy(lock); 745 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); 746#endif 747} // ___kmp_itt_lock_fini 748 749// ----------------------------------------------------------------------------- 750#if KMP_USE_DYNAMIC_LOCK 751void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { 752 ___kmp_itt_lock_init(lock, "OMP Lock", loc); 753} 754#else 755void __kmp_itt_lock_creating(kmp_user_lock_p lock) { 756 ___kmp_itt_lock_init(lock, "OMP Lock"); 757} // __kmp_itt_lock_creating 758#endif 759 760void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { 761#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 762 // postpone lock object access 763 if (__itt_sync_prepare_ptr) { 764 if (KMP_EXTRACT_D_TAG(lock) == 0) { 765 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 766 __itt_sync_prepare(ilk->lock); 767 } else { 768 __itt_sync_prepare(lock); 769 } 770 } 771#else 772 __itt_sync_prepare(lock); 773#endif 774} // __kmp_itt_lock_acquiring 775 776void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { 777#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 778 // postpone lock object access 779 if (__itt_sync_acquired_ptr) { 780 if (KMP_EXTRACT_D_TAG(lock) == 0) { 781 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 782 __itt_sync_acquired(ilk->lock); 783 } else { 784 __itt_sync_acquired(lock); 785 } 786 } 787#else 788 __itt_sync_acquired(lock); 789#endif 790} // __kmp_itt_lock_acquired 791 792void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { 793#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 794 if (__itt_sync_releasing_ptr) { 795 if (KMP_EXTRACT_D_TAG(lock) == 0) { 796 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 797 __itt_sync_releasing(ilk->lock); 798 } else { 799 __itt_sync_releasing(lock); 800 } 801 } 802#else 803 __itt_sync_releasing(lock); 804#endif 805} // __kmp_itt_lock_releasing 806 807void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { 808#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 809 if (__itt_sync_cancel_ptr) { 810 if (KMP_EXTRACT_D_TAG(lock) == 0) { 811 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 812 __itt_sync_cancel(ilk->lock); 813 } else { 814 __itt_sync_cancel(lock); 815 } 816 } 817#else 818 __itt_sync_cancel(lock); 819#endif 820} // __kmp_itt_lock_cancelled 821 822void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { 823 ___kmp_itt_lock_fini(lock, "OMP Lock"); 824} // __kmp_itt_lock_destroyed 825 826/* Critical reporting. 827 Critical sections are treated exactly as locks (but have different object 828 type). */ 829#if KMP_USE_DYNAMIC_LOCK 830void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { 831 ___kmp_itt_lock_init(lock, "OMP Critical", loc); 832} 833#else 834void __kmp_itt_critical_creating(kmp_user_lock_p lock) { 835 ___kmp_itt_lock_init(lock, "OMP Critical"); 836} // __kmp_itt_critical_creating 837#endif 838 839void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { 840 __itt_sync_prepare(lock); 841} // __kmp_itt_critical_acquiring 842 843void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { 844 __itt_sync_acquired(lock); 845} // __kmp_itt_critical_acquired 846 847void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { 848 __itt_sync_releasing(lock); 849} // __kmp_itt_critical_releasing 850 851void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { 852 ___kmp_itt_lock_fini(lock, "OMP Critical"); 853} // __kmp_itt_critical_destroyed 854 855/* Single reporting. */ 856 857void __kmp_itt_single_start(int gtid) { 858#if USE_ITT_NOTIFY 859 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { 860 kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); 861 ident_t *loc = thr->th.th_ident; 862 char const *src = (loc == NULL ? NULL : loc->psource); 863 kmp_str_buf_t name; 864 __kmp_str_buf_init(&name); 865 __kmp_str_buf_print(&name, "OMP Single-%s", src); 866 KMP_ITT_DEBUG_LOCK(); 867 thr->th.th_itt_mark_single = __itt_mark_create(name.str); 868 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, 869 thr->th.th_itt_mark_single); 870 __kmp_str_buf_free(&name); 871 KMP_ITT_DEBUG_LOCK(); 872 __itt_mark(thr->th.th_itt_mark_single, NULL); 873 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", 874 thr->th.th_itt_mark_single); 875 } 876#endif 877} // __kmp_itt_single_start 878 879void __kmp_itt_single_end(int gtid) { 880#if USE_ITT_NOTIFY 881 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; 882 KMP_ITT_DEBUG_LOCK(); 883 __itt_mark_off(mark); 884 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); 885#endif 886} // __kmp_itt_single_end 887 888/* Ordered reporting. 889 * __kmp_itt_ordered_init is called by each thread *before* first using sync 890 object. ITT team would like it to be called once, but it requires extra 891 synchronization. 892 * __kmp_itt_ordered_prep is called when thread is going to enter ordered 893 section (before synchronization). 894 * __kmp_itt_ordered_start is called just before entering user code (after 895 synchronization). 896 * __kmp_itt_ordered_end is called after returning from user code. 897 898 Sync object is th->th.th_dispatch->th_dispatch_sh_current. 899 Events are not generated in case of serialized team. */ 900 901void __kmp_itt_ordered_init(int gtid) { 902#if USE_ITT_NOTIFY 903 if (__itt_sync_create_ptr) { 904 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 905 ident_t const *loc = thr->th.th_ident; 906 char const *src = (loc == NULL ? NULL : loc->psource); 907 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, 908 "OMP Ordered", src, 0); 909 } 910#endif 911} // __kmp_itt_ordered_init 912 913void __kmp_itt_ordered_prep(int gtid) { 914#if USE_ITT_NOTIFY 915 if (__itt_sync_create_ptr) { 916 kmp_team_t *t = __kmp_team_from_gtid(gtid); 917 if (!t->t.t_serialized) { 918 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 919 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); 920 } 921 } 922#endif 923} // __kmp_itt_ordered_prep 924 925void __kmp_itt_ordered_start(int gtid) { 926#if USE_ITT_NOTIFY 927 if (__itt_sync_create_ptr) { 928 kmp_team_t *t = __kmp_team_from_gtid(gtid); 929 if (!t->t.t_serialized) { 930 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 931 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); 932 } 933 } 934#endif 935} // __kmp_itt_ordered_start 936 937void __kmp_itt_ordered_end(int gtid) { 938#if USE_ITT_NOTIFY 939 if (__itt_sync_create_ptr) { 940 kmp_team_t *t = __kmp_team_from_gtid(gtid); 941 if (!t->t.t_serialized) { 942 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 943 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); 944 } 945 } 946#endif 947} // __kmp_itt_ordered_end 948 949/* Threads reporting. */ 950 951void __kmp_itt_thread_ignore() { 952 __itt_thr_ignore(); 953} // __kmp_itt_thread_ignore 954 955void __kmp_itt_thread_name(int gtid) { 956#if USE_ITT_NOTIFY 957 if (__itt_thr_name_set_ptr) { 958 kmp_str_buf_t name; 959 __kmp_str_buf_init(&name); 960 if (KMP_MASTER_GTID(gtid)) { 961 __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid); 962 } else { 963 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); 964 } 965 KMP_ITT_DEBUG_LOCK(); 966 __itt_thr_name_set(name.str, name.used); 967 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); 968 __kmp_str_buf_free(&name); 969 } 970#endif 971} // __kmp_itt_thread_name 972 973/* System object reporting. 974 ITT catches operations with system sync objects (like Windows* OS on IA-32 975 architecture API critical sections and events). We only need to specify 976 name ("OMP Scheduler") for the object to let ITT know it is an object used 977 by OpenMP RTL for internal purposes. */ 978 979void __kmp_itt_system_object_created(void *object, char const *name) { 980#if USE_ITT_NOTIFY 981 KMP_ITT_DEBUG_LOCK(); 982 __itt_sync_create(object, "OMP Scheduler", name, 0); 983 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", 984 object, name); 985#endif 986} // __kmp_itt_system_object_created 987 988/* Stack stitching api. 989 Master calls "create" and put the stitching id into team structure. 990 Workers read the stitching id and call "enter" / "leave" api. 991 Master calls "destroy" at the end of the parallel region. */ 992 993__itt_caller __kmp_itt_stack_caller_create() { 994#if USE_ITT_NOTIFY 995 if (!__itt_stack_caller_create_ptr) 996 return NULL; 997 KMP_ITT_DEBUG_LOCK(); 998 __itt_caller id = __itt_stack_caller_create(); 999 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); 1000 return id; 1001#endif 1002 return NULL; 1003} 1004 1005void __kmp_itt_stack_caller_destroy(__itt_caller id) { 1006#if USE_ITT_NOTIFY 1007 if (__itt_stack_caller_destroy_ptr) { 1008 KMP_ITT_DEBUG_LOCK(); 1009 __itt_stack_caller_destroy(id); 1010 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); 1011 } 1012#endif 1013} 1014 1015void __kmp_itt_stack_callee_enter(__itt_caller id) { 1016#if USE_ITT_NOTIFY 1017 if (__itt_stack_callee_enter_ptr) { 1018 KMP_ITT_DEBUG_LOCK(); 1019 __itt_stack_callee_enter(id); 1020 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); 1021 } 1022#endif 1023} 1024 1025void __kmp_itt_stack_callee_leave(__itt_caller id) { 1026#if USE_ITT_NOTIFY 1027 if (__itt_stack_callee_leave_ptr) { 1028 KMP_ITT_DEBUG_LOCK(); 1029 __itt_stack_callee_leave(id); 1030 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); 1031 } 1032#endif 1033} 1034 1035#endif /* USE_ITT_BUILD */ 1036