1#if USE_ITT_BUILD 2/* 3 * kmp_itt.inl -- Inline functions of ITT Notify. 4 */ 5 6//===----------------------------------------------------------------------===// 7// 8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9// See https://llvm.org/LICENSE.txt for license information. 10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11// 12//===----------------------------------------------------------------------===// 13 14// Inline function definitions. This file should be included into kmp_itt.h file 15// for production build (to let compiler inline functions) or into kmp_itt.c 16// file for debug build (to reduce the number of files to recompile and save 17// build time). 18 19#include "kmp.h" 20#include "kmp_str.h" 21 22#if KMP_ITT_DEBUG 23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 24#define KMP_ITT_DEBUG_LOCK() \ 25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } 26#define KMP_ITT_DEBUG_PRINT(...) \ 27 { \ 28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ 29 fprintf(stderr, __VA_ARGS__); \ 30 fflush(stderr); \ 31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ 32 } 33#else 34#define KMP_ITT_DEBUG_LOCK() 35#define KMP_ITT_DEBUG_PRINT(...) 36#endif // KMP_ITT_DEBUG 37 38// Ensure that the functions are static if they're supposed to be being inlined. 39// Otherwise they cannot be used in more than one file, since there will be 40// multiple definitions. 41#if KMP_DEBUG 42#define LINKAGE 43#else 44#define LINKAGE static inline 45#endif 46 47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses 48// this API to support user-defined synchronization primitives, but does not use 49// ZCA; it would be safe to turn this off until wider support becomes available. 50#if USE_ITT_ZCA 51#ifdef __INTEL_COMPILER 52#if __INTEL_COMPILER >= 1200 53#undef __itt_sync_acquired 54#undef __itt_sync_releasing 55#define __itt_sync_acquired(addr) \ 56 __notify_zc_intrinsic((char *)"sync_acquired", addr) 57#define __itt_sync_releasing(addr) \ 58 __notify_intrinsic((char *)"sync_releasing", addr) 59#endif 60#endif 61#endif 62 63static kmp_bootstrap_lock_t metadata_lock = 64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); 65 66/* Parallel region reporting. 67 * __kmp_itt_region_forking should be called by master thread of a team. 68 Exact moment of call does not matter, but it should be completed before any 69 thread of this team calls __kmp_itt_region_starting. 70 * __kmp_itt_region_starting should be called by each thread of a team just 71 before entering parallel region body. 72 * __kmp_itt_region_finished should be called by each thread of a team right 73 after returning from parallel region body. 74 * __kmp_itt_region_joined should be called by master thread of a team, after 75 all threads called __kmp_itt_region_finished. 76 77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can 78 execute some more user code -- such a thread can execute tasks. 79 80 Note: The overhead of logging region_starting and region_finished in each 81 thread is too large, so these calls are not used. */ 82 83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { 84#if USE_ITT_NOTIFY 85 kmp_team_t *team = __kmp_team_from_gtid(gtid); 86 if (team->t.t_active_level > 1) { 87 // The frame notifications are only supported for the outermost teams. 88 return; 89 } 90 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 91 if (loc) { 92 // Use the reserved_2 field to store the index to the region domain. 93 // Assume that reserved_2 contains zero initially. Since zero is special 94 // value here, store the index into domain array increased by 1. 95 if (loc->reserved_2 == 0) { 96 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 97 int frm = 98 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 99 if (frm >= KMP_MAX_FRAME_DOMAINS) { 100 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 101 return; // loc->reserved_2 is still 0 102 } 103 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { 104 // frm = loc->reserved_2 - 1; // get value saved by other thread 105 // for same loc 106 //} // AC: this block is to replace next unsynchronized line 107 108 // We need to save indexes for both region and barrier frames. We'll use 109 // loc->reserved_2 field but put region index to the low two bytes and 110 // barrier indexes to the high two bytes. It is OK because 111 // KMP_MAX_FRAME_DOMAINS = 512. 112 loc->reserved_2 |= (frm + 1); // save "new" value 113 114 // Transform compiler-generated region location into the format 115 // that the tools more or less standardized on: 116 // "<func>$omp$parallel@[file:]<line>[:<col>]" 117 char *buff = NULL; 118 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 119 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 120 team_size, str_loc.file, str_loc.line, 121 str_loc.col); 122 123 __itt_suppress_push(__itt_suppress_memory_errors); 124 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 125 __itt_suppress_pop(); 126 127 __kmp_str_free(&buff); 128 if (barriers) { 129 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 130 int frm = KMP_TEST_THEN_INC32( 131 &__kmp_barrier_domain_count); // get "old" value 132 if (frm >= KMP_MAX_FRAME_DOMAINS) { 133 KMP_TEST_THEN_DEC32( 134 &__kmp_barrier_domain_count); // revert the count 135 return; // loc->reserved_2 is still 0 136 } 137 char *buff = NULL; 138 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 139 str_loc.file, str_loc.col); 140 __itt_suppress_push(__itt_suppress_memory_errors); 141 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 142 __itt_suppress_pop(); 143 __kmp_str_free(&buff); 144 // Save the barrier frame index to the high two bytes. 145 loc->reserved_2 |= (frm + 1) << 16; 146 } 147 } 148 __kmp_str_loc_free(&str_loc); 149 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 150 } 151 } else { // Region domain exists for this location 152 // Check if team size was changed. Then create new region domain for this 153 // location 154 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 155 if ((frm < KMP_MAX_FRAME_DOMAINS) && 156 (__kmp_itt_region_team_size[frm] != team_size)) { 157 char *buff = NULL; 158 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 159 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 160 team_size, str_loc.file, str_loc.line, 161 str_loc.col); 162 163 __itt_suppress_push(__itt_suppress_memory_errors); 164 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 165 __itt_suppress_pop(); 166 167 __kmp_str_free(&buff); 168 __kmp_str_loc_free(&str_loc); 169 __kmp_itt_region_team_size[frm] = team_size; 170 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 171 } else { // Team size was not changed. Use existing domain. 172 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 173 } 174 } 175 KMP_ITT_DEBUG_LOCK(); 176 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, 177 loc->reserved_2, loc); 178 } 179#endif 180} // __kmp_itt_region_forking 181 182// ----------------------------------------------------------------------------- 183LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 184 __itt_timestamp end, int imbalance, 185 ident_t *loc, int team_size, int region) { 186#if USE_ITT_NOTIFY 187 if (region) { 188 kmp_team_t *team = __kmp_team_from_gtid(gtid); 189 int serialized = (region == 2 ? 1 : 0); 190 if (team->t.t_active_level + serialized > 1) { 191 // The frame notifications are only supported for the outermost teams. 192 return; 193 } 194 // Check region domain has not been created before. It's index is saved in 195 // the low two bytes. 196 if ((loc->reserved_2 & 0x0000FFFF) == 0) { 197 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 198 int frm = 199 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 200 if (frm >= KMP_MAX_FRAME_DOMAINS) { 201 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 202 return; // loc->reserved_2 is still 0 203 } 204 205 // We need to save indexes for both region and barrier frames. We'll use 206 // loc->reserved_2 field but put region index to the low two bytes and 207 // barrier indexes to the high two bytes. It is OK because 208 // KMP_MAX_FRAME_DOMAINS = 512. 209 loc->reserved_2 |= (frm + 1); // save "new" value 210 211 // Transform compiler-generated region location into the format 212 // that the tools more or less standardized on: 213 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 214 char *buff = NULL; 215 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 216 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 217 team_size, str_loc.file, str_loc.line, 218 str_loc.col); 219 220 __itt_suppress_push(__itt_suppress_memory_errors); 221 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 222 __itt_suppress_pop(); 223 224 __kmp_str_free(&buff); 225 __kmp_str_loc_free(&str_loc); 226 __kmp_itt_region_team_size[frm] = team_size; 227 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 228 } 229 } else { // Region domain exists for this location 230 // Check if team size was changed. Then create new region domain for this 231 // location 232 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 233 if ((frm < KMP_MAX_FRAME_DOMAINS) && 234 (__kmp_itt_region_team_size[frm] != team_size)) { 235 char *buff = NULL; 236 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 237 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 238 team_size, str_loc.file, str_loc.line, 239 str_loc.col); 240 241 __itt_suppress_push(__itt_suppress_memory_errors); 242 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 243 __itt_suppress_pop(); 244 245 __kmp_str_free(&buff); 246 __kmp_str_loc_free(&str_loc); 247 __kmp_itt_region_team_size[frm] = team_size; 248 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 249 } else { // Team size was not changed. Use existing domain. 250 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 251 } 252 } 253 KMP_ITT_DEBUG_LOCK(); 254 KMP_ITT_DEBUG_PRINT( 255 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", 256 gtid, loc->reserved_2, region, loc, begin, end); 257 return; 258 } else { // called for barrier reporting 259 if (loc) { 260 if ((loc->reserved_2 & 0xFFFF0000) == 0) { 261 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 262 int frm = KMP_TEST_THEN_INC32( 263 &__kmp_barrier_domain_count); // get "old" value 264 if (frm >= KMP_MAX_FRAME_DOMAINS) { 265 KMP_TEST_THEN_DEC32( 266 &__kmp_barrier_domain_count); // revert the count 267 return; // loc->reserved_2 is still 0 268 } 269 // Save the barrier frame index to the high two bytes. 270 loc->reserved_2 |= (frm + 1) << 16; // save "new" value 271 272 // Transform compiler-generated region location into the format 273 // that the tools more or less standardized on: 274 // "<func>$omp$frame@[file:]<line>[:<col>]" 275 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 276 if (imbalance) { 277 char *buff_imb = NULL; 278 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", 279 str_loc.func, team_size, str_loc.file, 280 str_loc.col); 281 __itt_suppress_push(__itt_suppress_memory_errors); 282 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); 283 __itt_suppress_pop(); 284 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, 285 end); 286 __kmp_str_free(&buff_imb); 287 } else { 288 char *buff = NULL; 289 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 290 str_loc.file, str_loc.col); 291 __itt_suppress_push(__itt_suppress_memory_errors); 292 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 293 __itt_suppress_pop(); 294 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, 295 end); 296 __kmp_str_free(&buff); 297 } 298 __kmp_str_loc_free(&str_loc); 299 } 300 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS 301 if (imbalance) { 302 __itt_frame_submit_v3( 303 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, 304 begin, end); 305 } else { 306 __itt_frame_submit_v3( 307 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, 308 begin, end); 309 } 310 } 311 KMP_ITT_DEBUG_LOCK(); 312 KMP_ITT_DEBUG_PRINT( 313 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, 314 loc->reserved_2, loc, begin, end); 315 } 316 } 317#endif 318} // __kmp_itt_frame_submit 319 320// ----------------------------------------------------------------------------- 321LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 322 kmp_uint64 end, kmp_uint64 imbalance, 323 kmp_uint64 reduction) { 324#if USE_ITT_NOTIFY 325 if (metadata_domain == NULL) { 326 __kmp_acquire_bootstrap_lock(&metadata_lock); 327 if (metadata_domain == NULL) { 328 __itt_suppress_push(__itt_suppress_memory_errors); 329 metadata_domain = __itt_domain_create("OMP Metadata"); 330 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 331 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 332 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 333 __itt_suppress_pop(); 334 } 335 __kmp_release_bootstrap_lock(&metadata_lock); 336 } 337 338 kmp_uint64 imbalance_data[4]; 339 imbalance_data[0] = begin; 340 imbalance_data[1] = end; 341 imbalance_data[2] = imbalance; 342 imbalance_data[3] = reduction; 343 344 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, 345 __itt_metadata_u64, 4, imbalance_data); 346#endif 347} // __kmp_itt_metadata_imbalance 348 349// ----------------------------------------------------------------------------- 350LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 351 kmp_uint64 iterations, kmp_uint64 chunk) { 352#if USE_ITT_NOTIFY 353 if (metadata_domain == NULL) { 354 __kmp_acquire_bootstrap_lock(&metadata_lock); 355 if (metadata_domain == NULL) { 356 __itt_suppress_push(__itt_suppress_memory_errors); 357 metadata_domain = __itt_domain_create("OMP Metadata"); 358 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 359 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 360 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 361 __itt_suppress_pop(); 362 } 363 __kmp_release_bootstrap_lock(&metadata_lock); 364 } 365 366 // Parse line and column from psource string: ";file;func;line;col;;" 367 char *s_line; 368 char *s_col; 369 KMP_DEBUG_ASSERT(loc->psource); 370#ifdef __cplusplus 371 s_line = strchr(CCAST(char *, loc->psource), ';'); 372#else 373 s_line = strchr(loc->psource, ';'); 374#endif 375 KMP_DEBUG_ASSERT(s_line); 376 s_line = strchr(s_line + 1, ';'); // 2-nd semicolon 377 KMP_DEBUG_ASSERT(s_line); 378 s_line = strchr(s_line + 1, ';'); // 3-rd semicolon 379 KMP_DEBUG_ASSERT(s_line); 380 s_col = strchr(s_line + 1, ';'); // 4-th semicolon 381 KMP_DEBUG_ASSERT(s_col); 382 383 kmp_uint64 loop_data[5]; 384 loop_data[0] = atoi(s_line + 1); // read line 385 loop_data[1] = atoi(s_col + 1); // read column 386 loop_data[2] = sched_type; 387 loop_data[3] = iterations; 388 loop_data[4] = chunk; 389 390 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, 391 __itt_metadata_u64, 5, loop_data); 392#endif 393} // __kmp_itt_metadata_loop 394 395// ----------------------------------------------------------------------------- 396LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { 397#if USE_ITT_NOTIFY 398 if (metadata_domain == NULL) { 399 __kmp_acquire_bootstrap_lock(&metadata_lock); 400 if (metadata_domain == NULL) { 401 __itt_suppress_push(__itt_suppress_memory_errors); 402 metadata_domain = __itt_domain_create("OMP Metadata"); 403 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 404 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 405 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 406 __itt_suppress_pop(); 407 } 408 __kmp_release_bootstrap_lock(&metadata_lock); 409 } 410 411 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 412 kmp_uint64 single_data[2]; 413 single_data[0] = str_loc.line; 414 single_data[1] = str_loc.col; 415 416 __kmp_str_loc_free(&str_loc); 417 418 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, 419 __itt_metadata_u64, 2, single_data); 420#endif 421} // __kmp_itt_metadata_single 422 423// ----------------------------------------------------------------------------- 424LINKAGE void __kmp_itt_region_starting(int gtid) { 425#if USE_ITT_NOTIFY 426#endif 427} // __kmp_itt_region_starting 428 429// ----------------------------------------------------------------------------- 430LINKAGE void __kmp_itt_region_finished(int gtid) { 431#if USE_ITT_NOTIFY 432#endif 433} // __kmp_itt_region_finished 434 435// ---------------------------------------------------------------------------- 436LINKAGE void __kmp_itt_region_joined(int gtid) { 437#if USE_ITT_NOTIFY 438 kmp_team_t *team = __kmp_team_from_gtid(gtid); 439 if (team->t.t_active_level > 1) { 440 // The frame notifications are only supported for the outermost teams. 441 return; 442 } 443 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 444 if (loc && loc->reserved_2) { 445 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 446 if (frm < KMP_MAX_FRAME_DOMAINS) { 447 KMP_ITT_DEBUG_LOCK(); 448 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); 449 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, 450 loc->reserved_2, loc); 451 } 452 } 453#endif 454} // __kmp_itt_region_joined 455 456/* Barriers reporting. 457 458 A barrier consists of two phases: 459 1. Gather -- master waits for arriving of all the worker threads; each 460 worker thread registers arrival and goes further. 461 2. Release -- each worker threads waits until master lets it go; master lets 462 worker threads go. 463 464 Function should be called by each thread: 465 * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 466 * __kmp_itt_barrier_middle() -- between gather and release phases. 467 * __kmp_itt_barrier_finished() -- after release phase. 468 469 Note: Call __kmp_itt_barrier_object() before call to 470 __kmp_itt_barrier_starting() and save result in local variable. 471 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) 472 would return itt sync object for the next barrier! 473 474 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 475 does not have barrier object or barrier data structure. Barrier is just a 476 counter in team and thread structures. We could use an address of team 477 structure as a barrier sync object, but ITT wants different objects for 478 different barriers (even whithin the same team). So let us use team address 479 as barrier sync object for the first barrier, then increase it by one for the 480 next barrier, and so on (but wrap it not to use addresses outside of team 481 structure). */ 482 483void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, 484 int delta // 0 (current barrier) is default 485 // value; specify -1 to get previous 486 // barrier. 487 ) { 488 void *object = NULL; 489#if USE_ITT_NOTIFY 490 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 491 kmp_team_t *team = thr->th.th_team; 492 493 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can 494 // be NULL. This "if" helps to avoid crash. However, this is not complete 495 // solution, and reporting fork/join barriers to ITT should be revisited. 496 497 if (team != NULL) { 498 // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. 499 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 500 kmp_uint64 counter = 501 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 502 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so 503 // barriers of different types do not have the same ids. 504 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); 505 // This condition is a must (we would have zero divide otherwise). 506 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); 507 // More strong condition: make sure we have room at least for for two 508 // different ids (for each barrier type). 509 object = reinterpret_cast<void *>( 510 kmp_uintptr_t(team) + 511 counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier + 512 bt); 513 KMP_ITT_DEBUG_LOCK(); 514 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, 515 counter, object); 516 517 if (set_name) { 518 ident_t const *loc = NULL; 519 char const *src = NULL; 520 char const *type = "OMP Barrier"; 521 switch (bt) { 522 case bs_plain_barrier: { 523 // For plain barrier compiler calls __kmpc_barrier() function, which 524 // saves location in thr->th.th_ident. 525 loc = thr->th.th_ident; 526 // Get the barrier type from flags provided by compiler. 527 kmp_int32 expl = 0; 528 kmp_uint32 impl = 0; 529 if (loc != NULL) { 530 src = loc->psource; 531 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; 532 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; 533 } 534 if (impl) { 535 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { 536 case KMP_IDENT_BARRIER_IMPL_FOR: { 537 type = "OMP For Barrier"; 538 } break; 539 case KMP_IDENT_BARRIER_IMPL_SECTIONS: { 540 type = "OMP Sections Barrier"; 541 } break; 542 case KMP_IDENT_BARRIER_IMPL_SINGLE: { 543 type = "OMP Single Barrier"; 544 } break; 545 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { 546 type = "OMP Workshare Barrier"; 547 } break; 548 default: { 549 type = "OMP Implicit Barrier"; 550 KMP_DEBUG_ASSERT(0); 551 } 552 } 553 } else if (expl) { 554 type = "OMP Explicit Barrier"; 555 } 556 } break; 557 case bs_forkjoin_barrier: { 558 // In case of fork/join barrier we can read thr->th.th_ident, because it 559 // contains location of last passed construct (while join barrier is not 560 // such one). Use th_ident of master thread instead -- __kmp_join_call() 561 // called by the master thread saves location. 562 // 563 // AC: cannot read from master because __kmp_join_call may be not called 564 // yet, so we read the location from team. This is the same location. 565 // And team is valid at the enter to join barrier where this happens. 566 loc = team->t.t_ident; 567 if (loc != NULL) { 568 src = loc->psource; 569 } 570 type = "OMP Join Barrier"; 571 } break; 572 } 573 KMP_ITT_DEBUG_LOCK(); 574 __itt_sync_create(object, type, src, __itt_attr_barrier); 575 KMP_ITT_DEBUG_PRINT( 576 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, 577 type, src); 578 } 579 } 580#endif 581 return object; 582} // __kmp_itt_barrier_object 583 584// ----------------------------------------------------------------------------- 585void __kmp_itt_barrier_starting(int gtid, void *object) { 586#if USE_ITT_NOTIFY 587 if (!KMP_MASTER_GTID(gtid)) { 588 KMP_ITT_DEBUG_LOCK(); 589 __itt_sync_releasing(object); 590 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); 591 } 592 KMP_ITT_DEBUG_LOCK(); 593 __itt_sync_prepare(object); 594 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); 595#endif 596} // __kmp_itt_barrier_starting 597 598// ----------------------------------------------------------------------------- 599void __kmp_itt_barrier_middle(int gtid, void *object) { 600#if USE_ITT_NOTIFY 601 if (KMP_MASTER_GTID(gtid)) { 602 KMP_ITT_DEBUG_LOCK(); 603 __itt_sync_acquired(object); 604 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); 605 KMP_ITT_DEBUG_LOCK(); 606 __itt_sync_releasing(object); 607 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); 608 } else { 609 } 610#endif 611} // __kmp_itt_barrier_middle 612 613// ----------------------------------------------------------------------------- 614void __kmp_itt_barrier_finished(int gtid, void *object) { 615#if USE_ITT_NOTIFY 616 if (KMP_MASTER_GTID(gtid)) { 617 } else { 618 KMP_ITT_DEBUG_LOCK(); 619 __itt_sync_acquired(object); 620 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); 621 } 622#endif 623} // __kmp_itt_barrier_finished 624 625/* Taskwait reporting. 626 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 627 does not have taskwait structure, so we need to construct something. */ 628 629void *__kmp_itt_taskwait_object(int gtid) { 630 void *object = NULL; 631#if USE_ITT_NOTIFY 632 if (__itt_sync_create_ptr) { 633 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 634 kmp_taskdata_t *taskdata = thread->th.th_current_task; 635 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + 636 taskdata->td_taskwait_counter % 637 sizeof(kmp_taskdata_t)); 638 } 639#endif 640 return object; 641} // __kmp_itt_taskwait_object 642 643void __kmp_itt_taskwait_starting(int gtid, void *object) { 644#if USE_ITT_NOTIFY 645 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 646 kmp_taskdata_t *taskdata = thread->th.th_current_task; 647 ident_t const *loc = taskdata->td_taskwait_ident; 648 char const *src = (loc == NULL ? NULL : loc->psource); 649 KMP_ITT_DEBUG_LOCK(); 650 __itt_sync_create(object, "OMP Taskwait", src, 0); 651 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", 652 object, src); 653 KMP_ITT_DEBUG_LOCK(); 654 __itt_sync_prepare(object); 655 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); 656#endif 657} // __kmp_itt_taskwait_starting 658 659void __kmp_itt_taskwait_finished(int gtid, void *object) { 660#if USE_ITT_NOTIFY 661 KMP_ITT_DEBUG_LOCK(); 662 __itt_sync_acquired(object); 663 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); 664 KMP_ITT_DEBUG_LOCK(); 665 __itt_sync_destroy(object); 666 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); 667#endif 668} // __kmp_itt_taskwait_finished 669 670/* Task reporting. 671 Only those tasks are reported which are executed by a thread spinning at 672 barrier (or taskwait). Synch object passed to the function must be barrier of 673 taskwait the threads waiting at. */ 674 675void __kmp_itt_task_starting( 676 void *object // ITT sync object: barrier or taskwait. 677 ) { 678#if USE_ITT_NOTIFY 679 if (object != NULL) { 680 KMP_ITT_DEBUG_LOCK(); 681 __itt_sync_cancel(object); 682 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); 683 } 684#endif 685} // __kmp_itt_task_starting 686 687// ----------------------------------------------------------------------------- 688void __kmp_itt_task_finished( 689 void *object // ITT sync object: barrier or taskwait. 690 ) { 691#if USE_ITT_NOTIFY 692 KMP_ITT_DEBUG_LOCK(); 693 __itt_sync_prepare(object); 694 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); 695#endif 696} // __kmp_itt_task_finished 697 698/* Lock reporting. 699 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock 700 operation (set/unset). It is not a real event shown to the user but just 701 setting a name for synchronization object. `lock' is an address of sync 702 object, the same address should be used in all subsequent calls. 703 * __kmp_itt_lock_acquiring() should be called before setting the lock. 704 * __kmp_itt_lock_acquired() should be called after setting the lock. 705 * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 706 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting 707 for the lock. 708 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock 709 operation. After __kmp_itt_lock_destroyed() all the references to the same 710 address will be considered as another sync object, not related with the 711 original one. */ 712 713#if KMP_USE_DYNAMIC_LOCK 714// Takes location information directly 715__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, 716 const ident_t *loc) { 717#if USE_ITT_NOTIFY 718 if (__itt_sync_create_ptr) { 719 char const *src = (loc == NULL ? NULL : loc->psource); 720 KMP_ITT_DEBUG_LOCK(); 721 __itt_sync_create(lock, type, src, 0); 722 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 723 src); 724 } 725#endif 726} 727#else // KMP_USE_DYNAMIC_LOCK 728// Internal guts -- common code for locks and critical sections, do not call 729// directly. 730__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { 731#if USE_ITT_NOTIFY 732 if (__itt_sync_create_ptr) { 733 ident_t const *loc = NULL; 734 if (__kmp_get_user_lock_location_ != NULL) 735 loc = __kmp_get_user_lock_location_((lock)); 736 char const *src = (loc == NULL ? NULL : loc->psource); 737 KMP_ITT_DEBUG_LOCK(); 738 __itt_sync_create(lock, type, src, 0); 739 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 740 src); 741 } 742#endif 743} // ___kmp_itt_lock_init 744#endif // KMP_USE_DYNAMIC_LOCK 745 746// Internal guts -- common code for locks and critical sections, do not call 747// directly. 748__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { 749#if USE_ITT_NOTIFY 750 KMP_ITT_DEBUG_LOCK(); 751 __itt_sync_destroy(lock); 752 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); 753#endif 754} // ___kmp_itt_lock_fini 755 756// ----------------------------------------------------------------------------- 757#if KMP_USE_DYNAMIC_LOCK 758void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { 759 ___kmp_itt_lock_init(lock, "OMP Lock", loc); 760} 761#else 762void __kmp_itt_lock_creating(kmp_user_lock_p lock) { 763 ___kmp_itt_lock_init(lock, "OMP Lock"); 764} // __kmp_itt_lock_creating 765#endif 766 767void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { 768#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 769 // postpone lock object access 770 if (__itt_sync_prepare_ptr) { 771 if (KMP_EXTRACT_D_TAG(lock) == 0) { 772 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 773 __itt_sync_prepare(ilk->lock); 774 } else { 775 __itt_sync_prepare(lock); 776 } 777 } 778#else 779 __itt_sync_prepare(lock); 780#endif 781} // __kmp_itt_lock_acquiring 782 783void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { 784#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 785 // postpone lock object access 786 if (__itt_sync_acquired_ptr) { 787 if (KMP_EXTRACT_D_TAG(lock) == 0) { 788 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 789 __itt_sync_acquired(ilk->lock); 790 } else { 791 __itt_sync_acquired(lock); 792 } 793 } 794#else 795 __itt_sync_acquired(lock); 796#endif 797} // __kmp_itt_lock_acquired 798 799void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { 800#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 801 if (__itt_sync_releasing_ptr) { 802 if (KMP_EXTRACT_D_TAG(lock) == 0) { 803 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 804 __itt_sync_releasing(ilk->lock); 805 } else { 806 __itt_sync_releasing(lock); 807 } 808 } 809#else 810 __itt_sync_releasing(lock); 811#endif 812} // __kmp_itt_lock_releasing 813 814void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { 815#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 816 if (__itt_sync_cancel_ptr) { 817 if (KMP_EXTRACT_D_TAG(lock) == 0) { 818 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 819 __itt_sync_cancel(ilk->lock); 820 } else { 821 __itt_sync_cancel(lock); 822 } 823 } 824#else 825 __itt_sync_cancel(lock); 826#endif 827} // __kmp_itt_lock_cancelled 828 829void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { 830 ___kmp_itt_lock_fini(lock, "OMP Lock"); 831} // __kmp_itt_lock_destroyed 832 833/* Critical reporting. 834 Critical sections are treated exactly as locks (but have different object 835 type). */ 836#if KMP_USE_DYNAMIC_LOCK 837void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { 838 ___kmp_itt_lock_init(lock, "OMP Critical", loc); 839} 840#else 841void __kmp_itt_critical_creating(kmp_user_lock_p lock) { 842 ___kmp_itt_lock_init(lock, "OMP Critical"); 843} // __kmp_itt_critical_creating 844#endif 845 846void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { 847 __itt_sync_prepare(lock); 848} // __kmp_itt_critical_acquiring 849 850void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { 851 __itt_sync_acquired(lock); 852} // __kmp_itt_critical_acquired 853 854void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { 855 __itt_sync_releasing(lock); 856} // __kmp_itt_critical_releasing 857 858void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { 859 ___kmp_itt_lock_fini(lock, "OMP Critical"); 860} // __kmp_itt_critical_destroyed 861 862/* Single reporting. */ 863 864void __kmp_itt_single_start(int gtid) { 865#if USE_ITT_NOTIFY 866 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { 867 kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); 868 ident_t *loc = thr->th.th_ident; 869 char const *src = (loc == NULL ? NULL : loc->psource); 870 kmp_str_buf_t name; 871 __kmp_str_buf_init(&name); 872 __kmp_str_buf_print(&name, "OMP Single-%s", src); 873 KMP_ITT_DEBUG_LOCK(); 874 thr->th.th_itt_mark_single = __itt_mark_create(name.str); 875 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, 876 thr->th.th_itt_mark_single); 877 __kmp_str_buf_free(&name); 878 KMP_ITT_DEBUG_LOCK(); 879 __itt_mark(thr->th.th_itt_mark_single, NULL); 880 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", 881 thr->th.th_itt_mark_single); 882 } 883#endif 884} // __kmp_itt_single_start 885 886void __kmp_itt_single_end(int gtid) { 887#if USE_ITT_NOTIFY 888 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; 889 KMP_ITT_DEBUG_LOCK(); 890 __itt_mark_off(mark); 891 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); 892#endif 893} // __kmp_itt_single_end 894 895/* Ordered reporting. 896 * __kmp_itt_ordered_init is called by each thread *before* first using sync 897 object. ITT team would like it to be called once, but it requires extra 898 synchronization. 899 * __kmp_itt_ordered_prep is called when thread is going to enter ordered 900 section (before synchronization). 901 * __kmp_itt_ordered_start is called just before entering user code (after 902 synchronization). 903 * __kmp_itt_ordered_end is called after returning from user code. 904 905 Sync object is th->th.th_dispatch->th_dispatch_sh_current. 906 Events are not generated in case of serialized team. */ 907 908void __kmp_itt_ordered_init(int gtid) { 909#if USE_ITT_NOTIFY 910 if (__itt_sync_create_ptr) { 911 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 912 ident_t const *loc = thr->th.th_ident; 913 char const *src = (loc == NULL ? NULL : loc->psource); 914 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, 915 "OMP Ordered", src, 0); 916 } 917#endif 918} // __kmp_itt_ordered_init 919 920void __kmp_itt_ordered_prep(int gtid) { 921#if USE_ITT_NOTIFY 922 if (__itt_sync_create_ptr) { 923 kmp_team_t *t = __kmp_team_from_gtid(gtid); 924 if (!t->t.t_serialized) { 925 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 926 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); 927 } 928 } 929#endif 930} // __kmp_itt_ordered_prep 931 932void __kmp_itt_ordered_start(int gtid) { 933#if USE_ITT_NOTIFY 934 if (__itt_sync_create_ptr) { 935 kmp_team_t *t = __kmp_team_from_gtid(gtid); 936 if (!t->t.t_serialized) { 937 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 938 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); 939 } 940 } 941#endif 942} // __kmp_itt_ordered_start 943 944void __kmp_itt_ordered_end(int gtid) { 945#if USE_ITT_NOTIFY 946 if (__itt_sync_create_ptr) { 947 kmp_team_t *t = __kmp_team_from_gtid(gtid); 948 if (!t->t.t_serialized) { 949 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 950 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); 951 } 952 } 953#endif 954} // __kmp_itt_ordered_end 955 956/* Threads reporting. */ 957 958void __kmp_itt_thread_ignore() { 959 __itt_thr_ignore(); 960} // __kmp_itt_thread_ignore 961 962void __kmp_itt_thread_name(int gtid) { 963#if USE_ITT_NOTIFY 964 if (__itt_thr_name_set_ptr) { 965 kmp_str_buf_t name; 966 __kmp_str_buf_init(&name); 967 if (KMP_MASTER_GTID(gtid)) { 968 __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid); 969 } else { 970 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); 971 } 972 KMP_ITT_DEBUG_LOCK(); 973 __itt_thr_name_set(name.str, name.used); 974 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); 975 __kmp_str_buf_free(&name); 976 } 977#endif 978} // __kmp_itt_thread_name 979 980/* System object reporting. 981 ITT catches operations with system sync objects (like Windows* OS on IA-32 982 architecture API critical sections and events). We only need to specify 983 name ("OMP Scheduler") for the object to let ITT know it is an object used 984 by OpenMP RTL for internal purposes. */ 985 986void __kmp_itt_system_object_created(void *object, char const *name) { 987#if USE_ITT_NOTIFY 988 KMP_ITT_DEBUG_LOCK(); 989 __itt_sync_create(object, "OMP Scheduler", name, 0); 990 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", 991 object, name); 992#endif 993} // __kmp_itt_system_object_created 994 995/* Stack stitching api. 996 Master calls "create" and put the stitching id into team structure. 997 Workers read the stitching id and call "enter" / "leave" api. 998 Master calls "destroy" at the end of the parallel region. */ 999 1000__itt_caller __kmp_itt_stack_caller_create() { 1001#if USE_ITT_NOTIFY 1002 if (!__itt_stack_caller_create_ptr) 1003 return NULL; 1004 KMP_ITT_DEBUG_LOCK(); 1005 __itt_caller id = __itt_stack_caller_create(); 1006 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); 1007 return id; 1008#endif 1009 return NULL; 1010} 1011 1012void __kmp_itt_stack_caller_destroy(__itt_caller id) { 1013#if USE_ITT_NOTIFY 1014 if (__itt_stack_caller_destroy_ptr) { 1015 KMP_ITT_DEBUG_LOCK(); 1016 __itt_stack_caller_destroy(id); 1017 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); 1018 } 1019#endif 1020} 1021 1022void __kmp_itt_stack_callee_enter(__itt_caller id) { 1023#if USE_ITT_NOTIFY 1024 if (__itt_stack_callee_enter_ptr) { 1025 KMP_ITT_DEBUG_LOCK(); 1026 __itt_stack_callee_enter(id); 1027 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); 1028 } 1029#endif 1030} 1031 1032void __kmp_itt_stack_callee_leave(__itt_caller id) { 1033#if USE_ITT_NOTIFY 1034 if (__itt_stack_callee_leave_ptr) { 1035 KMP_ITT_DEBUG_LOCK(); 1036 __itt_stack_callee_leave(id); 1037 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); 1038 } 1039#endif 1040} 1041 1042#endif /* USE_ITT_BUILD */ 1043