1 /*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "kmp_utils.h"
22 #include "ompt-specific.h"
23
24 #define MAX_MESSAGE 512
25
26 // flags will be used in future, e.g. to implement openmp_strict library
27 // restrictions
28
29 /*!
30 * @ingroup STARTUP_SHUTDOWN
31 * @param loc in source location information
32 * @param flags in for future use (currently ignored)
33 *
34 * Initialize the runtime library. This call is optional; if it is not made then
35 * it will be implicitly called by attempts to use other library functions.
36 */
__kmpc_begin(ident_t * loc,kmp_int32 flags)37 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38 // By default __kmpc_begin() is no-op.
39 char *env;
40 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45 } else if (__kmp_ignore_mppbeg() == FALSE) {
46 // By default __kmp_ignore_mppbeg() returns TRUE.
47 __kmp_internal_begin();
48 KC_TRACE(10, ("__kmpc_begin: called\n"));
49 }
50 }
51
52 /*!
53 * @ingroup STARTUP_SHUTDOWN
54 * @param loc source location information
55 *
56 * Shutdown the runtime library. This is also optional, and even if called will
57 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
58 * zero.
59 */
__kmpc_end(ident_t * loc)60 void __kmpc_end(ident_t *loc) {
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64 // returns FALSE and __kmpc_end() will unregister this root (it can cause
65 // library shut down).
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, ("__kmpc_end: called\n"));
68 KA_TRACE(30, ("__kmpc_end\n"));
69
70 __kmp_internal_end_thread(-1);
71 }
72 #if KMP_OS_WINDOWS && OMPT_SUPPORT
73 // Normal exit process on Windows does not allow worker threads of the final
74 // parallel region to finish reporting their events, so shutting down the
75 // library here fixes the issue at least for the cases where __kmpc_end() is
76 // placed properly.
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
79 #endif
80 }
81
82 /*!
83 @ingroup THREAD_STATES
84 @param loc Source location information.
85 @return The global thread index of the active thread.
86
87 This function can be called in any context.
88
89 If the runtime has ony been entered at the outermost level from a
90 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
91 that which would be returned by omp_get_thread_num() in the outermost
92 active parallel construct. (Or zero if there is no active parallel
93 construct, since the primary thread is necessarily thread zero).
94
95 If multiple non-OpenMP threads all enter an OpenMP construct then this
96 will be a unique thread identifier among all the threads created by
97 the OpenMP runtime (but the value cannot be defined in terms of
98 OpenMP thread ids returned by omp_get_thread_num()).
99 */
__kmpc_global_thread_num(ident_t * loc)100 kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105 return gtid;
106 }
107
108 /*!
109 @ingroup THREAD_STATES
110 @param loc Source location information.
111 @return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113 This function can be called in any context.
114 It returns the total number of threads under the control of the OpenMP runtime.
115 That is not a number that can be determined by any OpenMP standard calls, since
116 the library may be called from more than one non-OpenMP thread, and this
117 reflects the total over all such calls. Similarly the runtime maintains
118 underlying threads even when they are not active (since the cost of creating
119 and destroying OS threads is high), this call counts all such threads even if
120 they are not waiting for work.
121 */
__kmpc_global_num_threads(ident_t * loc)122 kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
123 KC_TRACE(10,
124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126 return TCR_4(__kmp_all_nth);
127 }
128
129 /*!
130 @ingroup THREAD_STATES
131 @param loc Source location information.
132 @return The thread number of the calling thread in the innermost active parallel
133 construct.
134 */
__kmpc_bound_thread_num(ident_t * loc)135 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
138 }
139
140 /*!
141 @ingroup THREAD_STATES
142 @param loc Source location information.
143 @return The number of threads in the innermost active parallel construct.
144 */
__kmpc_bound_num_threads(ident_t * loc)145 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
149 }
150
151 /*!
152 * @ingroup DEPRECATED
153 * @param loc location description
154 *
155 * This function need not be called. It always returns TRUE.
156 */
__kmpc_ok_to_fork(ident_t * loc)157 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158 #ifndef KMP_DEBUG
159
160 return TRUE;
161
162 #else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(semi2 + 1, ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(semi2 + 1, ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210 #endif /* KMP_DEBUG */
211 }
212
213 /*!
214 @ingroup THREAD_STATES
215 @param loc Source location information.
216 @return 1 if this thread is executing inside an active parallel region, zero if
217 not.
218 */
__kmpc_in_parallel(ident_t * loc)219 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
220 return __kmp_entry_thread()->th.th_root->r.r_active;
221 }
222
223 /*!
224 @ingroup PARALLEL
225 @param loc source location information
226 @param global_tid global thread number
227 @param num_threads number of threads requested for this parallel construct
228
229 Set the number of threads to be used by the next fork spawned by this thread.
230 This call is only required if the parallel construct has a `num_threads` clause.
231 */
__kmpc_push_num_threads(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_threads)232 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233 kmp_int32 num_threads) {
234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
238 }
239
__kmpc_push_num_threads_strict(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_threads,int severity,const char * message)240 void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
241 kmp_int32 num_threads, int severity,
242 const char *message) {
243 __kmp_push_num_threads(loc, global_tid, num_threads);
244 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
245 }
246
247 /*!
248 @ingroup PARALLEL
249 @param loc source location information
250 @param global_tid global thread number
251 @param list_length number of entries in the num_threads_list array
252 @param num_threads_list array of numbers of threads requested for this parallel
253 construct and subsequent nested parallel constructs
254
255 Set the number of threads to be used by the next fork spawned by this thread,
256 and some nested forks as well.
257 This call is only required if the parallel construct has a `num_threads` clause
258 that has a list of integers as the argument.
259 */
__kmpc_push_num_threads_list(ident_t * loc,kmp_int32 global_tid,kmp_uint32 list_length,kmp_int32 * num_threads_list)260 void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
261 kmp_uint32 list_length,
262 kmp_int32 *num_threads_list) {
263 KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
264 global_tid));
265 KA_TRACE(20, ("%d", num_threads_list[0]));
266 #ifdef KMP_DEBUG
267 for (kmp_uint32 i = 1; i < list_length; ++i)
268 KA_TRACE(20, (", %d", num_threads_list[i]));
269 #endif
270 KA_TRACE(20, ("/n"));
271
272 __kmp_assert_valid_gtid(global_tid);
273 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
274 }
275
__kmpc_push_num_threads_list_strict(ident_t * loc,kmp_int32 global_tid,kmp_uint32 list_length,kmp_int32 * num_threads_list,int severity,const char * message)276 void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
277 kmp_uint32 list_length,
278 kmp_int32 *num_threads_list,
279 int severity, const char *message) {
280 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
281 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
282 }
283
__kmpc_pop_num_threads(ident_t * loc,kmp_int32 global_tid)284 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
285 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
286 /* the num_threads are automatically popped */
287 }
288
__kmpc_push_proc_bind(ident_t * loc,kmp_int32 global_tid,kmp_int32 proc_bind)289 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
290 kmp_int32 proc_bind) {
291 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
292 proc_bind));
293 __kmp_assert_valid_gtid(global_tid);
294 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
295 }
296
297 /*!
298 @ingroup PARALLEL
299 @param loc source location information
300 @param argc total number of arguments in the ellipsis
301 @param microtask pointer to callback routine consisting of outlined parallel
302 construct
303 @param ... pointers to shared variables that aren't global
304
305 Do the actual fork and call the microtask in the relevant number of threads.
306 */
__kmpc_fork_call(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)307 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
308 int gtid = __kmp_entry_gtid();
309
310 #if (KMP_STATS_ENABLED)
311 // If we were in a serial region, then stop the serial timer, record
312 // the event, and start parallel region timer
313 stats_state_e previous_state = KMP_GET_THREAD_STATE();
314 if (previous_state == stats_state_e::SERIAL_REGION) {
315 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
316 } else {
317 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
318 }
319 int inParallel = __kmpc_in_parallel(loc);
320 if (inParallel) {
321 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
322 } else {
323 KMP_COUNT_BLOCK(OMP_PARALLEL);
324 }
325 #endif
326
327 // maybe to save thr_state is enough here
328 {
329 va_list ap;
330 va_start(ap, microtask);
331
332 #if OMPT_SUPPORT
333 ompt_frame_t *ompt_frame;
334 if (ompt_enabled.enabled) {
335 kmp_info_t *master_th = __kmp_threads[gtid];
336 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
337 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
338 }
339 OMPT_STORE_RETURN_ADDRESS(gtid);
340 #endif
341
342 #if INCLUDE_SSC_MARKS
343 SSC_MARK_FORKING();
344 #endif
345 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
346 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
347 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
348 kmp_va_addr_of(ap));
349 #if INCLUDE_SSC_MARKS
350 SSC_MARK_JOINING();
351 #endif
352 __kmp_join_call(loc, gtid
353 #if OMPT_SUPPORT
354 ,
355 fork_context_intel
356 #endif
357 );
358
359 va_end(ap);
360
361 #if OMPT_SUPPORT
362 if (ompt_enabled.enabled) {
363 ompt_frame->enter_frame = ompt_data_none;
364 }
365 #endif
366 }
367
368 #if KMP_STATS_ENABLED
369 if (previous_state == stats_state_e::SERIAL_REGION) {
370 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
371 KMP_SET_THREAD_STATE(previous_state);
372 } else {
373 KMP_POP_PARTITIONED_TIMER();
374 }
375 #endif // KMP_STATS_ENABLED
376 }
377
378 /*!
379 @ingroup PARALLEL
380 @param loc source location information
381 @param microtask pointer to callback routine consisting of outlined parallel
382 construct
383 @param cond condition for running in parallel
384 @param args struct of pointers to shared variables that aren't global
385
386 Perform a fork only if the condition is true.
387 */
__kmpc_fork_call_if(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,kmp_int32 cond,void * args)388 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
389 kmp_int32 cond, void *args) {
390 int gtid = __kmp_entry_gtid();
391 if (cond) {
392 if (args)
393 __kmpc_fork_call(loc, argc, microtask, args);
394 else
395 __kmpc_fork_call(loc, argc, microtask);
396 } else {
397 __kmpc_serialized_parallel(loc, gtid);
398
399 #if OMPT_SUPPORT
400 void *exit_frame_ptr;
401 #endif
402
403 if (args)
404 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
405 /*npr=*/0,
406 /*argc=*/1, &args
407 #if OMPT_SUPPORT
408 ,
409 &exit_frame_ptr
410 #endif
411 );
412 else
413 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
414 /*npr=*/0,
415 /*argc=*/0,
416 /*args=*/nullptr
417 #if OMPT_SUPPORT
418 ,
419 &exit_frame_ptr
420 #endif
421 );
422
423 __kmpc_end_serialized_parallel(loc, gtid);
424 }
425 }
426
427 /*!
428 @ingroup PARALLEL
429 @param loc source location information
430 @param global_tid global thread number
431 @param num_teams number of teams requested for the teams construct
432 @param num_threads number of threads per team requested for the teams construct
433
434 Set the number of teams to be used by the teams construct.
435 This call is only required if the teams construct has a `num_teams` clause
436 or a `thread_limit` clause (or both).
437 */
__kmpc_push_num_teams(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams,kmp_int32 num_threads)438 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
439 kmp_int32 num_teams, kmp_int32 num_threads) {
440 KA_TRACE(20,
441 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
442 global_tid, num_teams, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
445 }
446
447 /*!
448 @ingroup PARALLEL
449 @param loc source location information
450 @param global_tid global thread number
451 @param thread_limit limit on number of threads which can be created within the
452 current task
453
454 Set the thread_limit for the current task
455 This call is there to support `thread_limit` clause on the `target` construct
456 */
__kmpc_set_thread_limit(ident_t * loc,kmp_int32 global_tid,kmp_int32 thread_limit)457 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
458 kmp_int32 thread_limit) {
459 __kmp_assert_valid_gtid(global_tid);
460 kmp_info_t *thread = __kmp_threads[global_tid];
461 if (thread_limit > 0)
462 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
463 }
464
465 /*!
466 @ingroup PARALLEL
467 @param loc source location information
468 @param global_tid global thread number
469 @param num_teams_lb lower bound on number of teams requested for the teams
470 construct
471 @param num_teams_ub upper bound on number of teams requested for the teams
472 construct
473 @param num_threads number of threads per team requested for the teams construct
474
475 Set the number of teams to be used by the teams construct. The number of initial
476 teams cretaed will be greater than or equal to the lower bound and less than or
477 equal to the upper bound.
478 This call is only required if the teams construct has a `num_teams` clause
479 or a `thread_limit` clause (or both).
480 */
__kmpc_push_num_teams_51(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams_lb,kmp_int32 num_teams_ub,kmp_int32 num_threads)481 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
482 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
483 kmp_int32 num_threads) {
484 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
485 " num_teams_ub=%d num_threads=%d\n",
486 global_tid, num_teams_lb, num_teams_ub, num_threads));
487 __kmp_assert_valid_gtid(global_tid);
488 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
489 num_threads);
490 }
491
492 /*!
493 @ingroup PARALLEL
494 @param loc source location information
495 @param argc total number of arguments in the ellipsis
496 @param microtask pointer to callback routine consisting of outlined teams
497 construct
498 @param ... pointers to shared variables that aren't global
499
500 Do the actual fork and call the microtask in the relevant number of threads.
501 */
__kmpc_fork_teams(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)502 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
503 ...) {
504 int gtid = __kmp_entry_gtid();
505 kmp_info_t *this_thr = __kmp_threads[gtid];
506 va_list ap;
507 va_start(ap, microtask);
508
509 #if KMP_STATS_ENABLED
510 KMP_COUNT_BLOCK(OMP_TEAMS);
511 stats_state_e previous_state = KMP_GET_THREAD_STATE();
512 if (previous_state == stats_state_e::SERIAL_REGION) {
513 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
514 } else {
515 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
516 }
517 #endif
518
519 // remember teams entry point and nesting level
520 this_thr->th.th_teams_microtask = microtask;
521 this_thr->th.th_teams_level =
522 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
523
524 #if OMPT_SUPPORT
525 kmp_team_t *parent_team = this_thr->th.th_team;
526 int tid = __kmp_tid_from_gtid(gtid);
527 if (ompt_enabled.enabled) {
528 parent_team->t.t_implicit_task_taskdata[tid]
529 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
530 }
531 OMPT_STORE_RETURN_ADDRESS(gtid);
532 #endif
533
534 // check if __kmpc_push_num_teams called, set default number of teams
535 // otherwise
536 if (this_thr->th.th_teams_size.nteams == 0) {
537 __kmp_push_num_teams(loc, gtid, 0, 0);
538 }
539 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
540 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
541 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
542
543 __kmp_fork_call(
544 loc, gtid, fork_context_intel, argc,
545 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
546 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
547 __kmp_join_call(loc, gtid
548 #if OMPT_SUPPORT
549 ,
550 fork_context_intel
551 #endif
552 );
553
554 // Pop current CG root off list
555 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
556 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
557 this_thr->th.th_cg_roots = tmp->up;
558 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
559 " to node %p. cg_nthreads was %d\n",
560 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
561 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
562 int i = tmp->cg_nthreads--;
563 if (i == 1) { // check is we are the last thread in CG (not always the case)
564 __kmp_free(tmp);
565 }
566 // Restore current task's thread_limit from CG root
567 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
568 this_thr->th.th_current_task->td_icvs.thread_limit =
569 this_thr->th.th_cg_roots->cg_thread_limit;
570
571 this_thr->th.th_teams_microtask = NULL;
572 this_thr->th.th_teams_level = 0;
573 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
574 va_end(ap);
575 #if KMP_STATS_ENABLED
576 if (previous_state == stats_state_e::SERIAL_REGION) {
577 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
578 KMP_SET_THREAD_STATE(previous_state);
579 } else {
580 KMP_POP_PARTITIONED_TIMER();
581 }
582 #endif // KMP_STATS_ENABLED
583 }
584
585 // I don't think this function should ever have been exported.
586 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
587 // openmp code ever called it, but it's been exported from the RTL for so
588 // long that I'm afraid to remove the definition.
__kmpc_invoke_task_func(int gtid)589 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
590
591 /*!
592 @ingroup PARALLEL
593 @param loc source location information
594 @param global_tid global thread number
595
596 Enter a serialized parallel construct. This interface is used to handle a
597 conditional parallel region, like this,
598 @code
599 #pragma omp parallel if (condition)
600 @endcode
601 when the condition is false.
602 */
__kmpc_serialized_parallel(ident_t * loc,kmp_int32 global_tid)603 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
604 // The implementation is now in kmp_runtime.cpp so that it can share static
605 // functions with kmp_fork_call since the tasks to be done are similar in
606 // each case.
607 __kmp_assert_valid_gtid(global_tid);
608 #if OMPT_SUPPORT
609 OMPT_STORE_RETURN_ADDRESS(global_tid);
610 #endif
611 __kmp_serialized_parallel(loc, global_tid);
612 }
613
614 /*!
615 @ingroup PARALLEL
616 @param loc source location information
617 @param global_tid global thread number
618
619 Leave a serialized parallel construct.
620 */
__kmpc_end_serialized_parallel(ident_t * loc,kmp_int32 global_tid)621 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
622 kmp_internal_control_t *top;
623 kmp_info_t *this_thr;
624 kmp_team_t *serial_team;
625
626 KC_TRACE(10,
627 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
628
629 /* skip all this code for autopar serialized loops since it results in
630 unacceptable overhead */
631 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
632 return;
633
634 // Not autopar code
635 __kmp_assert_valid_gtid(global_tid);
636 if (!TCR_4(__kmp_init_parallel))
637 __kmp_parallel_initialize();
638
639 __kmp_resume_if_soft_paused();
640
641 this_thr = __kmp_threads[global_tid];
642 serial_team = this_thr->th.th_serial_team;
643
644 kmp_task_team_t *task_team = this_thr->th.th_task_team;
645 // we need to wait for the proxy tasks before finishing the thread
646 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
647 task_team->tt.tt_hidden_helper_task_encountered))
648 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
649
650 KMP_MB();
651 KMP_DEBUG_ASSERT(serial_team);
652 KMP_ASSERT(serial_team->t.t_serialized);
653 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
654 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
655 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
656 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
657
658 #if OMPT_SUPPORT
659 if (ompt_enabled.enabled &&
660 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
661 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
662 if (ompt_enabled.ompt_callback_implicit_task) {
663 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
664 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
665 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
666 }
667
668 // reset clear the task id only after unlinking the task
669 ompt_data_t *parent_task_data;
670 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
671
672 if (ompt_enabled.ompt_callback_parallel_end) {
673 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
674 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
675 ompt_parallel_invoker_program | ompt_parallel_team,
676 OMPT_LOAD_RETURN_ADDRESS(global_tid));
677 }
678 __ompt_lw_taskteam_unlink(this_thr);
679 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
680 }
681 #endif
682
683 /* If necessary, pop the internal control stack values and replace the team
684 * values */
685 top = serial_team->t.t_control_stack_top;
686 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
687 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
688 serial_team->t.t_control_stack_top = top->next;
689 __kmp_free(top);
690 }
691
692 /* pop dispatch buffers stack */
693 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
694 {
695 dispatch_private_info_t *disp_buffer =
696 serial_team->t.t_dispatch->th_disp_buffer;
697 serial_team->t.t_dispatch->th_disp_buffer =
698 serial_team->t.t_dispatch->th_disp_buffer->next;
699 __kmp_free(disp_buffer);
700 }
701
702 /* pop the task team stack */
703 if (serial_team->t.t_serialized > 1) {
704 __kmp_pop_task_team_node(this_thr, serial_team);
705 }
706
707 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
708
709 --serial_team->t.t_serialized;
710 if (serial_team->t.t_serialized == 0) {
711
712 /* return to the parallel section */
713
714 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
715 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
716 __kmp_clear_x87_fpu_status_word();
717 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
718 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
719 }
720 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
721
722 __kmp_pop_current_task_from_thread(this_thr);
723 #if OMPD_SUPPORT
724 if (ompd_state & OMPD_ENABLE_BP)
725 ompd_bp_parallel_end();
726 #endif
727
728 this_thr->th.th_team = serial_team->t.t_parent;
729 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
730
731 /* restore values cached in the thread */
732 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
733 this_thr->th.th_team_master =
734 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
735 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
736
737 /* TODO the below shouldn't need to be adjusted for serialized teams */
738 this_thr->th.th_dispatch =
739 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
740
741 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
742 this_thr->th.th_current_task->td_flags.executing = 1;
743
744 if (__kmp_tasking_mode != tskm_immediate_exec) {
745 // Restore task state from serial team structure
746 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
747 serial_team->t.t_primary_task_state == 1);
748 this_thr->th.th_task_state =
749 (kmp_uint8)serial_team->t.t_primary_task_state;
750 // Copy the task team from the new child / old parent team to the thread.
751 this_thr->th.th_task_team =
752 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
753 KA_TRACE(20,
754 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
755 "team %p\n",
756 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
757 }
758 #if KMP_AFFINITY_SUPPORTED
759 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
760 __kmp_reset_root_init_mask(global_tid);
761 }
762 #endif
763 } else {
764 if (__kmp_tasking_mode != tskm_immediate_exec) {
765 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
766 "depth of serial team %p to %d\n",
767 global_tid, serial_team, serial_team->t.t_serialized));
768 }
769 }
770
771 serial_team->t.t_level--;
772 if (__kmp_env_consistency_check)
773 __kmp_pop_parallel(global_tid, NULL);
774 #if OMPT_SUPPORT
775 if (ompt_enabled.enabled)
776 this_thr->th.ompt_thread_info.state =
777 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
778 : ompt_state_work_parallel);
779 #endif
780 }
781
782 /*!
783 @ingroup SYNCHRONIZATION
784 @param loc source location information.
785
786 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
787 depending on the memory ordering convention obeyed by the compiler
788 even that may not be necessary).
789 */
__kmpc_flush(ident_t * loc)790 void __kmpc_flush(ident_t *loc) {
791 KC_TRACE(10, ("__kmpc_flush: called\n"));
792
793 /* need explicit __mf() here since use volatile instead in library */
794 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
795
796 #if OMPT_SUPPORT && OMPT_OPTIONAL
797 if (ompt_enabled.ompt_callback_flush) {
798 ompt_callbacks.ompt_callback(ompt_callback_flush)(
799 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
800 }
801 #endif
802 }
803
804 /* -------------------------------------------------------------------------- */
805 /*!
806 @ingroup SYNCHRONIZATION
807 @param loc source location information
808 @param global_tid thread id.
809
810 Execute a barrier.
811 */
__kmpc_barrier(ident_t * loc,kmp_int32 global_tid)812 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
813 KMP_COUNT_BLOCK(OMP_BARRIER);
814 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
815 __kmp_assert_valid_gtid(global_tid);
816
817 if (!TCR_4(__kmp_init_parallel))
818 __kmp_parallel_initialize();
819
820 __kmp_resume_if_soft_paused();
821
822 if (__kmp_env_consistency_check) {
823 if (loc == 0) {
824 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
825 }
826 __kmp_check_barrier(global_tid, ct_barrier, loc);
827 }
828
829 #if OMPT_SUPPORT
830 ompt_frame_t *ompt_frame;
831 if (ompt_enabled.enabled) {
832 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
833 if (ompt_frame->enter_frame.ptr == NULL)
834 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
835 }
836 OMPT_STORE_RETURN_ADDRESS(global_tid);
837 #endif
838 __kmp_threads[global_tid]->th.th_ident = loc;
839 // TODO: explicit barrier_wait_id:
840 // this function is called when 'barrier' directive is present or
841 // implicit barrier at the end of a worksharing construct.
842 // 1) better to add a per-thread barrier counter to a thread data structure
843 // 2) set to 0 when a new team is created
844 // 4) no sync is required
845
846 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
847 #if OMPT_SUPPORT && OMPT_OPTIONAL
848 if (ompt_enabled.enabled) {
849 ompt_frame->enter_frame = ompt_data_none;
850 }
851 #endif
852 }
853
854 /* The BARRIER for a MASTER section is always explicit */
855 /*!
856 @ingroup WORK_SHARING
857 @param loc source location information.
858 @param global_tid global thread number .
859 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
860 */
__kmpc_master(ident_t * loc,kmp_int32 global_tid)861 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
862 int status = 0;
863
864 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(global_tid);
866
867 if (!TCR_4(__kmp_init_parallel))
868 __kmp_parallel_initialize();
869
870 __kmp_resume_if_soft_paused();
871
872 if (KMP_MASTER_GTID(global_tid)) {
873 KMP_COUNT_BLOCK(OMP_MASTER);
874 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
875 status = 1;
876 }
877
878 #if OMPT_SUPPORT && OMPT_OPTIONAL
879 if (status) {
880 if (ompt_enabled.ompt_callback_masked) {
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
883
884 int tid = __kmp_tid_from_gtid(global_tid);
885 ompt_callbacks.ompt_callback(ompt_callback_masked)(
886 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888 OMPT_GET_RETURN_ADDRESS(0));
889 }
890 }
891 #endif
892
893 if (__kmp_env_consistency_check) {
894 #if KMP_USE_DYNAMIC_LOCK
895 if (status)
896 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
897 else
898 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
899 #else
900 if (status)
901 __kmp_push_sync(global_tid, ct_master, loc, NULL);
902 else
903 __kmp_check_sync(global_tid, ct_master, loc, NULL);
904 #endif
905 }
906
907 return status;
908 }
909
910 /*!
911 @ingroup WORK_SHARING
912 @param loc source location information.
913 @param global_tid global thread number .
914
915 Mark the end of a <tt>master</tt> region. This should only be called by the
916 thread that executes the <tt>master</tt> region.
917 */
__kmpc_end_master(ident_t * loc,kmp_int32 global_tid)918 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
919 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
920 __kmp_assert_valid_gtid(global_tid);
921 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
922 KMP_POP_PARTITIONED_TIMER();
923
924 #if OMPT_SUPPORT && OMPT_OPTIONAL
925 kmp_info_t *this_thr = __kmp_threads[global_tid];
926 kmp_team_t *team = this_thr->th.th_team;
927 if (ompt_enabled.ompt_callback_masked) {
928 int tid = __kmp_tid_from_gtid(global_tid);
929 ompt_callbacks.ompt_callback(ompt_callback_masked)(
930 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932 OMPT_GET_RETURN_ADDRESS(0));
933 }
934 #endif
935
936 if (__kmp_env_consistency_check) {
937 if (KMP_MASTER_GTID(global_tid))
938 __kmp_pop_sync(global_tid, ct_master, loc);
939 }
940 }
941
942 /*!
943 @ingroup WORK_SHARING
944 @param loc source location information.
945 @param global_tid global thread number.
946 @param filter result of evaluating filter clause on thread global_tid, or zero
947 if no filter clause present
948 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
949 */
__kmpc_masked(ident_t * loc,kmp_int32 global_tid,kmp_int32 filter)950 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
951 int status = 0;
952 int tid;
953 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
954 __kmp_assert_valid_gtid(global_tid);
955
956 if (!TCR_4(__kmp_init_parallel))
957 __kmp_parallel_initialize();
958
959 __kmp_resume_if_soft_paused();
960
961 tid = __kmp_tid_from_gtid(global_tid);
962 if (tid == filter) {
963 KMP_COUNT_BLOCK(OMP_MASKED);
964 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
965 status = 1;
966 }
967
968 #if OMPT_SUPPORT && OMPT_OPTIONAL
969 if (status) {
970 if (ompt_enabled.ompt_callback_masked) {
971 kmp_info_t *this_thr = __kmp_threads[global_tid];
972 kmp_team_t *team = this_thr->th.th_team;
973 ompt_callbacks.ompt_callback(ompt_callback_masked)(
974 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
975 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
976 OMPT_GET_RETURN_ADDRESS(0));
977 }
978 }
979 #endif
980
981 if (__kmp_env_consistency_check) {
982 #if KMP_USE_DYNAMIC_LOCK
983 if (status)
984 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
985 else
986 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
987 #else
988 if (status)
989 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
990 else
991 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
992 #endif
993 }
994
995 return status;
996 }
997
998 /*!
999 @ingroup WORK_SHARING
1000 @param loc source location information.
1001 @param global_tid global thread number .
1002
1003 Mark the end of a <tt>masked</tt> region. This should only be called by the
1004 thread that executes the <tt>masked</tt> region.
1005 */
__kmpc_end_masked(ident_t * loc,kmp_int32 global_tid)1006 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
1007 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
1008 __kmp_assert_valid_gtid(global_tid);
1009 KMP_POP_PARTITIONED_TIMER();
1010
1011 #if OMPT_SUPPORT && OMPT_OPTIONAL
1012 kmp_info_t *this_thr = __kmp_threads[global_tid];
1013 kmp_team_t *team = this_thr->th.th_team;
1014 if (ompt_enabled.ompt_callback_masked) {
1015 int tid = __kmp_tid_from_gtid(global_tid);
1016 ompt_callbacks.ompt_callback(ompt_callback_masked)(
1017 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
1018 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1019 OMPT_GET_RETURN_ADDRESS(0));
1020 }
1021 #endif
1022
1023 if (__kmp_env_consistency_check) {
1024 __kmp_pop_sync(global_tid, ct_masked, loc);
1025 }
1026 }
1027
1028 /*!
1029 @ingroup WORK_SHARING
1030 @param loc source location information.
1031 @param gtid global thread number.
1032
1033 Start execution of an <tt>ordered</tt> construct.
1034 */
__kmpc_ordered(ident_t * loc,kmp_int32 gtid)1035 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
1036 int cid = 0;
1037 kmp_info_t *th;
1038 KMP_DEBUG_ASSERT(__kmp_init_serial);
1039
1040 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
1041 __kmp_assert_valid_gtid(gtid);
1042
1043 if (!TCR_4(__kmp_init_parallel))
1044 __kmp_parallel_initialize();
1045
1046 __kmp_resume_if_soft_paused();
1047
1048 #if USE_ITT_BUILD
1049 __kmp_itt_ordered_prep(gtid);
1050 // TODO: ordered_wait_id
1051 #endif /* USE_ITT_BUILD */
1052
1053 th = __kmp_threads[gtid];
1054
1055 #if OMPT_SUPPORT && OMPT_OPTIONAL
1056 kmp_team_t *team;
1057 ompt_wait_id_t lck;
1058 void *codeptr_ra;
1059 OMPT_STORE_RETURN_ADDRESS(gtid);
1060 if (ompt_enabled.enabled) {
1061 team = __kmp_team_from_gtid(gtid);
1062 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1063 /* OMPT state update */
1064 th->th.ompt_thread_info.wait_id = lck;
1065 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1066
1067 /* OMPT event callback */
1068 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1069 if (ompt_enabled.ompt_callback_mutex_acquire) {
1070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1071 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1072 codeptr_ra);
1073 }
1074 }
1075 #endif
1076
1077 if (th->th.th_dispatch->th_deo_fcn != 0)
1078 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
1079 else
1080 __kmp_parallel_deo(>id, &cid, loc);
1081
1082 #if OMPT_SUPPORT && OMPT_OPTIONAL
1083 if (ompt_enabled.enabled) {
1084 /* OMPT state update */
1085 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1086 th->th.ompt_thread_info.wait_id = 0;
1087
1088 /* OMPT event callback */
1089 if (ompt_enabled.ompt_callback_mutex_acquired) {
1090 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1091 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1092 }
1093 }
1094 #endif
1095
1096 #if USE_ITT_BUILD
1097 __kmp_itt_ordered_start(gtid);
1098 #endif /* USE_ITT_BUILD */
1099 }
1100
1101 /*!
1102 @ingroup WORK_SHARING
1103 @param loc source location information.
1104 @param gtid global thread number.
1105
1106 End execution of an <tt>ordered</tt> construct.
1107 */
__kmpc_end_ordered(ident_t * loc,kmp_int32 gtid)1108 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1109 int cid = 0;
1110 kmp_info_t *th;
1111
1112 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1113 __kmp_assert_valid_gtid(gtid);
1114
1115 #if USE_ITT_BUILD
1116 __kmp_itt_ordered_end(gtid);
1117 // TODO: ordered_wait_id
1118 #endif /* USE_ITT_BUILD */
1119
1120 th = __kmp_threads[gtid];
1121
1122 if (th->th.th_dispatch->th_dxo_fcn != 0)
1123 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1124 else
1125 __kmp_parallel_dxo(>id, &cid, loc);
1126
1127 #if OMPT_SUPPORT && OMPT_OPTIONAL
1128 OMPT_STORE_RETURN_ADDRESS(gtid);
1129 if (ompt_enabled.ompt_callback_mutex_released) {
1130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1131 ompt_mutex_ordered,
1132 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1133 ->t.t_ordered.dt.t_value,
1134 OMPT_LOAD_RETURN_ADDRESS(gtid));
1135 }
1136 #endif
1137 }
1138
1139 #if KMP_USE_DYNAMIC_LOCK
1140
1141 static __forceinline void
__kmp_init_indirect_csptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid,kmp_indirect_locktag_t tag)1142 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1143 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1144 // Pointer to the allocated indirect lock is written to crit, while indexing
1145 // is ignored.
1146 void *idx;
1147 kmp_indirect_lock_t **lck;
1148 lck = (kmp_indirect_lock_t **)crit;
1149 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1150 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1151 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1152 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1153 KA_TRACE(20,
1154 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1155 #if USE_ITT_BUILD
1156 __kmp_itt_critical_creating(ilk->lock, loc);
1157 #endif
1158 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1159 if (status == 0) {
1160 #if USE_ITT_BUILD
1161 __kmp_itt_critical_destroyed(ilk->lock);
1162 #endif
1163 // We don't really need to destroy the unclaimed lock here since it will be
1164 // cleaned up at program exit.
1165 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1166 }
1167 KMP_DEBUG_ASSERT(*lck != NULL);
1168 }
1169
1170 // Fast-path acquire tas lock
1171 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1172 { \
1173 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1174 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1175 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1176 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1177 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1178 kmp_uint32 spins; \
1179 KMP_FSYNC_PREPARE(l); \
1180 KMP_INIT_YIELD(spins); \
1181 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1182 do { \
1183 if (TCR_4(__kmp_nth) > \
1184 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1185 KMP_YIELD(TRUE); \
1186 } else { \
1187 KMP_YIELD_SPIN(spins); \
1188 } \
1189 __kmp_spin_backoff(&backoff); \
1190 } while ( \
1191 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1192 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1193 } \
1194 KMP_FSYNC_ACQUIRED(l); \
1195 }
1196
1197 // Fast-path test tas lock
1198 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1199 { \
1200 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1201 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1202 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1203 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1204 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1205 }
1206
1207 // Fast-path release tas lock
1208 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1209 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1210
1211 #if KMP_USE_FUTEX
1212
1213 #include <sys/syscall.h>
1214 #include <unistd.h>
1215 #ifndef FUTEX_WAIT
1216 #define FUTEX_WAIT 0
1217 #endif
1218 #ifndef FUTEX_WAKE
1219 #define FUTEX_WAKE 1
1220 #endif
1221
1222 // Fast-path acquire futex lock
1223 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1224 { \
1225 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226 kmp_int32 gtid_code = (gtid + 1) << 1; \
1227 KMP_MB(); \
1228 KMP_FSYNC_PREPARE(ftx); \
1229 kmp_int32 poll_val; \
1230 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1231 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1232 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1233 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1234 if (!cond) { \
1235 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1236 poll_val | \
1237 KMP_LOCK_BUSY(1, futex))) { \
1238 continue; \
1239 } \
1240 poll_val |= KMP_LOCK_BUSY(1, futex); \
1241 } \
1242 kmp_int32 rc; \
1243 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1244 NULL, NULL, 0)) != 0) { \
1245 continue; \
1246 } \
1247 gtid_code |= 1; \
1248 } \
1249 KMP_FSYNC_ACQUIRED(ftx); \
1250 }
1251
1252 // Fast-path test futex lock
1253 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1254 { \
1255 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1256 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1257 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1258 KMP_FSYNC_ACQUIRED(ftx); \
1259 rc = TRUE; \
1260 } else { \
1261 rc = FALSE; \
1262 } \
1263 }
1264
1265 // Fast-path release futex lock
1266 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1267 { \
1268 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1269 KMP_MB(); \
1270 KMP_FSYNC_RELEASING(ftx); \
1271 kmp_int32 poll_val = \
1272 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1273 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1274 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1275 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1276 } \
1277 KMP_MB(); \
1278 KMP_YIELD_OVERSUB(); \
1279 }
1280
1281 #endif // KMP_USE_FUTEX
1282
1283 #else // KMP_USE_DYNAMIC_LOCK
1284
__kmp_get_critical_section_ptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid)1285 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1286 ident_t const *loc,
1287 kmp_int32 gtid) {
1288 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1289
1290 // Because of the double-check, the following load doesn't need to be volatile
1291 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1292
1293 if (lck == NULL) {
1294 void *idx;
1295
1296 // Allocate & initialize the lock.
1297 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1298 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1299 __kmp_init_user_lock_with_checks(lck);
1300 __kmp_set_user_lock_location(lck, loc);
1301 #if USE_ITT_BUILD
1302 __kmp_itt_critical_creating(lck);
1303 // __kmp_itt_critical_creating() should be called *before* the first usage
1304 // of underlying lock. It is the only place where we can guarantee it. There
1305 // are chances the lock will destroyed with no usage, but it is not a
1306 // problem, because this is not real event seen by user but rather setting
1307 // name for object (lock). See more details in kmp_itt.h.
1308 #endif /* USE_ITT_BUILD */
1309
1310 // Use a cmpxchg instruction to slam the start of the critical section with
1311 // the lock pointer. If another thread beat us to it, deallocate the lock,
1312 // and use the lock that the other thread allocated.
1313 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1314
1315 if (status == 0) {
1316 // Deallocate the lock and reload the value.
1317 #if USE_ITT_BUILD
1318 __kmp_itt_critical_destroyed(lck);
1319 // Let ITT know the lock is destroyed and the same memory location may be reused
1320 // for another purpose.
1321 #endif /* USE_ITT_BUILD */
1322 __kmp_destroy_user_lock_with_checks(lck);
1323 __kmp_user_lock_free(&idx, gtid, lck);
1324 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1325 KMP_DEBUG_ASSERT(lck != NULL);
1326 }
1327 }
1328 return lck;
1329 }
1330
1331 #endif // KMP_USE_DYNAMIC_LOCK
1332
1333 /*!
1334 @ingroup WORK_SHARING
1335 @param loc source location information.
1336 @param global_tid global thread number.
1337 @param crit identity of the critical section. This could be a pointer to a lock
1338 associated with the critical section, or some other suitably unique value.
1339
1340 Enter code protected by a `critical` construct.
1341 This function blocks until the executing thread can enter the critical section.
1342 */
__kmpc_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1343 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1344 kmp_critical_name *crit) {
1345 #if KMP_USE_DYNAMIC_LOCK
1346 #if OMPT_SUPPORT && OMPT_OPTIONAL
1347 OMPT_STORE_RETURN_ADDRESS(global_tid);
1348 #endif // OMPT_SUPPORT
1349 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1350 #else
1351 KMP_COUNT_BLOCK(OMP_CRITICAL);
1352 #if OMPT_SUPPORT && OMPT_OPTIONAL
1353 ompt_state_t prev_state = ompt_state_undefined;
1354 ompt_thread_info_t ti;
1355 #endif
1356 kmp_user_lock_p lck;
1357
1358 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1359 __kmp_assert_valid_gtid(global_tid);
1360
1361 // TODO: add THR_OVHD_STATE
1362
1363 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1364 KMP_CHECK_USER_LOCK_INIT();
1365
1366 if ((__kmp_user_lock_kind == lk_tas) &&
1367 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1368 lck = (kmp_user_lock_p)crit;
1369 }
1370 #if KMP_USE_FUTEX
1371 else if ((__kmp_user_lock_kind == lk_futex) &&
1372 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1373 lck = (kmp_user_lock_p)crit;
1374 }
1375 #endif
1376 else { // ticket, queuing or drdpa
1377 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1378 }
1379
1380 if (__kmp_env_consistency_check)
1381 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1382
1383 // since the critical directive binds to all threads, not just the current
1384 // team we have to check this even if we are in a serialized team.
1385 // also, even if we are the uber thread, we still have to conduct the lock,
1386 // as we have to contend with sibling threads.
1387
1388 #if USE_ITT_BUILD
1389 __kmp_itt_critical_acquiring(lck);
1390 #endif /* USE_ITT_BUILD */
1391 #if OMPT_SUPPORT && OMPT_OPTIONAL
1392 OMPT_STORE_RETURN_ADDRESS(gtid);
1393 void *codeptr_ra = NULL;
1394 if (ompt_enabled.enabled) {
1395 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1396 /* OMPT state update */
1397 prev_state = ti.state;
1398 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1399 ti.state = ompt_state_wait_critical;
1400
1401 /* OMPT event callback */
1402 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1403 if (ompt_enabled.ompt_callback_mutex_acquire) {
1404 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1405 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1406 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1407 }
1408 }
1409 #endif
1410 // Value of 'crit' should be good for using as a critical_id of the critical
1411 // section directive.
1412 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1413
1414 #if USE_ITT_BUILD
1415 __kmp_itt_critical_acquired(lck);
1416 #endif /* USE_ITT_BUILD */
1417 #if OMPT_SUPPORT && OMPT_OPTIONAL
1418 if (ompt_enabled.enabled) {
1419 /* OMPT state update */
1420 ti.state = prev_state;
1421 ti.wait_id = 0;
1422
1423 /* OMPT event callback */
1424 if (ompt_enabled.ompt_callback_mutex_acquired) {
1425 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1426 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1427 }
1428 }
1429 #endif
1430 KMP_POP_PARTITIONED_TIMER();
1431
1432 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1433 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1434 #endif // KMP_USE_DYNAMIC_LOCK
1435 }
1436
1437 #if KMP_USE_DYNAMIC_LOCK
1438
1439 // Converts the given hint to an internal lock implementation
__kmp_map_hint_to_lock(uintptr_t hint)1440 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1441 #if KMP_USE_TSX
1442 #define KMP_TSX_LOCK(seq) lockseq_##seq
1443 #else
1444 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1445 #endif
1446
1447 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1448 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1449 #else
1450 #define KMP_CPUINFO_RTM 0
1451 #endif
1452
1453 // Hints that do not require further logic
1454 if (hint & kmp_lock_hint_hle)
1455 return KMP_TSX_LOCK(hle);
1456 if (hint & kmp_lock_hint_rtm)
1457 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1458 if (hint & kmp_lock_hint_adaptive)
1459 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1460
1461 // Rule out conflicting hints first by returning the default lock
1462 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1463 return __kmp_user_lock_seq;
1464 if ((hint & omp_lock_hint_speculative) &&
1465 (hint & omp_lock_hint_nonspeculative))
1466 return __kmp_user_lock_seq;
1467
1468 // Do not even consider speculation when it appears to be contended
1469 if (hint & omp_lock_hint_contended)
1470 return lockseq_queuing;
1471
1472 // Uncontended lock without speculation
1473 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1474 return lockseq_tas;
1475
1476 // Use RTM lock for speculation
1477 if (hint & omp_lock_hint_speculative)
1478 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1479
1480 return __kmp_user_lock_seq;
1481 }
1482
1483 #if OMPT_SUPPORT && OMPT_OPTIONAL
1484 #if KMP_USE_DYNAMIC_LOCK
1485 static kmp_mutex_impl_t
__ompt_get_mutex_impl_type(void * user_lock,kmp_indirect_lock_t * ilock=0)1486 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1487 if (user_lock) {
1488 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1489 case 0:
1490 break;
1491 #if KMP_USE_FUTEX
1492 case locktag_futex:
1493 return kmp_mutex_impl_queuing;
1494 #endif
1495 case locktag_tas:
1496 return kmp_mutex_impl_spin;
1497 #if KMP_USE_TSX
1498 case locktag_hle:
1499 case locktag_rtm_spin:
1500 return kmp_mutex_impl_speculative;
1501 #endif
1502 default:
1503 return kmp_mutex_impl_none;
1504 }
1505 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1506 }
1507 KMP_ASSERT(ilock);
1508 switch (ilock->type) {
1509 #if KMP_USE_TSX
1510 case locktag_adaptive:
1511 case locktag_rtm_queuing:
1512 return kmp_mutex_impl_speculative;
1513 #endif
1514 case locktag_nested_tas:
1515 return kmp_mutex_impl_spin;
1516 #if KMP_USE_FUTEX
1517 case locktag_nested_futex:
1518 #endif
1519 case locktag_ticket:
1520 case locktag_queuing:
1521 case locktag_drdpa:
1522 case locktag_nested_ticket:
1523 case locktag_nested_queuing:
1524 case locktag_nested_drdpa:
1525 return kmp_mutex_impl_queuing;
1526 default:
1527 return kmp_mutex_impl_none;
1528 }
1529 }
1530 #else
1531 // For locks without dynamic binding
__ompt_get_mutex_impl_type()1532 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1533 switch (__kmp_user_lock_kind) {
1534 case lk_tas:
1535 return kmp_mutex_impl_spin;
1536 #if KMP_USE_FUTEX
1537 case lk_futex:
1538 #endif
1539 case lk_ticket:
1540 case lk_queuing:
1541 case lk_drdpa:
1542 return kmp_mutex_impl_queuing;
1543 #if KMP_USE_TSX
1544 case lk_hle:
1545 case lk_rtm_queuing:
1546 case lk_rtm_spin:
1547 case lk_adaptive:
1548 return kmp_mutex_impl_speculative;
1549 #endif
1550 default:
1551 return kmp_mutex_impl_none;
1552 }
1553 }
1554 #endif // KMP_USE_DYNAMIC_LOCK
1555 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1556
1557 /*!
1558 @ingroup WORK_SHARING
1559 @param loc source location information.
1560 @param global_tid global thread number.
1561 @param crit identity of the critical section. This could be a pointer to a lock
1562 associated with the critical section, or some other suitably unique value.
1563 @param hint the lock hint.
1564
1565 Enter code protected by a `critical` construct with a hint. The hint value is
1566 used to suggest a lock implementation. This function blocks until the executing
1567 thread can enter the critical section unless the hint suggests use of
1568 speculative execution and the hardware supports it.
1569 */
__kmpc_critical_with_hint(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit,uint32_t hint)1570 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1571 kmp_critical_name *crit, uint32_t hint) {
1572 KMP_COUNT_BLOCK(OMP_CRITICAL);
1573 kmp_user_lock_p lck;
1574 #if OMPT_SUPPORT && OMPT_OPTIONAL
1575 ompt_state_t prev_state = ompt_state_undefined;
1576 ompt_thread_info_t ti;
1577 // This is the case, if called from __kmpc_critical:
1578 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1579 if (!codeptr)
1580 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1581 #endif
1582
1583 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1584 __kmp_assert_valid_gtid(global_tid);
1585
1586 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1587 // Check if it is initialized.
1588 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1589 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1590 if (*lk == 0) {
1591 if (KMP_IS_D_LOCK(lockseq)) {
1592 KMP_COMPARE_AND_STORE_ACQ32(
1593 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1594 KMP_GET_D_TAG(lockseq));
1595 } else {
1596 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1597 }
1598 }
1599 // Branch for accessing the actual lock object and set operation. This
1600 // branching is inevitable since this lock initialization does not follow the
1601 // normal dispatch path (lock table is not used).
1602 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1603 lck = (kmp_user_lock_p)lk;
1604 if (__kmp_env_consistency_check) {
1605 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1606 __kmp_map_hint_to_lock(hint));
1607 }
1608 #if USE_ITT_BUILD
1609 __kmp_itt_critical_acquiring(lck);
1610 #endif
1611 #if OMPT_SUPPORT && OMPT_OPTIONAL
1612 if (ompt_enabled.enabled) {
1613 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1614 /* OMPT state update */
1615 prev_state = ti.state;
1616 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1617 ti.state = ompt_state_wait_critical;
1618
1619 /* OMPT event callback */
1620 if (ompt_enabled.ompt_callback_mutex_acquire) {
1621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1622 ompt_mutex_critical, (unsigned int)hint,
1623 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1624 codeptr);
1625 }
1626 }
1627 #endif
1628 #if KMP_USE_INLINED_TAS
1629 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1630 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1631 } else
1632 #elif KMP_USE_INLINED_FUTEX
1633 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1634 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1635 } else
1636 #endif
1637 {
1638 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1639 }
1640 } else {
1641 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1642 lck = ilk->lock;
1643 if (__kmp_env_consistency_check) {
1644 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1645 __kmp_map_hint_to_lock(hint));
1646 }
1647 #if USE_ITT_BUILD
1648 __kmp_itt_critical_acquiring(lck);
1649 #endif
1650 #if OMPT_SUPPORT && OMPT_OPTIONAL
1651 if (ompt_enabled.enabled) {
1652 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1653 /* OMPT state update */
1654 prev_state = ti.state;
1655 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1656 ti.state = ompt_state_wait_critical;
1657
1658 /* OMPT event callback */
1659 if (ompt_enabled.ompt_callback_mutex_acquire) {
1660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1661 ompt_mutex_critical, (unsigned int)hint,
1662 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1663 codeptr);
1664 }
1665 }
1666 #endif
1667 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1668 }
1669 KMP_POP_PARTITIONED_TIMER();
1670
1671 #if USE_ITT_BUILD
1672 __kmp_itt_critical_acquired(lck);
1673 #endif /* USE_ITT_BUILD */
1674 #if OMPT_SUPPORT && OMPT_OPTIONAL
1675 if (ompt_enabled.enabled) {
1676 /* OMPT state update */
1677 ti.state = prev_state;
1678 ti.wait_id = 0;
1679
1680 /* OMPT event callback */
1681 if (ompt_enabled.ompt_callback_mutex_acquired) {
1682 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1683 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1684 }
1685 }
1686 #endif
1687
1688 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1689 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1690 } // __kmpc_critical_with_hint
1691
1692 #endif // KMP_USE_DYNAMIC_LOCK
1693
1694 /*!
1695 @ingroup WORK_SHARING
1696 @param loc source location information.
1697 @param global_tid global thread number .
1698 @param crit identity of the critical section. This could be a pointer to a lock
1699 associated with the critical section, or some other suitably unique value.
1700
1701 Leave a critical section, releasing any lock that was held during its execution.
1702 */
__kmpc_end_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1703 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1704 kmp_critical_name *crit) {
1705 kmp_user_lock_p lck;
1706
1707 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1708
1709 #if KMP_USE_DYNAMIC_LOCK
1710 int locktag = KMP_EXTRACT_D_TAG(crit);
1711 if (locktag) {
1712 lck = (kmp_user_lock_p)crit;
1713 KMP_ASSERT(lck != NULL);
1714 if (__kmp_env_consistency_check) {
1715 __kmp_pop_sync(global_tid, ct_critical, loc);
1716 }
1717 #if USE_ITT_BUILD
1718 __kmp_itt_critical_releasing(lck);
1719 #endif
1720 #if KMP_USE_INLINED_TAS
1721 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1722 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1723 } else
1724 #elif KMP_USE_INLINED_FUTEX
1725 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1726 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1727 } else
1728 #endif
1729 {
1730 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1731 }
1732 } else {
1733 kmp_indirect_lock_t *ilk =
1734 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1735 KMP_ASSERT(ilk != NULL);
1736 lck = ilk->lock;
1737 if (__kmp_env_consistency_check) {
1738 __kmp_pop_sync(global_tid, ct_critical, loc);
1739 }
1740 #if USE_ITT_BUILD
1741 __kmp_itt_critical_releasing(lck);
1742 #endif
1743 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1744 }
1745
1746 #else // KMP_USE_DYNAMIC_LOCK
1747
1748 if ((__kmp_user_lock_kind == lk_tas) &&
1749 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1750 lck = (kmp_user_lock_p)crit;
1751 }
1752 #if KMP_USE_FUTEX
1753 else if ((__kmp_user_lock_kind == lk_futex) &&
1754 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1755 lck = (kmp_user_lock_p)crit;
1756 }
1757 #endif
1758 else { // ticket, queuing or drdpa
1759 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1760 }
1761
1762 KMP_ASSERT(lck != NULL);
1763
1764 if (__kmp_env_consistency_check)
1765 __kmp_pop_sync(global_tid, ct_critical, loc);
1766
1767 #if USE_ITT_BUILD
1768 __kmp_itt_critical_releasing(lck);
1769 #endif /* USE_ITT_BUILD */
1770 // Value of 'crit' should be good for using as a critical_id of the critical
1771 // section directive.
1772 __kmp_release_user_lock_with_checks(lck, global_tid);
1773
1774 #endif // KMP_USE_DYNAMIC_LOCK
1775
1776 #if OMPT_SUPPORT && OMPT_OPTIONAL
1777 /* OMPT release event triggers after lock is released; place here to trigger
1778 * for all #if branches */
1779 OMPT_STORE_RETURN_ADDRESS(global_tid);
1780 if (ompt_enabled.ompt_callback_mutex_released) {
1781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1782 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1783 OMPT_LOAD_RETURN_ADDRESS(0));
1784 }
1785 #endif
1786
1787 KMP_POP_PARTITIONED_TIMER();
1788 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1789 }
1790
1791 /*!
1792 @ingroup SYNCHRONIZATION
1793 @param loc source location information
1794 @param global_tid thread id.
1795 @return one if the thread should execute the master block, zero otherwise
1796
1797 Start execution of a combined barrier and master. The barrier is executed inside
1798 this function.
1799 */
__kmpc_barrier_master(ident_t * loc,kmp_int32 global_tid)1800 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1801 int status;
1802 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1803 __kmp_assert_valid_gtid(global_tid);
1804
1805 if (!TCR_4(__kmp_init_parallel))
1806 __kmp_parallel_initialize();
1807
1808 __kmp_resume_if_soft_paused();
1809
1810 if (__kmp_env_consistency_check)
1811 __kmp_check_barrier(global_tid, ct_barrier, loc);
1812
1813 #if OMPT_SUPPORT
1814 ompt_frame_t *ompt_frame;
1815 if (ompt_enabled.enabled) {
1816 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1817 if (ompt_frame->enter_frame.ptr == NULL)
1818 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1819 }
1820 OMPT_STORE_RETURN_ADDRESS(global_tid);
1821 #endif
1822 #if USE_ITT_NOTIFY
1823 __kmp_threads[global_tid]->th.th_ident = loc;
1824 #endif
1825 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1826 #if OMPT_SUPPORT && OMPT_OPTIONAL
1827 if (ompt_enabled.enabled) {
1828 ompt_frame->enter_frame = ompt_data_none;
1829 }
1830 #endif
1831
1832 return (status != 0) ? 0 : 1;
1833 }
1834
1835 /*!
1836 @ingroup SYNCHRONIZATION
1837 @param loc source location information
1838 @param global_tid thread id.
1839
1840 Complete the execution of a combined barrier and master. This function should
1841 only be called at the completion of the <tt>master</tt> code. Other threads will
1842 still be waiting at the barrier and this call releases them.
1843 */
__kmpc_end_barrier_master(ident_t * loc,kmp_int32 global_tid)1844 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1845 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1846 __kmp_assert_valid_gtid(global_tid);
1847 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1848 }
1849
1850 /*!
1851 @ingroup SYNCHRONIZATION
1852 @param loc source location information
1853 @param global_tid thread id.
1854 @return one if the thread should execute the master block, zero otherwise
1855
1856 Start execution of a combined barrier and master(nowait) construct.
1857 The barrier is executed inside this function.
1858 There is no equivalent "end" function, since the
1859 */
__kmpc_barrier_master_nowait(ident_t * loc,kmp_int32 global_tid)1860 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1861 kmp_int32 ret;
1862 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1863 __kmp_assert_valid_gtid(global_tid);
1864
1865 if (!TCR_4(__kmp_init_parallel))
1866 __kmp_parallel_initialize();
1867
1868 __kmp_resume_if_soft_paused();
1869
1870 if (__kmp_env_consistency_check) {
1871 if (loc == 0) {
1872 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1873 }
1874 __kmp_check_barrier(global_tid, ct_barrier, loc);
1875 }
1876
1877 #if OMPT_SUPPORT
1878 ompt_frame_t *ompt_frame;
1879 if (ompt_enabled.enabled) {
1880 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1881 if (ompt_frame->enter_frame.ptr == NULL)
1882 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1883 }
1884 OMPT_STORE_RETURN_ADDRESS(global_tid);
1885 #endif
1886 #if USE_ITT_NOTIFY
1887 __kmp_threads[global_tid]->th.th_ident = loc;
1888 #endif
1889 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1890 #if OMPT_SUPPORT && OMPT_OPTIONAL
1891 if (ompt_enabled.enabled) {
1892 ompt_frame->enter_frame = ompt_data_none;
1893 }
1894 #endif
1895
1896 ret = __kmpc_master(loc, global_tid);
1897
1898 if (__kmp_env_consistency_check) {
1899 /* there's no __kmpc_end_master called; so the (stats) */
1900 /* actions of __kmpc_end_master are done here */
1901 if (ret) {
1902 /* only one thread should do the pop since only */
1903 /* one did the push (see __kmpc_master()) */
1904 __kmp_pop_sync(global_tid, ct_master, loc);
1905 }
1906 }
1907
1908 return (ret);
1909 }
1910
1911 /* The BARRIER for a SINGLE process section is always explicit */
1912 /*!
1913 @ingroup WORK_SHARING
1914 @param loc source location information
1915 @param global_tid global thread number
1916 @return One if this thread should execute the single construct, zero otherwise.
1917
1918 Test whether to execute a <tt>single</tt> construct.
1919 There are no implicit barriers in the two "single" calls, rather the compiler
1920 should introduce an explicit barrier if it is required.
1921 */
1922
__kmpc_single(ident_t * loc,kmp_int32 global_tid)1923 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1924 __kmp_assert_valid_gtid(global_tid);
1925 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1926
1927 if (rc) {
1928 // We are going to execute the single statement, so we should count it.
1929 KMP_COUNT_BLOCK(OMP_SINGLE);
1930 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1931 }
1932
1933 #if OMPT_SUPPORT && OMPT_OPTIONAL
1934 kmp_info_t *this_thr = __kmp_threads[global_tid];
1935 kmp_team_t *team = this_thr->th.th_team;
1936 int tid = __kmp_tid_from_gtid(global_tid);
1937
1938 if (ompt_enabled.enabled) {
1939 if (rc) {
1940 if (ompt_enabled.ompt_callback_work) {
1941 ompt_callbacks.ompt_callback(ompt_callback_work)(
1942 ompt_work_single_executor, ompt_scope_begin,
1943 &(team->t.ompt_team_info.parallel_data),
1944 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1945 1, OMPT_GET_RETURN_ADDRESS(0));
1946 }
1947 } else {
1948 if (ompt_enabled.ompt_callback_work) {
1949 ompt_callbacks.ompt_callback(ompt_callback_work)(
1950 ompt_work_single_other, ompt_scope_begin,
1951 &(team->t.ompt_team_info.parallel_data),
1952 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1953 1, OMPT_GET_RETURN_ADDRESS(0));
1954 ompt_callbacks.ompt_callback(ompt_callback_work)(
1955 ompt_work_single_other, ompt_scope_end,
1956 &(team->t.ompt_team_info.parallel_data),
1957 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1958 1, OMPT_GET_RETURN_ADDRESS(0));
1959 }
1960 }
1961 }
1962 #endif
1963
1964 return rc;
1965 }
1966
1967 /*!
1968 @ingroup WORK_SHARING
1969 @param loc source location information
1970 @param global_tid global thread number
1971
1972 Mark the end of a <tt>single</tt> construct. This function should
1973 only be called by the thread that executed the block of code protected
1974 by the `single` construct.
1975 */
__kmpc_end_single(ident_t * loc,kmp_int32 global_tid)1976 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1977 __kmp_assert_valid_gtid(global_tid);
1978 __kmp_exit_single(global_tid);
1979 KMP_POP_PARTITIONED_TIMER();
1980
1981 #if OMPT_SUPPORT && OMPT_OPTIONAL
1982 kmp_info_t *this_thr = __kmp_threads[global_tid];
1983 kmp_team_t *team = this_thr->th.th_team;
1984 int tid = __kmp_tid_from_gtid(global_tid);
1985
1986 if (ompt_enabled.ompt_callback_work) {
1987 ompt_callbacks.ompt_callback(ompt_callback_work)(
1988 ompt_work_single_executor, ompt_scope_end,
1989 &(team->t.ompt_team_info.parallel_data),
1990 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1991 OMPT_GET_RETURN_ADDRESS(0));
1992 }
1993 #endif
1994 }
1995
1996 /*!
1997 @ingroup WORK_SHARING
1998 @param loc Source location
1999 @param global_tid Global thread id
2000
2001 Mark the end of a statically scheduled loop.
2002 */
__kmpc_for_static_fini(ident_t * loc,kmp_int32 global_tid)2003 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
2004 KMP_POP_PARTITIONED_TIMER();
2005 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
2006
2007 #if OMPT_SUPPORT && OMPT_OPTIONAL
2008 if (ompt_enabled.ompt_callback_work) {
2009 ompt_work_t ompt_work_type = ompt_work_loop_static;
2010 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2011 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2012 // Determine workshare type
2013 if (loc != NULL) {
2014 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
2015 ompt_work_type = ompt_work_loop_static;
2016 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
2017 ompt_work_type = ompt_work_sections;
2018 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
2019 ompt_work_type = ompt_work_distribute;
2020 } else {
2021 // use default set above.
2022 // a warning about this case is provided in __kmpc_for_static_init
2023 }
2024 KMP_DEBUG_ASSERT(ompt_work_type);
2025 }
2026 ompt_callbacks.ompt_callback(ompt_callback_work)(
2027 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
2028 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2029 }
2030 #endif
2031 if (__kmp_env_consistency_check)
2032 __kmp_pop_workshare(global_tid, ct_pdo, loc);
2033 }
2034
2035 // User routines which take C-style arguments (call by value)
2036 // different from the Fortran equivalent routines
2037
ompc_set_num_threads(int arg)2038 void ompc_set_num_threads(int arg) {
2039 // !!!!! TODO: check the per-task binding
2040 __kmp_set_num_threads(arg, __kmp_entry_gtid());
2041 }
2042
ompc_set_dynamic(int flag)2043 void ompc_set_dynamic(int flag) {
2044 kmp_info_t *thread;
2045
2046 /* For the thread-private implementation of the internal controls */
2047 thread = __kmp_entry_thread();
2048
2049 __kmp_save_internal_controls(thread);
2050
2051 set__dynamic(thread, flag ? true : false);
2052 }
2053
ompc_set_nested(int flag)2054 void ompc_set_nested(int flag) {
2055 kmp_info_t *thread;
2056
2057 /* For the thread-private internal controls implementation */
2058 thread = __kmp_entry_thread();
2059
2060 __kmp_save_internal_controls(thread);
2061
2062 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2063 }
2064
ompc_set_max_active_levels(int max_active_levels)2065 void ompc_set_max_active_levels(int max_active_levels) {
2066 /* TO DO */
2067 /* we want per-task implementation of this internal control */
2068
2069 /* For the per-thread internal controls implementation */
2070 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2071 }
2072
ompc_set_schedule(omp_sched_t kind,int modifier)2073 void ompc_set_schedule(omp_sched_t kind, int modifier) {
2074 // !!!!! TODO: check the per-task binding
2075 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2076 }
2077
ompc_get_ancestor_thread_num(int level)2078 int ompc_get_ancestor_thread_num(int level) {
2079 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2080 }
2081
ompc_get_team_size(int level)2082 int ompc_get_team_size(int level) {
2083 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2084 }
2085
2086 /* OpenMP 5.0 Affinity Format API */
KMP_EXPAND_NAME(ompc_set_affinity_format)2087 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2088 if (!__kmp_init_serial) {
2089 __kmp_serial_initialize();
2090 }
2091 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2092 format, KMP_STRLEN(format) + 1);
2093 }
2094
KMP_EXPAND_NAME(ompc_get_affinity_format)2095 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2096 size_t format_size;
2097 if (!__kmp_init_serial) {
2098 __kmp_serial_initialize();
2099 }
2100 format_size = KMP_STRLEN(__kmp_affinity_format);
2101 if (buffer && size) {
2102 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2103 format_size + 1);
2104 }
2105 return format_size;
2106 }
2107
KMP_EXPAND_NAME(ompc_display_affinity)2108 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2109 int gtid;
2110 if (!TCR_4(__kmp_init_middle)) {
2111 __kmp_middle_initialize();
2112 }
2113 __kmp_assign_root_init_mask();
2114 gtid = __kmp_get_gtid();
2115 #if KMP_AFFINITY_SUPPORTED
2116 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2117 __kmp_affinity.flags.reset) {
2118 __kmp_reset_root_init_mask(gtid);
2119 }
2120 #endif
2121 __kmp_aux_display_affinity(gtid, format);
2122 }
2123
KMP_EXPAND_NAME(ompc_capture_affinity)2124 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2125 char const *format) {
2126 int gtid;
2127 size_t num_required;
2128 kmp_str_buf_t capture_buf;
2129 if (!TCR_4(__kmp_init_middle)) {
2130 __kmp_middle_initialize();
2131 }
2132 __kmp_assign_root_init_mask();
2133 gtid = __kmp_get_gtid();
2134 #if KMP_AFFINITY_SUPPORTED
2135 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2136 __kmp_affinity.flags.reset) {
2137 __kmp_reset_root_init_mask(gtid);
2138 }
2139 #endif
2140 __kmp_str_buf_init(&capture_buf);
2141 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2142 if (buffer && buf_size) {
2143 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2144 capture_buf.used + 1);
2145 }
2146 __kmp_str_buf_free(&capture_buf);
2147 return num_required;
2148 }
2149
kmpc_set_stacksize(int arg)2150 void kmpc_set_stacksize(int arg) {
2151 // __kmp_aux_set_stacksize initializes the library if needed
2152 __kmp_aux_set_stacksize(arg);
2153 }
2154
kmpc_set_stacksize_s(size_t arg)2155 void kmpc_set_stacksize_s(size_t arg) {
2156 // __kmp_aux_set_stacksize initializes the library if needed
2157 __kmp_aux_set_stacksize(arg);
2158 }
2159
kmpc_set_blocktime(int arg)2160 void kmpc_set_blocktime(int arg) {
2161 int gtid, tid, bt = arg;
2162 kmp_info_t *thread;
2163
2164 gtid = __kmp_entry_gtid();
2165 tid = __kmp_tid_from_gtid(gtid);
2166 thread = __kmp_thread_from_gtid(gtid);
2167
2168 __kmp_aux_convert_blocktime(&bt);
2169 __kmp_aux_set_blocktime(bt, thread, tid);
2170 }
2171
kmpc_set_library(int arg)2172 void kmpc_set_library(int arg) {
2173 // __kmp_user_set_library initializes the library if needed
2174 __kmp_user_set_library((enum library_type)arg);
2175 }
2176
kmpc_set_defaults(char const * str)2177 void kmpc_set_defaults(char const *str) {
2178 // __kmp_aux_set_defaults initializes the library if needed
2179 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2180 }
2181
kmpc_set_disp_num_buffers(int arg)2182 void kmpc_set_disp_num_buffers(int arg) {
2183 // ignore after initialization because some teams have already
2184 // allocated dispatch buffers
2185 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2186 arg <= KMP_MAX_DISP_NUM_BUFF) {
2187 __kmp_dispatch_num_buffers = arg;
2188 }
2189 }
2190
kmpc_set_affinity_mask_proc(int proc,void ** mask)2191 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2192 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2193 return -1;
2194 #else
2195 if (!TCR_4(__kmp_init_middle)) {
2196 __kmp_middle_initialize();
2197 }
2198 __kmp_assign_root_init_mask();
2199 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2200 #endif
2201 }
2202
kmpc_unset_affinity_mask_proc(int proc,void ** mask)2203 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2204 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2205 return -1;
2206 #else
2207 if (!TCR_4(__kmp_init_middle)) {
2208 __kmp_middle_initialize();
2209 }
2210 __kmp_assign_root_init_mask();
2211 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2212 #endif
2213 }
2214
kmpc_get_affinity_mask_proc(int proc,void ** mask)2215 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2216 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2217 return -1;
2218 #else
2219 if (!TCR_4(__kmp_init_middle)) {
2220 __kmp_middle_initialize();
2221 }
2222 __kmp_assign_root_init_mask();
2223 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2224 #endif
2225 }
2226
2227 /* -------------------------------------------------------------------------- */
2228 /*!
2229 @ingroup THREADPRIVATE
2230 @param loc source location information
2231 @param gtid global thread number
2232 @param cpy_size size of the cpy_data buffer
2233 @param cpy_data pointer to data to be copied
2234 @param cpy_func helper function to call for copying data
2235 @param didit flag variable: 1=single thread; 0=not single thread
2236
2237 __kmpc_copyprivate implements the interface for the private data broadcast
2238 needed for the copyprivate clause associated with a single region in an
2239 OpenMP<sup>*</sup> program (both C and Fortran).
2240 All threads participating in the parallel region call this routine.
2241 One of the threads (called the single thread) should have the <tt>didit</tt>
2242 variable set to 1 and all other threads should have that variable set to 0.
2243 All threads pass a pointer to a data buffer (cpy_data) that they have built.
2244
2245 The OpenMP specification forbids the use of nowait on the single region when a
2246 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2247 barrier internally to avoid race conditions, so the code generation for the
2248 single region should avoid generating a barrier after the call to @ref
2249 __kmpc_copyprivate.
2250
2251 The <tt>gtid</tt> parameter is the global thread id for the current thread.
2252 The <tt>loc</tt> parameter is a pointer to source location information.
2253
2254 Internal implementation: The single thread will first copy its descriptor
2255 address (cpy_data) to a team-private location, then the other threads will each
2256 call the function pointed to by the parameter cpy_func, which carries out the
2257 copy by copying the data using the cpy_data buffer.
2258
2259 The cpy_func routine used for the copy and the contents of the data area defined
2260 by cpy_data and cpy_size may be built in any fashion that will allow the copy
2261 to be done. For instance, the cpy_data buffer can hold the actual data to be
2262 copied or it may hold a list of pointers to the data. The cpy_func routine must
2263 interpret the cpy_data buffer appropriately.
2264
2265 The interface to cpy_func is as follows:
2266 @code
2267 void cpy_func( void *destination, void *source )
2268 @endcode
2269 where void *destination is the cpy_data pointer for the thread being copied to
2270 and void *source is the cpy_data pointer for the thread being copied from.
2271 */
__kmpc_copyprivate(ident_t * loc,kmp_int32 gtid,size_t cpy_size,void * cpy_data,void (* cpy_func)(void *,void *),kmp_int32 didit)2272 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2273 void *cpy_data, void (*cpy_func)(void *, void *),
2274 kmp_int32 didit) {
2275 void **data_ptr;
2276 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2277 __kmp_assert_valid_gtid(gtid);
2278
2279 KMP_MB();
2280
2281 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2282
2283 if (__kmp_env_consistency_check) {
2284 if (loc == 0) {
2285 KMP_WARNING(ConstructIdentInvalid);
2286 }
2287 }
2288
2289 // ToDo: Optimize the following two barriers into some kind of split barrier
2290
2291 if (didit)
2292 *data_ptr = cpy_data;
2293
2294 #if OMPT_SUPPORT
2295 ompt_frame_t *ompt_frame;
2296 if (ompt_enabled.enabled) {
2297 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2298 if (ompt_frame->enter_frame.ptr == NULL)
2299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2300 }
2301 OMPT_STORE_RETURN_ADDRESS(gtid);
2302 #endif
2303 /* This barrier is not a barrier region boundary */
2304 #if USE_ITT_NOTIFY
2305 __kmp_threads[gtid]->th.th_ident = loc;
2306 #endif
2307 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2308
2309 if (!didit)
2310 (*cpy_func)(cpy_data, *data_ptr);
2311
2312 // Consider next barrier a user-visible barrier for barrier region boundaries
2313 // Nesting checks are already handled by the single construct checks
2314 {
2315 #if OMPT_SUPPORT
2316 OMPT_STORE_RETURN_ADDRESS(gtid);
2317 #endif
2318 #if USE_ITT_NOTIFY
2319 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2320 // tasks can overwrite the location)
2321 #endif
2322 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2323 #if OMPT_SUPPORT && OMPT_OPTIONAL
2324 if (ompt_enabled.enabled) {
2325 ompt_frame->enter_frame = ompt_data_none;
2326 }
2327 #endif
2328 }
2329 }
2330
2331 /* --------------------------------------------------------------------------*/
2332 /*!
2333 @ingroup THREADPRIVATE
2334 @param loc source location information
2335 @param gtid global thread number
2336 @param cpy_data pointer to the data to be saved/copied or 0
2337 @return the saved pointer to the data
2338
2339 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2340 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2341 coming from single), and returns that pointer in all calls (for single thread
2342 it's not needed). This version doesn't do any actual data copying. Data copying
2343 has to be done somewhere else, e.g. inline in the generated code. Due to this,
2344 this function doesn't have any barrier at the end of the function, like
2345 __kmpc_copyprivate does, so generated code needs barrier after copying of all
2346 data was done.
2347 */
__kmpc_copyprivate_light(ident_t * loc,kmp_int32 gtid,void * cpy_data)2348 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2349 void **data_ptr;
2350
2351 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2352
2353 KMP_MB();
2354
2355 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2356
2357 if (__kmp_env_consistency_check) {
2358 if (loc == 0) {
2359 KMP_WARNING(ConstructIdentInvalid);
2360 }
2361 }
2362
2363 // ToDo: Optimize the following barrier
2364
2365 if (cpy_data)
2366 *data_ptr = cpy_data;
2367
2368 #if OMPT_SUPPORT
2369 ompt_frame_t *ompt_frame;
2370 if (ompt_enabled.enabled) {
2371 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2372 if (ompt_frame->enter_frame.ptr == NULL)
2373 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2374 OMPT_STORE_RETURN_ADDRESS(gtid);
2375 }
2376 #endif
2377 /* This barrier is not a barrier region boundary */
2378 #if USE_ITT_NOTIFY
2379 __kmp_threads[gtid]->th.th_ident = loc;
2380 #endif
2381 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2382
2383 return *data_ptr;
2384 }
2385
2386 /* -------------------------------------------------------------------------- */
2387
2388 #define INIT_LOCK __kmp_init_user_lock_with_checks
2389 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2390 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2391 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2392 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2393 #define ACQUIRE_NESTED_LOCK_TIMED \
2394 __kmp_acquire_nested_user_lock_with_checks_timed
2395 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2396 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2397 #define TEST_LOCK __kmp_test_user_lock_with_checks
2398 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2399 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2400 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2401
2402 // TODO: Make check abort messages use location info & pass it into
2403 // with_checks routines
2404
2405 #if KMP_USE_DYNAMIC_LOCK
2406
2407 // internal lock initializer
__kmp_init_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2408 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2409 kmp_dyna_lockseq_t seq) {
2410 if (KMP_IS_D_LOCK(seq)) {
2411 KMP_INIT_D_LOCK(lock, seq);
2412 #if USE_ITT_BUILD
2413 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2414 #endif
2415 } else {
2416 KMP_INIT_I_LOCK(lock, seq);
2417 #if USE_ITT_BUILD
2418 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2419 __kmp_itt_lock_creating(ilk->lock, loc);
2420 #endif
2421 }
2422 }
2423
2424 // internal nest lock initializer
2425 static __forceinline void
__kmp_init_nest_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2426 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2427 kmp_dyna_lockseq_t seq) {
2428 #if KMP_USE_TSX
2429 // Don't have nested lock implementation for speculative locks
2430 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2431 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2432 seq = __kmp_user_lock_seq;
2433 #endif
2434 switch (seq) {
2435 case lockseq_tas:
2436 seq = lockseq_nested_tas;
2437 break;
2438 #if KMP_USE_FUTEX
2439 case lockseq_futex:
2440 seq = lockseq_nested_futex;
2441 break;
2442 #endif
2443 case lockseq_ticket:
2444 seq = lockseq_nested_ticket;
2445 break;
2446 case lockseq_queuing:
2447 seq = lockseq_nested_queuing;
2448 break;
2449 case lockseq_drdpa:
2450 seq = lockseq_nested_drdpa;
2451 break;
2452 default:
2453 seq = lockseq_nested_queuing;
2454 }
2455 KMP_INIT_I_LOCK(lock, seq);
2456 #if USE_ITT_BUILD
2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2458 __kmp_itt_lock_creating(ilk->lock, loc);
2459 #endif
2460 }
2461
2462 /* initialize the lock with a hint */
__kmpc_init_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2463 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2464 uintptr_t hint) {
2465 KMP_DEBUG_ASSERT(__kmp_init_serial);
2466 if (__kmp_env_consistency_check && user_lock == NULL) {
2467 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2468 }
2469
2470 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2471
2472 #if OMPT_SUPPORT && OMPT_OPTIONAL
2473 // This is the case, if called from omp_init_lock_with_hint:
2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475 if (!codeptr)
2476 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477 if (ompt_enabled.ompt_callback_lock_init) {
2478 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2479 ompt_mutex_lock, (omp_lock_hint_t)hint,
2480 __ompt_get_mutex_impl_type(user_lock),
2481 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2482 }
2483 #endif
2484 }
2485
2486 /* initialize the lock with a hint */
__kmpc_init_nest_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2487 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2488 void **user_lock, uintptr_t hint) {
2489 KMP_DEBUG_ASSERT(__kmp_init_serial);
2490 if (__kmp_env_consistency_check && user_lock == NULL) {
2491 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2492 }
2493
2494 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2495
2496 #if OMPT_SUPPORT && OMPT_OPTIONAL
2497 // This is the case, if called from omp_init_lock_with_hint:
2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2499 if (!codeptr)
2500 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501 if (ompt_enabled.ompt_callback_lock_init) {
2502 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2503 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2504 __ompt_get_mutex_impl_type(user_lock),
2505 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2506 }
2507 #endif
2508 }
2509
2510 #endif // KMP_USE_DYNAMIC_LOCK
2511
2512 /* initialize the lock */
__kmpc_init_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2513 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2514 #if KMP_USE_DYNAMIC_LOCK
2515
2516 KMP_DEBUG_ASSERT(__kmp_init_serial);
2517 if (__kmp_env_consistency_check && user_lock == NULL) {
2518 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2519 }
2520 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2521
2522 #if OMPT_SUPPORT && OMPT_OPTIONAL
2523 // This is the case, if called from omp_init_lock_with_hint:
2524 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2525 if (!codeptr)
2526 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2527 if (ompt_enabled.ompt_callback_lock_init) {
2528 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2529 ompt_mutex_lock, omp_lock_hint_none,
2530 __ompt_get_mutex_impl_type(user_lock),
2531 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2532 }
2533 #endif
2534
2535 #else // KMP_USE_DYNAMIC_LOCK
2536
2537 static char const *const func = "omp_init_lock";
2538 kmp_user_lock_p lck;
2539 KMP_DEBUG_ASSERT(__kmp_init_serial);
2540
2541 if (__kmp_env_consistency_check) {
2542 if (user_lock == NULL) {
2543 KMP_FATAL(LockIsUninitialized, func);
2544 }
2545 }
2546
2547 KMP_CHECK_USER_LOCK_INIT();
2548
2549 if ((__kmp_user_lock_kind == lk_tas) &&
2550 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2551 lck = (kmp_user_lock_p)user_lock;
2552 }
2553 #if KMP_USE_FUTEX
2554 else if ((__kmp_user_lock_kind == lk_futex) &&
2555 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2556 lck = (kmp_user_lock_p)user_lock;
2557 }
2558 #endif
2559 else {
2560 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2561 }
2562 INIT_LOCK(lck);
2563 __kmp_set_user_lock_location(lck, loc);
2564
2565 #if OMPT_SUPPORT && OMPT_OPTIONAL
2566 // This is the case, if called from omp_init_lock_with_hint:
2567 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2568 if (!codeptr)
2569 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2570 if (ompt_enabled.ompt_callback_lock_init) {
2571 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2572 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2573 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2574 }
2575 #endif
2576
2577 #if USE_ITT_BUILD
2578 __kmp_itt_lock_creating(lck);
2579 #endif /* USE_ITT_BUILD */
2580
2581 #endif // KMP_USE_DYNAMIC_LOCK
2582 } // __kmpc_init_lock
2583
2584 /* initialize the lock */
__kmpc_init_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2585 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2586 #if KMP_USE_DYNAMIC_LOCK
2587
2588 KMP_DEBUG_ASSERT(__kmp_init_serial);
2589 if (__kmp_env_consistency_check && user_lock == NULL) {
2590 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2591 }
2592 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2593
2594 #if OMPT_SUPPORT && OMPT_OPTIONAL
2595 // This is the case, if called from omp_init_lock_with_hint:
2596 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2597 if (!codeptr)
2598 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2599 if (ompt_enabled.ompt_callback_lock_init) {
2600 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2601 ompt_mutex_nest_lock, omp_lock_hint_none,
2602 __ompt_get_mutex_impl_type(user_lock),
2603 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2604 }
2605 #endif
2606
2607 #else // KMP_USE_DYNAMIC_LOCK
2608
2609 static char const *const func = "omp_init_nest_lock";
2610 kmp_user_lock_p lck;
2611 KMP_DEBUG_ASSERT(__kmp_init_serial);
2612
2613 if (__kmp_env_consistency_check) {
2614 if (user_lock == NULL) {
2615 KMP_FATAL(LockIsUninitialized, func);
2616 }
2617 }
2618
2619 KMP_CHECK_USER_LOCK_INIT();
2620
2621 if ((__kmp_user_lock_kind == lk_tas) &&
2622 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2623 OMP_NEST_LOCK_T_SIZE)) {
2624 lck = (kmp_user_lock_p)user_lock;
2625 }
2626 #if KMP_USE_FUTEX
2627 else if ((__kmp_user_lock_kind == lk_futex) &&
2628 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2629 OMP_NEST_LOCK_T_SIZE)) {
2630 lck = (kmp_user_lock_p)user_lock;
2631 }
2632 #endif
2633 else {
2634 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2635 }
2636
2637 INIT_NESTED_LOCK(lck);
2638 __kmp_set_user_lock_location(lck, loc);
2639
2640 #if OMPT_SUPPORT && OMPT_OPTIONAL
2641 // This is the case, if called from omp_init_lock_with_hint:
2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2643 if (!codeptr)
2644 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645 if (ompt_enabled.ompt_callback_lock_init) {
2646 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2647 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2648 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2649 }
2650 #endif
2651
2652 #if USE_ITT_BUILD
2653 __kmp_itt_lock_creating(lck);
2654 #endif /* USE_ITT_BUILD */
2655
2656 #endif // KMP_USE_DYNAMIC_LOCK
2657 } // __kmpc_init_nest_lock
2658
__kmpc_destroy_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2659 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2660 #if KMP_USE_DYNAMIC_LOCK
2661
2662 #if USE_ITT_BUILD
2663 kmp_user_lock_p lck;
2664 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2665 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2666 } else {
2667 lck = (kmp_user_lock_p)user_lock;
2668 }
2669 __kmp_itt_lock_destroyed(lck);
2670 #endif
2671 #if OMPT_SUPPORT && OMPT_OPTIONAL
2672 // This is the case, if called from omp_init_lock_with_hint:
2673 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2674 if (!codeptr)
2675 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2676 if (ompt_enabled.ompt_callback_lock_destroy) {
2677 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2678 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2679 }
2680 #endif
2681 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2682 #else
2683 kmp_user_lock_p lck;
2684
2685 if ((__kmp_user_lock_kind == lk_tas) &&
2686 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2687 lck = (kmp_user_lock_p)user_lock;
2688 }
2689 #if KMP_USE_FUTEX
2690 else if ((__kmp_user_lock_kind == lk_futex) &&
2691 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2692 lck = (kmp_user_lock_p)user_lock;
2693 }
2694 #endif
2695 else {
2696 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2697 }
2698
2699 #if OMPT_SUPPORT && OMPT_OPTIONAL
2700 // This is the case, if called from omp_init_lock_with_hint:
2701 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2702 if (!codeptr)
2703 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2704 if (ompt_enabled.ompt_callback_lock_destroy) {
2705 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2706 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2707 }
2708 #endif
2709
2710 #if USE_ITT_BUILD
2711 __kmp_itt_lock_destroyed(lck);
2712 #endif /* USE_ITT_BUILD */
2713 DESTROY_LOCK(lck);
2714
2715 if ((__kmp_user_lock_kind == lk_tas) &&
2716 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2717 ;
2718 }
2719 #if KMP_USE_FUTEX
2720 else if ((__kmp_user_lock_kind == lk_futex) &&
2721 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2722 ;
2723 }
2724 #endif
2725 else {
2726 __kmp_user_lock_free(user_lock, gtid, lck);
2727 }
2728 #endif // KMP_USE_DYNAMIC_LOCK
2729 } // __kmpc_destroy_lock
2730
2731 /* destroy the lock */
__kmpc_destroy_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2732 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2733 #if KMP_USE_DYNAMIC_LOCK
2734
2735 #if USE_ITT_BUILD
2736 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2737 __kmp_itt_lock_destroyed(ilk->lock);
2738 #endif
2739 #if OMPT_SUPPORT && OMPT_OPTIONAL
2740 // This is the case, if called from omp_init_lock_with_hint:
2741 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2742 if (!codeptr)
2743 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2744 if (ompt_enabled.ompt_callback_lock_destroy) {
2745 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2746 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2747 }
2748 #endif
2749 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2750
2751 #else // KMP_USE_DYNAMIC_LOCK
2752
2753 kmp_user_lock_p lck;
2754
2755 if ((__kmp_user_lock_kind == lk_tas) &&
2756 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2757 OMP_NEST_LOCK_T_SIZE)) {
2758 lck = (kmp_user_lock_p)user_lock;
2759 }
2760 #if KMP_USE_FUTEX
2761 else if ((__kmp_user_lock_kind == lk_futex) &&
2762 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2763 OMP_NEST_LOCK_T_SIZE)) {
2764 lck = (kmp_user_lock_p)user_lock;
2765 }
2766 #endif
2767 else {
2768 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2769 }
2770
2771 #if OMPT_SUPPORT && OMPT_OPTIONAL
2772 // This is the case, if called from omp_init_lock_with_hint:
2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2774 if (!codeptr)
2775 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776 if (ompt_enabled.ompt_callback_lock_destroy) {
2777 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2778 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2779 }
2780 #endif
2781
2782 #if USE_ITT_BUILD
2783 __kmp_itt_lock_destroyed(lck);
2784 #endif /* USE_ITT_BUILD */
2785
2786 DESTROY_NESTED_LOCK(lck);
2787
2788 if ((__kmp_user_lock_kind == lk_tas) &&
2789 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2790 OMP_NEST_LOCK_T_SIZE)) {
2791 ;
2792 }
2793 #if KMP_USE_FUTEX
2794 else if ((__kmp_user_lock_kind == lk_futex) &&
2795 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2796 OMP_NEST_LOCK_T_SIZE)) {
2797 ;
2798 }
2799 #endif
2800 else {
2801 __kmp_user_lock_free(user_lock, gtid, lck);
2802 }
2803 #endif // KMP_USE_DYNAMIC_LOCK
2804 } // __kmpc_destroy_nest_lock
2805
__kmpc_set_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2806 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2807 KMP_COUNT_BLOCK(OMP_set_lock);
2808 #if KMP_USE_DYNAMIC_LOCK
2809 int tag = KMP_EXTRACT_D_TAG(user_lock);
2810 #if USE_ITT_BUILD
2811 __kmp_itt_lock_acquiring(
2812 (kmp_user_lock_p)
2813 user_lock); // itt function will get to the right lock object.
2814 #endif
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_acquire) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822 ompt_mutex_lock, omp_lock_hint_none,
2823 __ompt_get_mutex_impl_type(user_lock),
2824 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2825 }
2826 #endif
2827 #if KMP_USE_INLINED_TAS
2828 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2829 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2830 } else
2831 #elif KMP_USE_INLINED_FUTEX
2832 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2833 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2834 } else
2835 #endif
2836 {
2837 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2838 }
2839 #if USE_ITT_BUILD
2840 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2841 #endif
2842 #if OMPT_SUPPORT && OMPT_OPTIONAL
2843 if (ompt_enabled.ompt_callback_mutex_acquired) {
2844 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2845 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2846 }
2847 #endif
2848
2849 #else // KMP_USE_DYNAMIC_LOCK
2850
2851 kmp_user_lock_p lck;
2852
2853 if ((__kmp_user_lock_kind == lk_tas) &&
2854 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2855 lck = (kmp_user_lock_p)user_lock;
2856 }
2857 #if KMP_USE_FUTEX
2858 else if ((__kmp_user_lock_kind == lk_futex) &&
2859 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2860 lck = (kmp_user_lock_p)user_lock;
2861 }
2862 #endif
2863 else {
2864 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2865 }
2866
2867 #if USE_ITT_BUILD
2868 __kmp_itt_lock_acquiring(lck);
2869 #endif /* USE_ITT_BUILD */
2870 #if OMPT_SUPPORT && OMPT_OPTIONAL
2871 // This is the case, if called from omp_init_lock_with_hint:
2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873 if (!codeptr)
2874 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875 if (ompt_enabled.ompt_callback_mutex_acquire) {
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2877 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2878 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2879 }
2880 #endif
2881
2882 ACQUIRE_LOCK(lck, gtid);
2883
2884 #if USE_ITT_BUILD
2885 __kmp_itt_lock_acquired(lck);
2886 #endif /* USE_ITT_BUILD */
2887
2888 #if OMPT_SUPPORT && OMPT_OPTIONAL
2889 if (ompt_enabled.ompt_callback_mutex_acquired) {
2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2891 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2892 }
2893 #endif
2894
2895 #endif // KMP_USE_DYNAMIC_LOCK
2896 }
2897
__kmpc_set_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2898 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2899 #if KMP_USE_DYNAMIC_LOCK
2900
2901 #if USE_ITT_BUILD
2902 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2903 #endif
2904 #if OMPT_SUPPORT && OMPT_OPTIONAL
2905 // This is the case, if called from omp_init_lock_with_hint:
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907 if (!codeptr)
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909 if (ompt_enabled.enabled) {
2910 if (ompt_enabled.ompt_callback_mutex_acquire) {
2911 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2912 ompt_mutex_nest_lock, omp_lock_hint_none,
2913 __ompt_get_mutex_impl_type(user_lock),
2914 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2915 }
2916 }
2917 #endif
2918 int acquire_status =
2919 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2920 (void)acquire_status;
2921 #if USE_ITT_BUILD
2922 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2923 #endif
2924
2925 #if OMPT_SUPPORT && OMPT_OPTIONAL
2926 if (ompt_enabled.enabled) {
2927 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2928 if (ompt_enabled.ompt_callback_mutex_acquired) {
2929 // lock_first
2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2931 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2932 codeptr);
2933 }
2934 } else {
2935 if (ompt_enabled.ompt_callback_nest_lock) {
2936 // lock_next
2937 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2938 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2939 }
2940 }
2941 }
2942 #endif
2943
2944 #else // KMP_USE_DYNAMIC_LOCK
2945 int acquire_status;
2946 kmp_user_lock_p lck;
2947
2948 if ((__kmp_user_lock_kind == lk_tas) &&
2949 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2950 OMP_NEST_LOCK_T_SIZE)) {
2951 lck = (kmp_user_lock_p)user_lock;
2952 }
2953 #if KMP_USE_FUTEX
2954 else if ((__kmp_user_lock_kind == lk_futex) &&
2955 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2956 OMP_NEST_LOCK_T_SIZE)) {
2957 lck = (kmp_user_lock_p)user_lock;
2958 }
2959 #endif
2960 else {
2961 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2962 }
2963
2964 #if USE_ITT_BUILD
2965 __kmp_itt_lock_acquiring(lck);
2966 #endif /* USE_ITT_BUILD */
2967 #if OMPT_SUPPORT && OMPT_OPTIONAL
2968 // This is the case, if called from omp_init_lock_with_hint:
2969 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2970 if (!codeptr)
2971 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2972 if (ompt_enabled.enabled) {
2973 if (ompt_enabled.ompt_callback_mutex_acquire) {
2974 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2975 ompt_mutex_nest_lock, omp_lock_hint_none,
2976 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2977 codeptr);
2978 }
2979 }
2980 #endif
2981
2982 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2983
2984 #if USE_ITT_BUILD
2985 __kmp_itt_lock_acquired(lck);
2986 #endif /* USE_ITT_BUILD */
2987
2988 #if OMPT_SUPPORT && OMPT_OPTIONAL
2989 if (ompt_enabled.enabled) {
2990 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2991 if (ompt_enabled.ompt_callback_mutex_acquired) {
2992 // lock_first
2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2994 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2995 }
2996 } else {
2997 if (ompt_enabled.ompt_callback_nest_lock) {
2998 // lock_next
2999 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3000 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3001 }
3002 }
3003 }
3004 #endif
3005
3006 #endif // KMP_USE_DYNAMIC_LOCK
3007 }
3008
__kmpc_unset_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3009 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3010 #if KMP_USE_DYNAMIC_LOCK
3011
3012 int tag = KMP_EXTRACT_D_TAG(user_lock);
3013 #if USE_ITT_BUILD
3014 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3015 #endif
3016 #if KMP_USE_INLINED_TAS
3017 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3018 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
3019 } else
3020 #elif KMP_USE_INLINED_FUTEX
3021 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3022 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
3023 } else
3024 #endif
3025 {
3026 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3027 }
3028
3029 #if OMPT_SUPPORT && OMPT_OPTIONAL
3030 // This is the case, if called from omp_init_lock_with_hint:
3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3032 if (!codeptr)
3033 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034 if (ompt_enabled.ompt_callback_mutex_released) {
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3037 }
3038 #endif
3039
3040 #else // KMP_USE_DYNAMIC_LOCK
3041
3042 kmp_user_lock_p lck;
3043
3044 /* Can't use serial interval since not block structured */
3045 /* release the lock */
3046
3047 if ((__kmp_user_lock_kind == lk_tas) &&
3048 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3049 #if KMP_OS_LINUX && \
3050 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3051 // "fast" path implemented to fix customer performance issue
3052 #if USE_ITT_BUILD
3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054 #endif /* USE_ITT_BUILD */
3055 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3056 KMP_MB();
3057
3058 #if OMPT_SUPPORT && OMPT_OPTIONAL
3059 // This is the case, if called from omp_init_lock_with_hint:
3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061 if (!codeptr)
3062 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled.ompt_callback_mutex_released) {
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3065 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3066 }
3067 #endif
3068
3069 return;
3070 #else
3071 lck = (kmp_user_lock_p)user_lock;
3072 #endif
3073 }
3074 #if KMP_USE_FUTEX
3075 else if ((__kmp_user_lock_kind == lk_futex) &&
3076 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3077 lck = (kmp_user_lock_p)user_lock;
3078 }
3079 #endif
3080 else {
3081 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3082 }
3083
3084 #if USE_ITT_BUILD
3085 __kmp_itt_lock_releasing(lck);
3086 #endif /* USE_ITT_BUILD */
3087
3088 RELEASE_LOCK(lck, gtid);
3089
3090 #if OMPT_SUPPORT && OMPT_OPTIONAL
3091 // This is the case, if called from omp_init_lock_with_hint:
3092 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3093 if (!codeptr)
3094 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3095 if (ompt_enabled.ompt_callback_mutex_released) {
3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098 }
3099 #endif
3100
3101 #endif // KMP_USE_DYNAMIC_LOCK
3102 }
3103
3104 /* release the lock */
__kmpc_unset_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3105 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3106 #if KMP_USE_DYNAMIC_LOCK
3107
3108 #if USE_ITT_BUILD
3109 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3110 #endif
3111 int release_status =
3112 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3113 (void)release_status;
3114
3115 #if OMPT_SUPPORT && OMPT_OPTIONAL
3116 // This is the case, if called from omp_init_lock_with_hint:
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118 if (!codeptr)
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120 if (ompt_enabled.enabled) {
3121 if (release_status == KMP_LOCK_RELEASED) {
3122 if (ompt_enabled.ompt_callback_mutex_released) {
3123 // release_lock_last
3124 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3125 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3126 codeptr);
3127 }
3128 } else if (ompt_enabled.ompt_callback_nest_lock) {
3129 // release_lock_prev
3130 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3131 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3132 }
3133 }
3134 #endif
3135
3136 #else // KMP_USE_DYNAMIC_LOCK
3137
3138 kmp_user_lock_p lck;
3139
3140 /* Can't use serial interval since not block structured */
3141
3142 if ((__kmp_user_lock_kind == lk_tas) &&
3143 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3144 OMP_NEST_LOCK_T_SIZE)) {
3145 #if KMP_OS_LINUX && \
3146 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3147 // "fast" path implemented to fix customer performance issue
3148 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3149 #if USE_ITT_BUILD
3150 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3151 #endif /* USE_ITT_BUILD */
3152
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL
3154 int release_status = KMP_LOCK_STILL_HELD;
3155 #endif
3156
3157 if (--(tl->lk.depth_locked) == 0) {
3158 TCW_4(tl->lk.poll, 0);
3159 #if OMPT_SUPPORT && OMPT_OPTIONAL
3160 release_status = KMP_LOCK_RELEASED;
3161 #endif
3162 }
3163 KMP_MB();
3164
3165 #if OMPT_SUPPORT && OMPT_OPTIONAL
3166 // This is the case, if called from omp_init_lock_with_hint:
3167 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3168 if (!codeptr)
3169 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3170 if (ompt_enabled.enabled) {
3171 if (release_status == KMP_LOCK_RELEASED) {
3172 if (ompt_enabled.ompt_callback_mutex_released) {
3173 // release_lock_last
3174 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3175 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3176 }
3177 } else if (ompt_enabled.ompt_callback_nest_lock) {
3178 // release_lock_previous
3179 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3180 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3181 }
3182 }
3183 #endif
3184
3185 return;
3186 #else
3187 lck = (kmp_user_lock_p)user_lock;
3188 #endif
3189 }
3190 #if KMP_USE_FUTEX
3191 else if ((__kmp_user_lock_kind == lk_futex) &&
3192 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3193 OMP_NEST_LOCK_T_SIZE)) {
3194 lck = (kmp_user_lock_p)user_lock;
3195 }
3196 #endif
3197 else {
3198 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3199 }
3200
3201 #if USE_ITT_BUILD
3202 __kmp_itt_lock_releasing(lck);
3203 #endif /* USE_ITT_BUILD */
3204
3205 int release_status;
3206 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3207 #if OMPT_SUPPORT && OMPT_OPTIONAL
3208 // This is the case, if called from omp_init_lock_with_hint:
3209 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3210 if (!codeptr)
3211 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3212 if (ompt_enabled.enabled) {
3213 if (release_status == KMP_LOCK_RELEASED) {
3214 if (ompt_enabled.ompt_callback_mutex_released) {
3215 // release_lock_last
3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3217 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3218 }
3219 } else if (ompt_enabled.ompt_callback_nest_lock) {
3220 // release_lock_previous
3221 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3222 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3223 }
3224 }
3225 #endif
3226
3227 #endif // KMP_USE_DYNAMIC_LOCK
3228 }
3229
3230 /* try to acquire the lock */
__kmpc_test_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3231 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3232 KMP_COUNT_BLOCK(OMP_test_lock);
3233
3234 #if KMP_USE_DYNAMIC_LOCK
3235 int rc;
3236 int tag = KMP_EXTRACT_D_TAG(user_lock);
3237 #if USE_ITT_BUILD
3238 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3239 #endif
3240 #if OMPT_SUPPORT && OMPT_OPTIONAL
3241 // This is the case, if called from omp_init_lock_with_hint:
3242 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3243 if (!codeptr)
3244 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3245 if (ompt_enabled.ompt_callback_mutex_acquire) {
3246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3247 ompt_mutex_test_lock, omp_lock_hint_none,
3248 __ompt_get_mutex_impl_type(user_lock),
3249 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3250 }
3251 #endif
3252 #if KMP_USE_INLINED_TAS
3253 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3254 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3255 } else
3256 #elif KMP_USE_INLINED_FUTEX
3257 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3258 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3259 } else
3260 #endif
3261 {
3262 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3263 }
3264 if (rc) {
3265 #if USE_ITT_BUILD
3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267 #endif
3268 #if OMPT_SUPPORT && OMPT_OPTIONAL
3269 if (ompt_enabled.ompt_callback_mutex_acquired) {
3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3272 }
3273 #endif
3274 return FTN_TRUE;
3275 } else {
3276 #if USE_ITT_BUILD
3277 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3278 #endif
3279 return FTN_FALSE;
3280 }
3281
3282 #else // KMP_USE_DYNAMIC_LOCK
3283
3284 kmp_user_lock_p lck;
3285 int rc;
3286
3287 if ((__kmp_user_lock_kind == lk_tas) &&
3288 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3289 lck = (kmp_user_lock_p)user_lock;
3290 }
3291 #if KMP_USE_FUTEX
3292 else if ((__kmp_user_lock_kind == lk_futex) &&
3293 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3294 lck = (kmp_user_lock_p)user_lock;
3295 }
3296 #endif
3297 else {
3298 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3299 }
3300
3301 #if USE_ITT_BUILD
3302 __kmp_itt_lock_acquiring(lck);
3303 #endif /* USE_ITT_BUILD */
3304 #if OMPT_SUPPORT && OMPT_OPTIONAL
3305 // This is the case, if called from omp_init_lock_with_hint:
3306 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3307 if (!codeptr)
3308 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3309 if (ompt_enabled.ompt_callback_mutex_acquire) {
3310 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3311 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3312 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3313 }
3314 #endif
3315
3316 rc = TEST_LOCK(lck, gtid);
3317 #if USE_ITT_BUILD
3318 if (rc) {
3319 __kmp_itt_lock_acquired(lck);
3320 } else {
3321 __kmp_itt_lock_cancelled(lck);
3322 }
3323 #endif /* USE_ITT_BUILD */
3324 #if OMPT_SUPPORT && OMPT_OPTIONAL
3325 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3328 }
3329 #endif
3330
3331 return (rc ? FTN_TRUE : FTN_FALSE);
3332
3333 /* Can't use serial interval since not block structured */
3334
3335 #endif // KMP_USE_DYNAMIC_LOCK
3336 }
3337
3338 /* try to acquire the lock */
__kmpc_test_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3339 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3340 #if KMP_USE_DYNAMIC_LOCK
3341 int rc;
3342 #if USE_ITT_BUILD
3343 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3344 #endif
3345 #if OMPT_SUPPORT && OMPT_OPTIONAL
3346 // This is the case, if called from omp_init_lock_with_hint:
3347 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3348 if (!codeptr)
3349 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3350 if (ompt_enabled.ompt_callback_mutex_acquire) {
3351 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3352 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3353 __ompt_get_mutex_impl_type(user_lock),
3354 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3355 }
3356 #endif
3357 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3358 #if USE_ITT_BUILD
3359 if (rc) {
3360 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3361 } else {
3362 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3363 }
3364 #endif
3365 #if OMPT_SUPPORT && OMPT_OPTIONAL
3366 if (ompt_enabled.enabled && rc) {
3367 if (rc == 1) {
3368 if (ompt_enabled.ompt_callback_mutex_acquired) {
3369 // lock_first
3370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3371 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3372 codeptr);
3373 }
3374 } else {
3375 if (ompt_enabled.ompt_callback_nest_lock) {
3376 // lock_next
3377 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3378 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3379 }
3380 }
3381 }
3382 #endif
3383 return rc;
3384
3385 #else // KMP_USE_DYNAMIC_LOCK
3386
3387 kmp_user_lock_p lck;
3388 int rc;
3389
3390 if ((__kmp_user_lock_kind == lk_tas) &&
3391 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3392 OMP_NEST_LOCK_T_SIZE)) {
3393 lck = (kmp_user_lock_p)user_lock;
3394 }
3395 #if KMP_USE_FUTEX
3396 else if ((__kmp_user_lock_kind == lk_futex) &&
3397 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3398 OMP_NEST_LOCK_T_SIZE)) {
3399 lck = (kmp_user_lock_p)user_lock;
3400 }
3401 #endif
3402 else {
3403 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3404 }
3405
3406 #if USE_ITT_BUILD
3407 __kmp_itt_lock_acquiring(lck);
3408 #endif /* USE_ITT_BUILD */
3409
3410 #if OMPT_SUPPORT && OMPT_OPTIONAL
3411 // This is the case, if called from omp_init_lock_with_hint:
3412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3413 if (!codeptr)
3414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3415 if (ompt_enabled.enabled) &&
3416 ompt_enabled.ompt_callback_mutex_acquire) {
3417 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3418 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3419 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3420 codeptr);
3421 }
3422 #endif
3423
3424 rc = TEST_NESTED_LOCK(lck, gtid);
3425 #if USE_ITT_BUILD
3426 if (rc) {
3427 __kmp_itt_lock_acquired(lck);
3428 } else {
3429 __kmp_itt_lock_cancelled(lck);
3430 }
3431 #endif /* USE_ITT_BUILD */
3432 #if OMPT_SUPPORT && OMPT_OPTIONAL
3433 if (ompt_enabled.enabled && rc) {
3434 if (rc == 1) {
3435 if (ompt_enabled.ompt_callback_mutex_acquired) {
3436 // lock_first
3437 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3438 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3439 }
3440 } else {
3441 if (ompt_enabled.ompt_callback_nest_lock) {
3442 // lock_next
3443 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3444 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3445 }
3446 }
3447 }
3448 #endif
3449 return rc;
3450
3451 /* Can't use serial interval since not block structured */
3452
3453 #endif // KMP_USE_DYNAMIC_LOCK
3454 }
3455
3456 // Interface to fast scalable reduce methods routines
3457
3458 // keep the selected method in a thread local structure for cross-function
3459 // usage: will be used in __kmpc_end_reduce* functions;
3460 // another solution: to re-determine the method one more time in
3461 // __kmpc_end_reduce* functions (new prototype required then)
3462 // AT: which solution is better?
3463 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3464 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3465
3466 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3467 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3468
3469 // description of the packed_reduction_method variable: look at the macros in
3470 // kmp.h
3471
3472 // used in a critical section reduce block
3473 static __forceinline void
__kmp_enter_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3474 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3475 kmp_critical_name *crit) {
3476
3477 // this lock was visible to a customer and to the threading profile tool as a
3478 // serial overhead span (although it's used for an internal purpose only)
3479 // why was it visible in previous implementation?
3480 // should we keep it visible in new reduce block?
3481 kmp_user_lock_p lck;
3482
3483 #if KMP_USE_DYNAMIC_LOCK
3484
3485 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3486 // Check if it is initialized.
3487 if (*lk == 0) {
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3490 KMP_GET_D_TAG(__kmp_user_lock_seq));
3491 } else {
3492 __kmp_init_indirect_csptr(crit, loc, global_tid,
3493 KMP_GET_I_TAG(__kmp_user_lock_seq));
3494 }
3495 }
3496 // Branch for accessing the actual lock object and set operation. This
3497 // branching is inevitable since this lock initialization does not follow the
3498 // normal dispatch path (lock table is not used).
3499 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3500 lck = (kmp_user_lock_p)lk;
3501 KMP_DEBUG_ASSERT(lck != NULL);
3502 if (__kmp_env_consistency_check) {
3503 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3504 }
3505 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3506 } else {
3507 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3508 lck = ilk->lock;
3509 KMP_DEBUG_ASSERT(lck != NULL);
3510 if (__kmp_env_consistency_check) {
3511 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3512 }
3513 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3514 }
3515
3516 #else // KMP_USE_DYNAMIC_LOCK
3517
3518 // We know that the fast reduction code is only emitted by Intel compilers
3519 // with 32 byte critical sections. If there isn't enough space, then we
3520 // have to use a pointer.
3521 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3522 lck = (kmp_user_lock_p)crit;
3523 } else {
3524 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3525 }
3526 KMP_DEBUG_ASSERT(lck != NULL);
3527
3528 if (__kmp_env_consistency_check)
3529 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3530
3531 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3532
3533 #endif // KMP_USE_DYNAMIC_LOCK
3534 }
3535
3536 // used in a critical section reduce block
3537 static __forceinline void
__kmp_end_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3538 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3539 kmp_critical_name *crit) {
3540
3541 kmp_user_lock_p lck;
3542
3543 #if KMP_USE_DYNAMIC_LOCK
3544
3545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3546 lck = (kmp_user_lock_p)crit;
3547 if (__kmp_env_consistency_check)
3548 __kmp_pop_sync(global_tid, ct_critical, loc);
3549 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3550 } else {
3551 kmp_indirect_lock_t *ilk =
3552 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3553 if (__kmp_env_consistency_check)
3554 __kmp_pop_sync(global_tid, ct_critical, loc);
3555 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3556 }
3557
3558 #else // KMP_USE_DYNAMIC_LOCK
3559
3560 // We know that the fast reduction code is only emitted by Intel compilers
3561 // with 32 byte critical sections. If there isn't enough space, then we have
3562 // to use a pointer.
3563 if (__kmp_base_user_lock_size > 32) {
3564 lck = *((kmp_user_lock_p *)crit);
3565 KMP_ASSERT(lck != NULL);
3566 } else {
3567 lck = (kmp_user_lock_p)crit;
3568 }
3569
3570 if (__kmp_env_consistency_check)
3571 __kmp_pop_sync(global_tid, ct_critical, loc);
3572
3573 __kmp_release_user_lock_with_checks(lck, global_tid);
3574
3575 #endif // KMP_USE_DYNAMIC_LOCK
3576 } // __kmp_end_critical_section_reduce_block
3577
3578 static __forceinline int
__kmp_swap_teams_for_teams_reduction(kmp_info_t * th,kmp_team_t ** team_p,int * task_state)3579 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3580 int *task_state) {
3581 kmp_team_t *team;
3582
3583 // Check if we are inside the teams construct?
3584 if (th->th.th_teams_microtask) {
3585 *team_p = team = th->th.th_team;
3586 if (team->t.t_level == th->th.th_teams_level) {
3587 // This is reduction at teams construct.
3588 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3589 // Let's swap teams temporarily for the reduction.
3590 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3591 th->th.th_team = team->t.t_parent;
3592 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3593 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3594 *task_state = th->th.th_task_state;
3595 th->th.th_task_state = 0;
3596
3597 return 1;
3598 }
3599 }
3600 return 0;
3601 }
3602
3603 static __forceinline void
__kmp_restore_swapped_teams(kmp_info_t * th,kmp_team_t * team,int task_state)3604 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3605 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3606 th->th.th_info.ds.ds_tid = 0;
3607 th->th.th_team = team;
3608 th->th.th_team_nproc = team->t.t_nproc;
3609 th->th.th_task_team = team->t.t_task_team[task_state];
3610 __kmp_type_convert(task_state, &(th->th.th_task_state));
3611 }
3612
3613 /* 2.a.i. Reduce Block without a terminating barrier */
3614 /*!
3615 @ingroup SYNCHRONIZATION
3616 @param loc source location information
3617 @param global_tid global thread number
3618 @param num_vars number of items (variables) to be reduced
3619 @param reduce_size size of data in bytes to be reduced
3620 @param reduce_data pointer to data to be reduced
3621 @param reduce_func callback function providing reduction operation on two
3622 operands and returning result of reduction in lhs_data
3623 @param lck pointer to the unique lock data structure
3624 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3625 threads if atomic reduction needed
3626
3627 The nowait version is used for a reduce clause with the nowait argument.
3628 */
3629 kmp_int32
__kmpc_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3630 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3631 size_t reduce_size, void *reduce_data,
3632 void (*reduce_func)(void *lhs_data, void *rhs_data),
3633 kmp_critical_name *lck) {
3634
3635 KMP_COUNT_BLOCK(REDUCE_nowait);
3636 int retval = 0;
3637 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638 kmp_info_t *th;
3639 kmp_team_t *team;
3640 int teams_swapped = 0, task_state;
3641 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3642 __kmp_assert_valid_gtid(global_tid);
3643
3644 // why do we need this initialization here at all?
3645 // Reduction clause can not be used as a stand-alone directive.
3646
3647 // do not call __kmp_serial_initialize(), it will be called by
3648 // __kmp_parallel_initialize() if needed
3649 // possible detection of false-positive race by the threadchecker ???
3650 if (!TCR_4(__kmp_init_parallel))
3651 __kmp_parallel_initialize();
3652
3653 __kmp_resume_if_soft_paused();
3654
3655 // check correctness of reduce block nesting
3656 #if KMP_USE_DYNAMIC_LOCK
3657 if (__kmp_env_consistency_check)
3658 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3659 #else
3660 if (__kmp_env_consistency_check)
3661 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3662 #endif
3663
3664 th = __kmp_thread_from_gtid(global_tid);
3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3666
3667 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3668 // the value should be kept in a variable
3669 // the variable should be either a construct-specific or thread-specific
3670 // property, not a team specific property
3671 // (a thread can reach the next reduce block on the next construct, reduce
3672 // method may differ on the next construct)
3673 // an ident_t "loc" parameter could be used as a construct-specific property
3674 // (what if loc == 0?)
3675 // (if both construct-specific and team-specific variables were shared,
3676 // then unness extra syncs should be needed)
3677 // a thread-specific variable is better regarding two issues above (next
3678 // construct and extra syncs)
3679 // a thread-specific "th_local.reduction_method" variable is used currently
3680 // each thread executes 'determine' and 'set' lines (no need to execute by one
3681 // thread, to avoid unness extra syncs)
3682
3683 packed_reduction_method = __kmp_determine_reduction_method(
3684 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3685 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3686
3687 OMPT_REDUCTION_DECL(th, global_tid);
3688 if (packed_reduction_method == critical_reduce_block) {
3689
3690 OMPT_REDUCTION_BEGIN;
3691
3692 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3693 retval = 1;
3694
3695 } else if (packed_reduction_method == empty_reduce_block) {
3696
3697 OMPT_REDUCTION_BEGIN;
3698
3699 // usage: if team size == 1, no synchronization is required ( Intel
3700 // platforms only )
3701 retval = 1;
3702
3703 } else if (packed_reduction_method == atomic_reduce_block) {
3704
3705 retval = 2;
3706
3707 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3708 // won't be called by the code gen)
3709 // (it's not quite good, because the checking block has been closed by
3710 // this 'pop',
3711 // but atomic operation has not been executed yet, will be executed
3712 // slightly later, literally on next instruction)
3713 if (__kmp_env_consistency_check)
3714 __kmp_pop_sync(global_tid, ct_reduce, loc);
3715
3716 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3717 tree_reduce_block)) {
3718
3719 // AT: performance issue: a real barrier here
3720 // AT: (if primary thread is slow, other threads are blocked here waiting for
3721 // the primary thread to come and release them)
3722 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3723 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3724 // be confusing to a customer)
3725 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3726 // might go faster and be more in line with sense of NOWAIT
3727 // AT: TO DO: do epcc test and compare times
3728
3729 // this barrier should be invisible to a customer and to the threading profile
3730 // tool (it's neither a terminating barrier nor customer's code, it's
3731 // used for an internal purpose)
3732 #if OMPT_SUPPORT
3733 // JP: can this barrier potentially leed to task scheduling?
3734 // JP: as long as there is a barrier in the implementation, OMPT should and
3735 // will provide the barrier events
3736 // so we set-up the necessary frame/return addresses.
3737 ompt_frame_t *ompt_frame;
3738 if (ompt_enabled.enabled) {
3739 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3740 if (ompt_frame->enter_frame.ptr == NULL)
3741 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3742 }
3743 OMPT_STORE_RETURN_ADDRESS(global_tid);
3744 #endif
3745 #if USE_ITT_NOTIFY
3746 __kmp_threads[global_tid]->th.th_ident = loc;
3747 #endif
3748 retval =
3749 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3750 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3751 retval = (retval != 0) ? (0) : (1);
3752 #if OMPT_SUPPORT && OMPT_OPTIONAL
3753 if (ompt_enabled.enabled) {
3754 ompt_frame->enter_frame = ompt_data_none;
3755 }
3756 #endif
3757
3758 // all other workers except primary thread should do this pop here
3759 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3760 if (__kmp_env_consistency_check) {
3761 if (retval == 0) {
3762 __kmp_pop_sync(global_tid, ct_reduce, loc);
3763 }
3764 }
3765
3766 } else {
3767
3768 // should never reach this block
3769 KMP_ASSERT(0); // "unexpected method"
3770 }
3771 if (teams_swapped) {
3772 __kmp_restore_swapped_teams(th, team, task_state);
3773 }
3774 KA_TRACE(
3775 10,
3776 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3777 global_tid, packed_reduction_method, retval));
3778
3779 return retval;
3780 }
3781
3782 /*!
3783 @ingroup SYNCHRONIZATION
3784 @param loc source location information
3785 @param global_tid global thread id.
3786 @param lck pointer to the unique lock data structure
3787
3788 Finish the execution of a reduce nowait.
3789 */
__kmpc_end_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3790 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3791 kmp_critical_name *lck) {
3792
3793 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3794
3795 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3796 __kmp_assert_valid_gtid(global_tid);
3797
3798 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3799
3800 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3801
3802 if (packed_reduction_method == critical_reduce_block) {
3803
3804 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3805 OMPT_REDUCTION_END;
3806
3807 } else if (packed_reduction_method == empty_reduce_block) {
3808
3809 // usage: if team size == 1, no synchronization is required ( on Intel
3810 // platforms only )
3811
3812 OMPT_REDUCTION_END;
3813
3814 } else if (packed_reduction_method == atomic_reduce_block) {
3815
3816 // neither primary thread nor other workers should get here
3817 // (code gen does not generate this call in case 2: atomic reduce block)
3818 // actually it's better to remove this elseif at all;
3819 // after removal this value will checked by the 'else' and will assert
3820
3821 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3822 tree_reduce_block)) {
3823
3824 // only primary thread gets here
3825 // OMPT: tree reduction is annotated in the barrier code
3826
3827 } else {
3828
3829 // should never reach this block
3830 KMP_ASSERT(0); // "unexpected method"
3831 }
3832
3833 if (__kmp_env_consistency_check)
3834 __kmp_pop_sync(global_tid, ct_reduce, loc);
3835
3836 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3837 global_tid, packed_reduction_method));
3838
3839 return;
3840 }
3841
3842 /* 2.a.ii. Reduce Block with a terminating barrier */
3843
3844 /*!
3845 @ingroup SYNCHRONIZATION
3846 @param loc source location information
3847 @param global_tid global thread number
3848 @param num_vars number of items (variables) to be reduced
3849 @param reduce_size size of data in bytes to be reduced
3850 @param reduce_data pointer to data to be reduced
3851 @param reduce_func callback function providing reduction operation on two
3852 operands and returning result of reduction in lhs_data
3853 @param lck pointer to the unique lock data structure
3854 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3855 threads if atomic reduction needed
3856
3857 A blocking reduce that includes an implicit barrier.
3858 */
__kmpc_reduce(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3859 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3860 size_t reduce_size, void *reduce_data,
3861 void (*reduce_func)(void *lhs_data, void *rhs_data),
3862 kmp_critical_name *lck) {
3863 KMP_COUNT_BLOCK(REDUCE_wait);
3864 int retval = 0;
3865 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3866 kmp_info_t *th;
3867 kmp_team_t *team;
3868 int teams_swapped = 0, task_state;
3869
3870 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3871 __kmp_assert_valid_gtid(global_tid);
3872
3873 // why do we need this initialization here at all?
3874 // Reduction clause can not be a stand-alone directive.
3875
3876 // do not call __kmp_serial_initialize(), it will be called by
3877 // __kmp_parallel_initialize() if needed
3878 // possible detection of false-positive race by the threadchecker ???
3879 if (!TCR_4(__kmp_init_parallel))
3880 __kmp_parallel_initialize();
3881
3882 __kmp_resume_if_soft_paused();
3883
3884 // check correctness of reduce block nesting
3885 #if KMP_USE_DYNAMIC_LOCK
3886 if (__kmp_env_consistency_check)
3887 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3888 #else
3889 if (__kmp_env_consistency_check)
3890 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3891 #endif
3892
3893 th = __kmp_thread_from_gtid(global_tid);
3894 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3895
3896 packed_reduction_method = __kmp_determine_reduction_method(
3897 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3898 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3899
3900 OMPT_REDUCTION_DECL(th, global_tid);
3901
3902 if (packed_reduction_method == critical_reduce_block) {
3903
3904 OMPT_REDUCTION_BEGIN;
3905 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3906 retval = 1;
3907
3908 } else if (packed_reduction_method == empty_reduce_block) {
3909
3910 OMPT_REDUCTION_BEGIN;
3911 // usage: if team size == 1, no synchronization is required ( Intel
3912 // platforms only )
3913 retval = 1;
3914
3915 } else if (packed_reduction_method == atomic_reduce_block) {
3916
3917 retval = 2;
3918
3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920 tree_reduce_block)) {
3921
3922 // case tree_reduce_block:
3923 // this barrier should be visible to a customer and to the threading profile
3924 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3925 #if OMPT_SUPPORT
3926 ompt_frame_t *ompt_frame;
3927 if (ompt_enabled.enabled) {
3928 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3929 if (ompt_frame->enter_frame.ptr == NULL)
3930 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3931 }
3932 OMPT_STORE_RETURN_ADDRESS(global_tid);
3933 #endif
3934 #if USE_ITT_NOTIFY
3935 __kmp_threads[global_tid]->th.th_ident =
3936 loc; // needed for correct notification of frames
3937 #endif
3938 retval =
3939 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3940 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3941 retval = (retval != 0) ? (0) : (1);
3942 #if OMPT_SUPPORT && OMPT_OPTIONAL
3943 if (ompt_enabled.enabled) {
3944 ompt_frame->enter_frame = ompt_data_none;
3945 }
3946 #endif
3947
3948 // all other workers except primary thread should do this pop here
3949 // (none of other workers except primary will enter __kmpc_end_reduce())
3950 if (__kmp_env_consistency_check) {
3951 if (retval == 0) { // 0: all other workers; 1: primary thread
3952 __kmp_pop_sync(global_tid, ct_reduce, loc);
3953 }
3954 }
3955
3956 } else {
3957
3958 // should never reach this block
3959 KMP_ASSERT(0); // "unexpected method"
3960 }
3961 if (teams_swapped) {
3962 __kmp_restore_swapped_teams(th, team, task_state);
3963 }
3964
3965 KA_TRACE(10,
3966 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3967 global_tid, packed_reduction_method, retval));
3968 return retval;
3969 }
3970
3971 /*!
3972 @ingroup SYNCHRONIZATION
3973 @param loc source location information
3974 @param global_tid global thread id.
3975 @param lck pointer to the unique lock data structure
3976
3977 Finish the execution of a blocking reduce.
3978 The <tt>lck</tt> pointer must be the same as that used in the corresponding
3979 start function.
3980 */
__kmpc_end_reduce(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3981 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3982 kmp_critical_name *lck) {
3983
3984 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3985 kmp_info_t *th;
3986 kmp_team_t *team;
3987 int teams_swapped = 0, task_state;
3988
3989 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3990 __kmp_assert_valid_gtid(global_tid);
3991
3992 th = __kmp_thread_from_gtid(global_tid);
3993 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3994
3995 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3996
3997 // this barrier should be visible to a customer and to the threading profile
3998 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3999 OMPT_REDUCTION_DECL(th, global_tid);
4000
4001 if (packed_reduction_method == critical_reduce_block) {
4002 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
4003
4004 OMPT_REDUCTION_END;
4005
4006 // TODO: implicit barrier: should be exposed
4007 #if OMPT_SUPPORT
4008 ompt_frame_t *ompt_frame;
4009 if (ompt_enabled.enabled) {
4010 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4011 if (ompt_frame->enter_frame.ptr == NULL)
4012 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4013 }
4014 OMPT_STORE_RETURN_ADDRESS(global_tid);
4015 #endif
4016 #if USE_ITT_NOTIFY
4017 __kmp_threads[global_tid]->th.th_ident = loc;
4018 #endif
4019 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4020 #if OMPT_SUPPORT && OMPT_OPTIONAL
4021 if (ompt_enabled.enabled) {
4022 ompt_frame->enter_frame = ompt_data_none;
4023 }
4024 #endif
4025
4026 } else if (packed_reduction_method == empty_reduce_block) {
4027
4028 OMPT_REDUCTION_END;
4029
4030 // usage: if team size==1, no synchronization is required (Intel platforms only)
4031
4032 // TODO: implicit barrier: should be exposed
4033 #if OMPT_SUPPORT
4034 ompt_frame_t *ompt_frame;
4035 if (ompt_enabled.enabled) {
4036 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4037 if (ompt_frame->enter_frame.ptr == NULL)
4038 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4039 }
4040 OMPT_STORE_RETURN_ADDRESS(global_tid);
4041 #endif
4042 #if USE_ITT_NOTIFY
4043 __kmp_threads[global_tid]->th.th_ident = loc;
4044 #endif
4045 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4046 #if OMPT_SUPPORT && OMPT_OPTIONAL
4047 if (ompt_enabled.enabled) {
4048 ompt_frame->enter_frame = ompt_data_none;
4049 }
4050 #endif
4051
4052 } else if (packed_reduction_method == atomic_reduce_block) {
4053
4054 #if OMPT_SUPPORT
4055 ompt_frame_t *ompt_frame;
4056 if (ompt_enabled.enabled) {
4057 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4058 if (ompt_frame->enter_frame.ptr == NULL)
4059 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4060 }
4061 OMPT_STORE_RETURN_ADDRESS(global_tid);
4062 #endif
4063 // TODO: implicit barrier: should be exposed
4064 #if USE_ITT_NOTIFY
4065 __kmp_threads[global_tid]->th.th_ident = loc;
4066 #endif
4067 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4068 #if OMPT_SUPPORT && OMPT_OPTIONAL
4069 if (ompt_enabled.enabled) {
4070 ompt_frame->enter_frame = ompt_data_none;
4071 }
4072 #endif
4073
4074 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4075 tree_reduce_block)) {
4076
4077 // only primary thread executes here (primary releases all other workers)
4078 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4079 global_tid);
4080
4081 } else {
4082
4083 // should never reach this block
4084 KMP_ASSERT(0); // "unexpected method"
4085 }
4086 if (teams_swapped) {
4087 __kmp_restore_swapped_teams(th, team, task_state);
4088 }
4089
4090 if (__kmp_env_consistency_check)
4091 __kmp_pop_sync(global_tid, ct_reduce, loc);
4092
4093 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4094 global_tid, packed_reduction_method));
4095
4096 return;
4097 }
4098
4099 #undef __KMP_GET_REDUCTION_METHOD
4100 #undef __KMP_SET_REDUCTION_METHOD
4101
4102 /* end of interface to fast scalable reduce routines */
4103
__kmpc_get_taskid()4104 kmp_uint64 __kmpc_get_taskid() {
4105
4106 kmp_int32 gtid;
4107 kmp_info_t *thread;
4108
4109 gtid = __kmp_get_gtid();
4110 if (gtid < 0) {
4111 return 0;
4112 }
4113 thread = __kmp_thread_from_gtid(gtid);
4114 return thread->th.th_current_task->td_task_id;
4115
4116 } // __kmpc_get_taskid
4117
__kmpc_get_parent_taskid()4118 kmp_uint64 __kmpc_get_parent_taskid() {
4119
4120 kmp_int32 gtid;
4121 kmp_info_t *thread;
4122 kmp_taskdata_t *parent_task;
4123
4124 gtid = __kmp_get_gtid();
4125 if (gtid < 0) {
4126 return 0;
4127 }
4128 thread = __kmp_thread_from_gtid(gtid);
4129 parent_task = thread->th.th_current_task->td_parent;
4130 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4131
4132 } // __kmpc_get_parent_taskid
4133
4134 /*!
4135 @ingroup WORK_SHARING
4136 @param loc source location information.
4137 @param gtid global thread number.
4138 @param num_dims number of associated doacross loops.
4139 @param dims info on loops bounds.
4140
4141 Initialize doacross loop information.
4142 Expect compiler send us inclusive bounds,
4143 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4144 */
__kmpc_doacross_init(ident_t * loc,int gtid,int num_dims,const struct kmp_dim * dims)4145 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4146 const struct kmp_dim *dims) {
4147 __kmp_assert_valid_gtid(gtid);
4148 int j, idx;
4149 kmp_int64 last, trace_count;
4150 kmp_info_t *th = __kmp_threads[gtid];
4151 kmp_team_t *team = th->th.th_team;
4152 kmp_uint32 *flags;
4153 kmp_disp_t *pr_buf = th->th.th_dispatch;
4154 dispatch_shared_info_t *sh_buf;
4155
4156 KA_TRACE(
4157 20,
4158 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4159 gtid, num_dims, !team->t.t_serialized));
4160 KMP_DEBUG_ASSERT(dims != NULL);
4161 KMP_DEBUG_ASSERT(num_dims > 0);
4162
4163 if (team->t.t_serialized) {
4164 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4165 return; // no dependencies if team is serialized
4166 }
4167 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4168 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4169 // the next loop
4170 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4171
4172 // Save bounds info into allocated private buffer
4173 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4174 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4175 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4177 pr_buf->th_doacross_info[0] =
4178 (kmp_int64)num_dims; // first element is number of dimensions
4179 // Save also address of num_done in order to access it later without knowing
4180 // the buffer index
4181 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4182 pr_buf->th_doacross_info[2] = dims[0].lo;
4183 pr_buf->th_doacross_info[3] = dims[0].up;
4184 pr_buf->th_doacross_info[4] = dims[0].st;
4185 last = 5;
4186 for (j = 1; j < num_dims; ++j) {
4187 kmp_int64
4188 range_length; // To keep ranges of all dimensions but the first dims[0]
4189 if (dims[j].st == 1) { // most common case
4190 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4191 range_length = dims[j].up - dims[j].lo + 1;
4192 } else {
4193 if (dims[j].st > 0) {
4194 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4195 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4196 } else { // negative increment
4197 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4198 range_length =
4199 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4200 }
4201 }
4202 pr_buf->th_doacross_info[last++] = range_length;
4203 pr_buf->th_doacross_info[last++] = dims[j].lo;
4204 pr_buf->th_doacross_info[last++] = dims[j].up;
4205 pr_buf->th_doacross_info[last++] = dims[j].st;
4206 }
4207
4208 // Compute total trip count.
4209 // Start with range of dims[0] which we don't need to keep in the buffer.
4210 if (dims[0].st == 1) { // most common case
4211 trace_count = dims[0].up - dims[0].lo + 1;
4212 } else if (dims[0].st > 0) {
4213 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4214 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4215 } else { // negative increment
4216 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4217 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4218 }
4219 for (j = 1; j < num_dims; ++j) {
4220 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4221 }
4222 KMP_DEBUG_ASSERT(trace_count > 0);
4223
4224 // Check if shared buffer is not occupied by other loop (idx -
4225 // __kmp_dispatch_num_buffers)
4226 if (idx != sh_buf->doacross_buf_idx) {
4227 // Shared buffer is occupied, wait for it to be free
4228 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4229 __kmp_eq_4, NULL);
4230 }
4231 #if KMP_32_BIT_ARCH
4232 // Check if we are the first thread. After the CAS the first thread gets 0,
4233 // others get 1 if initialization is in progress, allocated pointer otherwise.
4234 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4235 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4236 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4237 #else
4238 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4239 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4240 #endif
4241 if (flags == NULL) {
4242 // we are the first thread, allocate the array of flags
4243 size_t size =
4244 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4245 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4246 KMP_MB();
4247 sh_buf->doacross_flags = flags;
4248 } else if (flags == (kmp_uint32 *)1) {
4249 #if KMP_32_BIT_ARCH
4250 // initialization is still in progress, need to wait
4251 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4252 #else
4253 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4254 #endif
4255 KMP_YIELD(TRUE);
4256 KMP_MB();
4257 } else {
4258 KMP_MB();
4259 }
4260 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4261 pr_buf->th_doacross_flags =
4262 sh_buf->doacross_flags; // save private copy in order to not
4263 // touch shared buffer on each iteration
4264 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4265 }
4266
__kmpc_doacross_wait(ident_t * loc,int gtid,const kmp_int64 * vec)4267 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4268 __kmp_assert_valid_gtid(gtid);
4269 kmp_int64 shft;
4270 size_t num_dims, i;
4271 kmp_uint32 flag;
4272 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4273 kmp_info_t *th = __kmp_threads[gtid];
4274 kmp_team_t *team = th->th.th_team;
4275 kmp_disp_t *pr_buf;
4276 kmp_int64 lo, up, st;
4277
4278 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4279 if (team->t.t_serialized) {
4280 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4281 return; // no dependencies if team is serialized
4282 }
4283
4284 // calculate sequential iteration number and check out-of-bounds condition
4285 pr_buf = th->th.th_dispatch;
4286 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4287 num_dims = (size_t)pr_buf->th_doacross_info[0];
4288 lo = pr_buf->th_doacross_info[2];
4289 up = pr_buf->th_doacross_info[3];
4290 st = pr_buf->th_doacross_info[4];
4291 #if OMPT_SUPPORT && OMPT_OPTIONAL
4292 SimpleVLA<ompt_dependence_t> deps(num_dims);
4293 #endif
4294 if (st == 1) { // most common case
4295 if (vec[0] < lo || vec[0] > up) {
4296 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297 "bounds [%lld,%lld]\n",
4298 gtid, vec[0], lo, up));
4299 return;
4300 }
4301 iter_number = vec[0] - lo;
4302 } else if (st > 0) {
4303 if (vec[0] < lo || vec[0] > up) {
4304 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305 "bounds [%lld,%lld]\n",
4306 gtid, vec[0], lo, up));
4307 return;
4308 }
4309 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4310 } else { // negative increment
4311 if (vec[0] > lo || vec[0] < up) {
4312 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4313 "bounds [%lld,%lld]\n",
4314 gtid, vec[0], lo, up));
4315 return;
4316 }
4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318 }
4319 #if OMPT_SUPPORT && OMPT_OPTIONAL
4320 deps[0].variable.value = iter_number;
4321 deps[0].dependence_type = ompt_dependence_type_sink;
4322 #endif
4323 for (i = 1; i < num_dims; ++i) {
4324 kmp_int64 iter, ln;
4325 size_t j = i * 4;
4326 ln = pr_buf->th_doacross_info[j + 1];
4327 lo = pr_buf->th_doacross_info[j + 2];
4328 up = pr_buf->th_doacross_info[j + 3];
4329 st = pr_buf->th_doacross_info[j + 4];
4330 if (st == 1) {
4331 if (vec[i] < lo || vec[i] > up) {
4332 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4333 "bounds [%lld,%lld]\n",
4334 gtid, vec[i], lo, up));
4335 return;
4336 }
4337 iter = vec[i] - lo;
4338 } else if (st > 0) {
4339 if (vec[i] < lo || vec[i] > up) {
4340 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4341 "bounds [%lld,%lld]\n",
4342 gtid, vec[i], lo, up));
4343 return;
4344 }
4345 iter = (kmp_uint64)(vec[i] - lo) / st;
4346 } else { // st < 0
4347 if (vec[i] > lo || vec[i] < up) {
4348 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4349 "bounds [%lld,%lld]\n",
4350 gtid, vec[i], lo, up));
4351 return;
4352 }
4353 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4354 }
4355 iter_number = iter + ln * iter_number;
4356 #if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps[i].variable.value = iter;
4358 deps[i].dependence_type = ompt_dependence_type_sink;
4359 #endif
4360 }
4361 shft = iter_number % 32; // use 32-bit granularity
4362 iter_number >>= 5; // divided by 32
4363 flag = 1 << shft;
4364 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4365 KMP_YIELD(TRUE);
4366 }
4367 KMP_MB();
4368 #if OMPT_SUPPORT && OMPT_OPTIONAL
4369 if (ompt_enabled.ompt_callback_dependences) {
4370 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4371 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4372 }
4373 #endif
4374 KA_TRACE(20,
4375 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4376 gtid, (iter_number << 5) + shft));
4377 }
4378
__kmpc_doacross_post(ident_t * loc,int gtid,const kmp_int64 * vec)4379 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4380 __kmp_assert_valid_gtid(gtid);
4381 kmp_int64 shft;
4382 size_t num_dims, i;
4383 kmp_uint32 flag;
4384 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4385 kmp_info_t *th = __kmp_threads[gtid];
4386 kmp_team_t *team = th->th.th_team;
4387 kmp_disp_t *pr_buf;
4388 kmp_int64 lo, st;
4389
4390 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4391 if (team->t.t_serialized) {
4392 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4393 return; // no dependencies if team is serialized
4394 }
4395
4396 // calculate sequential iteration number (same as in "wait" but no
4397 // out-of-bounds checks)
4398 pr_buf = th->th.th_dispatch;
4399 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4400 num_dims = (size_t)pr_buf->th_doacross_info[0];
4401 lo = pr_buf->th_doacross_info[2];
4402 st = pr_buf->th_doacross_info[4];
4403 #if OMPT_SUPPORT && OMPT_OPTIONAL
4404 SimpleVLA<ompt_dependence_t> deps(num_dims);
4405 #endif
4406 if (st == 1) { // most common case
4407 iter_number = vec[0] - lo;
4408 } else if (st > 0) {
4409 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4410 } else { // negative increment
4411 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4412 }
4413 #if OMPT_SUPPORT && OMPT_OPTIONAL
4414 deps[0].variable.value = iter_number;
4415 deps[0].dependence_type = ompt_dependence_type_source;
4416 #endif
4417 for (i = 1; i < num_dims; ++i) {
4418 kmp_int64 iter, ln;
4419 size_t j = i * 4;
4420 ln = pr_buf->th_doacross_info[j + 1];
4421 lo = pr_buf->th_doacross_info[j + 2];
4422 st = pr_buf->th_doacross_info[j + 4];
4423 if (st == 1) {
4424 iter = vec[i] - lo;
4425 } else if (st > 0) {
4426 iter = (kmp_uint64)(vec[i] - lo) / st;
4427 } else { // st < 0
4428 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4429 }
4430 iter_number = iter + ln * iter_number;
4431 #if OMPT_SUPPORT && OMPT_OPTIONAL
4432 deps[i].variable.value = iter;
4433 deps[i].dependence_type = ompt_dependence_type_source;
4434 #endif
4435 }
4436 #if OMPT_SUPPORT && OMPT_OPTIONAL
4437 if (ompt_enabled.ompt_callback_dependences) {
4438 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4439 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4440 }
4441 #endif
4442 shft = iter_number % 32; // use 32-bit granularity
4443 iter_number >>= 5; // divided by 32
4444 flag = 1 << shft;
4445 KMP_MB();
4446 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4447 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4448 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4449 (iter_number << 5) + shft));
4450 }
4451
__kmpc_doacross_fini(ident_t * loc,int gtid)4452 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4453 __kmp_assert_valid_gtid(gtid);
4454 kmp_int32 num_done;
4455 kmp_info_t *th = __kmp_threads[gtid];
4456 kmp_team_t *team = th->th.th_team;
4457 kmp_disp_t *pr_buf = th->th.th_dispatch;
4458
4459 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4460 if (team->t.t_serialized) {
4461 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4462 return; // nothing to do
4463 }
4464 num_done =
4465 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4466 if (num_done == th->th.th_team_nproc) {
4467 // we are the last thread, need to free shared resources
4468 int idx = pr_buf->th_doacross_buf_idx - 1;
4469 dispatch_shared_info_t *sh_buf =
4470 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4471 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4472 (kmp_int64)&sh_buf->doacross_num_done);
4473 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4474 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4475 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4476 sh_buf->doacross_flags = NULL;
4477 sh_buf->doacross_num_done = 0;
4478 sh_buf->doacross_buf_idx +=
4479 __kmp_dispatch_num_buffers; // free buffer for future re-use
4480 }
4481 // free private resources (need to keep buffer index forever)
4482 pr_buf->th_doacross_flags = NULL;
4483 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4484 pr_buf->th_doacross_info = NULL;
4485 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4486 }
4487
4488 /* OpenMP 5.1 Memory Management routines */
omp_alloc(size_t size,omp_allocator_handle_t allocator)4489 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4490 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4491 }
4492
omp_aligned_alloc(size_t align,size_t size,omp_allocator_handle_t allocator)4493 void *omp_aligned_alloc(size_t align, size_t size,
4494 omp_allocator_handle_t allocator) {
4495 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4496 }
4497
omp_calloc(size_t nmemb,size_t size,omp_allocator_handle_t allocator)4498 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4499 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4500 }
4501
omp_aligned_calloc(size_t align,size_t nmemb,size_t size,omp_allocator_handle_t allocator)4502 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4503 omp_allocator_handle_t allocator) {
4504 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4505 }
4506
omp_realloc(void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)4507 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4508 omp_allocator_handle_t free_allocator) {
4509 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4510 free_allocator);
4511 }
4512
omp_free(void * ptr,omp_allocator_handle_t allocator)4513 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4514 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4515 }
4516 /* end of OpenMP 5.1 Memory Management routines */
4517
__kmpc_get_target_offload(void)4518 int __kmpc_get_target_offload(void) {
4519 if (!__kmp_init_serial) {
4520 __kmp_serial_initialize();
4521 }
4522 return __kmp_target_offload;
4523 }
4524
__kmpc_pause_resource(kmp_pause_status_t level)4525 int __kmpc_pause_resource(kmp_pause_status_t level) {
4526 if (!__kmp_init_serial) {
4527 return 1; // Can't pause if runtime is not initialized
4528 }
4529 return __kmp_pause_resource(level);
4530 }
4531
__kmpc_error(ident_t * loc,int severity,const char * message)4532 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4533 if (!__kmp_init_serial)
4534 __kmp_serial_initialize();
4535
4536 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4537
4538 #if OMPT_SUPPORT
4539 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4540 ompt_callbacks.ompt_callback(ompt_callback_error)(
4541 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4542 OMPT_GET_RETURN_ADDRESS(0));
4543 }
4544 #endif // OMPT_SUPPORT
4545
4546 char *src_loc;
4547 if (loc && loc->psource) {
4548 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4549 src_loc =
4550 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4551 __kmp_str_loc_free(&str_loc);
4552 } else {
4553 src_loc = __kmp_str_format("unknown");
4554 }
4555
4556 if (severity == severity_warning)
4557 KMP_WARNING(UserDirectedWarning, src_loc, message);
4558 else
4559 KMP_FATAL(UserDirectedError, src_loc, message);
4560
4561 __kmp_str_free(&src_loc);
4562 }
4563
4564 // Mark begin of scope directive.
__kmpc_scope(ident_t * loc,kmp_int32 gtid,void * reserved)4565 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4566 // reserved is for extension of scope directive and not used.
4567 #if OMPT_SUPPORT && OMPT_OPTIONAL
4568 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4569 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4570 int tid = __kmp_tid_from_gtid(gtid);
4571 ompt_callbacks.ompt_callback(ompt_callback_work)(
4572 ompt_work_scope, ompt_scope_begin,
4573 &(team->t.ompt_team_info.parallel_data),
4574 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4575 OMPT_GET_RETURN_ADDRESS(0));
4576 }
4577 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4578 }
4579
4580 // Mark end of scope directive
__kmpc_end_scope(ident_t * loc,kmp_int32 gtid,void * reserved)4581 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4582 // reserved is for extension of scope directive and not used.
4583 #if OMPT_SUPPORT && OMPT_OPTIONAL
4584 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4585 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4586 int tid = __kmp_tid_from_gtid(gtid);
4587 ompt_callbacks.ompt_callback(ompt_callback_work)(
4588 ompt_work_scope, ompt_scope_end,
4589 &(team->t.ompt_team_info.parallel_data),
4590 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4591 OMPT_GET_RETURN_ADDRESS(0));
4592 }
4593 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4594 }
4595
4596 #ifdef KMP_USE_VERSION_SYMBOLS
4597 // For GOMP compatibility there are two versions of each omp_* API.
4598 // One is the plain C symbol and one is the Fortran symbol with an appended
4599 // underscore. When we implement a specific ompc_* version of an omp_*
4600 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4601 // instead of the Fortran versions in kmp_ftn_entry.h
4602 extern "C" {
4603 // Have to undef these from omp.h so they aren't translated into
4604 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4605 #ifdef omp_set_affinity_format
4606 #undef omp_set_affinity_format
4607 #endif
4608 #ifdef omp_get_affinity_format
4609 #undef omp_get_affinity_format
4610 #endif
4611 #ifdef omp_display_affinity
4612 #undef omp_display_affinity
4613 #endif
4614 #ifdef omp_capture_affinity
4615 #undef omp_capture_affinity
4616 #endif
4617 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4618 "OMP_5.0");
4619 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4620 "OMP_5.0");
4621 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4622 "OMP_5.0");
4623 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4624 "OMP_5.0");
4625 } // extern "C"
4626 #endif
4627