Lines Matching +full:non +full:- +full:volatile
2 * kmp_dispatch.h: dynamic scheduling - iteration initialization and dispatch.
5 //===----------------------------------------------------------------------===//
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
16 /* ------------------------------------------------------------------------ */
17 /* ------------------------------------------------------------------------ */
30 #include "ompt-internal.h"
31 #include "ompt-specific.h"
34 /* ------------------------------------------------------------------------ */
35 /* ------------------------------------------------------------------------ */
58 dispatch_shared_info_template<T> volatile *sh, kmp_int32 *p_last, T *p_lb,
82 /* parm[1-4] are used in different ways by different scheduling algorithms */
86 // b) all parm1-4 are in the same cache line.
87 // Because of parm1-4 are used together, performance seems to be better
97 UT pchunks; // total number of chunks for processes with p-core
98 UT num_procs_with_pcore; // number of threads with p-core
161 /* chunk index under dynamic, number of idle threads under static-steal;
163 volatile UT iteration;
164 volatile ST num_done;
165 volatile UT ordered_iteration;
167 UT ordered_dummy[KMP_MAX_ORDERED - 3];
178 volatile kmp_uint32 buffer_index;
179 volatile kmp_int32 doacross_buf_idx; // teamwise index
193 /* ------------------------------------------------------------------------ */
194 /* ------------------------------------------------------------------------ */
199 template <typename T> static __forceinline T test_then_add(volatile T *p, T d);
202 __forceinline kmp_int32 test_then_add<kmp_int32>(volatile kmp_int32 *p,
210 __forceinline kmp_int64 test_then_add<kmp_int64>(volatile kmp_int64 *p,
218 template <typename T> static __forceinline T test_then_inc_acq(volatile T *p);
221 __forceinline kmp_int32 test_then_inc_acq<kmp_int32>(volatile kmp_int32 *p) {
228 __forceinline kmp_int64 test_then_inc_acq<kmp_int64>(volatile kmp_int64 *p) {
235 template <typename T> static __forceinline T test_then_inc(volatile T *p);
238 __forceinline kmp_int32 test_then_inc<kmp_int32>(volatile kmp_int32 *p) {
245 __forceinline kmp_int64 test_then_inc<kmp_int64>(volatile kmp_int64 *p) {
253 static __forceinline kmp_int32 compare_and_swap(volatile T *p, T c, T s);
256 __forceinline kmp_int32 compare_and_swap<kmp_int32>(volatile kmp_int32 *p,
262 __forceinline kmp_int32 compare_and_swap<kmp_int64>(volatile kmp_int64 *p,
276 Waits until function returns non-zero when called with *spinner and check.
279 UT is unsigned 4- or 8-byte type
280 spinner - memory location to check value
281 checker - value which spinner is >, <, ==, etc.
282 pred - predicate function to perform binary comparison of some sort
284 obj -- is higher-level synchronization object to report to ittnotify. It
289 same address, not an address of low-level spinner.
294 static UT __kmp_wait(volatile UT *spinner, UT checker,
297 volatile UT *spin = spinner;
310 /* GEH - remove this since it was accidentally introduced when kmp_wait was
322 /* ------------------------------------------------------------------------ */
323 /* ------------------------------------------------------------------------ */
332 KMP_DEBUG_ASSERT(th->th.th_dispatch);
337 th->th.th_dispatch->th_dispatch_pr_current);
338 if (pr->pushed_ws != ct_none) {
347 if (!th->th.th_team->t.t_serialized) {
350 th->th.th_dispatch->th_dispatch_sh_current);
355 th->th.th_dispatch->th_dispatch_pr_current);
357 lower = pr->u.p.ordered_lower;
361 if (pr->ordered_bumped) {
362 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
365 &p->stack_data[p->w_top]);
378 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
382 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
392 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
408 KMP_DEBUG_ASSERT(th->th.th_dispatch);
413 th->th.th_dispatch->th_dispatch_pr_current);
414 if (pr->pushed_ws != ct_none) {
419 if (!th->th.th_team->t.t_serialized) {
422 th->th.th_dispatch->th_dispatch_sh_current);
426 th->th.th_dispatch->th_dispatch_pr_current);
429 KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration));
432 if (pr->ordered_bumped != 0) {
433 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
434 /* How to test it? - OM */
437 &p->stack_data[p->w_top]);
444 pr->ordered_bumped += 1;
448 gtid, pr->ordered_bumped));
453 test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration);
460 /* Computes and returns x to the power of y, where y must a non-negative integer
489 /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at
493 Windows* OS on IA-32 architecture. The lack of precision is not
505 // Parameters of the guided-iterative algorithm: