Lines Matching +full:apt +full:- +full:mark

2  * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
5 //===----------------------------------------------------------------------===//
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
21 #include "ompt-specific.h"
57 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; in __kmp_trace_task_stack()
58 kmp_taskdata_t **stack_top = task_stack->ts_top; in __kmp_trace_task_stack()
59 kmp_int32 entries = task_stack->ts_entries; in __kmp_trace_task_stack()
66 location, gtid, entries, task_stack->ts_first_block, stack_top)); in __kmp_trace_task_stack()
72 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]); in __kmp_trace_task_stack()
77 stack_block = stack_block->sb_prev; in __kmp_trace_task_stack()
78 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; in __kmp_trace_task_stack()
82 stack_top--; in __kmp_trace_task_stack()
83 entries--; in __kmp_trace_task_stack()
88 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); in __kmp_trace_task_stack()
95 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]); in __kmp_trace_task_stack()
110 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; in __kmp_init_task_stack()
114 first_block = &task_stack->ts_first_block; in __kmp_init_task_stack()
115 task_stack->ts_top = (kmp_taskdata_t **)first_block; in __kmp_init_task_stack()
120 task_stack->ts_entries = TASK_STACK_EMPTY; in __kmp_init_task_stack()
121 first_block->sb_next = NULL; in __kmp_init_task_stack()
122 first_block->sb_prev = NULL; in __kmp_init_task_stack()
131 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; in __kmp_free_task_stack()
132 kmp_stack_block_t *stack_block = &task_stack->ts_first_block; in __kmp_free_task_stack()
134 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY); in __kmp_free_task_stack()
137 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL; in __kmp_free_task_stack()
139 stack_block->sb_next = NULL; in __kmp_free_task_stack()
140 stack_block->sb_prev = NULL; in __kmp_free_task_stack()
141 if (stack_block != &task_stack->ts_first_block) { in __kmp_free_task_stack()
148 task_stack->ts_entries = 0; in __kmp_free_task_stack()
149 task_stack->ts_top = NULL; in __kmp_free_task_stack()
160 // GEH - need to consider what to do if tt_threads_data not allocated yet in __kmp_push_task_stack()
162 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; in __kmp_push_task_stack()
163 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; in __kmp_push_task_stack()
165 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) { in __kmp_push_task_stack()
169 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); in __kmp_push_task_stack()
170 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); in __kmp_push_task_stack()
176 *(task_stack->ts_top) = tied_task; in __kmp_push_task_stack()
179 task_stack->ts_top++; in __kmp_push_task_stack()
180 task_stack->ts_entries++; in __kmp_push_task_stack()
182 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { in __kmp_push_task_stack()
185 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE); in __kmp_push_task_stack()
188 if (stack_block->sb_next != in __kmp_push_task_stack()
190 task_stack->ts_top = &stack_block->sb_next->sb_block[0]; in __kmp_push_task_stack()
195 task_stack->ts_top = &new_block->sb_block[0]; in __kmp_push_task_stack()
196 stack_block->sb_next = new_block; in __kmp_push_task_stack()
197 new_block->sb_prev = stack_block; in __kmp_push_task_stack()
198 new_block->sb_next = NULL; in __kmp_push_task_stack()
219 // GEH - need to consider what to do if tt_threads_data not allocated yet in __kmp_pop_task_stack()
221 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)]; in __kmp_pop_task_stack()
222 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; in __kmp_pop_task_stack()
225 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) { in __kmp_pop_task_stack()
230 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); in __kmp_pop_task_stack()
231 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0); in __kmp_pop_task_stack()
237 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { in __kmp_pop_task_stack()
238 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top); in __kmp_pop_task_stack()
240 stack_block = stack_block->sb_prev; in __kmp_pop_task_stack()
241 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; in __kmp_pop_task_stack()
245 task_stack->ts_top--; in __kmp_pop_task_stack()
246 task_stack->ts_entries--; in __kmp_pop_task_stack()
248 tied_task = *(task_stack->ts_top); in __kmp_pop_task_stack()
251 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); in __kmp_pop_task_stack()
266 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
270 kmp_taskdata_t *current = taskcurr->td_last_tied;
273 if (current->td_flags.tasktype == TASK_EXPLICIT ||
274 current->td_taskwait_thread > 0) { // <= 0 on barrier
275 kmp_int32 level = current->td_level;
276 kmp_taskdata_t *parent = tasknew->td_parent;
277 while (parent != current && parent->td_level > level) {
279 parent = parent->td_parent;
287 kmp_depnode_t *node = tasknew->td_depnode;
289 if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
291 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
293 for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
294 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
295 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
298 for (int j = i - 1; j >= 0; --j)
299 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
303 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
309 // Re-allocates a task deque for a particular thread, copies the content from
314 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
315 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
326 for (i = thread_data->td.td_deque_head, j = 0; j < size;
327 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
328 new_deque[j] = thread_data->td.td_deque[i];
330 __kmp_free(thread_data->td.td_deque);
332 thread_data->td.td_deque_head = 0;
333 thread_data->td.td_deque_tail = size;
334 thread_data->td.td_deque = new_deque;
335 thread_data->td.td_deque_size = new_size;
340 kmp_thread_data_t *thread_data = &l->td;
341 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
342 thread_data->td.td_deque_last_stolen = -1;
346 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
348 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
353 // allocates a new deque and put it into sorted (high -> low) list of deques.
354 // Deques of non-default priority tasks are shared between all threads in team,
355 // as opposed to per-thread deques of tasks with default priority.
356 // The function is called under the lock task_team->tt.tt_task_pri_lock.
360 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
361 if (lst->priority == pri) {
363 thread_data = &lst->td;
364 } else if (lst->priority < pri) {
368 thread_data = &list->td;
369 list->priority = pri;
370 list->next = lst;
371 task_team->tt.tt_task_pri_list = list;
372 } else { // task_team->tt.tt_task_pri_list->priority > pri
373 kmp_task_pri_t *next_queue = lst->next;
374 while (next_queue && next_queue->priority > pri) {
376 next_queue = lst->next;
378 // lst->priority > pri && (next == NULL || pri >= next->priority)
382 thread_data = &list->td;
383 list->priority = pri;
384 list->next = NULL;
385 lst->next = list;
386 } else if (next_queue->priority == pri) {
388 thread_data = &next_queue->td;
389 } else { // lst->priority > pri > next->priority
392 thread_data = &list->td;
393 list->priority = pri;
394 list->next = next_queue;
395 lst->next = list;
412 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
414 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
415 if (task_team->tt.tt_task_pri_list == NULL) {
418 thread_data = &list->td;
419 list->priority = pri;
420 list->next = NULL;
421 task_team->tt.tt_task_pri_list = list;
426 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
428 if (lst->priority == pri) {
430 thread_data = &lst->td;
432 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
434 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
439 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
441 if (TCR_4(thread_data->td.td_deque_ntasks) >=
442 TASK_DEQUE_SIZE(thread_data->td)) {
445 thread->th.th_current_task)) {
446 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
456 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
457 TASK_DEQUE_SIZE(thread_data->td));
459 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
461 thread_data->td.td_deque_tail =
462 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
463 TCW_4(thread_data->td.td_deque_ntasks,
464 TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
465 KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
469 gtid, taskdata, thread_data->td.td_deque_ntasks,
470 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
471 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
472 task_team->tt.tt_num_task_pri++; // atomic inc
484 if (UNLIKELY(taskdata->td_flags.hidden_helper &&
493 kmp_task_team_t *task_team = thread->th.th_task_team;
500 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
503 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
512 if (UNLIKELY(taskdata->td_flags.task_serial)) {
525 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
526 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
528 if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
530 int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
535 thread_data = &task_team->tt.tt_threads_data[tid];
540 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
546 if (TCR_4(thread_data->td.td_deque_ntasks) >=
547 TASK_DEQUE_SIZE(thread_data->td)) {
550 thread->th.th_current_task)) {
556 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
558 if (TCR_4(thread_data->td.td_deque_ntasks) >=
559 TASK_DEQUE_SIZE(thread_data->td)) {
567 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
569 if (TCR_4(thread_data->td.td_deque_ntasks) >=
570 TASK_DEQUE_SIZE(thread_data->td)) {
573 thread->th.th_current_task)) {
574 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
586 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
587 TASK_DEQUE_SIZE(thread_data->td));
589 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
592 thread_data->td.td_deque_tail =
593 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
594 TCW_4(thread_data->td.td_deque_ntasks,
595 TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
596 KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
600 gtid, taskdata, thread_data->td.td_deque_ntasks,
601 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
603 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
616 0, this_thr, this_thr->th.th_current_task,
617 this_thr->th.th_current_task->td_parent));
619 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
624 0, this_thr, this_thr->th.th_current_task,
625 this_thr->th.th_current_task->td_parent));
641 tid, this_thr, this_thr->th.th_current_task,
642 team->t.t_implicit_task_taskdata[tid].td_parent));
647 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
648 team->t.t_implicit_task_taskdata[0].td_parent =
649 this_thr->th.th_current_task;
650 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
653 team->t.t_implicit_task_taskdata[tid].td_parent =
654 team->t.t_implicit_task_taskdata[0].td_parent;
655 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
661 tid, this_thr, this_thr->th.th_current_task,
662 team->t.t_implicit_task_taskdata[tid].td_parent));
679 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
681 // mark currently executing task as suspended
682 // TODO: GEH - make sure root team implicit task is initialized properly.
683 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
684 current_task->td_flags.executing = 0;
688 if (taskdata->td_flags.tiedness == TASK_TIED) {
693 // mark starting task as executing and as current task
694 thread->th.th_current_task = taskdata;
696 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
697 taskdata->td_flags.tiedness == TASK_UNTIED);
698 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
699 taskdata->td_flags.tiedness == TASK_UNTIED);
700 taskdata->td_flags.started = 1;
701 taskdata->td_flags.executing = 1;
702 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
703 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
706 // APT: yes, we will pass location here.
716 //------------------------------------------------------------------------------
723 task->ompt_task_info.task_data.value = 0;
724 task->ompt_task_info.frame.exit_frame = ompt_data_none;
725 task->ompt_task_info.frame.enter_frame = ompt_data_none;
726 task->ompt_task_info.frame.exit_frame_flags =
728 task->ompt_task_info.frame.enter_frame_flags =
730 task->ompt_task_info.dispatch_chunk.start = 0;
731 task->ompt_task_info.dispatch_chunk.iterations = 0;
735 // Build and trigger task-begin event
741 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
743 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
748 &(current_task->ompt_task_info.task_data), status,
749 &(taskdata->ompt_task_info.task_data));
751 taskdata->ompt_task_info.scheduling_parent = current_task;
755 // Build and trigger final task-schedule event
761 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
762 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
768 &(taskdata->ompt_task_info.task_data), status,
769 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
780 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
786 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
789 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
796 taskdata->td_flags.task_serial =
802 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
803 current_task->ompt_task_info.frame.enter_frame.ptr =
804 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
805 current_task->ompt_task_info.frame.enter_frame_flags =
806 taskdata->ompt_task_info.frame.exit_frame_flags =
810 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
812 &(parent_info->task_data), &(parent_info->frame),
813 &(taskdata->ompt_task_info.task_data),
867 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
893 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
894 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
895 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
896 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
897 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
898 taskdata->td_flags.task_serial == 1);
899 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
901 // Clear data to not be re-used later by mistake.
902 task->data1.destructors = NULL;
903 task->data2.priority = 0;
905 taskdata->td_flags.freed = 1;
908 if (!taskdata->is_taskgraph) {
918 taskdata->td_flags.complete = 0;
919 taskdata->td_flags.started = 0;
920 taskdata->td_flags.freed = 0;
921 taskdata->td_flags.executing = 0;
922 taskdata->td_flags.task_serial =
923 (taskdata->td_parent->td_flags.final ||
924 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
926 // taskdata->td_allow_completion_event.pending_events_count = 1;
927 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
928 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
930 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
949 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
950 !taskdata->td_flags.proxy;
951 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
953 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
958 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
964 // --- Deallocate my ancestor task ---
973 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
974 if (taskdata->td_dephash) { // do we need to cleanup dephash?
975 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
976 kmp_tasking_flags_t flags_old = taskdata->td_flags;
981 RCAST(kmp_int32 *, &taskdata->td_flags),
988 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
994 // Predecrement simulated by "- 1" calculation
995 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
1014 kmp_tasking_flags_t flags = taskdata->td_flags;
1019 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
1021 if (taskdata->td_taskgroup && taskdata->is_taskgraph)
1022 ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0;
1042 thread->th.th_task_team; // might be NULL for serial teams...
1044 …// to avoid seg fault when we need to access taskdata->td_flags after free when using vanilla task…
1054 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
1057 is_taskgraph = taskdata->is_taskgraph;
1062 if (taskdata->td_flags.tiedness == TASK_TIED) {
1067 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
1070 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
1079 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
1080 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
1083 thread->th.th_current_task = resumed_task; // restore current_task
1084 resumed_task->td_flags.executing = 1; // resume previous task
1093 // GEH - note tasking_ser => task_serial
1095 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
1096 taskdata->td_flags.task_serial);
1097 if (taskdata->td_flags.task_serial) {
1099 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
1113 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
1114 kmp_routine_entry_t destr_thunk = task->data1.destructors;
1119 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
1120 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
1121 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
1124 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
1125 if (taskdata->td_allow_completion_event.type ==
1128 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1129 if (taskdata->td_allow_completion_event.type ==
1132 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1133 taskdata->td_flags.executing = 0; // suspend the finishing task
1146 taskdata->td_flags.proxy = TASK_PROXY; // proxify!
1149 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1153 // Tasks with valid target async handles must be re-enqueued.
1154 if (taskdata->td_target_data.async_handle != NULL) {
1170 taskdata->td_flags.complete = 1; // mark the task as completed
1172 taskdata->td_flags.onced = 1; // mark the task as ran once already
1184 // Predecrement simulated by "- 1" calculation
1186 children = -1 +
1188 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
1191 if (taskdata->td_taskgroup && !taskdata->is_taskgraph)
1193 if (taskdata->td_taskgroup)
1195 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1196 } else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1197 task_team->tt.tt_hidden_helper_task_encountered)) {
1206 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1207 taskdata->td_flags.executing = 0; // suspend the finishing task
1210 if (taskdata->td_flags.hidden_helper) {
1225 thread->th.th_current_task = resumed_task;
1229 // TODO: GEH - make sure root team implicit task is initialized properly.
1230 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
1231 resumed_task->td_flags.executing = 1; // resume previous task
1235 taskdata->td_taskgroup) {
1239 // taskdata->started, etc. If we release the barrier earlier, these
1241 // non-TDG implementation because we never reuse a task(data) structure
1242 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1270 ompt_frame->enter_frame = ompt_data_none;
1271 ompt_frame->enter_frame_flags =
1333 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1340 task->td_task_id = KMP_GEN_TASK_ID();
1341 task->td_team = team;
1342 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info
1344 task->td_ident = loc_ref;
1345 task->td_taskwait_ident = NULL;
1346 task->td_taskwait_counter = 0;
1347 task->td_taskwait_thread = 0;
1349 task->td_flags.tiedness = TASK_TIED;
1350 task->td_flags.tasktype = TASK_IMPLICIT;
1351 task->td_flags.proxy = TASK_FULL;
1354 task->td_flags.task_serial = 1;
1355 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1356 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1358 task->td_flags.started = 1;
1359 task->td_flags.executing = 1;
1360 task->td_flags.complete = 0;
1361 task->td_flags.freed = 0;
1363 task->td_flags.onced = 0;
1366 task->td_depnode = NULL;
1367 task->td_last_tied = task;
1368 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1371 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1373 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1374 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
1375 task->td_dephash = NULL;
1378 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1379 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1397 kmp_taskdata_t *task = thread->th.th_current_task;
1398 if (task->td_dephash) {
1400 task->td_flags.complete = 1;
1402 task->td_flags.onced = 1;
1404 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1405 kmp_tasking_flags_t flags_old = task->td_flags;
1409 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1414 thread->th.th_info.ds.ds_gtid, task));
1415 __kmp_dephash_free_entries(thread, task->td_dephash);
1426 kmp_taskdata_t *task = thread->th.th_current_task;
1427 if (task && task->td_dephash) {
1428 __kmp_dephash_free(thread, task->td_dephash);
1429 task->td_dephash = NULL;
1434 // between structures co-allocated using a single malloc() call
1436 if (size & (val - 1)) {
1437 size &= ~(val - 1);
1438 if (size <= KMP_SIZE_T_MAX - val) {
1464 kmp_team_t *team = thread->th.th_team;
1465 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1471 if (flags->hidden_helper) {
1477 flags->hidden_helper = FALSE;
1487 if (parent_task->td_flags.final) {
1488 if (flags->merged_if0) {
1490 flags->final = 1;
1493 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1497 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1503 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1504 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1505 if (flags->proxy == TASK_PROXY) {
1506 flags->tiedness = TASK_UNTIED;
1507 flags->merged_if0 = 1;
1511 if ((thread->th.th_task_team) == NULL) {
1514 KMP_DEBUG_ASSERT(team->t.t_serialized);
1519 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1521 kmp_task_team_t *task_team = thread->th.th_task_team;
1529 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1530 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1532 if (thread_data->td.td_deque == NULL) {
1537 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1538 task_team->tt.tt_found_proxy_tasks == FALSE)
1539 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1540 if (flags->hidden_helper &&
1541 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1542 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1569 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
1570 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
1572 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
1573 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
1577 task->shareds = &((char *)taskdata)[shareds_offset];
1579 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
1582 task->shareds = NULL;
1584 task->routine = task_entry;
1585 task->part_id = 0; // AC: Always start with 0 part id
1587 taskdata->td_task_id = KMP_GEN_TASK_ID();
1588 taskdata->td_team = thread->th.th_team;
1589 taskdata->td_alloc_thread = thread;
1590 taskdata->td_parent = parent_task;
1591 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1592 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1593 taskdata->td_ident = loc_ref;
1594 taskdata->td_taskwait_ident = NULL;
1595 taskdata->td_taskwait_counter = 0;
1596 taskdata->td_taskwait_thread = 0;
1597 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1599 if (flags->proxy == TASK_FULL)
1600 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1602 taskdata->td_flags = *flags;
1603 taskdata->td_task_team = thread->th.th_task_team;
1604 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1605 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1608 if (flags->hidden_helper) {
1610 taskdata->td_team = shadow_thread->th.th_team;
1611 taskdata->td_task_team = shadow_thread->th.th_task_team;
1614 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1615 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1617 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1618 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1620 // GEH - Note we serialize the task if the team is serialized to make sure
1624 taskdata->td_flags.task_serial =
1625 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1626 taskdata->td_flags.tasking_ser || flags->merged_if0);
1628 taskdata->td_flags.started = 0;
1629 taskdata->td_flags.executing = 0;
1630 taskdata->td_flags.complete = 0;
1631 taskdata->td_flags.freed = 0;
1633 taskdata->td_flags.onced = 0;
1635 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1637 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1638 taskdata->td_taskgroup =
1639 parent_task->td_taskgroup; // task inherits taskgroup from the parent task
1640 taskdata->td_dephash = NULL;
1641 taskdata->td_depnode = NULL;
1642 taskdata->td_target_data.async_handle = NULL;
1643 if (flags->tiedness == TASK_UNTIED)
1644 taskdata->td_last_tied = NULL; // will be set when the task is scheduled
1646 taskdata->td_last_tied = taskdata;
1647 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1655 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1656 if (parent_task->td_taskgroup)
1657 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1660 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1661 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1663 if (flags->hidden_helper) {
1664 taskdata->td_flags.task_serial = FALSE;
1672 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
1674 taskdata->is_taskgraph = 1;
1675 taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
1676 taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
1680 gtid, taskdata, taskdata->td_parent));
1692 input_flags->native = FALSE;
1696 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1697 input_flags->proxy ? "proxy" : "",
1698 input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t,
1735 @return Returns non-zero if registering affinity information was not successful.
1765 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1766 taskdata->td_flags.complete == 1)) {
1768 // its bottom-half finish
1790 oldInfo = thread->th.ompt_thread_info;
1791 thread->th.ompt_thread_info.wait_id = 0;
1792 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1795 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1800 if (taskdata->td_flags.proxy != TASK_PROXY) {
1809 kmp_team_t *this_team = thread->th.th_team;
1810 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1811 if ((taskgroup && taskgroup->cancel_request) ||
1812 (this_team->t.t_cancel_request == cancel_parallel)) {
1819 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1834 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1835 taskdata->td_last_tied = current_task->td_last_tied;
1836 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1869 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
1871 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1874 &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1876 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1888 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1889 current_task->td_flags.tasktype == TASK_IMPLICIT;
1893 if (thread->th.th_bar_arrive_time)
1896 kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
1902 if (taskdata->td_target_data.async_handle != NULL) {
1905 // instead of re-executing the routine.
1907 tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1910 if (task->routine != NULL) {
1912 if (taskdata->td_flags.native) {
1913 ((void (*)(void *))(*(task->routine)))(task->shareds);
1917 (*(task->routine))(gtid, task);
1924 // Barrier imbalance - adjust arrive time with the task duration
1925 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1928 KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent
1938 if (taskdata->td_flags.proxy != TASK_PROXY) {
1941 thread->th.ompt_thread_info = oldInfo;
1942 if (taskdata->td_flags.tiedness == TASK_TIED) {
1943 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1951 else if (UNLIKELY(ompt_enabled.enabled && taskdata->td_flags.target)) {
1963 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1983 parent = new_taskdata->td_parent;
1986 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1987 &(new_taskdata->ompt_task_info.task_data),
1999 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
2000 new_taskdata->td_flags.task_serial = 1;
2012 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2018 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
2021 // new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
2034 if (new_taskdata->is_taskgraph &&
2035 __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
2036 kmp_tdg_info_t *tdg = new_taskdata->tdg;
2038 if (new_taskdata->td_task_id >= new_taskdata->tdg->map_size) {
2039 __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
2042 if (new_taskdata->td_task_id >= tdg->map_size) {
2043 kmp_uint old_size = tdg->map_size;
2045 kmp_node_info_t *old_record = tdg->record_map;
2050 tdg->record_map = new_record;
2066 tdg->map_size = new_size;
2068 __kmp_release_bootstrap_lock(&tdg->graph_lock);
2071 if (tdg->record_map[new_taskdata->td_task_id].task == nullptr) {
2072 tdg->record_map[new_taskdata->td_task_id].task = new_task;
2073 tdg->record_map[new_taskdata->td_task_id].parent_task =
2074 new_taskdata->td_parent;
2075 KMP_ATOMIC_INC(&tdg->num_tasks);
2082 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
2085 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
2087 new_taskdata->td_flags.task_serial = 1;
2092 kmp_team_t *team = this_thr->th.th_team;
2093 kmp_int32 nthreads = this_thr->th.th_team_nproc;
2095 kmp_info_t *thread = team->t.t_threads[i];
2098 if (thread->th.th_sleep_loc != NULL) {
2108 // non-thread-switchable task from the parent thread only!
2112 // new_task: non-thread-switchable task thunk allocated by
2134 if (!new_taskdata->td_flags.started) {
2136 parent = new_taskdata->td_parent;
2137 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
2138 parent->ompt_task_info.frame.enter_frame.ptr =
2143 &(parent->ompt_task_info.task_data),
2144 &(parent->ompt_task_info.frame),
2145 &(new_taskdata->ompt_task_info.task_data),
2153 new_taskdata->ompt_task_info.scheduling_parent,
2155 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
2167 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2178 // new_task: non-thread-switchable task thunk allocated by
2199 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
2200 parent = new_taskdata->td_parent;
2201 if (!parent->ompt_task_info.frame.enter_frame.ptr)
2202 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2205 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
2206 &(new_taskdata->ompt_task_info.task_data),
2219 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2239 taskdata = thread->th.th_current_task;
2246 my_task_data = &(taskdata->ompt_task_info.task_data);
2249 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2270 taskdata->td_taskwait_counter += 1;
2271 taskdata->td_taskwait_ident = loc_ref;
2272 taskdata->td_taskwait_thread = gtid + 1;
2282 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2284 must_wait = must_wait || (thread->th.th_task_team != NULL &&
2285 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2289 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
2290 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2295 &(taskdata->td_incomplete_child_tasks)),
2297 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
2305 KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children
2310 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2324 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
2374 taskdata = thread->th.th_current_task;
2381 taskdata->td_taskwait_counter += 1;
2382 taskdata->td_taskwait_ident = loc_ref;
2383 taskdata->td_taskwait_thread = gtid + 1;
2391 if (!taskdata->td_flags.team_serial) {
2392 kmp_task_team_t *task_team = thread->th.th_task_team;
2397 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2405 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2416 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2442 /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */
2453 // three compiler-generated routines (init, fini are optional):
2469 // three compiler-generated routines (init, fini are optional):
2485 // three compiler-generated routines (init, fini are optional):
2508 } // non-NULL reduce_orig means new interface used
2528 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2529 kmp_uint32 nth = thread->th.th_team_nproc;
2546 size_t size = data[i].reduce_size - 1;
2547 // round the size up to cache line per thread-specific item
2548 size += CACHE_LINE - size % CACHE_LINE;
2558 // allocate cache-line aligned block and fill it with zeros
2562 // initialize all thread-specific items
2574 tg->reduce_data = (void *)arr;
2575 tg->reduce_num_data = num;
2588 Note: this entry supposes the optional compiler-generated initializer routine
2589 has single parameter - pointer to object to be initialized. That means
2596 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2598 this_tdg->rec_taskred_data =
2600 this_tdg->rec_num_taskred = num;
2601 KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2617 Note: this entry supposes the optional compiler-generated initializer routine
2623 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2625 this_tdg->rec_taskred_data =
2627 this_tdg->rec_num_taskred = num;
2628 KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2650 tg->reduce_data = (void *)arr;
2651 tg->reduce_num_data = num;
2659 @return The pointer to per-thread data
2661 Get thread-specific location of data item
2666 kmp_int32 nth = thread->th.th_team_nproc;
2672 tg = thread->th.th_current_task->td_taskgroup;
2676 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2679 if ((thread->th.th_current_task->is_taskgraph) &&
2681 __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
2682 tg = thread->th.th_current_task->td_taskgroup;
2684 KMP_ASSERT(tg->reduce_data != NULL);
2685 arr = (kmp_taskred_data_t *)(tg->reduce_data);
2686 num = tg->reduce_num_data;
2692 arr = (kmp_taskred_data_t *)(tg->reduce_data);
2693 num = tg->reduce_num_data;
2725 KMP_ASSERT(tg->parent);
2726 tg = tg->parent;
2735 kmp_int32 nth = th->th.th_team_nproc;
2740 kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
2741 kmp_int32 num = tg->reduce_num_data;
2770 tg->reduce_data = NULL;
2771 tg->reduce_num_data = 0;
2778 __kmp_thread_free(th, tg->reduce_data);
2779 tg->reduce_data = NULL;
2780 tg->reduce_num_data = 0;
2788 kmp_int32 nth = thr->th.th_team_nproc;
2793 gtid, thr->th.th_current_task->td_taskgroup));
2794 return (void *)thr->th.th_current_task->td_taskgroup;
2796 kmp_team_t *team = thr->th.th_team;
2799 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2801 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2808 KMP_MEMCPY(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t));
2810 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2811 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2812 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2815 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2820 tg = thr->th.th_current_task->td_taskgroup;
2837 Note: this entry supposes the optional compiler-generated initializer routine
2838 has single parameter - pointer to object to be initialized. That means
2859 Note: this entry supposes the optional compiler-generated initializer routine
2884 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2888 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2889 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2890 tg_new->parent = taskdata->td_taskgroup;
2891 tg_new->reduce_data = NULL;
2892 tg_new->reduce_num_data = 0;
2893 tg_new->gomp_data = NULL;
2894 taskdata->td_taskgroup = tg_new;
2901 kmp_team_t *team = thread->th.th_team;
2902 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2904 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2918 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2919 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2928 team = thread->th.th_team;
2929 my_task_data = taskdata->ompt_task_info.task_data;
2931 my_parallel_data = team->t.ompt_team_info.parallel_data;
2943 // mark task as waiting not on a barrier
2944 taskdata->td_taskwait_counter += 1;
2945 taskdata->td_taskwait_ident = loc;
2946 taskdata->td_taskwait_thread = gtid + 1;
2964 if (!taskdata->td_flags.team_serial ||
2965 (thread->th.th_task_team != NULL &&
2966 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2967 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2969 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2970 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2976 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting
2988 KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants
2991 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2993 if (taskgroup->reduce_data != NULL &&
2994 !taskgroup->gomp_data) { // need to reduce?
2997 kmp_team_t *t = thread->th.th_team;
2998 kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data;
3001 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
3004 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
3005 if (cnt == thread->th.th_team_nproc - 1) {
3012 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
3013 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
3019 } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
3023 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
3024 if (cnt == thread->th.th_team_nproc - 1) {
3030 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
3031 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
3043 taskdata->td_taskgroup = taskgroup->parent;
3065 int ntasks = task_team->tt.tt_num_task_pri;
3073 if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
3074 ntasks - 1))
3076 ntasks = task_team->tt.tt_num_task_pri;
3085 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3088 thread_data = &list->td;
3089 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3090 deque_ntasks = thread_data->td.td_deque_ntasks;
3092 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3095 list = list->next;
3099 int target = thread_data->td.td_deque_head;
3100 current = __kmp_threads[gtid]->th.th_current_task;
3101 taskdata = thread_data->td.td_deque[target];
3104 thread_data->td.td_deque_head =
3105 (target + 1) & TASK_DEQUE_MASK(thread_data->td);
3107 if (!task_team->tt.tt_untied_task_encountered) {
3109 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3113 thread_data->td.td_deque_tail));
3114 task_team->tt.tt_num_task_pri++; // atomic inc, restore value
3121 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
3122 taskdata = thread_data->td.td_deque[target];
3131 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3136 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3137 task_team->tt.tt_num_task_pri++; // atomic inc, restore value
3143 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
3144 thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
3148 thread_data->td.td_deque_tail ==
3149 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
3150 thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped))
3152 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
3153 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3168 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
3171 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
3174 gtid, thread_data->td.td_deque_ntasks,
3175 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3177 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3181 gtid, thread_data->td.td_deque_ntasks,
3182 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3186 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3188 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3189 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3193 gtid, thread_data->td.td_deque_ntasks,
3194 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3198 tail = (thread_data->td.td_deque_tail - 1) &
3199 TASK_DEQUE_MASK(thread_data->td); // Wrap index.
3200 taskdata = thread_data->td.td_deque[tail];
3203 thread->th.th_current_task)) {
3205 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3209 gtid, thread_data->td.td_deque_ntasks,
3210 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3214 thread_data->td.td_deque_tail = tail;
3215 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
3217 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3221 gtid, taskdata, thread_data->td.td_deque_ntasks,
3222 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3245 threads_data = task_team->tt.tt_threads_data;
3248 KMP_DEBUG_ASSERT(victim_tid < task_team->tt.tt_nproc);
3251 victim_thr = victim_td->td.td_thr;
3257 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3258 victim_td->td.td_deque_tail));
3260 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
3264 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3265 victim_td->td.td_deque_tail));
3269 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3271 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
3274 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3278 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3282 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
3283 current = __kmp_threads[gtid]->th.th_current_task;
3284 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3287 victim_td->td.td_deque_head =
3288 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
3290 if (!task_team->tt.tt_untied_task_encountered) {
3292 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3296 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3301 target = victim_td->td.td_deque_head;
3304 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3305 taskdata = victim_td->td.td_deque[target];
3314 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3318 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3324 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3325 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3329 victim_td->td.td_deque_tail ==
3330 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
3331 victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped))
3334 // We need to un-mark this victim as a finished victim. This must be done
3347 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
3349 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3356 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3376 kmp_task_team_t *task_team = thread->th.th_task_team;
3380 kmp_taskdata_t *current_task = thread->th.th_current_task;
3382 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3383 tid = thread->th.th_info.ds.ds_tid;
3395 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
3396 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3400 nthreads = task_team->tt.tt_nproc;
3401 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3408 if (task_team->tt.tt_num_task_pri) { // get priority task first
3418 if (victim_tid == -2) { // haven't stolen anything yet
3421 -1) // if we have a last stolen from victim, get the thread
3424 if (victim_tid != -1) { // found last victim
3432 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3449 (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
3472 // The pre-refactored code did not try more than 1 successful new
3478 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
3479 victim_tid = -2; // no successful victim found
3506 if (flag == NULL || (!final_spin && flag->done_check())) {
3513 if (thread->th.th_task_team == NULL) {
3532 KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks) == 0) {
3538 kmp_int32 count = -1 +
3547 // It is now unsafe to reference thread->th.th_team !!!
3548 // Decrementing task_team->tt.tt_unfinished_threads can allow the primary
3552 if (flag != NULL && flag->done_check()) {
3563 if (thread->th.th_task_team == NULL) {
3574 if (flag == NULL || (!final_spin && flag->done_check())) {
3584 KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks))
3670 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3672 nthreads = task_team->tt.tt_nproc;
3674 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3687 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3699 if (i == this_thr->th.th_info.ds.ds_tid) {
3708 if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
3769 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3770 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3773 thread_data->td.td_deque_last_stolen = -1;
3775 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3776 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3777 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3786 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3788 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3795 if (thread_data->td.td_deque != NULL) {
3796 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3797 TCW_4(thread_data->td.td_deque_ntasks, 0);
3798 __kmp_free(thread_data->td.td_deque);
3799 thread_data->td.td_deque = NULL;
3800 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3805 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3814 // the lock allocs or enlarges the array and re-initializes the array elements.
3816 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
3817 // The current size is given by task_team -> tt.tt_max_threads.
3824 if (TCR_4(task_team->tt.tt_found_tasks)) {
3829 threads_data_p = &task_team->tt.tt_threads_data;
3830 nthreads = task_team->tt.tt_nproc;
3831 maxthreads = task_team->tt.tt_max_threads;
3836 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3838 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3840 kmp_team_t *team = thread->th.th_team;
3892 task_team->tt.tt_max_threads = nthreads;
3901 thread_data->td.td_thr = team->t.t_threads[i];
3903 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3907 thread_data->td.td_deque_last_stolen = -1;
3912 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3915 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3923 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3924 if (task_team->tt.tt_threads_data != NULL) {
3926 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3927 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3929 __kmp_free(task_team->tt.tt_threads_data);
3930 task_team->tt.tt_threads_data = NULL;
3932 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3939 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3940 if (task_team->tt.tt_task_pri_list != NULL) {
3941 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3943 kmp_task_pri_t *next = list->next;
3944 __kmp_free_task_deque(&list->td);
3948 task_team->tt.tt_task_pri_list = NULL;
3950 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3955 int team_nth = team->t.t_nproc;
3957 if (!task_team->tt.tt_active || team_nth != task_team->tt.tt_nproc) {
3958 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3959 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3960 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3961 TCW_4(task_team->tt.tt_nproc, team_nth);
3962 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team_nth);
3963 TCW_4(task_team->tt.tt_active, TRUE);
3976 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3983 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3984 task_team->tt.tt_next = NULL;
3996 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3997 __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
4003 &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
4006 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
4007 sizeof(task_team->tt.tt_active));
4010 // task_team->tt.tt_threads_data = NULL;
4011 // task_team->tt.tt_max_threads = 0;
4012 // task_team->tt.tt_next = NULL;
4019 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
4020 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
4029 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
4034 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
4035 task_team->tt.tt_next = __kmp_free_task_teams;
4053 __kmp_free_task_teams = task_team->tt.tt_next;
4054 task_team->tt.tt_next = NULL;
4057 if (task_team->tt.tt_threads_data != NULL) {
4060 if (task_team->tt.tt_task_pri_list != NULL) {
4074 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4076 (kmp_task_team_list_t *)(&team->t.t_task_team[0]);
4079 node->task_team = current->task_team;
4080 node->next = current->next;
4081 thread->th.th_task_team = current->task_team = NULL;
4082 current->next = node;
4087 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4089 (kmp_task_team_list_t *)(&team->t.t_task_team[0]);
4090 if (current->task_team) {
4091 __kmp_free_task_team(thread, current->task_team);
4093 kmp_task_team_list_t *next = current->next;
4095 current->task_team = next->task_team;
4096 current->next = next->next;
4099 thread->th.th_task_team = current->task_team;
4118 // TODO: GEH - this may be is wrong because some sync would be necessary
4122 thread = thread->th.th_next_pool) {
4126 if (TCR_PTR(thread->th.th_task_team) == NULL) {
4132 // TODO: GEH - add this check for Linux* OS / OS X* as well?
4134 thread->th.th_task_team = NULL;
4148 if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
4175 if (team == this_thr->th.th_serial_team ||
4176 team == this_thr->th.th_root->r.r_root_team) {
4177 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4178 if (team->t.t_task_team[0] == NULL) {
4179 team->t.t_task_team[0] = __kmp_allocate_task_team(this_thr, team);
4183 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team));
4186 __kmp_task_team_init(team->t.t_task_team[0], team);
4194 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL) {
4195 team->t.t_task_team[this_thr->th.th_task_state] =
4200 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
4201 this_thr->th.th_task_state));
4210 int other_team = 1 - this_thr->th.th_task_state;
4212 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
4213 team->t.t_task_team[other_team] = __kmp_allocate_task_team(this_thr, team);
4217 team->t.t_task_team[other_team], team->t.t_id, other_team));
4220 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
4227 team->t.t_task_team[other_team], team->t.t_id, other_team));
4236 kmp_task_team_t *task_team = team->t.t_task_team[i];
4241 for (int j = 0; j < task_team->tt.tt_nproc; ++j) {
4242 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4243 if (thread_data->td.td_deque == NULL) {
4256 KMP_DEBUG_ASSERT(team != this_thr->th.th_serial_team);
4257 KMP_DEBUG_ASSERT(team != this_thr->th.th_root->r.r_root_team);
4261 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4265 TCW_PTR(this_thr->th.th_task_team,
4266 team->t.t_task_team[this_thr->th.th_task_state]);
4270 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
4271 team->t.t_id, this_thr->th.th_task_state));
4283 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4286 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
4298 &task_team->tt.tt_unfinished_threads),
4309 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
4310 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
4311 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
4312 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
4315 TCW_PTR(this_thr->th.th_task_team, NULL);
4327 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
4355 // - the queue for that thread was created
4356 // - there's space in that queue
4362 kmp_task_team_t *task_team = taskdata->td_task_team;
4371 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4373 if (thread_data->td.td_deque == NULL) {
4382 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4383 TASK_DEQUE_SIZE(thread_data->td)) {
4391 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4394 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4395 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4396 TASK_DEQUE_SIZE(thread_data->td)) {
4403 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4405 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4406 TASK_DEQUE_SIZE(thread_data->td)) {
4413 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4422 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4424 thread_data->td.td_deque_tail =
4425 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
4426 TCW_4(thread_data->td.td_deque_ntasks,
4427 TCR_4(thread_data->td.td_deque_ntasks) + 1);
4434 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4441 - the top half is the one that can be done from a thread outside the team
4442 - the bottom half must be run from a thread within the team
4449 - things that can be run before queuing the bottom half
4450 - things that must be run after queuing the bottom half
4457 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
4458 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4459 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
4460 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
4462 taskdata->td_flags.complete = 1; // mark the task as completed
4464 taskdata->td_flags.onced = 1;
4467 if (taskdata->td_taskgroup)
4468 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
4472 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
4478 // Predecrement simulated by "- 1" calculation
4479 children = -1 +
4481 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
4485 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
4492 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4493 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
4498 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
4521 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4538 kmp_team_t *team = taskdata->td_team;
4539 kmp_int32 nthreads = team->t.t_nproc;
4550 thread = team->t.t_threads[k];
4562 thread = team->t.t_threads[i];
4563 if (thread->th.th_sleep_loc != NULL) {
4587 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4604 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4605 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4606 td->td_allow_completion_event.ed.task = task;
4607 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4609 return &td->td_allow_completion_event;
4613 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4614 kmp_task_t *ptask = event->ed.task;
4622 __kmp_acquire_tas_lock(&event->lock, gtid);
4623 if (taskdata->td_flags.proxy == TASK_PROXY) {
4633 event->type = KMP_EVENT_UNINITIALIZED;
4634 __kmp_release_tas_lock(&event->lock, gtid);
4645 kmp_team_t *team = taskdata->td_team;
4647 if (thread->th.th_team == team) {
4665 // indicating whether we need to update task->td_task_id
4675 kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task
4681 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4683 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4684 task_size = taskdata_src->td_size_alloc;
4700 if (!taskdata->is_taskgraph || taskloop_recur)
4701 taskdata->td_task_id = KMP_GEN_TASK_ID();
4702 else if (taskdata->is_taskgraph &&
4703 __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
4704 taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
4706 taskdata->td_task_id = KMP_GEN_TASK_ID();
4708 if (task->shareds != NULL) { // need setup shareds pointer
4709 shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
4710 task->shareds = &((char *)taskdata)[shareds_offset];
4711 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
4714 taskdata->td_alloc_thread = thread;
4715 taskdata->td_parent = parent_task;
4717 taskdata->td_taskgroup = parent_task->td_taskgroup;
4720 if (taskdata->td_flags.tiedness == TASK_TIED)
4721 taskdata->td_last_tied = taskdata;
4725 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4726 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4727 if (parent_task->td_taskgroup)
4728 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4731 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4732 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4737 thread, taskdata, taskdata->td_parent));
4740 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4765 lower_offset((char *)lb - (char *)task),
4766 upper_offset((char *)ub - (char *)task) {
4779 if (!taskdata->td_flags.native) {
4783 if (taskdata->td_size_loop_bounds == 4) {
4784 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4787 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4801 if (!taskdata->td_flags.native) {
4805 if (taskdata->td_size_loop_bounds == 4) {
4806 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4809 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4821 if (!taskdata->td_flags.native) {
4825 if (taskdata->td_size_loop_bounds == 4) {
4826 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4829 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4840 if (!taskdata->td_flags.native) {
4844 if (taskdata->td_size_loop_bounds == 4) {
4845 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4848 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4892 kmp_taskdata_t *current_task = thread->th.th_current_task;
4909 chunk_minus_1 = grainsize - 1;
4912 --extras; // first extras iterations get bigger chunk (grainsize+1)
4918 if (i == num_tasks - 1) {
4925 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4926 if ((kmp_uint64)st > ub_glob - upper)
4930 if (upper - ub_glob < (kmp_uint64)(-st))
4945 // adjust task-specific bounds
4947 if (next_taskdata->td_flags.native) {
4948 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4966 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
5013 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
5014 kmp_task_t *task = p->task;
5015 kmp_uint64 *lb = p->lb;
5016 kmp_uint64 *ub = p->ub;
5017 void *task_dup = p->task_dup;
5019 kmp_int64 st = p->st;
5020 kmp_uint64 ub_glob = p->ub_glob;
5021 kmp_uint64 num_tasks = p->num_tasks;
5022 kmp_uint64 grainsize = p->grainsize;
5023 kmp_uint64 extras = p->extras;
5024 kmp_int64 last_chunk = p->last_chunk;
5025 kmp_uint64 tc = p->tc;
5026 kmp_uint64 num_t_min = p->num_t_min;
5028 void *codeptr_ra = p->codeptr_ra;
5098 // kmp_taskdata_t *current_task = thread->th.th_current_task;
5101 (char *)lb - (char *)task; // remember offset of lb in the task structure
5103 (char *)ub - (char *)task; // remember offset of ub in the task structure
5115 kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
5120 tc1 = tc - tc0;
5124 ext1 = extras - n_tsk0; // remaining extras
5126 tc1 = tc - tc0;
5131 tc0 = tc - tc1;
5133 ub0 = lower + st * (tc0 - 1);
5151 kmp_taskdata_t *current_task = thread->th.th_current_task;
5152 thread->th.th_current_task = taskdata->td_parent;
5157 thread->th.th_current_task = current_task;
5158 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
5159 p->task = next_task;
5160 p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
5161 p->ub = (kmp_uint64 *)((char *)next_task + upper_offset);
5162 p->task_dup = task_dup;
5163 p->st = st;
5164 p->ub_glob = ub_glob;
5165 p->num_tasks = n_tsk1;
5166 p->grainsize = grainsize;
5167 p->extras = ext1;
5168 p->last_chunk = last_chunk1;
5169 p->tc = tc1;
5170 p->num_t_min = num_t_min;
5172 p->codeptr_ra = codeptr_ra;
5177 new_task_data->tdg = taskdata->tdg;
5178 new_task_data->is_taskgraph = 0;
5236 kmp_taskdata_t *current_task = thread->th.th_current_task;
5245 tc = upper - lower + 1;
5247 tc = (lower - upper) / (-st) + 1;
5249 tc = (upper - lower) / st + 1;
5252 KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
5255 // do not execute anything for zero-trip loop
5265 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
5266 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5273 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
5279 grainsize = thread->th.th_team_nproc * 10;
5299 num_tasks = (tc + grainsize - 1) / grainsize;
5300 last_chunk = tc - (num_tasks * grainsize);
5321 // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
5322 if (if_val == 0) { // if(0) specified, mark task as serial
5323 taskdata->td_flags.task_serial = 1;
5324 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
5332 // !taskdata->td_flags.native => currently force linear spawning of tasks
5334 } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5361 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5362 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5442 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5447 return &taskdata->td_target_data.async_handle;
5463 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5468 return taskdata->td_task_team != NULL;
5487 (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
5495 kmp_int32 tdg_id = tdg->tdg_id;
5502 kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5514 kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
5515 kmp_int32 *successors = tdg->record_map[i].successors;
5518 fprintf(tdg_file, " %d -> %d \n", i, successors[j]);
5530 KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
5532 tdg->tdg_id, tdg->num_roots));
5533 kmp_node_info_t *this_record_map = tdg->record_map;
5534 kmp_int32 *this_root_tasks = tdg->root_tasks;
5535 kmp_int32 this_num_roots = tdg->num_roots;
5536 kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5539 kmp_taskdata_t *parent_task = thread->th.th_current_task;
5541 if (tdg->rec_taskred_data) {
5542 __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data);
5548 td->td_parent = parent_task;
5552 this_record_map[j].parent_task->td_taskgroup;
5556 KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
5559 KMP_ATOMIC_INC(&parent_taskgroup->count);
5561 td->td_taskgroup = parent_taskgroup;
5562 } else if (td->td_taskgroup != nullptr) {
5564 td->td_taskgroup = nullptr;
5566 if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
5567 KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
5574 tdg->tdg_id, tdg->num_roots));
5589 tdg->tdg_id = tdg_id;
5590 tdg->map_size = INIT_MAPSIZE;
5591 tdg->num_roots = -1;
5592 tdg->root_tasks = nullptr;
5593 tdg->tdg_status = KMP_TDG_RECORDING;
5594 tdg->rec_num_taskred = 0;
5595 tdg->rec_taskred_data = nullptr;
5596 KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
5612 __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
5615 // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark
5662 kmp_node_info_t *this_record_map = tdg->record_map;
5663 kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5666 kmp_int32 this_map_size = tdg->map_size;
5677 tdg->map_size = this_map_size;
5678 tdg->num_roots = this_num_roots;
5679 tdg->root_tasks = this_root_tasks;
5680 KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
5681 tdg->tdg_status = KMP_TDG_READY;
5683 if (thread->th.th_current_task->td_dephash) {
5684 __kmp_dephash_free(thread, thread->th.th_current_task->td_dephash);
5685 thread->th.th_current_task->td_dephash = NULL;
5699 // __kmpc_end_record_task: wrapper around __kmp_end_record to mark
5714 // TODO: use input_flags->nowait
5716 if (__kmp_tdg_is_recording(tdg->tdg_status))