24#if ENABLE_LIBOMPTARGET
25static void (*tgt_target_nowait_query)(
void **);
27void __kmp_init_target_task() {
28 *(
void **)(&tgt_target_nowait_query) =
KMP_DLSYM(
"__tgt_target_nowait_query");
41static kmp_tdg_info_t *__kmp_find_tdg(
kmp_int32 tdg_id);
45#ifdef BUILD_TIED_TASK_STACK
54static void __kmp_trace_task_stack(
kmp_int32 gtid,
56 int threshold,
char *location) {
57 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
59 kmp_int32 entries = task_stack->ts_entries;
64 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
65 "first_block = %p, stack_top = %p \n",
66 location, gtid, entries, task_stack->ts_first_block, stack_top));
71 while (entries != 0) {
74 if (entries & TASK_STACK_INDEX_MASK == 0) {
75 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
77 stack_block = stack_block->sb_prev;
78 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
85 tied_task = *stack_top;
91 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
92 "stack_top=%p, tied_task=%p\n",
93 location, gtid, entries, stack_top, tied_task));
98 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
108static void __kmp_init_task_stack(
kmp_int32 gtid,
110 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
111 kmp_stack_block_t *first_block;
114 first_block = &task_stack->ts_first_block;
116 memset((
void *)first_block,
'\0',
120 task_stack->ts_entries = TASK_STACK_EMPTY;
121 first_block->sb_next = NULL;
122 first_block->sb_prev = NULL;
129static void __kmp_free_task_stack(
kmp_int32 gtid,
131 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
132 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
136 while (stack_block != NULL) {
137 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
139 stack_block->sb_next = NULL;
140 stack_block->sb_prev = NULL;
141 if (stack_block != &task_stack->ts_first_block) {
145 stack_block = next_block;
148 task_stack->ts_entries = 0;
149 task_stack->ts_top = NULL;
163 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
173 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
174 gtid, thread, tied_task));
176 *(task_stack->ts_top) = tied_task;
179 task_stack->ts_top++;
180 task_stack->ts_entries++;
182 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
184 kmp_stack_block_t *stack_block =
185 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
188 if (stack_block->sb_next !=
190 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
193 thread,
sizeof(kmp_stack_block_t));
195 task_stack->ts_top = &new_block->sb_block[0];
196 stack_block->sb_next = new_block;
197 new_block->sb_prev = stack_block;
198 new_block->sb_next = NULL;
202 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
203 gtid, tied_task, new_block));
206 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
222 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
233 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
237 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
238 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
240 stack_block = stack_block->sb_prev;
241 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
245 task_stack->ts_top--;
246 task_stack->ts_entries--;
248 tied_task = *(task_stack->ts_top);
254 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
298 for (
int j =
i - 1;
j >= 0; --
j)
318 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
319 "%d] for thread_data %p\n",
326 for (
i = thread_data->td.td_deque_head,
j = 0;
j <
size;
328 new_deque[
j] = thread_data->td.td_deque[
i];
332 thread_data->td.td_deque_head = 0;
333 thread_data->td.td_deque_tail =
size;
334 thread_data->td.td_deque = new_deque;
335 thread_data->td.td_deque_size =
new_size;
342 thread_data->td.td_deque_last_stolen = -1;
343 KE_TRACE(20, (
"__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
344 "for thread_data %p\n",
363 thread_data = &lst->
td;
368 thread_data = &list->
td;
374 while (next_queue && next_queue->
priority > pri) {
376 next_queue = lst->
next;
379 if (next_queue == NULL) {
382 thread_data = &list->
td;
386 }
else if (next_queue->
priority == pri) {
388 thread_data = &next_queue->
td;
392 thread_data = &list->
td;
394 list->
next = next_queue;
408 (
"__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
409 gtid, taskdata, pri));
418 thread_data = &list->
td;
430 thread_data = &lst->
td;
441 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
445 thread->th.th_current_task)) {
447 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d deque is full; returning "
448 "TASK_NOT_PUSHED for task %p\n",
459 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
461 thread_data->td.td_deque_tail =
462 (thread_data->td.td_deque_tail + 1) &
TASK_DEQUE_MASK(thread_data->td);
463 TCW_4(thread_data->td.td_deque_ntasks,
464 TCR_4(thread_data->td.td_deque_ntasks) + 1);
467 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d returning "
468 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
469 gtid, taskdata, thread_data->td.td_deque_ntasks,
470 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
498 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
507 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
513 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
514 "TASK_NOT_PUSHED for task %p\n",
540 if (
UNLIKELY(thread_data->td.td_deque == NULL)) {
546 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
550 thread->th.th_current_task)) {
551 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
552 "TASK_NOT_PUSHED for task %p\n",
558 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
569 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
573 thread->th.th_current_task)) {
575 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
576 "returning TASK_NOT_PUSHED for task %p\n",
589 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
592 thread_data->td.td_deque_tail =
593 (thread_data->td.td_deque_tail + 1) &
TASK_DEQUE_MASK(thread_data->td);
594 TCW_4(thread_data->td.td_deque_ntasks,
595 TCR_4(thread_data->td.td_deque_ntasks) + 1);
598 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
599 "task=%p ntasks=%d head=%u tail=%u\n",
600 gtid, taskdata, thread_data->td.td_deque_ntasks,
601 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
613 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
614 "this_thread=%p, curtask=%p, "
615 "curtask_parent=%p\n",
616 0, this_thr, this_thr->th.th_current_task,
617 this_thr->th.th_current_task->td_parent));
619 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
621 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
622 "this_thread=%p, curtask=%p, "
623 "curtask_parent=%p\n",
624 0, this_thr, this_thr->th.th_current_task,
625 this_thr->th.th_current_task->td_parent));
638 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
641 tid, this_thr, this_thr->th.th_current_task,
642 team->
t.t_implicit_task_taskdata[tid].td_parent));
647 if (this_thr->th.th_current_task != &team->
t.t_implicit_task_taskdata[0]) {
648 team->
t.t_implicit_task_taskdata[0].td_parent =
649 this_thr->th.th_current_task;
650 this_thr->th.th_current_task = &team->
t.t_implicit_task_taskdata[0];
653 team->
t.t_implicit_task_taskdata[tid].td_parent =
654 team->
t.t_implicit_task_taskdata[0].td_parent;
655 this_thr->th.th_current_task = &team->
t.t_implicit_task_taskdata[tid];
658 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
661 tid, this_thr, this_thr->th.th_current_task,
662 team->
t.t_implicit_task_taskdata[tid].td_parent));
676 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
677 gtid, taskdata, current_task));
687#ifdef BUILD_TIED_TASK_STACK
689 __kmp_push_task_stack(gtid, thread, taskdata);
694 thread->th.th_current_task = taskdata;
710 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
723 task->ompt_task_info.task_data.value = 0;
724 task->ompt_task_info.frame.exit_frame = ompt_data_none;
725 task->ompt_task_info.frame.enter_frame = ompt_data_none;
726 task->ompt_task_info.frame.exit_frame_flags =
727 ompt_frame_runtime | ompt_frame_framepointer;
728 task->ompt_task_info.frame.enter_frame_flags =
729 ompt_frame_runtime | ompt_frame_framepointer;
730 task->ompt_task_info.dispatch_chunk.start = 0;
731 task->ompt_task_info.dispatch_chunk.iterations = 0;
740 ompt_task_status_t
status = ompt_task_switch;
741 if (
__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
743 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
748 &(current_task->ompt_task_info.task_data),
status,
749 &(taskdata->ompt_task_info.task_data));
751 taskdata->ompt_task_info.scheduling_parent = current_task;
758 ompt_task_status_t
status) {
763 status = ompt_task_cancel;
768 &(taskdata->ompt_task_info.task_data),
status,
769 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
778 void *return_address) {
782 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
784 gtid, loc_ref, taskdata, current_task));
791 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
792 "incremented for task %p\n",
802 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
803 current_task->ompt_task_info.frame.enter_frame.ptr =
804 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
805 current_task->ompt_task_info.frame.enter_frame_flags =
806 taskdata->ompt_task_info.frame.exit_frame_flags =
807 ompt_frame_application | ompt_frame_framepointer;
813 &(taskdata->ompt_task_info.task_data),
816 __ompt_task_start(
task, current_task, gtid);
820 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
829 void *return_address) {
830 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid,
task, frame_address,
853 OMPT_STORE_RETURN_ADDRESS(gtid);
854 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid,
task,
856 OMPT_LOAD_RETURN_ADDRESS(gtid));
860 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid,
task, NULL, NULL);
871 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
876 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
889 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
902 task->data1.destructors = NULL;
908 if (!taskdata->is_taskgraph) {
912 __kmp_fast_free(thread, taskdata);
934 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
957 while (children == 0) {
960 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
961 "and freeing itself\n",
967 taskdata = parent_taskdata;
977 if (children == 0 && flags_old.
complete == 1) {
984 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
985 "dephash of implicit task %p\n",
1000 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
1001 "not freeing it yet\n",
1002 gtid, taskdata, children));
1042 thread->th.th_task_team;
1050 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
1052 gtid, taskdata, resumed_task));
1057 is_taskgraph = taskdata->is_taskgraph;
1061#ifdef BUILD_TIED_TASK_STACK
1063 __kmp_pop_task_stack(gtid, thread, taskdata);
1073 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
1078 if (resumed_task == NULL) {
1083 thread->th.th_current_task = resumed_task;
1085 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
1086 "resuming task %p\n",
1087 gtid, taskdata, resumed_task));
1098 if (resumed_task == NULL) {
1116 destr_thunk(gtid,
task);
1123 bool completed =
true;
1140 __ompt_task_finish(
task, resumed_task, ompt_task_detach);
1160 __ompt_task_finish(
task, resumed_task, ompt_task_switch);
1178 __ompt_task_finish(
task, resumed_task, ompt_task_complete);
1218 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
1219 gtid, taskdata, children));
1225 thread->th.th_current_task = resumed_task;
1247 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
1248 gtid, taskdata, resumed_task));
1257 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
1261 __kmp_task_finish<ompt>(gtid,
task, NULL);
1263 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
1268 ompt_frame_t *ompt_frame;
1270 ompt_frame->enter_frame = ompt_data_none;
1271 ompt_frame->enter_frame_flags =
1272 ompt_frame_runtime | ompt_frame_framepointer;
1283 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid,
task);
1296 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid,
task);
1300 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid,
task);
1308 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1311 __kmp_task_finish<false>(gtid,
task,
1314 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1332 kmp_team_t *team,
int tid,
int set_curr_task) {
1337 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1338 tid, team,
task, set_curr_task ?
"TRUE" :
"FALSE"));
1341 task->td_team = team;
1344 task->td_ident = loc_ref;
1345 task->td_taskwait_ident = NULL;
1346 task->td_taskwait_counter = 0;
1347 task->td_taskwait_thread = 0;
1354 task->td_flags.task_serial = 1;
1356 task->td_flags.team_serial = (team->
t.t_serialized) ? 1 : 0;
1358 task->td_flags.started = 1;
1359 task->td_flags.executing = 1;
1360 task->td_flags.complete = 0;
1361 task->td_flags.freed = 0;
1363 task->td_flags.onced = 0;
1366 task->td_depnode = NULL;
1370 if (set_curr_task) {
1374 task->td_taskgroup = NULL;
1375 task->td_dephash = NULL;
1384 __ompt_task_init(
task, tid);
1387 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1398 if (
task->td_dephash) {
1400 task->td_flags.complete = 1;
1402 task->td_flags.onced = 1;
1406 if (children == 0 && flags_old.
complete == 1) {
1412 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1413 "dephash of implicit task %p\n",
1414 thread->th.th_info.ds.ds_gtid,
task));
1429 task->td_dephash = NULL;
1459 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1466 size_t shareds_offset;
1481 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1482 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1483 gtid, loc_ref, *((
kmp_int32 *)flags), sizeof_kmp_task_t,
1497 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1511 if ((thread->th.th_task_team) == NULL) {
1516 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1519 thread->th.th_task_team = team->
t.t_task_team[thread->th.th_task_state];
1527 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1529 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1532 if (thread_data->td.td_deque == NULL) {
1551 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1553 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1558 taskdata = (
kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1568#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_S390X || !KMP_HAVE_QUAD
1575 if (sizeof_shareds > 0) {
1577 task->
shareds = &((
char *)taskdata)[shareds_offset];
1588 taskdata->
td_team = thread->th.th_team;
1610 taskdata->
td_team = shadow_thread->th.th_team;
1611 taskdata->
td_task_team = shadow_thread->th.th_task_team;
1650 __ompt_task_init(taskdata, gtid);
1671 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
1672 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
1674 taskdata->is_taskgraph = 1;
1675 taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
1679 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1686 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1687 size_t sizeof_shareds,
1694 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1695 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1696 gtid, loc_ref, input_flags->
tiedness ?
"tied " :
"untied",
1697 input_flags->
proxy ?
"proxy" :
"",
1698 input_flags->
detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1704 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1711 size_t sizeof_kmp_task_t,
1712 size_t sizeof_shareds,
1718 input_flags.target = 1;
1721 input_flags.hidden_helper =
TRUE;
1762 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1763 gtid, taskdata, current_task));
1771 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1776 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1777 "proxy task %p, resuming task %p\n",
1778 gtid, taskdata, current_task));
1790 oldInfo = thread->th.ompt_thread_info;
1791 thread->th.ompt_thread_info.
wait_id = 0;
1792 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1793 ? ompt_state_work_serial
1794 : ompt_state_work_parallel;
1813#if OMPT_SUPPORT && OMPT_OPTIONAL
1814 ompt_data_t *task_data;
1819 ((taskgroup && taskgroup->
cancel_request) ? ompt_cancel_taskgroup
1820 : ompt_cancel_parallel) |
1821 ompt_cancel_discarded_task,
1838#if KMP_STATS_ENABLED
1841 case FORK_JOIN_BARRIER:
1865 __ompt_task_start(
task, current_task, gtid);
1867#if OMPT_SUPPORT && OMPT_OPTIONAL
1869 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
1870 ompt_data_t
instance = ompt_data_none;
1871 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1874 &(team_info->
parallel_data), &(taskdata->ompt_task_info.task_data),
1875 ompt_dispatch_taskloop_chunk,
instance);
1876 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1881 if (ompd_state & OMPD_ENABLE_BP)
1882 ompd_bp_task_begin();
1885#if USE_ITT_BUILD && USE_ITT_NOTIFY
1890 if (kmp_itt_count_task) {
1893 if (thread->th.th_bar_arrive_time)
1894 cur_time = __itt_get_timestamp();
1896 kmp_itt_count_task = 0;
1901#if ENABLE_LIBOMPTARGET
1911#ifdef KMP_GOMP_COMPAT
1922#if USE_ITT_BUILD && USE_ITT_NOTIFY
1923 if (kmp_itt_count_task) {
1925 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1933 if (ompd_state & OMPD_ENABLE_BP)
1941 thread->th.ompt_thread_info = oldInfo;
1943 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1945 __kmp_task_finish<true>(gtid,
task, current_task);
1948 __kmp_task_finish<false>(gtid,
task, current_task);
1952 __ompt_task_finish(
task, current_task, ompt_task_switch);
1958 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1959 gtid, taskdata, current_task));
1977 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1978 loc_ref, new_taskdata));
1986 &(
parent->ompt_task_info.task_data), &(
parent->ompt_task_info.frame),
1987 &(new_taskdata->ompt_task_info.task_data),
2006 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
2007 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
2008 gtid, loc_ref, new_taskdata));
2012 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2030 bool serialize_immediate) {
2034 if (new_taskdata->is_taskgraph &&
2035 __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
2036 kmp_tdg_info_t *tdg = new_taskdata->tdg;
2038 if (new_taskdata->
td_task_id >= new_taskdata->tdg->map_size) {
2042 if (new_taskdata->
td_task_id >= tdg->map_size) {
2045 kmp_node_info_t *old_record = tdg->record_map;
2047 new_size *
sizeof(kmp_node_info_t));
2049 KMP_MEMCPY(new_record, old_record, old_size *
sizeof(kmp_node_info_t));
2050 tdg->record_map = new_record;
2056 __kmp_successors_size *
sizeof(
kmp_int32));
2057 new_record[
i].task =
nullptr;
2058 new_record[
i].successors = successorsList;
2059 new_record[
i].nsuccessors = 0;
2060 new_record[
i].npredecessors = 0;
2061 new_record[
i].successors_size = __kmp_successors_size;
2071 if (tdg->record_map[new_taskdata->
td_task_id].task ==
nullptr) {
2072 tdg->record_map[new_taskdata->
td_task_id].task = new_task;
2073 tdg->record_map[new_taskdata->
td_task_id].parent_task =
2086 if (serialize_immediate)
2093 kmp_int32 nthreads = this_thr->th.th_team_nproc;
2094 for (
int i = 0;
i < nthreads; ++
i) {
2096 if (thread == this_thr)
2098 if (thread->th.th_sleep_loc != NULL) {
2124#if KMP_DEBUG || OMPT_SUPPORT
2127 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
2135 OMPT_STORE_RETURN_ADDRESS(gtid);
2137 if (!
parent->ompt_task_info.frame.enter_frame.ptr) {
2138 parent->ompt_task_info.frame.enter_frame.ptr =
2143 &(
parent->ompt_task_info.task_data),
2144 &(
parent->ompt_task_info.frame),
2145 &(new_taskdata->ompt_task_info.task_data),
2147 OMPT_LOAD_RETURN_ADDRESS(gtid));
2152 __ompt_task_finish(new_task,
2153 new_taskdata->ompt_task_info.scheduling_parent,
2155 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
2162 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2163 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2164 gtid, loc_ref, new_taskdata));
2167 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2191#if KMP_DEBUG || OMPT_SUPPORT
2194 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
2201 if (!
parent->ompt_task_info.frame.enter_frame.ptr)
2205 &(
parent->ompt_task_info.task_data), &(
parent->ompt_task_info.frame),
2206 &(new_taskdata->ompt_task_info.task_data),
2214 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2215 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2216 gtid, loc_ref, new_taskdata));
2219 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2227 void *frame_address,
2228 void *return_address) {
2231 int thread_finished =
FALSE;
2234 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
2239 taskdata = thread->th.th_current_task;
2241#if OMPT_SUPPORT && OMPT_OPTIONAL
2242 ompt_data_t *my_task_data;
2243 ompt_data_t *my_parallel_data;
2246 my_task_data = &(taskdata->ompt_task_info.task_data);
2247 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
2249 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2253 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2254 my_task_data, return_address);
2259 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2260 my_task_data, return_address);
2275 void *itt_sync_obj = NULL;
2277 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2284 must_wait = must_wait || (thread->th.th_task_team != NULL &&
2285 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2290 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2294 RCAST(std::atomic<kmp_uint32> *,
2304 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2312#if OMPT_SUPPORT && OMPT_OPTIONAL
2316 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2317 my_task_data, return_address);
2321 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2322 my_task_data, return_address);
2324 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
2329 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
2330 "returning TASK_CURRENT_NOT_QUEUED\n",
2336#if OMPT_SUPPORT && OMPT_OPTIONAL
2339 void *frame_address,
2340 void *return_address) {
2341 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2349#if OMPT_SUPPORT && OMPT_OPTIONAL
2351 OMPT_STORE_RETURN_ADDRESS(gtid);
2353 OMPT_LOAD_RETURN_ADDRESS(gtid));
2356 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2363 int thread_finished =
FALSE;
2368 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2369 gtid, loc_ref, end_part));
2374 taskdata = thread->th.th_current_task;
2386 void *itt_sync_obj = NULL;
2388 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2393 if (task_team != NULL) {
2397 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2405 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2411 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2419 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2420 "returning TASK_CURRENT_NOT_QUEUED\n",
2524template <
typename T>
2537 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2541 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2545 for (
int i = 0;
i < num; ++
i) {
2550 arr[
i].reduce_shar =
data[
i].reduce_shar;
2553 arr[
i].reduce_comb =
data[
i].reduce_comb;
2554 arr[
i].reduce_init =
data[
i].reduce_init;
2555 arr[
i].reduce_fini =
data[
i].reduce_fini;
2557 if (!
arr[
i].flags.lazy_priv) {
2560 arr[
i].reduce_pend = (
char *)(
arr[
i].reduce_priv) + nth *
size;
2561 if (
arr[
i].reduce_init != NULL) {
2563 for (
size_t j = 0;
j < nth; ++
j) {
2595 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2596 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2597 kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2598 this_tdg->rec_taskred_data =
2600 this_tdg->rec_num_taskred = num;
2622 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2623 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2624 kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2625 this_tdg->rec_taskred_data =
2627 this_tdg->rec_num_taskred = num;
2636template <
typename T>
2640 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2642 thr, tg, reduce_data));
2647 for (
int i = 0;
i < num; ++
i) {
2648 arr[
i].reduce_shar =
data[
i].reduce_shar;
2666 kmp_int32 nth = thread->th.th_team_nproc;
2672 tg = thread->th.th_current_task->td_taskgroup;
2676 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2679 if ((thread->th.th_current_task->is_taskgraph) &&
2680 (!__kmp_tdg_is_recording(
2681 __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
2682 tg = thread->th.th_current_task->td_taskgroup;
2691 while (tg != NULL) {
2694 for (
int i = 0;
i < num; ++
i) {
2695 if (!
arr[
i].flags.lazy_priv) {
2698 return (
char *)(
arr[
i].reduce_priv) + tid *
arr[
i].reduce_size;
2701 void **p_priv = (
void **)(
arr[
i].reduce_priv);
2705 for (
int j = 0;
j < nth; ++
j)
2706 if (
data == p_priv[
j])
2710 if (p_priv[tid] == NULL) {
2713 if (
arr[
i].reduce_init != NULL) {
2714 if (
arr[
i].reduce_orig != NULL) {
2715 ((
void (*)(
void *,
void *))
arr[
i].reduce_init)(
2716 p_priv[tid],
arr[
i].reduce_orig);
2718 ((
void (*)(
void *))
arr[
i].reduce_init)(p_priv[tid]);
2742 for (
int i = 0;
i < num; ++
i) {
2743 void *sh_data =
arr[
i].reduce_shar;
2744 void (*f_fini)(
void *) = (
void (*)(
void *))(
arr[
i].reduce_fini);
2745 void (*f_comb)(
void *,
void *) =
2746 (
void (*)(
void *,
void *))(
arr[
i].reduce_comb);
2747 if (!
arr[
i].flags.lazy_priv) {
2748 void *pr_data =
arr[
i].reduce_priv;
2750 for (
int j = 0;
j < nth; ++
j) {
2751 void *priv_data = (
char *)pr_data +
j *
size;
2752 f_comb(sh_data, priv_data);
2757 void **pr_data = (
void **)(
arr[
i].reduce_priv);
2758 for (
int j = 0;
j < nth; ++
j) {
2759 if (pr_data[
j] != NULL) {
2760 f_comb(sh_data, pr_data[
j]);
2783template <
typename T>
2792 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2793 gtid, thr->th.th_current_task->td_taskgroup));
2794 return (
void *)thr->th.th_current_task->td_taskgroup;
2800 if (reduce_data == NULL &&
2820 tg = thr->th.th_current_task->td_taskgroup;
2821 __kmp_task_reduction_init_copy<T>(thr, num,
data, tg, reduce_data);
2843 int num,
void *
data) {
2887 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid,
loc, tg_new));
2896#if OMPT_SUPPORT && OMPT_OPTIONAL
2898 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2902 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2904 ompt_data_t my_parallel_data = team->
t.ompt_team_info.parallel_data;
2907 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2908 &(my_task_data), codeptr);
2920 int thread_finished =
FALSE;
2922#if OMPT_SUPPORT && OMPT_OPTIONAL
2924 ompt_data_t my_task_data;
2925 ompt_data_t my_parallel_data;
2926 void *codeptr =
nullptr;
2928 team = thread->th.th_team;
2929 my_task_data = taskdata->ompt_task_info.task_data;
2931 my_parallel_data = team->
t.ompt_team_info.parallel_data;
2932 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2938 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid,
loc));
2950 void *itt_sync_obj = NULL;
2952 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2956#if OMPT_SUPPORT && OMPT_OPTIONAL
2959 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2960 &(my_task_data), codeptr);
2965 (thread->th.th_task_team != NULL &&
2966 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2967 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2969 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->
count)), 0U);
2978#if OMPT_SUPPORT && OMPT_OPTIONAL
2981 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2982 &(my_task_data), codeptr);
2987 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
3000 void *priv0 =
arr[0].reduce_priv;
3005 if (cnt == thread->th.th_team_nproc - 1) {
3024 if (cnt == thread->th.th_team_nproc - 1) {
3046 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
3049#if OMPT_SUPPORT && OMPT_OPTIONAL
3052 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
3053 &(my_task_data), codeptr);
3068 20, (
"__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
3077 }
while (ntasks > 0);
3079 KA_TRACE(20, (
"__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
3088 thread_data = &list->
td;
3090 deque_ntasks = thread_data->td.td_deque_ntasks;
3091 if (deque_ntasks == 0) {
3093 KA_TRACE(20, (
"__kmp_get_priority_task: T#%d No tasks to get from %p\n",
3097 }
while (deque_ntasks == 0);
3099 int target = thread_data->td.td_deque_head;
3101 taskdata = thread_data->td.td_deque[
target];
3104 thread_data->td.td_deque_head =
3110 KA_TRACE(20, (
"__kmp_get_priority_task(exit #3): T#%d could not get task "
3111 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
3112 gtid, thread_data, task_team, deque_ntasks,
target,
3113 thread_data->td.td_deque_tail));
3120 for (
i = 1;
i < deque_ntasks; ++
i) {
3122 taskdata = thread_data->td.td_deque[
target];
3129 if (taskdata == NULL) {
3133 10, (
"__kmp_get_priority_task(exit #4): T#%d could not get task from "
3134 "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
3135 gtid, thread_data, task_team, deque_ntasks,
3136 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3141 for (
i =
i + 1;
i < deque_ntasks; ++
i) {
3144 thread_data->td.td_deque[prev] = thread_data->td.td_deque[
target];
3148 thread_data->td.td_deque_tail ==
3150 thread_data->td.td_deque_tail =
target;
3152 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
3173 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
3174 gtid, thread_data->td.td_deque_ntasks,
3175 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3177 if (
TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3179 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
3180 "ntasks=%d head=%u tail=%u\n",
3181 gtid, thread_data->td.td_deque_ntasks,
3182 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3188 if (
TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3191 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
3192 "ntasks=%d head=%u tail=%u\n",
3193 gtid, thread_data->td.td_deque_ntasks,
3194 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3198 tail = (thread_data->td.td_deque_tail - 1) &
3200 taskdata = thread_data->td.td_deque[
tail];
3203 thread->th.th_current_task)) {
3207 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
3208 "ntasks=%d head=%u tail=%u\n",
3209 gtid, thread_data->td.td_deque_ntasks,
3210 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3214 thread_data->td.td_deque_tail =
tail;
3215 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
3219 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
3220 "ntasks=%d head=%u tail=%u\n",
3221 gtid, taskdata, thread_data->td.td_deque_ntasks,
3222 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3233 std::atomic<kmp_int32> *unfinished_threads,
3234 int *thread_finished,
3250 victim_td = &threads_data[victim_tid];
3251 victim_thr = victim_td->td.td_thr;
3254 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
3255 "task_team=%p ntasks=%d head=%u tail=%u\n",
3257 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3258 victim_td->td.td_deque_tail));
3260 if (
TCR_4(victim_td->td.td_deque_ntasks) == 0) {
3261 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
3262 "task_team=%p ntasks=%d head=%u tail=%u\n",
3264 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3265 victim_td->td.td_deque_tail));
3271 int ntasks =
TCR_4(victim_td->td.td_deque_ntasks);
3275 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
3276 "task_team=%p ntasks=%d head=%u tail=%u\n",
3278 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3284 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3287 victim_td->td.td_deque_head =
3293 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
3294 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3296 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3301 target = victim_td->td.td_deque_head;
3303 for (
i = 1;
i < ntasks; ++
i) {
3305 taskdata = victim_td->td.td_deque[
target];
3312 if (taskdata == NULL) {
3315 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
3316 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3318 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3322 for (
i =
i + 1;
i < ntasks; ++
i) {
3325 victim_td->td.td_deque[prev] = victim_td->td.td_deque[
target];
3329 victim_td->td.td_deque_tail ==
3331 victim_td->td.td_deque_tail =
target;
3333 if (*thread_finished) {
3343 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
3344 gtid,
count + 1, task_team));
3345 *thread_finished =
FALSE;
3347 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
3353 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
3354 "task_team=%p ntasks=%d head=%u tail=%u\n",
3356 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3381 std::atomic<kmp_int32> *unfinished_threads;
3382 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3383 tid = thread->th.th_info.ds.ds_tid;
3388 if (task_team == NULL || current_task == NULL)
3391 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
3392 "*thread_finished=%d\n",
3393 gtid, final_spin, *thread_finished));
3411 if (
task == NULL && use_own_tasks) {
3414 if ((
task == NULL) && (nthreads > 1)) {
3418 if (victim_tid == -2) {
3419 victim_tid = threads_data[tid].td.td_deque_last_stolen;
3422 other_thread = threads_data[victim_tid].td.td_thr;
3424 if (victim_tid != -1) {
3426 }
else if (!new_victim) {
3433 if (victim_tid >= tid) {
3437 other_thread = threads_data[victim_tid].td.td_thr;
3449 (
TCR_PTR(
CCAST(
void *, other_thread->th.th_sleep_loc)) !=
3467 thread_finished, is_constrained);
3470 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3471 threads_data[tid].td.td_deque_last_stolen = victim_tid;
3487#if USE_ITT_BUILD && USE_ITT_NOTIFY
3488 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
3489 if (itt_sync_obj == NULL) {
3493 __kmp_itt_task_starting(itt_sync_obj);
3498 if (itt_sync_obj != NULL)
3499 __kmp_itt_task_finished(itt_sync_obj);
3506 if (
flag == NULL || (!final_spin &&
flag->done_check())) {
3509 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3513 if (thread->th.th_task_team == NULL) {
3519 if (!use_own_tasks &&
TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3520 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3521 "other tasks, restart\n",
3536 if (!*thread_finished) {
3541 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3542 "unfinished_threads to %d task_team=%p\n",
3543 gtid,
count, task_team));
3544 *thread_finished =
TRUE;
3552 if (
flag != NULL &&
flag->done_check()) {
3555 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3563 if (thread->th.th_task_team == NULL) {
3565 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3574 if (
flag == NULL || (!final_spin &&
flag->done_check())) {
3576 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3583 if (nthreads == 1 &&
3588 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3594template <
bool C,
bool S>
3600 thread, gtid,
flag, final_spin,
3604template <
bool C,
bool S>
3610 thread, gtid,
flag, final_spin,
3614template <
bool C,
bool S>
3620 thread, gtid,
flag, final_spin,
3629 thread, gtid,
flag, final_spin,
3664 int nthreads,
i, is_init_thread;
3666 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3679 if (!is_init_thread) {
3683 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3695 for (
i = 0;
i < nthreads;
i++) {
3699 if (
i == this_thr->th.th_info.ds.ds_tid) {
3708 if ((sleep_loc =
TCR_PTR(
CCAST(
void *, thread->th.th_sleep_loc))) !=
3710 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3715 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3722 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3773 thread_data->td.td_deque_last_stolen = -1;
3781 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3795 if (thread_data->td.td_deque != NULL) {
3797 TCW_4(thread_data->td.td_deque_ntasks, 0);
3799 thread_data->td.td_deque = NULL;
3803#ifdef BUILD_TIED_TASK_STACK
3805 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3822 int is_init_thread =
FALSE;
3843 is_init_thread =
TRUE;
3844 if (maxthreads < nthreads) {
3846 if (*threads_data_p != NULL) {
3852 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3853 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3865#ifdef BUILD_TIED_TASK_STACK
3867 for (
i = maxthreads;
i < nthreads;
i++) {
3873 (*threads_data_p) = new_data;
3876 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3877 "threads data for task_team %p, size = %d\n",
3884#ifdef BUILD_TIED_TASK_STACK
3886 for (
i = 0;
i < nthreads;
i++) {
3899 for (
i = 0;
i < nthreads;
i++) {
3901 thread_data->td.td_thr = team->
t.t_threads[
i];
3903 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3907 thread_data->td.td_deque_last_stolen = -1;
3916 return is_init_thread;
3942 while (list != NULL) {
3955 int team_nth = team->
t.t_nproc;
3975 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3989 if (task_team == NULL) {
3990 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3991 "task team for team %p\n",
3998#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4001 __itt_suppress_mark_range(
4002 __itt_suppress_range, __itt_suppress_threading_errors,
4004 __itt_suppress_mark_range(__itt_suppress_range,
4005 __itt_suppress_threading_errors,
4017 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
4018 "unfinished_threads init'd to %d\n",
4028 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
4081 thread->th.th_task_team = current->
task_team = NULL;
4082 current->
next = node;
4099 thread->th.th_task_team = current->
task_team;
4122 thread = thread->th.th_next_pool) {
4126 if (
TCR_PTR(thread->th.th_task_team) == NULL) {
4127 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
4134 thread->th.th_task_team = NULL;
4141 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
4142 "unreference task_team\n",
4148 if ((sleep_loc =
TCR_PTR(
CCAST(
void *, thread->th.th_sleep_loc))) !=
4152 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
4175 if (team == this_thr->th.th_serial_team ||
4176 team == this_thr->th.th_root->r.r_root_team) {
4178 if (team->
t.t_task_team[0] == NULL) {
4181 20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
4182 " for serial/root team %p\n",
4194 if (team->
t.t_task_team[this_thr->th.th_task_state] == NULL) {
4195 team->
t.t_task_team[this_thr->th.th_task_state] =
4197 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
4198 " for team %d at parity=%d\n",
4200 team->
t.t_task_team[this_thr->th.th_task_state], team->
t.t_id,
4201 this_thr->th.th_task_state));
4210 int other_team = 1 - this_thr->th.th_task_state;
4212 if (team->
t.t_task_team[other_team] == NULL) {
4214 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
4215 "task_team %p for team %d at parity=%d\n",
4217 team->
t.t_task_team[other_team], team->
t.t_id, other_team));
4224 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
4225 "%p for team %d at parity=%d\n",
4227 team->
t.t_task_team[other_team], team->
t.t_id, other_team));
4235 for (
int i = 0;
i < 2; ++
i) {
4243 if (thread_data->td.td_deque == NULL) {
4261 this_thr->th.th_task_state = (
kmp_uint8)(1 - this_thr->th.th_task_state);
4265 TCW_PTR(this_thr->th.th_task_team,
4266 team->
t.t_task_team[this_thr->th.th_task_state]);
4268 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
4269 "%p from Team #%d (parity=%d)\n",
4271 team->
t.t_id, this_thr->th.th_task_state));
4283 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4290 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
4291 "(for unfinished_threads to reach 0) on task_team = %p\n",
4297 RCAST(std::atomic<kmp_uint32> *,
4306 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
4307 "setting active to false, setting local and team's pointer to NULL\n",
4315 TCW_PTR(this_thr->th.th_task_team, NULL);
4325 std::atomic<kmp_uint32> *spin =
RCAST(
4326 std::atomic<kmp_uint32> *,
4327 &team->
t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
4364 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
4373 if (thread_data->td.td_deque == NULL) {
4377 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
4382 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
4386 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
4395 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
4405 if (
TCR_4(thread_data->td.td_deque_ntasks) >=
4407 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
4414 goto release_and_exit;
4422 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4424 thread_data->td.td_deque_tail =
4425 (thread_data->td.td_deque_tail + 1) &
TASK_DEQUE_MASK(thread_data->td);
4426 TCW_4(thread_data->td.td_deque_ntasks,
4427 TCR_4(thread_data->td.td_deque_ntasks) + 1);
4430 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
4439#define PROXY_TASK_FLAG 0x40000000
4518 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
4528 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
4550 thread = team->
t.t_threads[k];
4551 k = (k + 1) % nthreads;
4561 for (
int i = 0;
i < nthreads; ++
i) {
4562 thread = team->
t.t_threads[
i];
4563 if (thread->th.th_sleep_loc != NULL) {
4584 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
4597 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4616 bool detached =
false;
4630 __ompt_task_finish(
ptask, NULL, ompt_task_early_fulfill);
4641 __ompt_task_finish(
ptask, NULL, ompt_task_late_fulfill);
4647 if (thread->th.th_team == team) {
4669 ,
int taskloop_recur
4676 size_t shareds_offset;
4679 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4687 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4690 taskdata = (
kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4694 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4700 if (!taskdata->is_taskgraph || taskloop_recur)
4702 else if (taskdata->is_taskgraph &&
4703 __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
4709 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4710 task->
shareds = &((
char *)taskdata)[shareds_offset];
4736 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4737 thread, taskdata, taskdata->
td_parent));
4740 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4759 size_t lower_offset;
4760 size_t upper_offset;
4772 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4777#if defined(KMP_GOMP_COMPAT)
4783 if (taskdata->td_size_loop_bounds == 4) {
4799#if defined(KMP_GOMP_COMPAT)
4805 if (taskdata->td_size_loop_bounds == 4) {
4819#if defined(KMP_GOMP_COMPAT)
4825 if (taskdata->td_size_loop_bounds == 4) {
4838#if defined(KMP_GOMP_COMPAT)
4844 if (taskdata->td_size_loop_bounds == 4) {
4897 (last_chunk < 0 ? last_chunk : extras));
4900 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4901 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4902 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4903 ub_glob, st, task_dup));
4906 for (
i = 0;
i < num_tasks; ++
i) {
4909 chunk_minus_1 = grainsize - 1;
4911 chunk_minus_1 = grainsize;
4914 upper = lower + st * chunk_minus_1;
4918 if (
i == num_tasks - 1) {
4922 if (upper == ub_glob)
4924 }
else if (st > 0) {
4946 next_task_bounds.
set_lb(lower);
4948 next_task_bounds.
set_ub(upper + (st > 0 ? 1 : -1));
4950 next_task_bounds.
set_ub(upper);
4952 if (ptask_dup != NULL)
4954 ptask_dup(next_task,
task, lastpriv);
4956 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4957 "upper %lld stride %lld, (offsets %p %p)\n",
4958 gtid,
i, next_task, lower, upper, st,
4966 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
4978 __kmp_task_finish<false>(gtid,
task, current_task);
5017 void *task_dup =
p->task_dup;
5028 void *codeptr_ra =
p->codeptr_ra;
5034 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
5035 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
5036 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
5040 if (num_tasks > num_t_min)
5042 grainsize, extras, last_chunk, tc, num_t_min,
5049 grainsize, extras, last_chunk, tc,
5055 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
5091 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
5092 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
5093 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
5100 size_t lower_offset =
5101 (
char *)lb - (
char *)
task;
5102 size_t upper_offset =
5103 (
char *)ub - (
char *)
task;
5106 (last_chunk < 0 ? last_chunk : extras));
5112 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
5116 if (last_chunk < 0) {
5118 last_chunk1 = last_chunk;
5119 tc0 = grainsize * n_tsk0;
5121 }
else if (n_tsk0 <= extras) {
5124 ext1 = extras - n_tsk0;
5125 tc0 = gr_size0 * n_tsk0;
5130 tc1 = grainsize * n_tsk1;
5133 ub0 = lower + st * (tc0 - 1);
5144 *(
kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
5145 if (ptask_dup != NULL)
5146 ptask_dup(next_task,
task, 0);
5152 thread->th.th_current_task = taskdata->
td_parent;
5157 thread->th.th_current_task = current_task;
5159 p->task = next_task;
5160 p->lb = (
kmp_uint64 *)((
char *)next_task + lower_offset);
5161 p->ub = (
kmp_uint64 *)((
char *)next_task + upper_offset);
5162 p->task_dup = task_dup;
5164 p->ub_glob = ub_glob;
5165 p->num_tasks = n_tsk1;
5166 p->grainsize = grainsize;
5168 p->last_chunk = last_chunk1;
5170 p->num_t_min = num_t_min;
5172 p->codeptr_ra = codeptr_ra;
5177 new_task_data->tdg = taskdata->tdg;
5178 new_task_data->is_taskgraph = 0;
5189 if (n_tsk0 > num_t_min)
5191 ext0, last_chunk0, tc0, num_t_min,
5198 gr_size0, ext0, last_chunk0, tc0,
5204 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
5210 int modifier,
void *task_dup) {
5214#if OMPT_SUPPORT && OMPT_OPTIONAL
5215 OMPT_STORE_RETURN_ADDRESS(gtid);
5238 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
5239 "grain %llu(%d, %d), dup %p\n",
5240 gtid, taskdata, lower, upper, st, grainsize,
sched, modifier,
5245 tc = upper - lower + 1;
5246 }
else if (st < 0) {
5247 tc = (lower - upper) / (-st) + 1;
5249 tc = (upper - lower) / st + 1;
5252 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
5256 __kmp_task_finish<false>(gtid,
task, current_task);
5260#if OMPT_SUPPORT && OMPT_OPTIONAL
5265 ompt_work_taskloop, ompt_scope_begin, &(team_info->
parallel_data),
5270 if (num_tasks_min == 0)
5279 grainsize = thread->th.th_team_nproc *
static_cast<kmp_uint64>(10);
5282 if (grainsize > tc) {
5287 num_tasks = grainsize;
5288 grainsize = tc / num_tasks;
5289 extras = tc % num_tasks;
5293 if (grainsize > tc) {
5299 num_tasks = (tc + grainsize - 1) / grainsize;
5300 last_chunk = tc - (num_tasks * grainsize);
5303 num_tasks = tc / grainsize;
5305 grainsize = tc / num_tasks;
5306 extras = tc % num_tasks;
5315 (last_chunk < 0 ? last_chunk : extras));
5327 grainsize, extras, last_chunk, tc,
5334 }
else if (num_tasks > num_tasks_min && !taskdata->
td_flags.
native) {
5335 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
5336 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5337 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5340 grainsize, extras, last_chunk, tc, num_tasks_min,
5346 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
5347 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5348 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5351 grainsize, extras, last_chunk, tc,
5358#if OMPT_SUPPORT && OMPT_OPTIONAL
5361 ompt_work_taskloop, ompt_scope_end, &(team_info->
parallel_data),
5367#if OMPT_SUPPORT && OMPT_OPTIONAL
5368 OMPT_STORE_RETURN_ADDRESS(gtid);
5372 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
5395 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
5396 __kmp_taskloop(
loc, gtid,
task, if_val, lb, ub, st, nogroup,
sched, grainsize,
5398 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
5421 int modifier,
void *task_dup) {
5423 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
5424 __kmp_taskloop(
loc, gtid,
task, if_val, lb, ub, st, nogroup,
sched, grainsize,
5425 modifier, task_dup);
5426 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
5477static kmp_tdg_info_t *__kmp_find_tdg(
kmp_int32 tdg_id) {
5478 kmp_tdg_info_t *
res =
nullptr;
5479 if (__kmp_max_tdgs == 0)
5482 if (__kmp_global_tdgs == NULL)
5484 sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
5486 if ((__kmp_global_tdgs[tdg_id]) &&
5487 (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
5488 res = __kmp_global_tdgs[tdg_id];
5495void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg,
kmp_int32 gtid) {
5497 KA_TRACE(10, (
"__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id));
5500 sprintf(file_name,
"tdg_%d.dot", tdg_id);
5507 " subgraph cluster {\n"
5511 fprintf(tdg_file,
" %d[style=bold]\n",
i);
5513 fprintf(tdg_file,
" }\n");
5515 kmp_int32 nsuccessors = tdg->record_map[
i].nsuccessors;
5516 kmp_int32 *successors = tdg->record_map[
i].successors;
5517 if (nsuccessors > 0) {
5519 fprintf(tdg_file,
" %d -> %d \n",
i, successors[
j]);
5522 fprintf(tdg_file,
"}");
5523 KA_TRACE(10, (
"__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
5530void __kmp_exec_tdg(
kmp_int32 gtid, kmp_tdg_info_t *tdg) {
5532 KA_TRACE(10, (
"__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
5533 tdg->tdg_id, tdg->num_roots));
5534 kmp_node_info_t *this_record_map = tdg->record_map;
5535 kmp_int32 *this_root_tasks = tdg->root_tasks;
5536 kmp_int32 this_num_roots = tdg->num_roots;
5542 if (tdg->rec_taskred_data) {
5550 this_record_map[
j].parent_task = parent_task;
5553 this_record_map[
j].parent_task->td_taskgroup;
5556 this_record_map[
j].npredecessors);
5559 if (parent_taskgroup) {
5574 KA_TRACE(10, (
"__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
5575 tdg->tdg_id, tdg->num_roots));
5583static inline void __kmp_start_record(
kmp_int32 gtid,
5584 kmp_taskgraph_flags_t *flags,
5586 kmp_tdg_info_t *tdg =
5588 __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
5590 tdg->tdg_id = tdg_id;
5591 tdg->map_size = INIT_MAPSIZE;
5592 tdg->num_roots = -1;
5593 tdg->root_tasks =
nullptr;
5594 tdg->tdg_status = KMP_TDG_RECORDING;
5595 tdg->rec_num_taskred = 0;
5596 tdg->rec_taskred_data =
nullptr;
5600 kmp_node_info_t *this_record_map =
5601 (kmp_node_info_t *)
__kmp_allocate(INIT_MAPSIZE *
sizeof(kmp_node_info_t));
5605 this_record_map[
i].task =
nullptr;
5606 this_record_map[
i].successors = successorsList;
5607 this_record_map[
i].nsuccessors = 0;
5608 this_record_map[
i].npredecessors = 0;
5609 this_record_map[
i].successors_size = __kmp_successors_size;
5613 __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
5627 kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
5629 (
"__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
5630 gtid, loc_ref, input_flags, tdg_id));
5632 if (__kmp_max_tdgs == 0) {
5635 (
"__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
5636 "__kmp_max_tdgs = 0\n",
5637 gtid, loc_ref, input_flags, tdg_id));
5642 if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
5644 __kmp_exec_tdg(gtid, tdg);
5647 __kmp_curr_tdg_idx = tdg_id;
5649 __kmp_start_record(gtid, flags, tdg_id);
5653 KA_TRACE(10, (
"__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n",
5654 gtid, tdg_id,
res ?
"record" :
"execute"));
5661void __kmp_end_record(
kmp_int32 gtid, kmp_tdg_info_t *tdg) {
5663 kmp_node_info_t *this_record_map = tdg->record_map;
5667 kmp_int32 this_map_size = tdg->map_size;
5672 if (this_record_map[
i].npredecessors == 0) {
5673 this_root_tasks[this_num_roots++] =
i;
5678 tdg->map_size = this_map_size;
5679 tdg->num_roots = this_num_roots;
5680 tdg->root_tasks = this_root_tasks;
5682 tdg->tdg_status = KMP_TDG_READY;
5684 if (thread->th.th_current_task->td_dephash) {
5686 thread->th.th_current_task->td_dephash = NULL;
5692 this_record_map[
i].npredecessors);
5697 __kmp_print_tdg_dot(tdg, gtid);
5709 kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
5711 KA_TRACE(10, (
"__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
5712 " tdg=%d with flags=%d\n",
5713 gtid, loc_ref, tdg_id, input_flags));
5714 if (__kmp_max_tdgs) {
5717 if (__kmp_tdg_is_recording(tdg->tdg_status))
5718 __kmp_end_record(gtid, tdg);
5720 KA_TRACE(10, (
"__kmpc_end_record_task(exit): T#%d loc=%p finished recording"
5721 " tdg=%d, its status is now READY\n",
5722 gtid, loc_ref, tdg_id));
void * target(void *task)
int task_entry(kmp_int32 gtid, kmp_task_t *task)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
This class safely opens and closes a C-style FILE* object using RAII semantics.
kmp_uint64 get_ub() const
void set_ub(kmp_uint64 ub)
size_t get_lower_offset() const
void set_lb(kmp_uint64 lb)
size_t get_upper_offset() const
kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
kmp_uint64 get_lb() const
kmp_int32(*)(kmp_int32, void *) kmp_routine_entry_t
struct kmp_taskred_data kmp_taskred_data_t
Internal struct for reduction data item related info saved by the library.
struct kmp_task_red_input kmp_task_red_input_t
Internal struct for reduction data item related info set up by compiler.
struct kmp_taskred_flags kmp_taskred_flags_t
Flags for special info per task reduction item.
struct kmp_taskred_input kmp_taskred_input_t
Internal struct for reduction data item related info set up by compiler.
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
bool __kmpc_omp_has_task_team(kmp_int32 gtid)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_taskred_init(int gtid, int num, void *data)
void ** __kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t new_size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance * instance
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
kmp_info_t * __kmp_hidden_helper_main_thread
kmp_global_t __kmp_global
void __kmp_hidden_helper_worker_thread_signal()
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time)
#define KMP_MAX_BLOCKTIME
#define INITIAL_TASK_DEQUE_SIZE
#define KMP_TASKDATA_TO_TASK(taskdata)
#define KMP_NOT_SAFE_TO_REAP
#define TASK_DEQUE_MASK(td)
unsigned short __kmp_get_random(kmp_info_t *thread)
kmp_tasking_mode_t __kmp_tasking_mode
void __kmp_abort_thread(void)
volatile kmp_info_t * __kmp_thread_pool
#define KMP_GEN_TASK_ID()
int __kmp_omp_cancellation
#define TASK_DEQUE_SIZE(td)
#define KMP_GTID_TO_SHADOW_GTID(gtid)
#define __kmp_get_thread()
#define TASK_CURRENT_NOT_QUEUED
static int __kmp_tid_from_gtid(int gtid)
volatile int __kmp_init_hidden_helper
@ KMP_EVENT_UNINITIALIZED
@ KMP_EVENT_ALLOW_COMPLETION
volatile int __kmp_init_middle
#define KMP_CHECK_UPDATE(a, b)
#define KMP_TASK_TO_TASKDATA(task)
union KMP_ALIGN_CACHE kmp_thread_data kmp_thread_data_t
#define TASK_SUCCESSFULLY_PUSHED
#define __kmp_thread_malloc(th, size)
void __kmp_middle_initialize(void)
static void copy_icvs(kmp_internal_control_t *dst, kmp_internal_control_t *src)
#define KMP_TASKING_ENABLED(task_team)
kmp_info_t ** __kmp_threads
#define KMP_HIDDEN_HELPER_THREAD(gtid)
int __kmp_enable_task_throttling
int __kmp_task_stealing_constraint
#define KMP_INIT_YIELD(count)
#define KMP_INIT_BACKOFF(time)
volatile int __kmp_init_parallel
kmp_int32 __kmp_enable_hidden_helper
#define __kmp_allocate(size)
enum library_type __kmp_library
kmp_uint64 __kmp_taskloop_min_tasks
std::atomic< kmp_int32 > __kmp_unexecuted_hidden_helper_tasks
kmp_info_t ** __kmp_hidden_helper_threads
#define __kmp_thread_calloc(th, nelem, elsize)
bool __kmp_wpolicy_passive
void __kmp_hidden_helper_initialize()
kmp_int32 __kmp_max_task_priority
static void __kmp_assert_valid_gtid(kmp_int32 gtid)
static kmp_info_t * __kmp_thread_from_gtid(int gtid)
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
struct kmp_taskdata kmp_taskdata_t
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define __kmp_thread_free(th, ptr)
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86<<, 2i, 1, KMP_ARCH_X86) ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, > KMP_ARCH_X86 KMP_ARCH_X86 kmp_uint32
#define KMP_DEBUG_USE_VAR(x)
#define KMP_BUILD_ASSERT(expr)
#define KMP_DEBUG_ASSERT(cond)
#define KMP_ASSERT2(cond, msg)
unsigned long long kmp_uint64
static volatile kmp_i18n_cat_status_t status
#define KMP_FSYNC_RELEASING(obj)
#define KMP_FSYNC_ACQUIRED(obj)
#define KMP_FSYNC_SPIN_ACQUIRED(obj)
#define KMP_FSYNC_CANCEL(obj)
#define KMP_FSYNC_SPIN_PREPARE(obj)
#define USE_ITT_BUILD_ARG(x)
#define KMP_FSYNC_SPIN_INIT(obj, spin)
int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
void __kmp_init_tas_lock(kmp_tas_lock_t *lck)
int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid)
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock)
#define KMP_ATOMIC_AND(p, v)
#define KMP_ATOMIC_ST_REL(p, v)
bool __kmp_atomic_compare_store(std::atomic< T > *p, T expected, T desired)
#define KMP_ATOMIC_LD_ACQ(p)
#define KMP_ATOMIC_ST_RLX(p, v)
#define KMP_FALLTHROUGH()
#define KMP_ATOMIC_DEC(p)
#define KMP_ATOMIC_LD_RLX(p)
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
unsigned long kmp_uintptr_t
#define KMP_ATOMIC_OR(p, v)
#define KMP_ATOMIC_INC(p)
#define KMP_MEMCPY_S(dst, bsz, src, cnt)
Functions for collecting statistics.
#define KMP_PUSH_PARTITIONED_TIMER(name)
#define KMP_GET_THREAD_STATE()
#define KMP_POP_PARTITIONED_TIMER()
#define KMP_SET_THREAD_STATE_BLOCK(state_name)
#define KMP_TIME_PARTITIONED_BLOCK(name)
#define KMP_COUNT_BLOCK(n)
static void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h)
static void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h)
static void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task)
void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team)
template int __kmp_atomic_execute_tasks_64< true, false >(kmp_info_t *, kmp_int32, kmp_atomic_flag_64< true, false > *, int, int *USE_ITT_BUILD_ARG(void *), kmp_int32)
void __kmp_call_init(kmp_taskred_data_t &item, size_t j)
void __kmp_call_init< kmp_task_red_input_t >(kmp_taskred_data_t &item, size_t offset)
void(* p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32)
void * __kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, T *data)
static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg)
static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task)
kmp_task_t * __kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry)
void __kmp_reap_task_teams(void)
kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part)
kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task)
static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *current_task)
void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team)
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64< C, S > *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
void __kmp_wait_to_unref_task_teams(void)
static kmp_task_team_t * __kmp_allocate_task_team(kmp_info_t *thread, kmp_team_t *team)
void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team)
void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data, kmp_taskgroup_t *tg, void *reduce_data)
void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task)
static void __kmp_realloc_task_deque(kmp_info_t *thread, kmp_thread_data_t *thread_data)
static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask)
void __kmpc_end_taskgroup(ident_t *loc, int gtid)
void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task)
static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata)
static int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
kmp_bootstrap_lock_t __kmp_task_team_lock
static void __kmp_free_task_and_ancestors(kmp_int32 gtid, kmp_taskdata_t *taskdata, kmp_info_t *thread)
static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task)
static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *current_task)
static kmp_thread_data_t * __kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri)
template int __kmp_execute_tasks_64< false, true >(kmp_info_t *, kmp_int32, kmp_flag_64< false, true > *, int, int *USE_ITT_BUILD_ARG(void *), kmp_int32)
void __kmp_finish_implicit_task(kmp_info_t *thread)
template int __kmp_execute_tasks_32< false, false >(kmp_info_t *, kmp_int32, kmp_flag_32< false, false > *, int, int *USE_ITT_BUILD_ARG(void *), kmp_int32)
struct __taskloop_params __taskloop_params_t
kmp_event_t * __kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid, kmp_task_t *task)
static void __kmp_free_task_threads_data(kmp_task_team_t *task_team)
void __kmp_assign_orig(kmp_taskred_data_t &item, T &src)
static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, void *frame_address, void *return_address)
static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg)
static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, kmp_info_t *thread)
template int __kmp_execute_tasks_64< true, false >(kmp_info_t *, kmp_int32, kmp_flag_64< true, false > *, int, int *USE_ITT_BUILD_ARG(void *), kmp_int32)
int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64< C, S > *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait)
kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task)
static kmp_task_pri_t * __kmp_alloc_task_pri_list()
static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, void *codeptr_ra)
void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task)
static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task)
static kmp_task_t * __kmp_steal_task(kmp_int32 victim_tid, kmp_int32 gtid, kmp_task_team_t *task_team, std::atomic< kmp_int32 > *unfinished_threads, int *thread_finished, kmp_int32 is_constrained)
void * __kmp_task_reduction_init(int gtid, int num, T *data)
static void __kmp_enable_tasking(kmp_task_team_t *task_team, kmp_info_t *this_thr)
static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task, kmp_int32 pass)
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32< C, S > *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
static kmp_task_t * __kmp_get_priority_task(kmp_int32 gtid, kmp_task_team_t *task_team, kmp_int32 is_constrained)
void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid)
void __kmp_free_implicit_task(kmp_info_t *thread)
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
static size_t __kmp_round_up_to_val(size_t size, size_t val)
kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id)
void __kmp_fulfill_event(kmp_event_t *event)
void __kmp_assign_orig< kmp_taskred_input_t >(kmp_taskred_data_t &item, kmp_taskred_input_t &src)
static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task, void *frame_address, void *return_address)
void __kmp_assign_orig< kmp_task_red_input_t >(kmp_taskred_data_t &item, kmp_task_red_input_t &src)
static int __kmp_realloc_task_threads_data(kmp_info_t *thread, kmp_task_team_t *task_team)
template int __kmp_atomic_execute_tasks_64< false, true >(kmp_info_t *, kmp_int32, kmp_atomic_flag_64< false, true > *, int, int *USE_ITT_BUILD_ARG(void *), kmp_int32)
static kmp_task_team_t * __kmp_free_task_teams
kmp_task_t * __kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src)
kmp_task_t * __kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry)
kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid)
int __kmp_taskloop_task(int gtid, void *ptask)
void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, int tid)
void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team)
static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata)
static void __kmp_free_task_pri_list(kmp_task_team_t *task_team)
static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, const kmp_taskdata_t *tasknew, const kmp_taskdata_t *taskcurr)
static kmp_task_t * __kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, kmp_task_team_t *task_team, kmp_int32 is_constrained)
void __kmpc_taskgroup(ident_t *loc, int gtid)
kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate)
static void __kmp_task_team_init(kmp_task_team_t *task_team, kmp_team_t *team)
void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr)
void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team)
static void __kmp_alloc_task_deque(kmp_info_t *thread, kmp_thread_data_t *thread_data)
static void __kmp_free_task_deque(kmp_thread_data_t *thread_data)
static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *taskdata, kmp_task_team_t *task_team, kmp_int32 pri)
void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start=0)
void __kmp_call_init< kmp_taskred_input_t >(kmp_taskred_data_t &item, size_t offset)
void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, kmp_uint64 ub_glob, kmp_uint64 num_tasks, kmp_uint64 grainsize, kmp_uint64 extras, kmp_int64 last_chunk, kmp_uint64 tc, void *task_dup)
void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *, kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64, kmp_uint64, void *)
static bool __kmp_track_children_task(kmp_taskdata_t *taskdata)
static void __kmp_null_resume_wrapper(kmp_info_t *thr)
__attribute__((noinline))
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
#define OMPT_GET_RETURN_ADDRESS(level)
#define TASK_TYPE_DETAILS_FORMAT(info)
#define OMPT_GET_FRAME_ADDRESS(level)
ompt_team_info_t * __ompt_get_teaminfo(int depth, int *size)
int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num)
ompt_task_info_t * __ompt_get_task_info_object(int depth)
kmp_lock_t * mtx_locks[MAX_MTX_DEPS]
kmp_int32 tt_found_proxy_tasks
KMP_ALIGN_CACHE std::atomic< kmp_int32 > tt_unfinished_threads
kmp_bootstrap_lock_t tt_task_pri_lock
std::atomic< kmp_int32 > tt_num_task_pri
kmp_bootstrap_lock_t tt_threads_lock
kmp_int32 tt_untied_task_encountered
kmp_task_pri_t * tt_task_pri_list
kmp_int32 tt_hidden_helper_task_encountered
kmp_thread_data_t * tt_threads_data
KMP_ALIGN_CACHE volatile kmp_uint32 tt_active
kmp_task_team_t * tt_next
union kmp_event_t::@12 ed
kmp_task_team_list_t * next
kmp_task_team_t * task_team
void * shareds
pointer to block of pointers to shared vars
kmp_uint32 td_taskwait_counter
ident_t * td_taskwait_ident
kmp_task_team_t * td_task_team
kmp_dephash_t * td_dephash
kmp_taskdata_t * td_parent
std::atomic< kmp_int32 > td_incomplete_child_tasks
std::atomic< kmp_int32 > td_untied_count
kmp_taskgroup_t * td_taskgroup
kmp_info_p * td_alloc_thread
kmp_depnode_t * td_depnode
kmp_int32 td_taskwait_thread
kmp_tasking_flags_t td_flags
kmp_taskdata_t * td_last_tied
KMP_ALIGN_CACHE kmp_internal_control_t td_icvs
kmp_event_t td_allow_completion_event
kmp_target_data_t td_target_data
KMP_ALIGN_CACHE std::atomic< kmp_int32 > td_allocated_child_tasks
std::atomic< kmp_int32 > cancel_request
std::atomic< kmp_int32 > count
struct kmp_taskgroup * parent
kmp_int32 reduce_num_data
unsigned priority_specified
unsigned destructors_thunk
Internal struct for reduction data item related info saved by the library.
void * reduce_init
data initialization routine (two parameters)
void * reduce_priv
array of thread specific items
void * reduce_pend
end of private data for faster comparison op
void * reduce_comb
data combiner routine
kmp_taskred_flags_t flags
flags for additional info from compiler
void * reduce_fini
data finalization routine
size_t reduce_size
size of data item
void * reduce_shar
shared between tasks item to reduce into
void * reduce_orig
original item (can be used in UDR initializer)
Flags for special info per task reduction item.
unsigned lazy_priv
1 - use lazy alloc/init (e.g.
ompt_data_t parallel_data
int(* routine)(int, struct task *)
int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val)