32#ifdef KMP_SUPPORT_GRAPH_OUTPUT
33static std::atomic<kmp_int32> kmp_node_id_seed = 0;
45#ifdef KMP_SUPPORT_GRAPH_OUTPUT
48#if USE_ITT_BUILD && USE_ITT_NOTIFY
49 __itt_sync_create(node,
"OMP task dep node", NULL, 0);
60size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
66 return ((
addr >> 6) ^ (
addr >> 2)) % hsize;
75 return current_dephash;
78 size_t size_to_allocate =
82 h = (
kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
100 for (
size_t i = 0;
i < current_dephash->
size;
i++) {
102 for (entry = current_dephash->
buckets[
i]; entry; entry = next) {
111 h->buckets[new_bucket] = entry;
117 __kmp_fast_free(thread, current_dephash);
151 for (
size_t i = 0;
i < h_size;
i++)
161 if (
h->nelements != 0 &&
h->nconflicts /
h->size >= 1) {
191 h->buckets[bucket] = entry;
213 new_head->
next = list;
224 if (source->
dn.
task && sink_task) {
227 KMP_ASSERT(task_source->is_taskgraph == task_sink->is_taskgraph);
229 if (task_sink->is_taskgraph &&
230 __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) {
231 kmp_node_info_t *source_info =
232 &task_sink->tdg->record_map[task_source->
td_task_id];
234 for (
int i = 0;
i < source_info->nsuccessors;
i++) {
235 if (source_info->successors[
i] == task_sink->
td_task_id) {
241 if (source_info->nsuccessors >= source_info->successors_size) {
242 source_info->successors_size = 2 * source_info->successors_size;
243 kmp_int32 *old_succ_ids = source_info->successors;
245 source_info->successors_size *
sizeof(
kmp_int32));
246 source_info->successors = new_succ_ids;
250 source_info->successors[source_info->nsuccessors] = task_sink->
td_task_id;
251 source_info->nsuccessors++;
253 kmp_node_info_t *sink_info =
254 &(task_sink->tdg->record_map[task_sink->
td_task_id]);
255 sink_info->npredecessors++;
259#ifdef KMP_SUPPORT_GRAPH_OUTPUT
269#if OMPT_SUPPORT && OMPT_OPTIONAL
275 ompt_data_t *sink_data;
279 sink_data = &
__kmp_threads[gtid]->th.ompt_thread_info.task_data;
282 &(task_source->ompt_task_info.task_data), sink_data);
308 kmp_tdg_status tdg_status = KMP_TDG_NONE;
311 if (td->is_taskgraph)
313 if (__kmp_tdg_is_recording(tdg_status))
320 if (!
dep->dn.successors ||
dep->dn.successors->node != node) {
322 if (!(__kmp_tdg_is_recording(tdg_status)) &&
task)
326 KA_TRACE(40, (
"__kmp_process_deps: T#%d adding dependence from %p to "
336 return npredecessors;
349 kmp_tdg_status tdg_status = KMP_TDG_NONE;
352 if (td->is_taskgraph)
354 if (__kmp_tdg_is_recording(tdg_status) && sink->
dn.
task)
364 if (!(__kmp_tdg_is_recording(tdg_status)) &&
task)
368 KA_TRACE(40, (
"__kmp_process_deps: T#%d adding dependence from %p to "
373 if (__kmp_tdg_is_recording(tdg_status)) {
375 if (tdd->is_taskgraph) {
390 return npredecessors;
396 KA_TRACE(30, (
"__kmp_process_dep_all: T#%d processing dep_all, "
397 "dep_barrier = %d\n",
416 for (
size_t i = 0;
i <
h->size;
i++) {
445 KA_TRACE(30, (
"__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
447 return npredecessors;
450template <
bool filter>
455 KA_TRACE(30, (
"__kmp_process_deps<%d>: T#%d processing %d dependences : "
456 "dep_barrier = %d\n",
457 filter, gtid, ndeps, dep_barrier));
464 if (filter &&
dep->base_addr == 0)
562 KA_TRACE(30, (
"__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
563 gtid, npredecessors));
564 return npredecessors;
567#define NO_DEP_BARRIER (false)
568#define DEP_BARRIER (true)
577 int i, n_mtxs = 0, dep_all = 0;
581 KA_TRACE(20, (
"__kmp_check_deps: T#%d checking dependences for task %p : %d "
582 "possibly aliased dependences, %d non-aliased dependences : "
583 "dep_barrier=%d .\n",
584 gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
588 for (
i = 0;
i < ndeps;
i++) {
589 if (dep_list[
i].base_addr != 0 &&
595 for (
int j =
i + 1;
j < ndeps;
j++) {
596 if (dep_list[
i].base_addr == dep_list[
j].base_addr) {
633 npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
634 ndeps, dep_list,
task);
635 npredecessors += __kmp_process_deps<false>(
636 gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list,
task);
653 KA_TRACE(20, (
"__kmp_check_deps: T#%d found %d predecessors for task %p \n",
654 gtid, npredecessors, taskdata));
658 return npredecessors > 0 ? true :
false;
684 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
685 loc_ref, new_taskdata));
692 if (new_taskdata->is_taskgraph &&
693 __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
694 kmp_tdg_info_t *tdg = new_taskdata->tdg;
696 if (new_taskdata->
td_task_id >= tdg->map_size) {
698 if (new_taskdata->
td_task_id >= tdg->map_size) {
701 kmp_node_info_t *old_record = tdg->record_map;
703 new_size *
sizeof(kmp_node_info_t));
705 old_size *
sizeof(kmp_node_info_t));
706 tdg->record_map = new_record;
712 __kmp_successors_size *
sizeof(
kmp_int32));
713 new_record[
i].task =
nullptr;
714 new_record[
i].successors = successorsList;
715 new_record[
i].nsuccessors = 0;
716 new_record[
i].npredecessors = 0;
717 new_record[
i].successors_size = __kmp_successors_size;
726 tdg->record_map[new_taskdata->
td_task_id].task = new_task;
727 tdg->record_map[new_taskdata->
td_task_id].parent_task =
734 if (!current_task->ompt_task_info.frame.enter_frame.ptr)
735 current_task->ompt_task_info.frame.enter_frame.ptr =
739 &(current_task->ompt_task_info.task_data),
740 &(current_task->ompt_task_info.frame),
741 &(new_taskdata->ompt_task_info.task_data),
743 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
746 new_taskdata->ompt_task_info.frame.enter_frame.ptr =
752 if (ndeps + ndeps_noalias > 0 &&
ompt_enabled.ompt_callback_dependences) {
755 int ompt_ndeps = ndeps + ndeps_noalias;
756 ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
757 thread, (ndeps + ndeps_noalias) *
sizeof(ompt_dependence_t));
761 for (
i = 0;
i < ndeps;
i++) {
762 ompt_deps[
i].variable.ptr = (
void *)dep_list[
i].base_addr;
764 ompt_deps[
i].dependence_type = ompt_dependence_type_out_all_memory;
766 ompt_deps[
i].dependence_type = ompt_dependence_type_inout;
767 else if (dep_list[
i].flags.
out)
768 ompt_deps[
i].dependence_type = ompt_dependence_type_out;
769 else if (dep_list[
i].flags.
in)
770 ompt_deps[
i].dependence_type = ompt_dependence_type_in;
771 else if (dep_list[
i].flags.
mtx)
772 ompt_deps[
i].dependence_type = ompt_dependence_type_mutexinoutset;
773 else if (dep_list[
i].flags.
set)
774 ompt_deps[
i].dependence_type = ompt_dependence_type_inoutset;
775 else if (dep_list[
i].flags.
all)
776 ompt_deps[
i].dependence_type = ompt_dependence_type_out_all_memory;
778 for (
i = 0;
i < ndeps_noalias;
i++) {
779 ompt_deps[ndeps +
i].variable.ptr = (
void *)noalias_dep_list[
i].base_addr;
781 ompt_deps[ndeps +
i].dependence_type =
782 ompt_dependence_type_out_all_memory;
783 else if (noalias_dep_list[
i].flags.
in && noalias_dep_list[
i].
flags.
out)
784 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inout;
785 else if (noalias_dep_list[
i].flags.
out)
786 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_out;
787 else if (noalias_dep_list[
i].flags.
in)
788 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_in;
789 else if (noalias_dep_list[
i].flags.
mtx)
790 ompt_deps[ndeps +
i].dependence_type =
791 ompt_dependence_type_mutexinoutset;
792 else if (noalias_dep_list[
i].flags.
set)
793 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inoutset;
794 else if (noalias_dep_list[
i].flags.
all)
795 ompt_deps[ndeps +
i].dependence_type =
796 ompt_dependence_type_out_all_memory;
799 &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
802 KMP_OMPT_DEPS_FREE(thread, ompt_deps);
815 if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
834 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
836 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
837 gtid, loc_ref, new_taskdata));
840 current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
846 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
847 "for task (serialized) loc=%p task=%p\n",
848 gtid, loc_ref, new_taskdata));
851 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
853 "loc=%p task=%p, transferring to __kmp_omp_task\n",
854 gtid, loc_ref, new_taskdata));
859 current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
867 ompt_data_t *taskwait_task_data) {
870 taskwait_task_data, ompt_taskwait_complete, NULL);
872 current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
873 *taskwait_task_data = ompt_data_none;
892 noalias_dep_list,
false);
905 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n",
906 gtid, loc_ref, has_no_wait));
907 if (ndeps == 0 && ndeps_noalias == 0) {
908 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to "
909 "wait upon : loc=%p\n",
923 ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
926 if (!current_task->ompt_task_info.frame.enter_frame.ptr)
927 current_task->ompt_task_info.frame.enter_frame.ptr =
931 &(current_task->ompt_task_info.task_data),
932 &(current_task->ompt_task_info.frame), taskwait_task_data,
933 ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1,
934 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
940 if (ndeps + ndeps_noalias > 0 &&
ompt_enabled.ompt_callback_dependences) {
943 int ompt_ndeps = ndeps + ndeps_noalias;
944 ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
945 thread, (ndeps + ndeps_noalias) *
sizeof(ompt_dependence_t));
949 for (
i = 0;
i < ndeps;
i++) {
950 ompt_deps[
i].variable.ptr = (
void *)dep_list[
i].base_addr;
952 ompt_deps[
i].dependence_type = ompt_dependence_type_inout;
953 else if (dep_list[
i].flags.
out)
954 ompt_deps[
i].dependence_type = ompt_dependence_type_out;
955 else if (dep_list[
i].flags.
in)
956 ompt_deps[
i].dependence_type = ompt_dependence_type_in;
957 else if (dep_list[
i].flags.
mtx)
958 ompt_deps[ndeps +
i].dependence_type =
959 ompt_dependence_type_mutexinoutset;
960 else if (dep_list[
i].flags.
set)
961 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inoutset;
963 for (
i = 0;
i < ndeps_noalias;
i++) {
964 ompt_deps[ndeps +
i].variable.ptr = (
void *)noalias_dep_list[
i].base_addr;
965 if (noalias_dep_list[
i].flags.
in && noalias_dep_list[
i].
flags.
out)
966 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inout;
967 else if (noalias_dep_list[
i].flags.
out)
968 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_out;
969 else if (noalias_dep_list[
i].flags.
in)
970 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_in;
971 else if (noalias_dep_list[
i].flags.
mtx)
972 ompt_deps[ndeps +
i].dependence_type =
973 ompt_dependence_type_mutexinoutset;
974 else if (noalias_dep_list[
i].flags.
set)
975 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inoutset;
978 taskwait_task_data, ompt_deps, ompt_ndeps);
981 KMP_OMPT_DEPS_FREE(thread, ompt_deps);
994 ignore && thread->th.th_task_team != NULL &&
995 thread->th.th_task_team->tt.tt_found_proxy_tasks ==
FALSE &&
996 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
FALSE;
997 ignore = ignore || current_task->
td_dephash == NULL;
1000 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1001 "dependences : loc=%p\n",
1004 __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1014 noalias_dep_list)) {
1015 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1016 "dependences : loc=%p\n",
1019 __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1024 int thread_finished =
FALSE;
1040 __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1042 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t new_size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function h
#define TASK_CURRENT_NOT_QUEUED
#define KMP_TASK_TO_TASKDATA(task)
#define __kmp_thread_malloc(th, size)
kmp_info_t ** __kmp_threads
int __kmp_task_stealing_constraint
#define __kmp_allocate(size)
static void __kmp_assert_valid_gtid(kmp_int32 gtid)
kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define __kmp_thread_free(th, ptr)
#define KMP_DEBUG_ASSERT(cond)
void __kmp_printf(char const *format,...)
#define USE_ITT_BUILD_ARG(x)
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static void __kmp_init_lock(kmp_lock_t *lck)
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
#define KMP_ATOMIC_ST_REL(p, v)
#define KMP_ATOMIC_ST_RLX(p, v)
#define KMP_ATOMIC_INC(p)
kmp_depnode_list_t * __kmpc_task_get_successors(kmp_task_t *task)
static void __kmp_init_node(kmp_depnode_t *node)
@ KMP_DEPHASH_MASTER_SIZE
static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t **hash, bool dep_barrier, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
static kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, kmp_task_t *task, kmp_depnode_t *node, kmp_depnode_list_t *plist)
static kmp_int32 __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h, bool dep_barrier, kmp_task_t *task)
kmp_base_depnode_t * __kmpc_task_get_depnode(kmp_task_t *task)
static kmp_dephash_t * __kmp_dephash_extend(kmp_info_t *thread, kmp_dephash_t *current_dephash)
void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list, kmp_int32 has_no_wait)
static kmp_dephash_entry * __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t **hash, kmp_intptr_t addr)
static size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize)
static void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *sink, kmp_task_t *sink_task)
static kmp_int32 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, bool dep_barrier, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_task_t *task)
static kmp_dephash_t * __kmp_dephash_create(kmp_info_t *thread, kmp_taskdata_t *current_task)
static kmp_depnode_list_t * __kmp_add_node(kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node)
static kmp_depnode_t * __kmp_node_ref(kmp_depnode_t *node)
#define KMP_ACQUIRE_DEPNODE(gtid, n)
static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list)
static void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node)
#define KMP_RELEASE_DEPNODE(gtid, n)
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
#define TASK_TYPE_DETAILS_FORMAT(info)
#define OMPT_GET_FRAME_ADDRESS(level)
char const * psource
String describing the source location.
std::atomic< kmp_int32 > npredecessors
std::atomic< kmp_int32 > nrefs
kmp_lock_t * mtx_locks[MAX_MTX_DEPS]
kmp_depnode_list_t * successors
kmp_int32 tt_found_proxy_tasks
kmp_int32 tt_hidden_helper_task_encountered
struct kmp_depend_info::@8::@10 flags
kmp_dephash_entry_t * next_in_bucket
kmp_depnode_list_t * last_set
kmp_depnode_list_t * prev_set
kmp_dephash_entry_t ** buckets
kmp_depnode_list_t * next
kmp_dephash_t * td_dephash
kmp_taskdata_t * td_parent
kmp_depnode_t * td_depnode
kmp_tasking_flags_t td_flags