32#ifdef KMP_SUPPORT_GRAPH_OUTPUT
33static std::atomic<kmp_int32> kmp_node_id_seed = 0;
49#ifdef KMP_SUPPORT_GRAPH_OUTPUT
52#if USE_ITT_BUILD && USE_ITT_NOTIFY
53 __itt_sync_create(node,
"OMP task dep node", NULL, 0);
64size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
70 return ((
addr >> 6) ^ (
addr >> 2)) % hsize;
79 return current_dephash;
82 size_t size_to_allocate =
86 h = (
kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
100 h->buckets[
i] = NULL;
104 for (
size_t i = 0;
i < current_dephash->
size;
i++) {
106 for (entry = current_dephash->
buckets[
i]; entry; entry = next) {
115 h->buckets[new_bucket] = entry;
121 __kmp_fast_free(thread, current_dephash);
155 for (
size_t i = 0;
i < h_size;
i++)
165 if (
h->nelements != 0 &&
h->nconflicts /
h->size >= 1) {
195 h->buckets[bucket] = entry;
217 new_head->
next = list;
228 if (source->
dn.
task && sink_task) {
231 KMP_ASSERT(task_source->is_taskgraph == task_sink->is_taskgraph);
233 if (task_sink->is_taskgraph &&
234 __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) {
235 kmp_node_info_t *source_info =
236 &task_sink->tdg->record_map[task_source->td_tdg_task_id];
238 for (
int i = 0;
i < source_info->nsuccessors;
i++) {
239 if (source_info->successors[
i] == task_sink->td_tdg_task_id) {
245 if (source_info->nsuccessors >= source_info->successors_size) {
246 kmp_uint old_size = source_info->successors_size;
247 source_info->successors_size = 2 * source_info->successors_size;
248 kmp_int32 *old_succ_ids = source_info->successors;
250 source_info->successors_size *
sizeof(
kmp_int32));
252 source_info->successors = new_succ_ids;
256 source_info->successors[source_info->nsuccessors] =
257 task_sink->td_tdg_task_id;
258 source_info->nsuccessors++;
260 kmp_node_info_t *sink_info =
261 &(task_sink->tdg->record_map[task_sink->td_tdg_task_id]);
262 sink_info->npredecessors++;
266#ifdef KMP_SUPPORT_GRAPH_OUTPUT
276#if OMPT_SUPPORT && OMPT_OPTIONAL
282 ompt_data_t *sink_data;
286 sink_data = &
__kmp_threads[gtid]->th.ompt_thread_info.task_data;
289 &(task_source->ompt_task_info.task_data), sink_data);
315 kmp_tdg_status tdg_status = KMP_TDG_NONE;
318 if (td->is_taskgraph)
320 if (__kmp_tdg_is_recording(tdg_status))
327 if (!
dep->dn.successors ||
dep->dn.successors->node != node) {
329 if (!(__kmp_tdg_is_recording(tdg_status)) &&
task)
333 KA_TRACE(40, (
"__kmp_process_deps: T#%d adding dependence from %p to "
343 return npredecessors;
356 kmp_tdg_status tdg_status = KMP_TDG_NONE;
359 if (td->is_taskgraph)
361 if (__kmp_tdg_is_recording(tdg_status) && sink->
dn.
task)
371 if (!(__kmp_tdg_is_recording(tdg_status)) &&
task)
375 KA_TRACE(40, (
"__kmp_process_deps: T#%d adding dependence from %p to "
380 if (__kmp_tdg_is_recording(tdg_status)) {
382 if (tdd->is_taskgraph) {
397 return npredecessors;
403 KA_TRACE(30, (
"__kmp_process_dep_all: T#%d processing dep_all, "
404 "dep_barrier = %d\n",
423 for (
size_t i = 0;
i <
h->size;
i++) {
452 KA_TRACE(30, (
"__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
454 return npredecessors;
457template <
bool filter>
462 KA_TRACE(30, (
"__kmp_process_deps<%d>: T#%d processing %d dependences : "
463 "dep_barrier = %d\n",
464 filter, gtid, ndeps, dep_barrier));
471 if (filter &&
dep->base_addr == 0)
569 KA_TRACE(30, (
"__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
570 gtid, npredecessors));
571 return npredecessors;
574#define NO_DEP_BARRIER (false)
575#define DEP_BARRIER (true)
584 int i, n_mtxs = 0, dep_all = 0;
588 KA_TRACE(20, (
"__kmp_check_deps: T#%d checking dependences for task %p : %d "
589 "possibly aliased dependences, %d non-aliased dependences : "
590 "dep_barrier=%d .\n",
591 gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
595 for (
i = 0;
i < ndeps;
i++) {
596 if (dep_list[
i].base_addr != 0 &&
602 for (
int j =
i + 1;
j < ndeps;
j++) {
603 if (dep_list[
i].base_addr == dep_list[
j].base_addr) {
640 npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
641 ndeps, dep_list,
task);
642 npredecessors += __kmp_process_deps<false>(
643 gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list,
task);
660 KA_TRACE(20, (
"__kmp_check_deps: T#%d found %d predecessors for task %p \n",
661 gtid, npredecessors, taskdata));
665 return npredecessors > 0 ? true :
false;
691 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
692 loc_ref, new_taskdata));
699 if (new_taskdata->is_taskgraph &&
700 __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
701 kmp_tdg_info_t *tdg = new_taskdata->tdg;
703 if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
705 if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
708 kmp_node_info_t *old_record = tdg->record_map;
710 new_size *
sizeof(kmp_node_info_t));
712 old_size *
sizeof(kmp_node_info_t));
713 tdg->record_map = new_record;
719 __kmp_successors_size *
sizeof(
kmp_int32));
720 new_record[
i].task =
nullptr;
721 new_record[
i].successors = successorsList;
722 new_record[
i].nsuccessors = 0;
723 new_record[
i].npredecessors = 0;
724 new_record[
i].successors_size = __kmp_successors_size;
733 tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
734 tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
741 if (!current_task->ompt_task_info.frame.enter_frame.ptr)
742 current_task->ompt_task_info.frame.enter_frame.ptr =
746 &(current_task->ompt_task_info.task_data),
747 &(current_task->ompt_task_info.frame),
748 &(new_taskdata->ompt_task_info.task_data),
750 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
753 new_taskdata->ompt_task_info.frame.enter_frame.ptr =
759 if (ndeps + ndeps_noalias > 0 &&
ompt_enabled.ompt_callback_dependences) {
762 int ompt_ndeps = ndeps + ndeps_noalias;
763 ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
764 thread, (ndeps + ndeps_noalias) *
sizeof(ompt_dependence_t));
768 for (
i = 0;
i < ndeps;
i++) {
769 ompt_deps[
i].variable.ptr = (
void *)dep_list[
i].base_addr;
771 ompt_deps[
i].dependence_type = ompt_dependence_type_out_all_memory;
773 ompt_deps[
i].dependence_type = ompt_dependence_type_inout;
774 else if (dep_list[
i].flags.
out)
775 ompt_deps[
i].dependence_type = ompt_dependence_type_out;
776 else if (dep_list[
i].flags.
in)
777 ompt_deps[
i].dependence_type = ompt_dependence_type_in;
778 else if (dep_list[
i].flags.
mtx)
779 ompt_deps[
i].dependence_type = ompt_dependence_type_mutexinoutset;
780 else if (dep_list[
i].flags.
set)
781 ompt_deps[
i].dependence_type = ompt_dependence_type_inoutset;
782 else if (dep_list[
i].flags.
all)
783 ompt_deps[
i].dependence_type = ompt_dependence_type_out_all_memory;
785 for (
i = 0;
i < ndeps_noalias;
i++) {
786 ompt_deps[ndeps +
i].variable.ptr = (
void *)noalias_dep_list[
i].base_addr;
788 ompt_deps[ndeps +
i].dependence_type =
789 ompt_dependence_type_out_all_memory;
790 else if (noalias_dep_list[
i].flags.
in && noalias_dep_list[
i].
flags.
out)
791 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inout;
792 else if (noalias_dep_list[
i].flags.
out)
793 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_out;
794 else if (noalias_dep_list[
i].flags.
in)
795 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_in;
796 else if (noalias_dep_list[
i].flags.
mtx)
797 ompt_deps[ndeps +
i].dependence_type =
798 ompt_dependence_type_mutexinoutset;
799 else if (noalias_dep_list[
i].flags.
set)
800 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inoutset;
801 else if (noalias_dep_list[
i].flags.
all)
802 ompt_deps[ndeps +
i].dependence_type =
803 ompt_dependence_type_out_all_memory;
806 &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
809 KMP_OMPT_DEPS_FREE(thread, ompt_deps);
822 if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
841 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
843 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
844 gtid, loc_ref, new_taskdata));
847 current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
853 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
854 "for task (serialized) loc=%p task=%p\n",
855 gtid, loc_ref, new_taskdata));
858 KA_TRACE(10, (
"__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
860 "loc=%p task=%p, transferring to __kmp_omp_task\n",
861 gtid, loc_ref, new_taskdata));
866 current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
874 ompt_data_t *taskwait_task_data) {
877 taskwait_task_data, ompt_taskwait_complete, NULL);
879 current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
880 *taskwait_task_data = ompt_data_none;
899 noalias_dep_list,
false);
912 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n",
913 gtid, loc_ref, has_no_wait));
914 if (ndeps == 0 && ndeps_noalias == 0) {
915 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to "
916 "wait upon : loc=%p\n",
930 ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
933 if (!current_task->ompt_task_info.frame.enter_frame.ptr)
934 current_task->ompt_task_info.frame.enter_frame.ptr =
938 &(current_task->ompt_task_info.task_data),
939 &(current_task->ompt_task_info.frame), taskwait_task_data,
940 ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1,
941 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
947 if (ndeps + ndeps_noalias > 0 &&
ompt_enabled.ompt_callback_dependences) {
950 int ompt_ndeps = ndeps + ndeps_noalias;
951 ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
952 thread, (ndeps + ndeps_noalias) *
sizeof(ompt_dependence_t));
956 for (
i = 0;
i < ndeps;
i++) {
957 ompt_deps[
i].variable.ptr = (
void *)dep_list[
i].base_addr;
959 ompt_deps[
i].dependence_type = ompt_dependence_type_inout;
960 else if (dep_list[
i].flags.
out)
961 ompt_deps[
i].dependence_type = ompt_dependence_type_out;
962 else if (dep_list[
i].flags.
in)
963 ompt_deps[
i].dependence_type = ompt_dependence_type_in;
964 else if (dep_list[
i].flags.
mtx)
965 ompt_deps[ndeps +
i].dependence_type =
966 ompt_dependence_type_mutexinoutset;
967 else if (dep_list[
i].flags.
set)
968 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inoutset;
970 for (
i = 0;
i < ndeps_noalias;
i++) {
971 ompt_deps[ndeps +
i].variable.ptr = (
void *)noalias_dep_list[
i].base_addr;
972 if (noalias_dep_list[
i].flags.
in && noalias_dep_list[
i].
flags.
out)
973 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inout;
974 else if (noalias_dep_list[
i].flags.
out)
975 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_out;
976 else if (noalias_dep_list[
i].flags.
in)
977 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_in;
978 else if (noalias_dep_list[
i].flags.
mtx)
979 ompt_deps[ndeps +
i].dependence_type =
980 ompt_dependence_type_mutexinoutset;
981 else if (noalias_dep_list[
i].flags.
set)
982 ompt_deps[ndeps +
i].dependence_type = ompt_dependence_type_inoutset;
985 taskwait_task_data, ompt_deps, ompt_ndeps);
988 KMP_OMPT_DEPS_FREE(thread, ompt_deps);
1001 ignore && thread->th.th_task_team != NULL &&
1002 thread->th.th_task_team->tt.tt_found_proxy_tasks ==
FALSE &&
1003 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
FALSE;
1004 ignore = ignore || current_task->
td_dephash == NULL;
1007 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1008 "dependences : loc=%p\n",
1011 __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1021 noalias_dep_list)) {
1022 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1023 "dependences : loc=%p\n",
1026 __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1032 while ((nrefs = node.
dn.
nrefs) > 3) {
1041 int thread_finished =
FALSE;
1056 while ((nrefs = node.
dn.
nrefs) > 3) {
1063 __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1065 KA_TRACE(10, (
"__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t new_size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function h
#define TASK_CURRENT_NOT_QUEUED
#define KMP_TASK_TO_TASKDATA(task)
#define __kmp_thread_malloc(th, size)
kmp_info_t ** __kmp_threads
int __kmp_task_stealing_constraint
#define __kmp_allocate(size)
static void __kmp_assert_valid_gtid(kmp_int32 gtid)
kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define __kmp_thread_free(th, ptr)
#define KMP_DEBUG_ASSERT(cond)
void __kmp_printf(char const *format,...)
#define USE_ITT_BUILD_ARG(x)
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static void __kmp_init_lock(kmp_lock_t *lck)
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
#define KMP_ATOMIC_ADD(p, v)
#define KMP_ATOMIC_ST_REL(p, v)
#define KMP_ATOMIC_ST_RLX(p, v)
#define KMP_ATOMIC_INC(p)
kmp_depnode_list_t * __kmpc_task_get_successors(kmp_task_t *task)
@ KMP_DEPHASH_MASTER_SIZE
static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t **hash, bool dep_barrier, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
static kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, kmp_task_t *task, kmp_depnode_t *node, kmp_depnode_list_t *plist)
static kmp_int32 __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h, bool dep_barrier, kmp_task_t *task)
kmp_base_depnode_t * __kmpc_task_get_depnode(kmp_task_t *task)
static kmp_dephash_t * __kmp_dephash_extend(kmp_info_t *thread, kmp_dephash_t *current_dephash)
void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list, kmp_int32 has_no_wait)
static kmp_dephash_entry * __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t **hash, kmp_intptr_t addr)
static size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize)
static void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *sink, kmp_task_t *sink_task)
static void __kmp_init_node(kmp_depnode_t *node, bool on_stack)
static kmp_int32 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, bool dep_barrier, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_task_t *task)
static kmp_dephash_t * __kmp_dephash_create(kmp_info_t *thread, kmp_taskdata_t *current_task)
static kmp_depnode_list_t * __kmp_add_node(kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node)
static kmp_depnode_t * __kmp_node_ref(kmp_depnode_t *node)
#define KMP_ACQUIRE_DEPNODE(gtid, n)
static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list)
static void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node)
#define KMP_RELEASE_DEPNODE(gtid, n)
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
#define TASK_TYPE_DETAILS_FORMAT(info)
#define OMPT_GET_FRAME_ADDRESS(level)
char const * psource
String describing the source location.
std::atomic< kmp_int32 > npredecessors
std::atomic< kmp_int32 > nrefs
kmp_lock_t * mtx_locks[MAX_MTX_DEPS]
kmp_depnode_list_t * successors
kmp_int32 tt_found_proxy_tasks
kmp_int32 tt_hidden_helper_task_encountered
struct kmp_depend_info::@8::@10 flags
kmp_dephash_entry_t * next_in_bucket
kmp_depnode_list_t * last_set
kmp_depnode_list_t * prev_set
kmp_dephash_entry_t ** buckets
kmp_depnode_list_t * next
kmp_dephash_t * td_dephash
kmp_taskdata_t * td_parent
kmp_depnode_t * td_depnode
kmp_tasking_flags_t td_flags