13#ifndef __STDC_FORMAT_MACROS
14#define __STDC_FORMAT_MACROS
29#include <sys/resource.h>
31#include <unordered_map>
40#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41#define KMP_FALLTHROUGH() [[fallthrough]]
43#elif defined(__INTEL_COMPILER)
44#define KMP_FALLTHROUGH() ((void)0)
45#elif __has_cpp_attribute(clang::fallthrough)
46#define KMP_FALLTHROUGH() [[clang::fallthrough]]
47#elif __has_attribute(fallthrough) || __GNUC__ >= 7
48#define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
50#define KMP_FALLTHROUGH() ((void)0)
58#if (LLVM_VERSION) >= 40
64 int report_data_leak{0};
66 std::atomic<int> all_memory{0};
68 ArcherFlags(
const char *env) {
70 std::vector<std::string> tokens;
73 std::istringstream iss(str);
75 while (std::getline(iss, token,
' '))
76 tokens.push_back(token);
78 for (std::vector<std::string>::iterator it = tokens.begin();
79 it != tokens.end(); ++it) {
80#if (LLVM_VERSION) >= 40
81 if (sscanf(it->c_str(),
"flush_shadow=%d", &flush_shadow))
84 if (sscanf(it->c_str(),
"print_max_rss=%d", &print_max_rss))
86 if (sscanf(it->c_str(),
"verbose=%d", &verbose))
88 if (sscanf(it->c_str(),
"report_data_leak=%d", &report_data_leak))
90 if (sscanf(it->c_str(),
"enable=%d", &enabled))
92 if (sscanf(it->c_str(),
"ignore_serial=%d", &ignore_serial))
94 if (sscanf(it->c_str(),
"all_memory=%d", &tmp_int)) {
98 std::cerr <<
"Illegal values for ARCHER_OPTIONS variable: " << token
107 int ignore_noninstrumented_modules;
109 TsanFlags(
const char *env) : ignore_noninstrumented_modules(0) {
111 std::vector<std::string> tokens;
112 std::string str(env);
113 auto end = str.end();
114 auto it = str.begin();
115 auto is_sep = [](
char c) {
116 return c ==
' ' || c ==
',' || c ==
':' || c ==
'\n' || c ==
'\t' ||
120 auto next_it = std::find_if(it,
end, is_sep);
121 tokens.emplace_back(it, next_it);
128 for (
const auto &token : tokens) {
131 if (sscanf(token.c_str(),
"ignore_noninstrumented_modules=%d",
132 &ignore_noninstrumented_modules))
140#if (LLVM_VERSION) >= 40
148#ifndef TsanHappensBefore
152#define DECLARE_TSAN_FUNCTION(name, ...) \
153 static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>;
160 const volatile void *)
162 const volatile
void *)
166 const volatile
void *,
size_t)
174#define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
177#define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
180#define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
183#define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
186#define TsanDeleteClock(cv)
189#define TsanNewMemory(addr, size) \
190 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
191#define TsanFreeMemory(addr, size) \
192 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
196#define TsanFuncEntry(pc) __tsan_func_entry(pc)
197#define TsanFuncExit() __tsan_func_exit()
206 static uint64_t
ID = 0;
207 uint64_t
ret = __sync_fetch_and_add(&
ID, 1);
216template <
typename T>
struct DataPool final {
217 static __thread DataPool<T> *ThreadDataPool;
218 std::mutex DPMutex{};
221 std::vector<T *> DataPointer{};
222 std::vector<T *> RemoteDataPointer{};
225 std::list<void *> memory;
228 std::atomic<int> remote{0};
236 int getRemote() {
return remoteReturn + remote; }
237 int getLocal() {
return localReturn; }
239 int getTotal() {
return total; }
241 return total - DataPointer.size() - RemoteDataPointer.size();
247 const std::lock_guard<std::mutex>
lock(DPMutex);
249 DataPointer.swap(RemoteDataPointer);
254 size_t elemSize =
sizeof(T);
255 size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
258 char *datas = (
char *)malloc(ndatas * paddedSize);
259 memory.push_back(datas);
260 for (
int i = 0;
i < ndatas;
i++) {
261 DataPointer.push_back(
new (datas +
i * paddedSize) T(
this));
269 if (DataPointer.empty())
271 ret = DataPointer.back();
272 DataPointer.pop_back();
277 void returnOwnData(T *
data) {
278 DataPointer.emplace_back(
data);
285 void returnData(T *
data) {
286 const std::lock_guard<std::mutex>
lock(DPMutex);
287 RemoteDataPointer.emplace_back(
data);
297 if (
archer_flags->report_data_leak && getMissing() != 0) {
298 printf(
"ERROR: While freeing DataPool (%s) we are missing %i data "
300 __PRETTY_FUNCTION__, getMissing());
303 for (
auto i : DataPointer)
306 for (
auto i : RemoteDataPointer)
309 for (
auto i : memory)
315template <
typename T>
struct DataPoolEntry {
318 static T *New() {
return DataPool<T>::ThreadDataPool->getData(); }
321 static_cast<T *
>(
this)->Reset();
322 if (owner == DataPool<T>::ThreadDataPool)
323 owner->returnOwnData(
static_cast<T *
>(
this));
325 owner->returnData(
static_cast<T *
>(
this));
328 DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
331struct DependencyData;
332typedef DataPool<DependencyData> DependencyDataPool;
334__thread DependencyDataPool *DependencyDataPool::ThreadDataPool =
nullptr;
337struct DependencyData final : DataPoolEntry<DependencyData> {
341 void *GetInPtr() {
return ∈ }
342 void *GetOutPtr() {
return &out; }
343 void *GetInoutsetPtr() {
return &inoutset; }
347 static DependencyData *New() {
return DataPoolEntry<DependencyData>::New(); }
349 DependencyData(DataPool<DependencyData> *dp)
350 : DataPoolEntry<DependencyData>(dp) {}
353struct TaskDependency {
357 ompt_dependence_type_t type;
358 TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
359 : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
360 inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
361 void AnnotateBegin() {
362 if (type == ompt_dependence_type_out ||
363 type == ompt_dependence_type_inout ||
364 type == ompt_dependence_type_mutexinoutset) {
368 }
else if (type == ompt_dependence_type_in) {
371 }
else if (type == ompt_dependence_type_inoutset) {
377 if (type == ompt_dependence_type_out ||
378 type == ompt_dependence_type_inout ||
379 type == ompt_dependence_type_mutexinoutset) {
381 }
else if (type == ompt_dependence_type_in) {
383 }
else if (type == ompt_dependence_type_inoutset) {
390typedef DataPool<ParallelData> ParallelDataPool;
392__thread ParallelDataPool *ParallelDataPool::ThreadDataPool =
nullptr;
395struct ParallelData final : DataPoolEntry<ParallelData> {
404 void *GetParallelPtr() {
return &(Barrier[1]); }
406 void *GetBarrierPtr(
unsigned Index) {
return &(Barrier[
Index]); }
408 ParallelData *Init(
const void *codeptr) {
415 static ParallelData *New(
const void *codeptr) {
416 return DataPoolEntry<ParallelData>::New()->Init(codeptr);
419 ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
422static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
423 return reinterpret_cast<ParallelData *
>(parallel_data->ptr);
427typedef DataPool<Taskgroup> TaskgroupPool;
428template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool =
nullptr;
431struct Taskgroup final : DataPoolEntry<Taskgroup> {
438 void *GetPtr() {
return &Ptr; }
440 Taskgroup *Init(Taskgroup *
parent) {
447 static Taskgroup *New(Taskgroup *Parent) {
448 return DataPoolEntry<Taskgroup>::New()->Init(Parent);
451 Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
454enum ArcherTaskFlag { ArcherTaskFulfilled = 0x00010000 };
457typedef DataPool<TaskData> TaskDataPool;
458template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool =
nullptr;
461struct TaskData final : DataPoolEntry<TaskData> {
473 char BarrierIndex{0};
476 bool InBarrier{
false};
485 std::atomic_int RefCount{1};
488 TaskData *Parent{
nullptr};
491 ParallelData *Team{
nullptr};
495 Taskgroup *TaskGroup{
nullptr};
498 TaskDependency *Dependencies{
nullptr};
501 unsigned DependencyCount{0};
507 std::unordered_map<void *, DependencyData *> *DependencyMap{
nullptr};
513 bool isIncluded() {
return TaskType & ompt_task_undeferred; }
514 bool isUntied() {
return TaskType & ompt_task_untied; }
515 bool isFinal() {
return TaskType & ompt_task_final; }
516 bool isMergable() {
return TaskType & ompt_task_mergeable; }
517 bool isMerged() {
return TaskType & ompt_task_merged; }
519 bool isExplicit() {
return TaskType & ompt_task_explicit; }
520 bool isImplicit() {
return TaskType & ompt_task_implicit; }
521 bool isInitial() {
return TaskType & ompt_task_initial; }
522 bool isTarget() {
return TaskType & ompt_task_target; }
524 bool isFulfilled() {
return TaskType & ArcherTaskFulfilled; }
525 void setFulfilled() { TaskType |= ArcherTaskFulfilled; }
527 void setAllMemoryDep() { AllMemory[0] = 1; }
528 bool hasAllMemoryDep() {
return AllMemory[0]; }
530 void *GetTaskPtr() {
return &Task; }
532 void *GetTaskwaitPtr() {
return &Taskwait; }
534 void *GetLastAllMemoryPtr() {
return AllMemory; }
535 void *GetNextAllMemoryPtr() {
return AllMemory + 1; }
537 TaskData *Init(TaskData *
parent,
int taskType) {
541 BarrierIndex = Parent->BarrierIndex;
542 if (Parent !=
nullptr) {
546 TaskGroup = Parent->TaskGroup;
551 TaskData *Init(ParallelData *team,
int taskType) {
568 for (
auto i : *DependencyMap)
570 delete DependencyMap;
572 DependencyMap =
nullptr;
575 Dependencies =
nullptr;
582 static TaskData *New(TaskData *
parent,
int taskType) {
583 return DataPoolEntry<TaskData>::New()->Init(
parent, taskType);
586 static TaskData *New(ParallelData *team,
int taskType) {
587 return DataPoolEntry<TaskData>::New()->Init(team, taskType);
590 TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
596 return reinterpret_cast<TaskData *
>(task_data->ptr);
601static std::unordered_map<ompt_wait_id_t, std::mutex>
Locks;
605 ompt_data_t *thread_data) {
606 ParallelDataPool::ThreadDataPool =
new ParallelDataPool;
608 sizeof(ParallelDataPool::ThreadDataPool));
609 TaskgroupPool::ThreadDataPool =
new TaskgroupPool;
611 sizeof(TaskgroupPool::ThreadDataPool));
612 TaskDataPool::ThreadDataPool =
new TaskDataPool;
614 sizeof(TaskDataPool::ThreadDataPool));
615 DependencyDataPool::ThreadDataPool =
new DependencyDataPool;
617 sizeof(DependencyDataPool::ThreadDataPool));
623 delete ParallelDataPool::ThreadDataPool;
624 delete TaskgroupPool::ThreadDataPool;
625 delete TaskDataPool::ThreadDataPool;
626 delete DependencyDataPool::ThreadDataPool;
633 const ompt_frame_t *parent_task_frame,
634 ompt_data_t *parallel_data,
635 uint32_t requested_team_size,
int flag,
636 const void *codeptr_ra) {
637 ParallelData *Data = ParallelData::New(codeptr_ra);
638 parallel_data->ptr = Data;
646 ompt_data_t *task_data,
int flag,
647 const void *codeptr_ra) {
650 ParallelData *Data = ToParallelData(parallel_data);
656#if (LLVM_VERSION >= 40)
657 if (&__archer_get_omp_status) {
658 if (__archer_get_omp_status() == 0 &&
archer_flags->flush_shadow)
659 __tsan_flush_memory();
665 ompt_data_t *parallel_data,
666 ompt_data_t *task_data,
667 unsigned int team_size,
668 unsigned int thread_num,
int type) {
670 case ompt_scope_begin:
671 if (
type & ompt_task_initial) {
672 parallel_data->ptr = ParallelData::New(
nullptr);
674 task_data->ptr = TaskData::New(ToParallelData(parallel_data),
type);
678 case ompt_scope_end: {
681 assert(Data->freed == 0 &&
"Implicit task end should only be called once!");
684 assert(Data->RefCount == 1 &&
685 "All tasks should have finished at the implicit barrier!");
686 if (
type & ompt_task_initial) {
687 Data->Team->Delete();
693 case ompt_scope_beginend:
701 ompt_scope_endpoint_t endpoint,
702 ompt_data_t *parallel_data,
703 ompt_data_t *task_data,
704 const void *codeptr_ra) {
707 case ompt_scope_begin:
708 case ompt_scope_beginend:
711 case ompt_sync_region_barrier_implementation:
712 case ompt_sync_region_barrier_implicit:
713 case ompt_sync_region_barrier_explicit:
714 case ompt_sync_region_barrier_implicit_parallel:
715 case ompt_sync_region_barrier_implicit_workshare:
716 case ompt_sync_region_barrier_teams:
717 case ompt_sync_region_barrier: {
718 char BarrierIndex = Data->BarrierIndex;
727 Data->InBarrier =
true;
734 case ompt_sync_region_taskwait:
737 case ompt_sync_region_taskgroup:
738 Data->TaskGroup = Taskgroup::New(Data->TaskGroup);
741 case ompt_sync_region_reduction:
745 if (endpoint == ompt_scope_begin)
751 case ompt_sync_region_barrier_implementation:
752 case ompt_sync_region_barrier_implicit:
753 case ompt_sync_region_barrier_explicit:
754 case ompt_sync_region_barrier_implicit_parallel:
755 case ompt_sync_region_barrier_implicit_workshare:
756 case ompt_sync_region_barrier_teams:
757 case ompt_sync_region_barrier: {
760 Data->InBarrier =
false;
764 char BarrierIndex = Data->BarrierIndex;
774 Data->BarrierIndex = (BarrierIndex + 1) % 2;
778 case ompt_sync_region_taskwait: {
779 if (Data->execution > 1)
784 case ompt_sync_region_taskgroup: {
785 assert(Data->TaskGroup !=
nullptr &&
786 "Should have at least one taskgroup!");
792 Taskgroup *Parent = Data->TaskGroup->Parent;
793 Data->TaskGroup->Delete();
794 Data->TaskGroup = Parent;
798 case ompt_sync_region_reduction:
808 ompt_scope_endpoint_t endpoint,
809 ompt_data_t *parallel_data,
810 ompt_data_t *task_data,
811 const void *codeptr_ra) {
813 case ompt_scope_begin:
815 case ompt_sync_region_reduction:
824 case ompt_sync_region_reduction:
831 case ompt_scope_beginend:
842 ompt_data_t *parent_task_data,
843 const ompt_frame_t *parent_frame,
844 ompt_data_t *new_task_data,
845 int type,
int has_dependences,
846 const void *codeptr_ra)
849 assert(new_task_data->ptr == NULL &&
850 "Task data should be initialized to NULL");
851 if (
type & ompt_task_initial) {
852 ompt_data_t *parallel_data;
855 ParallelData *PData = ParallelData::New(
nullptr);
856 parallel_data->ptr = PData;
858 Data = TaskData::New(PData,
type);
859 new_task_data->ptr = Data;
860 }
else if (
type & ompt_task_undeferred) {
862 new_task_data->ptr = Data;
863 }
else if (
type & ompt_task_explicit ||
type & ompt_task_target) {
865 new_task_data->ptr = Data;
876 while (
task !=
nullptr && --
task->RefCount == 0) {
877 TaskData *Parent =
task->Parent;
889 if (
task->hasAllMemoryDep()) {
892 }
else if (
task->DependencyCount)
895 for (
unsigned i = 0;
i <
task->DependencyCount;
i++) {
896 task->Dependencies[
i].AnnotateEnd();
902 if (
task->hasAllMemoryDep())
904 else if (
task->DependencyCount)
907 for (
unsigned i = 0;
i <
task->DependencyCount;
i++) {
908 task->Dependencies[
i].AnnotateBegin();
916 if (FromTask->isFulfilled())
920 if (!FromTask->isIncluded()) {
923 ParallelData *PData = FromTask->Team;
929 if (FromTask->TaskGroup !=
nullptr) {
949 if (FromTask && FromTask->InBarrier) {
954 if (ToTask && ToTask->InBarrier) {
976 if (ToTask->execution == 0) {
986 ompt_task_status_t prior_task_status,
987 ompt_data_t *second_task_data) {
1011 TaskData *FromTask =
ToTaskData(first_task_data);
1012 TaskData *ToTask =
ToTaskData(second_task_data);
1014 switch (prior_task_status) {
1015 case ompt_task_early_fulfill:
1017 FromTask->setFulfilled();
1019 case ompt_task_late_fulfill:
1024 case ompt_taskwait_complete:
1028 case ompt_task_complete:
1034 case ompt_task_cancel:
1041 case ompt_task_detach:
1047 case ompt_task_yield:
1052 case ompt_task_switch:
1061 const ompt_dependence_t *deps,
int ndeps) {
1065 if (!Data->Parent) {
1069 if (!Data->Parent->DependencyMap)
1070 Data->Parent->DependencyMap =
1071 new std::unordered_map<void *, DependencyData *>();
1072 Data->Dependencies =
1073 (TaskDependency *)malloc(
sizeof(TaskDependency) * ndeps);
1074 Data->DependencyCount = ndeps;
1075 for (
int i = 0,
d = 0;
i < ndeps;
i++,
d++) {
1076 if (deps[
i].dependence_type == ompt_dependence_type_out_all_memory ||
1077 deps[
i].dependence_type == ompt_dependence_type_inout_all_memory) {
1078 Data->setAllMemoryDep();
1079 Data->DependencyCount--;
1081 printf(
"The application uses omp_all_memory, but Archer was\n"
1082 "started to not consider omp_all_memory. This can lead\n"
1083 "to false data race alerts.\n"
1084 "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1085 "omp_all_memory from the beginning.\n");
1091 auto ret = Data->Parent->DependencyMap->insert(
1092 std::make_pair(deps[
i].variable.ptr,
nullptr));
1094 ret.first->second = DependencyData::New();
1096 new ((
void *)(Data->Dependencies +
d))
1097 TaskDependency(
ret.first->second, deps[
i].dependence_type);
1107 const void *codeptr_ra) {
1113 std::mutex &Lock =
Locks[wait_id];
1121 const void *codeptr_ra) {
1123 std::mutex &Lock =
Locks[wait_id];
1131#define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1133 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1134 result = ompt_set_callback(ompt_callback_##event, \
1135 (ompt_callback_t)tsan_##event); \
1136 if (result < level) \
1137 printf("Registered callback '" #event "' is not supported at " #level \
1142#define SET_CALLBACK_T(event, type) \
1145 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1148#define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1150#define findTsanFunction(f, fSig) \
1152 void *fp = dlsym(RTLD_DEFAULT, #f); \
1156 printf("Unable to find TSan function " #f ".\n"); \
1159#define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1162 ompt_data_t *tool_data) {
1163 const char *options = getenv(
"TSAN_OPTIONS");
1164 TsanFlags tsan_flags(options);
1167 (ompt_set_callback_t)lookup(
"ompt_set_callback");
1169 std::cerr <<
"Could not set callback, exiting..." << std::endl;
1173 (ompt_get_parallel_info_t)lookup(
"ompt_get_parallel_info");
1177 fprintf(stderr,
"Could not get inquiry function 'ompt_get_parallel_info', "
1183 (
void (*)(
const char *,
int,
const volatile void *)));
1185 (
void (*)(
const char *,
int,
const volatile void *)));
1186 findTsanFunction(AnnotateIgnoreWritesBegin, (
void (*)(
const char *,
int)));
1190 (
void (*)(
const char *,
int,
const volatile void *,
size_t)));
1210 if (!tsan_flags.ignore_noninstrumented_modules)
1212 "Warning: please export "
1213 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1214 "to avoid false positive reports from the OpenMP runtime!\n");
1226 getrusage(RUSAGE_SELF, &
end);
1227 printf(
"MAX RSS[KiB] during execution: %ld\n",
end.ru_maxrss);
1236 const char *options = getenv(
"ARCHER_OPTIONS");
1240 std::cout <<
"Archer disabled, stopping operation" << std::endl;
1256 void (*__tsan_init)(
void) =
nullptr;
1263 std::cout <<
"Archer detected OpenMP application without TSan; "
1264 "stopping operation"
1271 std::cout <<
"Archer detected OpenMP application with TSan, supplying "
1272 "OpenMP synchronization semantics"
static ompt_set_callback_t ompt_set_callback
static ompt_get_parallel_info_t ompt_get_parallel_info
static ompt_get_thread_data_t ompt_get_thread_data
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
static kmp_bootstrap_lock_t lock
struct ompt_start_tool_result_t ompt_start_tool_result_t
unsigned * Index(unsigned *p, unsigned i, unsigned j, unsigned bound2)
static ompt_start_tool_result_t * ompt_start_tool_result
static void __ompt_tsan_func(Args...)
#define TsanHappensBefore(cv)
#define TsanFuncEntry(pc)
static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame, ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, const void *codeptr_ra)
OMPT event callbacks for handling parallel regions.
static TaskData * ToTaskData(ompt_data_t *task_data)
#define findTsanFunctionSilent(f, fSig)
static uint64_t my_next_id()
#define DECLARE_TSAN_FUNCTION(name,...)
static void ompt_tsan_sync_region(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
static void freeTask(TaskData *task)
static void switchTasks(TaskData *FromTask, TaskData *ToTask)
#define TsanHappensAfter(cv)
static void endTask(TaskData *FromTask)
static ArcherFlags * archer_flags
static void ompt_tsan_dependences(ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps)
static void ompt_tsan_thread_end(ompt_data_t *thread_data)
#define SET_CALLBACK_T(event, type)
static void ompt_tsan_task_create(ompt_data_t *parent_task_data, const ompt_frame_t *parent_frame, ompt_data_t *new_task_data, int type, int has_dependences, const void *codeptr_ra)
OMPT event callbacks for handling tasks.
#define TsanIgnoreWritesEnd()
static std::unordered_map< ompt_wait_id_t, std::mutex > Locks
Store a mutex for each wait_id to resolve race condition with callbacks.
static void ompt_tsan_task_schedule(ompt_data_t *first_task_data, ompt_task_status_t prior_task_status, ompt_data_t *second_task_data)
static void acquireDependencies(TaskData *task)
#define SET_CALLBACK(event)
static void completeTask(TaskData *FromTask)
static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
OMPT event callbacks for handling locking.
static void suspendTask(TaskData *FromTask)
static std::mutex LocksMutex
#define SET_OPTIONAL_CALLBACK_T(event, type, result, level)
static void ompt_tsan_parallel_end(ompt_data_t *parallel_data, ompt_data_t *task_data, int flag, const void *codeptr_ra)
static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num, ompt_data_t *tool_data)
#define TsanNewMemory(addr, size)
#define KMP_FALLTHROUGH()
static void ompt_tsan_finalize(ompt_data_t *tool_data)
static int hasReductionCallback
static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num, int type)
#define findTsanFunction(f, fSig)
static void ompt_tsan_reduction(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
#define TsanIgnoreWritesBegin()
static void ompt_tsan_thread_begin(ompt_thread_t thread_type, ompt_data_t *thread_data)
static void startTask(TaskData *ToTask)
static void releaseDependencies(TaskData *task)
__attribute__((noinline))