13#ifndef KMP_WAIT_RELEASE_H
14#define KMP_WAIT_RELEASE_H
140template <
typename PtrType, flag_type FlagType,
bool Sleepable>
163 void set(
volatile PtrType *new_loc) {
loc = new_loc; }
172 return traits_type::tcr(*(this->
get())) ==
checker;
183 return traits_type::tcr(*(this->
get())) !=
checker;
188 (
void)traits_type::test_then_add4((
volatile PtrType *)this->
get());
196 return *(this->
get());
198 return traits_type::test_then_or((
volatile PtrType *)this->
get(),
209 traits_type::test_then_and((
volatile PtrType *)this->
get(),
234template <
typename PtrType, flag_type FlagType,
bool Sleepable>
237 std::atomic<PtrType> *
loc;
252 std::atomic<PtrType> *
get() {
return loc; }
256 void set(std::atomic<PtrType> *new_loc) {
loc = new_loc; }
258 PtrType
load() {
return loc->load(std::memory_order_acquire); }
286 return *(this->
get());
323static void __ompt_implicit_task_end(
kmp_info_t *this_thr,
324 ompt_state_t ompt_state,
326 int ds_tid = this_thr->th.th_info.ds.ds_tid;
327 if (ompt_state == ompt_state_wait_barrier_implicit_parallel ||
328 ompt_state == ompt_state_wait_barrier_teams) {
329 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
331 void *codeptr = NULL;
332 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
333 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
334 sync_kind = ompt_sync_region_barrier_teams;
337 sync_kind, ompt_scope_end, NULL, tId, codeptr);
341 sync_kind, ompt_scope_end, NULL, tId, codeptr);
346 int flags = this_thr->th.ompt_thread_info.parallel_flags;
347 flags = (flags & ompt_parallel_league) ? ompt_task_initial
348 : ompt_task_implicit;
350 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
353 this_thr->th.ompt_thread_info.state = ompt_state_idle;
355 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
366template <
class C,
bool final_spin,
bool Cancellable =
false,
367 bool Sleepable =
true>
371#if USE_ITT_BUILD && USE_ITT_NOTIFY
372 volatile void *spin =
flag->get();
376 int tasks_completed =
FALSE;
386 if (
flag->done_check()) {
390 th_gtid = this_thr->th.th_info.ds.ds_gtid;
401 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid,
flag));
457 ompt_state_t ompt_entry_state;
460 ompt_entry_state = this_thr->th.ompt_thread_info.state;
462 (ompt_entry_state != ompt_state_wait_barrier_implicit_parallel &&
463 ompt_entry_state != ompt_state_wait_barrier_teams) ||
466 if (this_thr->th.th_team)
467 team = this_thr->th.th_team->t.ompt_serialized_team_info;
471 tId = OMPT_CUR_TASK_DATA(this_thr);
474 tId = &(this_thr->th.ompt_thread_info.task_data);
477 this_thr->th.th_task_team == NULL)) {
479 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
492#ifdef KMP_ADJUST_BLOCKTIME
494 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
499 hibernate = this_thr->th.th_team_bt_intervals;
501 hibernate = this_thr->th.th_team_bt_intervals;
513 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
521 hibernate_goal =
KMP_NOW() + this_thr->th.th_team_bt_intervals;
530 while (
flag->notdone_check()) {
533 task_team = this_thr->th.th_task_team;
541 if (task_team != NULL) {
545 this_thr, th_gtid, final_spin,
554 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
556 this_thr->th.th_task_team = NULL;
578 if (this_thr->th.th_stats->isIdle() &&
634#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
635 if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
636 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
637 flag->mwait(th_gtid);
640 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
645 flag->suspend(th_gtid);
650#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
666 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
670 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
671 ompt_exit_state = this_thr->th.ompt_thread_info.state;
674 if (ompt_exit_state == ompt_state_idle) {
675 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
684 this_thr->th.th_stats->resetIdleFlag();
696 if (tasks_completed) {
700 std::atomic<kmp_int32> *unfinished_threads =
710#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
714static inline void __kmp_mwait_template(
int th_gtid,
C *
flag) {
718 KF_TRACE(30, (
"__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
727 volatile void *spin =
flag->get();
730 if (!
flag->done_check()) {
732 th->th.th_active =
FALSE;
733 if (th->th.th_active_in_pool) {
734 th->th.th_active_in_pool =
FALSE;
738 flag->set_sleeping();
739 KF_TRACE(50, (
"__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
741 if (__kmp_umwait_enabled) {
742 __kmp_umonitor(cacheline);
745 if (__kmp_mwait_enabled) {
746 __kmp_mm_monitor(cacheline, 0, 0);
752 if (
flag->done_check())
753 flag->unset_sleeping();
757 th->th.th_sleep_loc_type =
flag->get_type();
759 KF_TRACE(50, (
"__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
761 if (__kmp_umwait_enabled) {
762 __kmp_umwait(1, 100);
765 if (__kmp_mwait_enabled) {
766 __kmp_mm_mwait(0, __kmp_mwait_hints);
769 KF_TRACE(50, (
"__kmp_mwait_template: T#%d mwait done\n", th_gtid));
772 if (
flag->is_sleeping())
773 flag->unset_sleeping();
774 TCW_PTR(th->th.th_sleep_loc, NULL);
778 th->th.th_active =
TRUE;
779 if (
TCR_4(th->th.th_in_pool)) {
781 th->th.th_active_in_pool =
TRUE;
785 KF_TRACE(30, (
"__kmp_mwait_template: T#%d exit\n", th_gtid));
797 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid,
flag->get()));
801 flag->internal_release();
803 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid,
flag->get(),
809 if (
flag->is_any_sleeping()) {
810 for (
unsigned int i = 0;
i <
flag->get_num_waiters(); ++
i) {
814 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
816 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep "
818 gtid, wait_gtid,
flag->get()));
819 flag->resume(wait_gtid);
826template <
bool Cancellable,
bool Sleepable>
836#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
837 void mwait(
int th_gtid) { __kmp_mwait_32(th_gtid,
this); }
844 this_thr, gtid,
this, final_spin,
850 return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
853 return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
860template <
bool Cancellable,
bool Sleepable>
872#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
873 void mwait(
int th_gtid) { __kmp_mwait_64(th_gtid,
this); }
880 this_thr, gtid,
this, final_spin,
886 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
889 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
896template <
bool Cancellable,
bool Sleepable>
907 std::atomic<bool> *
loc)
910 void mwait(
int th_gtid) { __kmp_atomic_mwait_64(th_gtid,
this); }
916 this_thr, gtid,
this, final_spin,
944 unsigned char &byteref(
volatile kmp_uint64 *
loc,
size_t offset) {
960 flag_switch(false), bt(bar_t),
966 return byteref(&old_loc, offset) ==
checker;
973 if (byteref(
get(), offset) != 1 && !flag_switch)
975 else if (flag_switch) {
986 byteref(
get(), offset) = 1;
989 byteref(&
mask, offset) = 1;
995 __kmp_wait_template<kmp_flag_oncore, TRUE>(
998 __kmp_wait_template<kmp_flag_oncore, FALSE>(
1003#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1004 void mwait(
int th_gtid) { __kmp_mwait_oncore(th_gtid,
this); }
1012 this_thr, gtid,
this, final_spin,
1014 if (ompd_state & OMPD_ENABLE_BP)
1019 this_thr, gtid,
this, final_spin,
1029 void *
flag =
CCAST(
void *, thr->th.th_sleep_loc);
1048 KF_TRACE(100, (
"__kmp_null_resume_wrapper: flag type %d is unset\n",
type));
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p, kmp_uint64 c, std::atomic< bool > *loc)
bool wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj))
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p, kmp_uint64 c)
void suspend(int th_gtid)
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p)
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p, kmp_info_t *thr)
kmp_flag_32(std::atomic< kmp_uint32 > *p, kmp_uint32 c)
bool wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj))
kmp_flag_32(std::atomic< kmp_uint32 > *p, kmp_info_t *thr)
kmp_flag_32(std::atomic< kmp_uint32 > *p)
void suspend(int th_gtid)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
bool wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj))
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
kmp_flag_64(volatile kmp_uint64 *p)
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
void suspend(int th_gtid)
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic< bool > *loc)
Base class for wait/release atomic flag.
std::atomic< PtrType > * loc
Pointer to flag location to wait on.
bool is_sleeping_val(PtrType old_loc)
bool is_sleeping()
Test whether there are threads sleeping on the flag.
kmp_flag_atomic(std::atomic< PtrType > *p, PtrType c)
kmp_flag_atomic(std::atomic< PtrType > *p)
kmp_flag_atomic(std::atomic< PtrType > *p, kmp_info_t *thr)
bool done_check_val(PtrType old_loc)
kmp_flag_atomic(std::atomic< PtrType > *p, PtrType c, std::atomic< bool > *sloc)
void set(std::atomic< PtrType > *new_loc)
PtrType checker
Flag==checker means it has been released.
std::atomic< PtrType > * get()
flag_traits< FlagType > traits_type
Base class for wait/release volatile flag.
kmp_flag_native(volatile PtrType *p, PtrType c)
kmp_flag_native(volatile PtrType *p)
PtrType checker
When flag==checker, it has been released.
void set(volatile PtrType *new_loc)
flag_traits< FlagType > traits_type
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual ~kmp_flag_native()
bool is_sleeping()
Test whether there are threads sleeping on the flag.
virtual bool done_check()
kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic< bool > *sloc)
bool done_check() override
void wait(kmp_info_t *this_thr, int final_spin)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
void suspend(int th_gtid)
virtual ~kmp_flag_oncore() override
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
bool done_check_val(kmp_uint64 old_loc) override
kmp_flag_oncore(volatile kmp_uint64 *p)
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
enum barrier_type get_bt()
bool notdone_check() override
Base class for all flags.
flag_properties t
"Type" of the flag in loc
kmp_flag(std::atomic< bool > *sloc)
kmp_uint32 num_waiting_threads
Num threads sleeping on this thread.
kmp_info_t * waiting_threads[1]
kmp_uint32 get_num_waiters()
flag_traits< FlagType > traits_type
enum barrier_type get_bt()
kmp_info_t * get_waiter(kmp_uint32 i)
param i in index into waiting_threads
std::atomic< bool > * sleepLoc
void set_waiter(kmp_info_t *thr)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
kmp_global_t __kmp_global
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time)
kmp_pause_status_t __kmp_pause_status
#define KMP_MAX_BLOCKTIME
#define KMP_NOT_SAFE_TO_REAP
void __kmp_unlock_suspend_mx(kmp_info_t *th)
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
kmp_tasking_mode_t __kmp_tasking_mode
void __kmp_abort_thread(void)
volatile int __kmp_init_gtid
void __kmp_suspend_64(int th_gtid, kmp_flag_64< C, S > *flag)
@ atomic_flag64
atomic 64 bit flags
@ flag_oncore
special 64-bit flag for on-core barrier (hierarchical)
@ flag32
atomic 32 bit flags
#define KMP_BARRIER_SLEEP_STATE
void __kmp_suspend_32(int th_gtid, kmp_flag_32< C, S > *flag)
void __kmp_hidden_helper_worker_thread_wait()
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64< C, S > *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
std::atomic< int > __kmp_thread_pool_active_nth
#define KMP_MASTER_TID(tid)
#define KMP_TASKING_ENABLED(task_team)
kmp_info_t ** __kmp_threads
#define KMP_BARRIER_SWITCH_TO_OWN_FLAG
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag)
#define KMP_INIT_YIELD(count)
void __kmp_lock_suspend_mx(kmp_info_t *th)
#define KMP_BLOCKING(goal, count)
#define KMP_INIT_BACKOFF(time)
#define __kmp_allocate(size)
volatile int __kmp_hidden_helper_team_done
std::atomic< kmp_int32 > __kmp_unexecuted_hidden_helper_tasks
#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag)
bool __kmp_wpolicy_passive
#define KMP_BARRIER_SWITCHING
void __kmp_resume_64(int target_gtid, kmp_flag_64< C, S > *flag)
void __kmp_resume_32(int target_gtid, kmp_flag_32< C, S > *flag)
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32< C, S > *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64< C, S > *flag)
int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64< C, S > *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
#define KMP_BARRIER_STATE_BUMP
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64< C, S > *flag)
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
void __kmp_suspend_initialize_thread(kmp_info_t *th)
void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, int final_spin)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define KMP_DEBUG_ASSERT(cond)
unsigned long long kmp_uint64
#define KMP_FSYNC_RELEASING(obj)
#define KMP_FSYNC_SPIN_ACQUIRED(obj)
#define KMP_FSYNC_SPIN_PREPARE(obj)
#define USE_ITT_BUILD_ARG(x)
#define KMP_FSYNC_SPIN_INIT(obj, spin)
#define KMP_ATOMIC_ADD(p, v)
#define KMP_TEST_THEN_OR32(p, v)
#define KMP_ATOMIC_AND(p, v)
#define KMP_TEST_THEN_ADD4_32(p)
#define KMP_ATOMIC_ST_REL(p, v)
#define KMP_TEST_THEN_AND64(p, v)
#define KMP_TEST_THEN_ADD4_64(p)
#define KMP_ATOMIC_LD_ACQ(p)
#define KMP_TEST_THEN_AND32(p, v)
#define KMP_ATOMIC_DEC(p)
unsigned long kmp_uintptr_t
#define KMP_ATOMIC_OR(p, v)
#define KMP_TEST_THEN_OR64(p, v)
#define KMP_ATOMIC_INC(p)
Functions for collecting statistics.
#define KMP_PUSH_PARTITIONED_TIMER(name)
#define KMP_GET_THREAD_STATE()
#define KMP_POP_PARTITIONED_TIMER()
#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n)
#define KMP_SET_THREAD_STATE(state_name)
static void __kmp_null_resume_wrapper(kmp_info_t *thr)
static void __kmp_release_template(C *flag)
static bool __kmp_wait_template(kmp_info_t *this_thr, C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj))
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
static flag_t tcr(flag_t f)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t tcr(flag_t f)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t tcr(flag_t f)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t tcr(flag_t f)
KMP_ALIGN_CACHE std::atomic< kmp_int32 > tt_unfinished_threads
KMP_ALIGN_CACHE volatile kmp_uint32 tt_active
ompt_task_info_t ompt_task_info