13#ifndef KMP_WAIT_RELEASE_H
14#define KMP_WAIT_RELEASE_H
139template <
typename PtrType, flag_type FlagType,
bool Sleepable>
162 void set(
volatile PtrType *new_loc) {
loc = new_loc; }
171 return traits_type::tcr(*(this->
get())) ==
checker;
182 return traits_type::tcr(*(this->
get())) !=
checker;
187 (
void)traits_type::test_then_add4((
volatile PtrType *)this->
get());
195 return *(this->
get());
197 return traits_type::test_then_or((
volatile PtrType *)this->
get(),
208 traits_type::test_then_and((
volatile PtrType *)this->
get(),
233template <
typename PtrType, flag_type FlagType,
bool Sleepable>
236 std::atomic<PtrType> *
loc;
251 std::atomic<PtrType> *
get() {
return loc; }
255 void set(std::atomic<PtrType> *new_loc) {
loc = new_loc; }
257 PtrType
load() {
return loc->load(std::memory_order_acquire); }
285 return *(this->
get());
322static void __ompt_implicit_task_end(
kmp_info_t *this_thr,
323 ompt_state_t ompt_state,
325 int ds_tid = this_thr->th.th_info.ds.ds_tid;
326 if (ompt_state == ompt_state_wait_barrier_implicit) {
327 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
329 void *codeptr = NULL;
332 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
337 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
343 int flags = this_thr->th.ompt_thread_info.parallel_flags;
344 flags = (flags & ompt_parallel_league) ? ompt_task_initial
345 : ompt_task_implicit;
347 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
350 this_thr->th.ompt_thread_info.state = ompt_state_idle;
352 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
363template <
class C,
bool final_spin,
bool Cancellable =
false,
364 bool Sleepable =
true>
368#if USE_ITT_BUILD && USE_ITT_NOTIFY
369 volatile void *spin =
flag->get();
373 int tasks_completed =
FALSE;
383 if (
flag->done_check()) {
387 th_gtid = this_thr->th.th_info.ds.ds_gtid;
398 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid,
flag));
454 ompt_state_t ompt_entry_state;
457 ompt_entry_state = this_thr->th.ompt_thread_info.state;
458 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
461 if (this_thr->th.th_team)
462 team = this_thr->th.th_team->t.ompt_serialized_team_info;
466 tId = OMPT_CUR_TASK_DATA(this_thr);
469 tId = &(this_thr->th.ompt_thread_info.task_data);
472 this_thr->th.th_task_team == NULL)) {
474 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
487#ifdef KMP_ADJUST_BLOCKTIME
489 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
494 hibernate = this_thr->th.th_team_bt_intervals;
496 hibernate = this_thr->th.th_team_bt_intervals;
508 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
516 hibernate_goal =
KMP_NOW() + this_thr->th.th_team_bt_intervals;
525 while (
flag->notdone_check()) {
528 task_team = this_thr->th.th_task_team;
536 if (task_team != NULL) {
540 this_thr, th_gtid, final_spin,
549 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
551 this_thr->th.th_task_team = NULL;
573 if (this_thr->th.th_stats->isIdle() &&
629#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
630 if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
631 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
632 flag->mwait(th_gtid);
635 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
640 flag->suspend(th_gtid);
645#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
661 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
665 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
666 ompt_exit_state = this_thr->th.ompt_thread_info.state;
669 if (ompt_exit_state == ompt_state_idle) {
670 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
679 this_thr->th.th_stats->resetIdleFlag();
691 if (tasks_completed) {
695 std::atomic<kmp_int32> *unfinished_threads =
705#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
709static inline void __kmp_mwait_template(
int th_gtid,
C *
flag) {
713 KF_TRACE(30, (
"__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
722 volatile void *spin =
flag->get();
725 if (!
flag->done_check()) {
727 th->th.th_active =
FALSE;
728 if (th->th.th_active_in_pool) {
729 th->th.th_active_in_pool =
FALSE;
733 flag->set_sleeping();
734 KF_TRACE(50, (
"__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
736 if (__kmp_umwait_enabled) {
737 __kmp_umonitor(cacheline);
740 if (__kmp_mwait_enabled) {
741 __kmp_mm_monitor(cacheline, 0, 0);
747 if (
flag->done_check())
748 flag->unset_sleeping();
752 th->th.th_sleep_loc_type =
flag->get_type();
754 KF_TRACE(50, (
"__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
756 if (__kmp_umwait_enabled) {
757 __kmp_umwait(1, 100);
760 if (__kmp_mwait_enabled) {
761 __kmp_mm_mwait(0, __kmp_mwait_hints);
764 KF_TRACE(50, (
"__kmp_mwait_template: T#%d mwait done\n", th_gtid));
767 if (
flag->is_sleeping())
768 flag->unset_sleeping();
769 TCW_PTR(th->th.th_sleep_loc, NULL);
773 th->th.th_active =
TRUE;
774 if (
TCR_4(th->th.th_in_pool)) {
776 th->th.th_active_in_pool =
TRUE;
780 KF_TRACE(30, (
"__kmp_mwait_template: T#%d exit\n", th_gtid));
792 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid,
flag->get()));
796 flag->internal_release();
798 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid,
flag->get(),
804 if (
flag->is_any_sleeping()) {
805 for (
unsigned int i = 0;
i <
flag->get_num_waiters(); ++
i) {
809 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
811 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep "
813 gtid, wait_gtid,
flag->get()));
814 flag->resume(wait_gtid);
821template <
bool Cancellable,
bool Sleepable>
831#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
832 void mwait(
int th_gtid) { __kmp_mwait_32(th_gtid,
this); }
839 this_thr, gtid,
this, final_spin,
845 return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
848 return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
855template <
bool Cancellable,
bool Sleepable>
867#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
868 void mwait(
int th_gtid) { __kmp_mwait_64(th_gtid,
this); }
875 this_thr, gtid,
this, final_spin,
881 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
884 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
891template <
bool Cancellable,
bool Sleepable>
902 std::atomic<bool> *
loc)
905 void mwait(
int th_gtid) { __kmp_atomic_mwait_64(th_gtid,
this); }
911 this_thr, gtid,
this, final_spin,
938 unsigned char &byteref(
volatile kmp_uint64 *
loc,
size_t offset) {
954 flag_switch(false), bt(bar_t),
960 return byteref(&old_loc, offset) ==
checker;
967 if (byteref(
get(), offset) != 1 && !flag_switch)
969 else if (flag_switch) {
980 byteref(
get(), offset) = 1;
983 byteref(&
mask, offset) = 1;
989 __kmp_wait_template<kmp_flag_oncore, TRUE>(
992 __kmp_wait_template<kmp_flag_oncore, FALSE>(
997#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
998 void mwait(
int th_gtid) { __kmp_mwait_oncore(th_gtid,
this); }
1006 this_thr, gtid,
this, final_spin,
1008 if (ompd_state & OMPD_ENABLE_BP)
1013 this_thr, gtid,
this, final_spin,
1023 void *
flag =
CCAST(
void *, thr->th.th_sleep_loc);
1042 KF_TRACE(100, (
"__kmp_null_resume_wrapper: flag type %d is unset\n",
type));
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p, kmp_uint64 c, std::atomic< bool > *loc)
bool wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj))
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p, kmp_uint64 c)
void suspend(int th_gtid)
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p)
kmp_atomic_flag_64(std::atomic< kmp_uint64 > *p, kmp_info_t *thr)
kmp_flag_32(std::atomic< kmp_uint32 > *p, kmp_uint32 c)
bool wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj))
kmp_flag_32(std::atomic< kmp_uint32 > *p, kmp_info_t *thr)
kmp_flag_32(std::atomic< kmp_uint32 > *p)
void suspend(int th_gtid)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
bool wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj))
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
kmp_flag_64(volatile kmp_uint64 *p)
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
void suspend(int th_gtid)
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic< bool > *loc)
Base class for wait/release atomic flag.
std::atomic< PtrType > * loc
Pointer to flag location to wait on.
bool is_sleeping_val(PtrType old_loc)
bool is_sleeping()
Test whether there are threads sleeping on the flag.
kmp_flag_atomic(std::atomic< PtrType > *p, PtrType c)
kmp_flag_atomic(std::atomic< PtrType > *p)
kmp_flag_atomic(std::atomic< PtrType > *p, kmp_info_t *thr)
bool done_check_val(PtrType old_loc)
kmp_flag_atomic(std::atomic< PtrType > *p, PtrType c, std::atomic< bool > *sloc)
void set(std::atomic< PtrType > *new_loc)
PtrType checker
Flag == checker means it has been released.
std::atomic< PtrType > * get()
flag_traits< FlagType > traits_type
Base class for wait/release volatile flag.
kmp_flag_native(volatile PtrType *p, PtrType c)
kmp_flag_native(volatile PtrType *p)
PtrType checker
When flag==checker, it has been released.
void set(volatile PtrType *new_loc)
flag_traits< FlagType > traits_type
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual ~kmp_flag_native()
bool is_sleeping()
Test whether there are threads sleeping on the flag.
virtual bool done_check()
kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic< bool > *sloc)
bool done_check() override
void wait(kmp_info_t *this_thr, int final_spin)
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained)
void suspend(int th_gtid)
virtual ~kmp_flag_oncore() override
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
bool done_check_val(kmp_uint64 old_loc) override
kmp_flag_oncore(volatile kmp_uint64 *p)
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
enum barrier_type get_bt()
bool notdone_check() override
Base class for all flags.
flag_properties t
"Type" of the flag in loc
kmp_flag(std::atomic< bool > *sloc)
kmp_uint32 num_waiting_threads
Num threads sleeping on this thread.
kmp_info_t * waiting_threads[1]
Threads sleeping on this thread.
kmp_uint32 get_num_waiters()
flag_traits< FlagType > traits_type
enum barrier_type get_bt()
kmp_info_t * get_waiter(kmp_uint32 i)
param i in index into waiting_threads
std::atomic< bool > * sleepLoc
void set_waiter(kmp_info_t *thr)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
kmp_global_t __kmp_global
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time)
kmp_pause_status_t __kmp_pause_status
#define KMP_MAX_BLOCKTIME
#define KMP_NOT_SAFE_TO_REAP
void __kmp_unlock_suspend_mx(kmp_info_t *th)
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
kmp_tasking_mode_t __kmp_tasking_mode
void __kmp_abort_thread(void)
volatile int __kmp_init_gtid
void __kmp_suspend_64(int th_gtid, kmp_flag_64< C, S > *flag)
@ atomic_flag64
atomic 64 bit flags
@ flag_oncore
special 64-bit flag for on-core barrier (hierarchical)
@ flag32
atomic 32 bit flags
#define KMP_BARRIER_SLEEP_STATE
void __kmp_suspend_32(int th_gtid, kmp_flag_32< C, S > *flag)
void __kmp_hidden_helper_worker_thread_wait()
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64< C, S > *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
std::atomic< int > __kmp_thread_pool_active_nth
#define KMP_MASTER_TID(tid)
#define KMP_TASKING_ENABLED(task_team)
kmp_info_t ** __kmp_threads
#define KMP_BARRIER_SWITCH_TO_OWN_FLAG
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag)
#define KMP_INIT_YIELD(count)
void __kmp_lock_suspend_mx(kmp_info_t *th)
#define KMP_BLOCKING(goal, count)
#define KMP_INIT_BACKOFF(time)
#define __kmp_allocate(size)
volatile int __kmp_hidden_helper_team_done
std::atomic< kmp_int32 > __kmp_unexecuted_hidden_helper_tasks
#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag)
bool __kmp_wpolicy_passive
#define KMP_BARRIER_SWITCHING
void __kmp_resume_64(int target_gtid, kmp_flag_64< C, S > *flag)
void __kmp_resume_32(int target_gtid, kmp_flag_32< C, S > *flag)
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32< C, S > *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64< C, S > *flag)
int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64< C, S > *flag, int final_spin, int *thread_finished, kmp_int32 is_constrained)
#define KMP_BARRIER_STATE_BUMP
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64< C, S > *flag)
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
void __kmp_suspend_initialize_thread(kmp_info_t *th)
void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, int final_spin)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
#define KMP_DEBUG_ASSERT(cond)
unsigned long long kmp_uint64
#define KMP_FSYNC_RELEASING(obj)
#define KMP_FSYNC_SPIN_ACQUIRED(obj)
#define KMP_FSYNC_SPIN_PREPARE(obj)
#define USE_ITT_BUILD_ARG(x)
#define KMP_FSYNC_SPIN_INIT(obj, spin)
#define KMP_ATOMIC_ADD(p, v)
#define KMP_TEST_THEN_OR32(p, v)
#define KMP_ATOMIC_AND(p, v)
#define KMP_TEST_THEN_ADD4_32(p)
#define KMP_ATOMIC_ST_REL(p, v)
#define KMP_TEST_THEN_AND64(p, v)
#define KMP_TEST_THEN_ADD4_64(p)
#define KMP_ATOMIC_LD_ACQ(p)
#define KMP_TEST_THEN_AND32(p, v)
#define KMP_ATOMIC_DEC(p)
unsigned long kmp_uintptr_t
#define KMP_ATOMIC_OR(p, v)
#define KMP_TEST_THEN_OR64(p, v)
#define KMP_ATOMIC_INC(p)
Functions for collecting statistics.
#define KMP_PUSH_PARTITIONED_TIMER(name)
#define KMP_GET_THREAD_STATE()
#define KMP_POP_PARTITIONED_TIMER()
#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n)
#define KMP_SET_THREAD_STATE(state_name)
static void __kmp_null_resume_wrapper(kmp_info_t *thr)
static void __kmp_release_template(C *flag)
static bool __kmp_wait_template(kmp_info_t *this_thr, C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj))
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
static flag_t tcr(flag_t f)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t tcr(flag_t f)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t tcr(flag_t f)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t test_then_add4(volatile flag_t *f)
static flag_t test_then_or(volatile flag_t *f, flag_t v)
static flag_t test_then_and(volatile flag_t *f, flag_t v)
static flag_t tcr(flag_t f)
KMP_ALIGN_CACHE std::atomic< kmp_int32 > tt_unfinished_threads
KMP_ALIGN_CACHE volatile kmp_uint32 tt_active
ompt_task_info_t ompt_task_info