39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(
kmp_team_t *team,
93 int update_master_only = 0);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(
kmp_root_t *root,
int set_nproc);
107static int __kmp_unregister_root_other_thread(
int gtid);
129 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
142 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
147 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
150 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
152 stack_addr = (
char *)&stack_data;
172 stack_size = (size_t)
TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)
TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
180 if (stack_diff <= stack_size) {
196 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
214 if (!
TCR_4(other_threads[
i]->th.th_info.ds.ds_stackgrow)) {
218 stack_base = (
char *)other_threads[
i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[
i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[
i]->th.th_info.ds.ds_stacksize,
222 other_threads[
i]->th.th_info.ds.ds_stacksize + stack_addr -
225 TCW_PTR(other_threads[
i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
231 char *stack_end = (
char *)other_threads[
i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[
i]->th.th_info.ds.ds_stacksize;
234 other_threads[
i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)",
i);
248 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
253 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
257 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
264 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
289 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
291 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)",
"mon",
300 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
314 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (
char *)
TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)
TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
334 -1, other_stack_beg, other_stack_end,
335 (
size_t)
TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
344 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
350 static int done =
FALSE;
357#define MAX_MESSAGE 512
360 char const *format, ...) {
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (
unsigned long)
size, format);
369#if KMP_PRINT_DATA_PLACEMENT
372 if (p1 <= p2 && (
char *)p2 - (
char *)p1 ==
size) {
374 node = __kmp_get_host_node(p1);
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
384 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
399 (
char *)p1 += page_size;
400 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
406 (
char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
410 (
char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
433 va_start(ap, format);
435 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
510#if KMP_FAST_REDUCTION_BARRIER
522 int team_id,
int num_thr) {
530 "%s_%d.t_bar", header, team_id);
540 "%s_%d.t_bar[forkjoin]", header, team_id);
542#if KMP_FAST_REDUCTION_BARRIER
546 "%s_%d.t_bar[reduction]", header, team_id);
550 -1, &team->
t.t_dispatch[0], &team->
t.t_dispatch[num_thr],
551 sizeof(
kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
554 -1, &team->
t.t_threads[0], &team->
t.t_threads[num_thr],
555 sizeof(
kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
558 &team->
t.t_disp_buffer[num_disp_buff],
560 "%s_%d.t_disp_buffer", header, team_id);
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
592 case DLL_PROCESS_DETACH:
606 if (lpReserved == NULL)
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
618 case DLL_THREAD_DETACH:
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
640#if KMP_USE_DYNAMIC_LOCK
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->
t.t_serialized) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->
t.t_serialized) {
674 team->
t.t_ordered.dt.t_value = ((tid + 1) % team->
t.t_nproc);
694 team = th->th.th_team;
697 th->th.th_ident = id_ref;
699 if (team->
t.t_serialized) {
702 kmp_int32 old_this = th->th.th_local.this_construct;
704 ++th->th.th_local.this_construct;
708 if (team->
t.t_construct == old_this) {
710 th->th.th_local.this_construct);
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
715 team->
t.t_active_level == 1) {
717 __kmp_itt_metadata_single(id_ref);
731 __kmp_itt_single_start(gtid);
739 __kmp_itt_single_end(gtid);
752 int master_tid,
int set_nthreads,
758 kmp_info_t *this_thr = parent_team->
t.t_threads[master_tid];
762 new_nthreads = set_nthreads;
766#ifdef USE_LOAD_BALANCE
767 else if (
__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
796 new_nthreads = set_nthreads;
799 if (set_nthreads > 2) {
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
872 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
895 int slotsRequired =
__kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
899 if (slotsAdded < slotsRequired) {
901 new_nthreads -= (slotsRequired - slotsAdded);
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
922 if (new_nthreads == 1) {
924 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
928 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
941 int fork_teams_workers) {
945 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->
t.t_nproc));
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->
t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized =
FALSE;
955 master_th->th.th_dispatch = &team->
t.t_dispatch[0];
958#if KMP_NESTED_HOT_TEAMS
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
963 int level = team->
t.t_active_level - 1;
964 if (master_th->th.th_teams_microtask) {
965 if (master_th->th.th_teams_size.nteams > 1) {
970 master_th->th.th_teams_level == team->
t.t_level) {
975 if (
level < __kmp_hot_teams_max_level) {
976 if (hot_teams[
level].hot_team) {
982 hot_teams[
level].hot_team = team;
983 hot_teams[
level].hot_team_nth = team->
t.t_nproc;
990 use_hot_team = team == root->r.r_hot_team;
995 team->
t.t_threads[0] = master_th;
999 for (
i = 1;
i < team->
t.t_nproc;
i++) {
1003 team->
t.t_threads[
i] = thr;
1007 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1020 balign[
b].
bb.b_arrived = team->
t.t_bar[
b].b_arrived;
1023 balign[
b].
bb.b_worker_arrived = team->
t.t_bar[
b].b_team_arrived;
1029#if KMP_AFFINITY_SUPPORTED
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1038 if (team->
t.t_nproc > 1 &&
1040 team->
t.b->update_num_threads(team->
t.t_nproc);
1051 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1052 "%p, new task_team %p / team %p\n",
1054 team->
t.t_parent, team->
t.t_task_team[master_th->th.th_task_state],
1059 master_th->th.th_task_state);
1063 if (team->
t.t_nproc > 1) {
1065 team->
t.t_threads[1]->th.th_task_state == 1);
1067 team->
t.t_threads[1]->th.th_task_state);
1069 master_th->th.th_task_state = 0;
1074 master_th->th.th_task_state);
1076 master_th->th.th_task_state = 0;
1081 for (
i = 0;
i < team->
t.t_nproc;
i++) {
1083 if (thr->th.th_prev_num_threads != team->
t.t_nproc ||
1084 thr->th.th_prev_level != team->
t.t_level) {
1085 team->
t.t_display_affinity = 1;
1094#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1099 if (__kmp_inherit_fp_control) {
1104 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1105 __kmp_store_mxcsr(&mxcsr);
1106 mxcsr &= KMP_X86_MXCSR_MASK;
1132 if (__kmp_inherit_fp_control && team->
t.t_fp_control_saved) {
1137 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1138 __kmp_store_mxcsr(&mxcsr);
1139 mxcsr &= KMP_X86_MXCSR_MASK;
1141 if (team->
t.t_x87_fpu_control_word != x87_fpu_control_word) {
1142 __kmp_clear_x87_fpu_status_word();
1143 __kmp_load_x87_fpu_control_word(&team->
t.t_x87_fpu_control_word);
1146 if (team->
t.t_mxcsr != mxcsr) {
1147 __kmp_load_mxcsr(&team->
t.t_mxcsr);
1152#define propagateFPControl(x) ((void)0)
1153#define updateHWFPControl(x) ((void)0)
1165 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1177 serial_team = this_thr->th.th_serial_team;
1184 if (this_thr->th.th_current_task->td_icvs.proc_bind ==
proc_bind_false) {
1189 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1195 this_thr->th.th_set_nproc = 0;
1198 ompt_data_t ompt_parallel_data = ompt_data_none;
1199 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1201 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1204 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1212 &ompt_parallel_data, team_size,
1213 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1218 if (this_thr->th.th_team != serial_team) {
1220 int level = this_thr->th.th_team->t.t_level;
1222 if (serial_team->
t.t_serialized) {
1234 proc_bind, &this_thr->th.th_current_task->td_icvs,
1240 new_team->
t.t_threads[0] = this_thr;
1241 new_team->
t.t_parent = this_thr->th.th_team;
1242 serial_team = new_team;
1243 this_thr->th.th_serial_team = serial_team;
1247 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1248 global_tid, serial_team));
1256 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1257 global_tid, serial_team));
1264 serial_team->
t.t_ident =
loc;
1265 serial_team->
t.t_serialized = 1;
1266 serial_team->
t.t_nproc = 1;
1267 serial_team->
t.t_parent = this_thr->th.th_team;
1269 serial_team->
t.t_primary_task_state = this_thr->th.th_task_state;
1270 serial_team->
t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1271 this_thr->th.th_team = serial_team;
1272 serial_team->
t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1274 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1275 this_thr->th.th_current_task));
1276 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1277 this_thr->th.th_current_task->td_flags.executing = 0;
1284 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1285 &this_thr->th.th_current_task->td_parent->td_icvs);
1290 this_thr->th.th_current_task->td_icvs.nproc =
1296 this_thr->th.th_current_task->td_icvs.proc_bind =
1303 this_thr->th.th_info.ds.ds_tid = 0;
1306 this_thr->th.th_team_nproc = 1;
1307 this_thr->th.th_team_master = this_thr;
1308 this_thr->th.th_team_serialized = 1;
1309 this_thr->th.th_task_team = NULL;
1310 this_thr->th.th_task_state = 0;
1312 serial_team->
t.t_level = serial_team->
t.t_parent->t.t_level + 1;
1313 serial_team->
t.t_active_level = serial_team->
t.t_parent->t.t_active_level;
1314 serial_team->
t.t_def_allocator = this_thr->th.th_def_allocator;
1320 if (!serial_team->
t.t_dispatch->th_disp_buffer) {
1321 serial_team->
t.t_dispatch->th_disp_buffer =
1325 this_thr->th.th_dispatch = serial_team->
t.t_dispatch;
1335 ++serial_team->
t.t_serialized;
1336 this_thr->th.th_team_serialized = serial_team->
t.t_serialized;
1339 int level = this_thr->th.th_team->t.t_level;
1343 this_thr->th.th_current_task->td_icvs.nproc =
1346 serial_team->
t.t_level++;
1347 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1348 "of serial team %p to %d\n",
1349 global_tid, serial_team, serial_team->
t.t_level));
1357 disp_buffer->next = serial_team->
t.t_dispatch->th_disp_buffer;
1358 serial_team->
t.t_dispatch->th_disp_buffer = disp_buffer;
1360 this_thr->th.th_dispatch = serial_team->
t.t_dispatch;
1372 if (this_thr->th.th_prev_level != serial_team->
t.t_level ||
1373 this_thr->th.th_prev_num_threads != 1) {
1376 this_thr->th.th_prev_level = serial_team->
t.t_level;
1377 this_thr->th.th_prev_num_threads = 1;
1384 serial_team->
t.ompt_team_info.master_return_address = codeptr;
1386 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1387 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1392 &ompt_parallel_data, codeptr);
1400 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1402 ompt_task_implicit);
1403 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1408 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1409 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1419 return (master_th->th.th_teams_microtask && ap &&
1427 return ((ap == NULL && active_level == 0) ||
1428 (ap && teams_level > 0 && teams_level ==
level));
1440 ompt_data_t ompt_parallel_data,
void *return_address,
1446 parent_team->
t.t_ident =
loc;
1448 parent_team->
t.t_argc = argc;
1449 argv = (
void **)parent_team->
t.t_argv;
1450 for (
i = argc - 1;
i >= 0; --
i) {
1454 if (parent_team == master_th->th.th_serial_team) {
1462 parent_team->
t.t_serialized--;
1472 void **exit_frame_p;
1473 ompt_data_t *implicit_task_data;
1478 &ompt_parallel_data, return_address);
1485 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1489 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1490 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1494 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1496 exit_frame_p = &dummy;
1502 parent_team->
t.t_serialized--;
1517 *exit_frame_p = NULL;
1518 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1521 ompt_scope_end, NULL, implicit_task_data, 1,
1522 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1524 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1528 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1529 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1531 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1538 parent_team->
t.t_invoke = invoker;
1540 parent_team->
t.t_active_level++;
1541 parent_team->
t.t_level++;
1542 parent_team->
t.t_def_allocator = master_th->th.th_def_allocator;
1549 master_th->th.th_teams_size.nth = parent_team->
t.t_nproc;
1561 if (master_set_numthreads) {
1562 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1564 kmp_info_t **other_threads = parent_team->
t.t_threads;
1567 int old_proc = master_th->th.th_teams_size.nth;
1572 parent_team->
t.t_nproc = master_set_numthreads;
1573 for (
i = 0;
i < master_set_numthreads; ++
i) {
1574 other_threads[
i]->th.th_team_nproc = master_set_numthreads;
1578 master_th->th.th_set_nproc = 0;
1583 int nth = __kmp_omp_num_threads(
loc);
1585 master_set_numthreads = nth;
1594 if (master_th->th.th_current_task->td_icvs.proc_bind ==
proc_bind_false) {
1599 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1607 master_th->th.th_current_task->td_icvs.proc_bind)) {
1614 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1615 kmp_info_t **other_threads = parent_team->
t.t_threads;
1616 for (
i = 0;
i < master_th->th.th_team_nproc; ++
i) {
1617 other_threads[
i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1623#if USE_ITT_BUILD && USE_ITT_NOTIFY
1624 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1626 __kmp_forkjoin_frames_mode == 3 &&
1627 parent_team->
t.t_active_level == 1
1628 && master_th->th.th_teams_size.nteams == 1) {
1630 master_th->th.th_frame_time = tmp_time;
1631 parent_team->
t.t_region_time = tmp_time;
1633 if (__itt_stack_caller_create_ptr) {
1636 parent_team->
t.t_stack_id = __kmp_itt_stack_caller_create();
1639#if KMP_AFFINITY_SUPPORTED
1640 __kmp_partition_places(parent_team);
1643 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1644 "master_th=%p, gtid=%d\n",
1645 root, parent_team, master_th, gtid));
1647 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1648 "master_th=%p, gtid=%d\n",
1649 root, parent_team, master_th, gtid));
1655 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1656 parent_team->
t.t_id, parent_team->
t.t_pkfn));
1658 if (!parent_team->
t.t_invoke(gtid)) {
1659 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1661 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1662 parent_team->
t.t_id, parent_team->
t.t_pkfn));
1665 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1676 ompt_data_t *ompt_parallel_data,
void **return_address,
1677 ompt_data_t **parent_task_data,
1685#if KMP_OS_LINUX && \
1686 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1694 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1699 master_th->th.th_serial_team->t.t_pkfn =
microtask;
1704 master_th->th.th_serial_team->t.t_ident =
loc;
1707 master_th->th.th_serial_team->t.t_level--;
1712 void **exit_frame_p;
1718 ompt_parallel_data, *return_address);
1722 task_info = OMPT_CUR_TASK_INFO(master_th);
1723 exit_frame_p = &(task_info->
frame.exit_frame.ptr);
1727 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1729 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1733 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1735 exit_frame_p = &dummy;
1752 *exit_frame_p = NULL;
1755 ompt_scope_end, NULL, &(task_info->
task_data), 1,
1756 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1758 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1762 ompt_parallel_data, *parent_task_data,
1763 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1765 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1770 team = master_th->th.th_team;
1772 team->
t.t_invoke = invoker;
1774 team->
t.t_argc = argc;
1775 argv = (
void **)team->
t.t_argv;
1776 for (
i = argc - 1;
i >= 0; --
i)
1788 ompt_scope_end, NULL, &(task_info->
task_data), 0,
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1793 ompt_parallel_data, *parent_task_data,
1797 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1802 for (
i = argc - 1;
i >= 0; --
i)
1808 void **exit_frame_p;
1811 ompt_data_t *implicit_task_data;
1815 ompt_parallel_data, *return_address);
1818 task_info = OMPT_CUR_TASK_INFO(master_th);
1819 exit_frame_p = &(task_info->
frame.exit_frame.ptr);
1822 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1825 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1827 ompt_task_implicit);
1832 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1834 exit_frame_p = &dummy;
1851 *exit_frame_p = NULL;
1854 ompt_scope_end, NULL, &(task_info->
task_data), 1,
1855 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1858 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1862 ompt_parallel_data, *parent_task_data,
1863 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1865 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1883 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1887 "__kmp_serial_fork_call: unknown fork_context parameter");
1890 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1904 int master_this_cons;
1911 int master_set_numthreads;
1912 int task_thread_limit = 0;
1916#if KMP_NESTED_HOT_TEAMS
1917 kmp_hot_team_ptr_t **p_hot_teams;
1923 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1945 parent_team = master_th->th.th_team;
1946 master_tid = master_th->th.th_info.ds.ds_tid;
1947 master_this_cons = master_th->th.th_local.this_construct;
1948 root = master_th->th.th_root;
1949 master_active = root->r.r_active;
1950 master_set_numthreads = master_th->th.th_set_nproc;
1952 master_th->th.th_current_task->td_icvs.task_thread_limit;
1955 ompt_data_t ompt_parallel_data = ompt_data_none;
1956 ompt_data_t *parent_task_data;
1957 ompt_frame_t *ompt_frame;
1958 void *return_address = NULL;
1963 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1971 level = parent_team->
t.t_level;
1973 active_level = parent_team->
t.t_active_level;
1975 teams_level = master_th->th.th_teams_level;
1976#if KMP_NESTED_HOT_TEAMS
1977 p_hot_teams = &master_th->th.th_hot_teams;
1978 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1980 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1981 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1983 (*p_hot_teams)[0].hot_team_nth = 1;
1990 int team_size = master_set_numthreads
1991 ? master_set_numthreads
1995 ? ompt_parallel_league
1996 : ompt_parallel_team);
1998 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2001 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2005 master_th->th.th_ident =
loc;
2011 master_set_numthreads,
level,
2013 ompt_parallel_data, return_address,
2027 if ((!enter_teams &&
2028 (parent_team->
t.t_active_level >=
2029 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2031 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2034 nthreads = master_set_numthreads
2035 ? master_set_numthreads
2040 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2053 nthreads, enter_teams);
2054 if (nthreads == 1) {
2065 master_th->th.th_set_nproc = 0;
2067 if (nthreads == 1) {
2069 invoker, master_th, parent_team,
2071 &ompt_parallel_data, &return_address,
2079 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2080 "curtask=%p, curtask_max_aclevel=%d\n",
2081 parent_team->
t.t_active_level, master_th,
2082 master_th->th.th_current_task,
2083 master_th->th.th_current_task->td_icvs.max_active_levels));
2087 master_th->th.th_current_task->td_flags.executing = 0;
2089 if (!master_th->th.th_teams_microtask ||
level > teams_level) {
2095 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2107 if (master_th->th.th_current_task->td_icvs.proc_bind ==
proc_bind_false) {
2113 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2116 if (master_th->th.th_teams_microtask &&
2126 master_th->th.th_current_task->td_icvs.proc_bind)) {
2129 if (!master_th->th.th_teams_microtask ||
2140 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2141 new_icvs.
next = NULL;
2142 if (nthreads_icv > 0) {
2143 new_icvs.
nproc = nthreads_icv;
2150 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2155 proc_bind, &new_icvs,
2161 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2167 &master_th->th.th_current_task->td_icvs,
2171 &master_th->th.th_current_task->td_icvs);
2174 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2188 if (!master_th->th.th_teams_microtask ||
level > teams_level) {
2189 int new_level = parent_team->
t.t_level + 1;
2191 new_level = parent_team->
t.t_active_level + 1;
2195 int new_level = parent_team->
t.t_level;
2197 new_level = parent_team->
t.t_active_level;
2210 if (ompd_state & OMPD_ENABLE_BP)
2211 ompd_bp_parallel_begin();
2216 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2217 gtid, parent_team->
t.t_id, team->
t.t_master_tid, team->
t.t_id,
2220 (team->
t.t_master_tid == 0 &&
2221 (team->
t.t_parent == root->r.r_root_team ||
2222 team->
t.t_parent->t.t_serialized)));
2226 argv = (
void **)team->
t.t_argv;
2228 for (
i = argc - 1;
i >= 0; --
i) {
2234 for (
i = 0;
i < argc; ++
i) {
2242 if (!root->r.r_active)
2243 root->r.r_active =
TRUE;
2247 &master_th->th.th_current_task->td_icvs,
loc);
2250 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2256 if (team->
t.t_active_level == 1
2257 && !master_th->th.th_teams_microtask) {
2259 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2260 (__kmp_forkjoin_frames_mode == 3 ||
2261 __kmp_forkjoin_frames_mode == 1)) {
2263 if (__itt_get_timestamp_ptr)
2264 tmp_time = __itt_get_timestamp();
2266 master_th->th.th_frame_time = tmp_time;
2267 if (__kmp_forkjoin_frames_mode == 3)
2268 team->
t.t_region_time = tmp_time;
2272 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2273 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2275 __kmp_itt_region_forking(gtid, team->
t.t_nproc, 0);
2284 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2285 root, team, master_th, gtid));
2288 if (__itt_stack_caller_create_ptr) {
2292 team->
t.t_stack_id = __kmp_itt_stack_caller_create();
2293 }
else if (parent_team->
t.t_serialized) {
2299 parent_team->
t.t_stack_id = __kmp_itt_stack_caller_create();
2308 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2309 "master_th=%p, gtid=%d\n",
2310 root, team, master_th, gtid));
2314 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2319 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2320 team->
t.t_id, team->
t.t_pkfn));
2323#if KMP_STATS_ENABLED
2331 if (!team->
t.t_invoke(gtid)) {
2332 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2335#if KMP_STATS_ENABLED
2342 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2343 team->
t.t_id, team->
t.t_pkfn));
2346 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2349 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2357static inline void __kmp_join_restore_state(
kmp_info_t *thread,
2360 thread->th.ompt_thread_info.state =
2361 ((team->
t.t_serialized) ? ompt_state_work_serial
2362 : ompt_state_work_parallel);
2365static inline void __kmp_join_ompt(
int gtid,
kmp_info_t *thread,
2366 kmp_team_t *team, ompt_data_t *parallel_data,
2367 int flags,
void *codeptr) {
2371 parallel_data, &(task_info->
task_data), flags, codeptr);
2374 task_info->
frame.enter_frame = ompt_data_none;
2375 __kmp_join_restore_state(thread, team);
2393 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2397 root = master_th->th.th_root;
2398 team = master_th->th.th_team;
2399 parent_team = team->
t.t_parent;
2401 master_th->th.th_ident =
loc;
2404 void *team_microtask = (
void *)team->
t.t_pkfn;
2410 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2416 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2417 "th_task_team = %p\n",
2419 team->
t.t_task_team[master_th->th.th_task_state],
2420 master_th->th.th_task_team));
2425 if (team->
t.t_serialized) {
2426 if (master_th->th.th_teams_microtask) {
2428 int level = team->
t.t_level;
2429 int tlevel = master_th->th.th_teams_level;
2430 if (
level == tlevel) {
2434 }
else if (
level == tlevel + 1) {
2438 team->
t.t_serialized++;
2448 __kmp_join_restore_state(master_th, parent_team);
2455 master_active = team->
t.t_master_active;
2462 if (__itt_stack_caller_create_ptr) {
2465 __kmp_itt_stack_caller_destroy((__itt_caller)team->
t.t_stack_id);
2466 team->
t.t_stack_id = NULL;
2470 master_th->th.th_task_state =
2473 if (__itt_stack_caller_create_ptr && parent_team->
t.t_serialized) {
2478 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->
t.t_stack_id);
2479 parent_team->
t.t_stack_id = NULL;
2487 ompt_data_t *parallel_data = &(team->
t.ompt_team_info.parallel_data);
2488 void *codeptr = team->
t.ompt_team_info.master_return_address;
2493 if (team->
t.t_active_level == 1 &&
2494 (!master_th->th.th_teams_microtask ||
2495 master_th->th.th_teams_size.nteams == 1)) {
2496 master_th->th.th_ident =
loc;
2499 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2500 __kmp_forkjoin_frames_mode == 3)
2501 __kmp_itt_frame_submit(gtid, team->
t.t_region_time,
2502 master_th->th.th_frame_time, 0,
loc,
2503 master_th->th.th_team_nproc, 1);
2504 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2505 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2506 __kmp_itt_region_joined(gtid);
2510#if KMP_AFFINITY_SUPPORTED
2513 master_th->th.th_first_place = team->
t.t_first_place;
2514 master_th->th.th_last_place = team->
t.t_last_place;
2518 if (master_th->th.th_teams_microtask && !exit_teams &&
2520 team->
t.t_level == master_th->th.th_teams_level + 1) {
2525 ompt_data_t ompt_parallel_data = ompt_data_none;
2529 int ompt_team_size = team->
t.t_nproc;
2531 ompt_scope_end, NULL, &(task_info->
task_data), ompt_team_size,
2532 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2534 task_info->
frame.exit_frame = ompt_data_none;
2536 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2542 team->
t.t_active_level--;
2549 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2550 int old_num = master_th->th.th_team_nproc;
2551 int new_num = master_th->th.th_teams_size.nth;
2553 team->
t.t_nproc = new_num;
2554 for (
int i = 0;
i < old_num; ++
i) {
2555 other_threads[
i]->th.th_team_nproc = new_num;
2558 for (
int i = old_num;
i < new_num; ++
i) {
2563 balign[
b].
bb.b_arrived = team->
t.t_bar[
b].b_arrived;
2566 balign[
b].
bb.b_worker_arrived = team->
t.t_bar[
b].b_team_arrived;
2571 other_threads[
i]->th.th_task_state = master_th->th.th_task_state;
2578 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2579 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2587 master_th->th.th_info.ds.ds_tid = team->
t.t_master_tid;
2588 master_th->th.th_local.this_construct = team->
t.t_master_this_cons;
2590 master_th->th.th_dispatch = &parent_team->
t.t_dispatch[team->
t.t_master_tid];
2597 if (!master_th->th.th_teams_microtask ||
2598 team->
t.t_level > master_th->th.th_teams_level) {
2610 : ompt_task_implicit;
2611 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->
t.t_nproc;
2613 ompt_scope_end, NULL, &(task_info->
task_data), ompt_team_size,
2614 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2616 task_info->
frame.exit_frame = ompt_data_none;
2621 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2625 master_th->th.th_def_allocator = team->
t.t_def_allocator;
2628 if (ompd_state & OMPD_ENABLE_BP)
2629 ompd_bp_parallel_end();
2633 if (root->r.r_active != master_active)
2634 root->r.r_active = master_active;
2644 master_th->th.th_team = parent_team;
2645 master_th->th.th_team_nproc = parent_team->
t.t_nproc;
2646 master_th->th.th_team_master = parent_team->
t.t_threads[0];
2647 master_th->th.th_team_serialized = parent_team->
t.t_serialized;
2650 if (parent_team->
t.t_serialized &&
2651 parent_team != master_th->th.th_serial_team &&
2652 parent_team != root->r.r_root_team) {
2655 master_th->th.th_serial_team = parent_team;
2661 team->
t.t_primary_task_state == 1);
2662 master_th->th.th_task_state = (
kmp_uint8)team->
t.t_primary_task_state;
2665 master_th->th.th_task_team =
2666 parent_team->
t.t_task_team[master_th->th.th_task_state];
2668 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2676 master_th->th.th_current_task->td_flags.executing = 1;
2680#if KMP_AFFINITY_SUPPORTED
2681 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2689 : ompt_parallel_team);
2691 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2697 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2704 if (thread->th.th_team != thread->th.th_serial_team) {
2707 if (thread->th.th_team->t.t_serialized > 1) {
2710 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2713 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2714 thread->th.th_team->t.t_serialized) {
2723 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2727 control->
next = thread->th.th_team->t.t_control_stack_top;
2728 thread->th.th_team->t.t_control_stack_top = control;
2738 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2748 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2758 root = thread->th.th_root;
2760 (root->r.r_hot_team->t.t_nproc > new_nth)
2762 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2774 for (
f = new_nth;
f < hot_team->
t.t_nproc;
f++) {
2779 hot_team->
t.t_threads[
f]->th.th_task_team = NULL;
2782 hot_team->
t.t_threads[
f] = NULL;
2784 hot_team->
t.t_nproc = new_nth;
2785#if KMP_NESTED_HOT_TEAMS
2786 if (thread->th.th_hot_teams) {
2788 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2793 hot_team->
t.b->update_num_threads(new_nth);
2800 for (
f = 0;
f < new_nth;
f++) {
2802 hot_team->
t.t_threads[
f]->th.th_team_nproc = new_nth;
2805 hot_team->
t.t_size_changed = -1;
2813 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2815 gtid, max_active_levels));
2819 if (max_active_levels < 0) {
2820 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2825 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2826 "max_active_levels for thread %d = (%d)\n",
2827 gtid, max_active_levels));
2835 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2843 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2844 "max_active_levels for thread %d = (%d)\n",
2845 gtid, max_active_levels));
2858 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2863 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2864 "curtask_maxaclevel=%d\n",
2865 gtid, thread->th.th_current_task,
2866 thread->th.th_current_task->td_icvs.max_active_levels));
2867 return thread->th.th_current_task->td_icvs.max_active_levels;
2892 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2893 gtid, (
int)kind, chunk));
2907 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2921 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2923 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2929 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2934 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2939 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2948 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2953 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2979#if KMP_STATIC_STEAL_ENABLED
2981 *kind = kmp_sched_static_steal;
2985 KMP_FATAL(UnknownSchedulingType, th_type);
2989 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2998 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid,
level));
3007 team = thr->th.th_team;
3008 ii = team->
t.t_level;
3012 if (thr->th.th_teams_microtask) {
3014 int tlevel = thr->th.th_teams_level;
3031 dd = team->
t.t_serialized;
3034 for (dd = team->
t.t_serialized; (dd > 0) && (
ii >
level); dd--,
ii--) {
3036 if ((team->
t.t_serialized) && (!dd)) {
3037 team = team->
t.t_parent;
3041 team = team->
t.t_parent;
3042 dd = team->
t.t_serialized;
3047 return (dd > 1) ? (0) : (team->
t.t_master_tid);
3056 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid,
level));
3065 team = thr->th.th_team;
3066 ii = team->
t.t_level;
3070 if (thr->th.th_teams_microtask) {
3072 int tlevel = thr->th.th_teams_level;
3087 for (dd = team->
t.t_serialized; (dd > 0) && (
ii >
level); dd--,
ii--) {
3089 if (team->
t.t_serialized && (!dd)) {
3090 team = team->
t.t_parent;
3094 team = team->
t.t_parent;
3099 return team->
t.t_nproc;
3141 if (!realloc || argc > team->
t.t_max_argc) {
3143 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3144 "current entries=%d\n",
3145 team->
t.t_id, argc, (realloc) ? team->
t.t_max_argc : 0));
3147 if (realloc && team->
t.t_argv != &team->
t.t_inline_argv[0])
3153 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3155 team->
t.t_id, team->
t.t_max_argc));
3156 team->
t.t_argv = &team->
t.t_inline_argv[0];
3159 -1, &team->
t.t_inline_argv[0],
3169 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3171 team->
t.t_id, team->
t.t_max_argc));
3176 &team->
t.t_argv[team->
t.t_max_argc],
3177 sizeof(
void *) * team->
t.t_max_argc,
3178 "team_%d.t_argv", team->
t.t_id);
3191 team->
t.t_dispatch =
3193 team->
t.t_implicit_task_taskdata =
3195 team->
t.t_max_nproc = max_nth;
3198 for (
i = 0;
i < num_disp_buff; ++
i) {
3199 team->
t.t_disp_buffer[
i].buffer_index =
i;
3200 team->
t.t_disp_buffer[
i].doacross_buf_idx =
i;
3207 for (
i = 0;
i < team->
t.t_max_nproc; ++
i) {
3208 if (team->
t.t_dispatch[
i].th_disp_buffer != NULL) {
3210 team->
t.t_dispatch[
i].th_disp_buffer = NULL;
3213#if KMP_USE_HIER_SCHED
3220 team->
t.t_threads = NULL;
3221 team->
t.t_disp_buffer = NULL;
3222 team->
t.t_dispatch = NULL;
3223 team->
t.t_implicit_task_taskdata = 0;
3281 copy_icvs(&gx_icvs, &team->
t.t_threads[0]->th.th_current_task->td_icvs);
3282 gx_icvs.
next = NULL;
3291 int hot_team_max_nth;
3300 root->r.r_begin =
FALSE;
3301 root->r.r_active =
FALSE;
3302 root->r.r_in_parallel = 0;
3304#if KMP_AFFINITY_SUPPORTED
3305 root->r.r_affinity_assigned =
FALSE;
3310 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3329 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3331 root->r.r_root_team = root_team;
3332 root_team->
t.t_control_stack_top = NULL;
3335 root_team->
t.t_threads[0] = NULL;
3336 root_team->
t.t_nproc = 1;
3337 root_team->
t.t_serialized = 1;
3339 root_team->
t.t_sched.sched = r_sched.
sched;
3342 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3347 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3360 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3362 root->r.r_hot_team = hot_team;
3363 root_team->
t.t_control_stack_top = NULL;
3366 hot_team->
t.t_parent = root_team;
3369 hot_team_max_nth = hot_team->
t.t_max_nproc;
3370 for (
f = 0;
f < hot_team_max_nth; ++
f) {
3371 hot_team->
t.t_threads[
f] = NULL;
3373 hot_team->
t.t_nproc = 1;
3375 hot_team->
t.t_sched.sched = r_sched.
sched;
3376 hot_team->
t.t_size_changed = 0;
3381typedef struct kmp_team_list_item {
3383 struct kmp_team_list_item *next;
3384} kmp_team_list_item_t;
3385typedef kmp_team_list_item_t *kmp_team_list_t;
3387static void __kmp_print_structure_team_accum(
3388 kmp_team_list_t list,
3404 __kmp_print_structure_team_accum(list, team->
t.t_parent);
3405 __kmp_print_structure_team_accum(list, team->
t.t_next_pool);
3409 while (l->next != NULL && l->entry != team) {
3412 if (l->next != NULL) {
3418 while (l->next != NULL && l->entry->t.t_id <= team->
t.t_id) {
3425 sizeof(kmp_team_list_item_t));
3432static void __kmp_print_structure_team(
char const *title,
kmp_team_p const *team
3443static void __kmp_print_structure_thread(
char const *title,
3446 if (thread != NULL) {
3455 kmp_team_list_t list;
3463 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3464 "Table\n------------------------------\n");
3480 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3486 if (thread != NULL) {
3489 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3490 __kmp_print_structure_team(
" Serial Team: ",
3491 thread->th.th_serial_team);
3492 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3493 __kmp_print_structure_thread(
" Primary: ",
3494 thread->th.th_team_master);
3495 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3496 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3497 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3498 __kmp_print_structure_thread(
" Next in pool: ",
3499 thread->th.th_next_pool);
3501 __kmp_print_structure_team_accum(list, thread->th.th_team);
3502 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3510 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3518 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3519 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3520 __kmp_print_structure_thread(
" Uber Thread: ",
3521 root->r.r_uber_thread);
3526 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3527 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3534 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3536 while (list->next != NULL) {
3540 __kmp_print_structure_team(
" Parent Team: ", team->
t.t_parent);
3543 __kmp_printf(
" Levels of serial: %2d\n", team->
t.t_serialized);
3545 for (
i = 0;
i < team->
t.t_nproc; ++
i) {
3547 __kmp_print_structure_thread(
"", team->
t.t_threads[
i]);
3549 __kmp_print_structure_team(
" Next in pool: ", team->
t.t_next_pool);
3555 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3557 __kmp_print_structure_thread(
"Thread pool: ",
3559 __kmp_print_structure_team(
"Team pool: ",
3564 while (list != NULL) {
3565 kmp_team_list_item_t *item = list;
3577 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3578 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3579 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3580 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3581 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3582 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3583 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3584 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3585 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3586 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3587 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3592 unsigned x = thread->th.th_x;
3593 unsigned short r = (
unsigned short)(x >> 16);
3595 thread->th.th_x = x * thread->th.th_a + 1;
3597 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3598 thread->th.th_info.ds.ds_tid,
r));
3605 unsigned seed = thread->th.th_info.ds.ds_tid;
3609 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3611 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3617static int __kmp_reclaim_dead_roots(
void) {
3625 r += __kmp_unregister_root_other_thread(
i);
3652 int minimumRequiredCapacity;
3661#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3664 added = __kmp_reclaim_dead_roots();
3706 }
while (newCapacity < minimumRequiredCapacity);
3750 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3804 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3805 "hidden helper thread: T#%d\n",
3819 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3841#ifdef KMP_ADJUST_BLOCKTIME
3846 __kmp_zero_bt =
TRUE;
3857#if KMP_STATS_ENABLED
3859 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3860 __kmp_stats_thread_ptr->startLife();
3867 if (root->r.r_uber_thread) {
3868 root_thread = root->r.r_uber_thread;
3874 root_thread->th.th_info.ds.ds_gtid = gtid;
3876 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3878 root_thread->th.th_root = root;
3883 __kmp_initialize_fast_memory(root_thread);
3894 if (!root_thread->th.th_serial_team) {
3896 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3905 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3906 root_thread->th.th_serial_team));
3911 root->r.r_root_team->t.t_threads[0] = root_thread;
3912 root->r.r_hot_team->t.t_threads[0] = root_thread;
3913 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3915 root_thread->th.th_serial_team->t.t_serialized = 0;
3916 root->r.r_uber_thread = root_thread;
3926 __kmp_itt_thread_name(gtid);
3929#ifdef KMP_TDATA_GTID
3935 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3945 root_thread->th.th_bar[
b].bb.b_worker_arrived = 0;
3952#if KMP_AFFINITY_SUPPORTED
3953 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3954 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3955 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3956 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3959 root_thread->th.th_prev_level = 0;
3960 root_thread->th.th_prev_num_threads = 1;
3966 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3967 " cg_nthreads init to 1\n",
3970 root_thread->th.th_cg_roots = tmp;
3979 ompt_set_thread_state(root_thread, ompt_state_overhead);
3985 ompt_data_t *task_data;
3986 ompt_data_t *parallel_data;
3991 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3994 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3998 if (ompd_state & OMPD_ENABLE_BP)
3999 ompd_bp_thread_begin();
4008#if KMP_NESTED_HOT_TEAMS
4010 const int max_level) {
4012 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4013 if (!hot_teams || !hot_teams[
level].hot_team) {
4018 nth = hot_teams[
level].hot_team_nth;
4020 if (
level < max_level - 1) {
4021 for (
i = 0;
i < nth; ++
i) {
4023 n += __kmp_free_hot_teams(root, th,
level + 1, max_level);
4024 if (
i > 0 && th->th.th_hot_teams) {
4026 th->th.th_hot_teams = NULL;
4040 int n = hot_team->
t.t_nproc;
4045 root->r.r_root_team = NULL;
4046 root->r.r_hot_team = NULL;
4050#if KMP_NESTED_HOT_TEAMS
4051 if (__kmp_hot_teams_max_level >
4053 for (
i = 0;
i < hot_team->
t.t_nproc; ++
i) {
4055 if (__kmp_hot_teams_max_level > 1) {
4056 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4058 if (th->th.th_hot_teams) {
4060 th->th.th_hot_teams = NULL;
4079 (LPVOID) & (root->r.r_uber_thread->th),
4080 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4085 if (ompd_state & OMPD_ENABLE_BP)
4086 ompd_bp_thread_end();
4090 ompt_data_t *task_data;
4091 ompt_data_t *parallel_data;
4096 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4100 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4106 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4107 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4109 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4110 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4114 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4116 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4117 root->r.r_uber_thread->th.th_cg_roots = NULL;
4123 root->r.r_uber_thread = NULL;
4125 root->r.r_begin =
FALSE;
4131 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4137 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4161 thread->th.ompt_thread_info.state = ompt_state_undefined;
4170 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4179static int __kmp_unregister_root_other_thread(
int gtid) {
4183 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4191 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4197void __kmp_task_info() {
4202 kmp_team_t *steam = this_thr->th.th_serial_team;
4206 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4208 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4209 team->
t.t_implicit_task_taskdata[tid].td_parent);
4217 int tid,
int gtid) {
4234 this_thr->th.th_info.ds.ds_tid = tid;
4235 this_thr->th.th_set_nproc = 0;
4243#if KMP_AFFINITY_SUPPORTED
4244 this_thr->th.th_new_place = this_thr->th.th_current_place;
4246 this_thr->th.th_root = master->th.th_root;
4249 this_thr->th.th_team_nproc = team->
t.t_nproc;
4250 this_thr->th.th_team_master = master;
4251 this_thr->th.th_team_serialized = team->
t.t_serialized;
4255 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4256 tid, gtid, this_thr, this_thr->th.th_current_task));
4261 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4262 tid, gtid, this_thr, this_thr->th.th_current_task));
4267 this_thr->th.th_dispatch = &team->
t.t_dispatch[tid];
4269 this_thr->th.th_local.this_construct = 0;
4271 if (!this_thr->th.th_pri_common) {
4272 this_thr->th.th_pri_common =
4276 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4277 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4279 this_thr->th.th_pri_head = NULL;
4282 if (this_thr != master &&
4283 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4290 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4291 " on node %p of thread %p to %d\n",
4297 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4299 this_thr->th.th_cg_roots->cg_nthreads++;
4300 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4301 " node %p of thread %p to %d\n",
4302 this_thr, this_thr->th.th_cg_roots,
4303 this_thr->th.th_cg_roots->cg_root,
4304 this_thr->th.th_cg_roots->cg_nthreads));
4305 this_thr->th.th_current_task->td_icvs.thread_limit =
4306 this_thr->th.th_cg_roots->cg_thread_limit;
4311 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4316 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4317 team->
t.t_max_nproc));
4335 "th_%d.th_dispatch.th_disp_buffer "
4336 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4337 gtid, team->
t.t_id, gtid);
4350 this_thr->th.th_next_pool = NULL;
4371#if !KMP_NESTED_HOT_TEAMS
4388 if (new_thr->th.th_active_in_pool ==
TRUE) {
4391 new_thr->th.th_active_in_pool =
FALSE;
4395 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4402 new_thr->th.th_info.ds.ds_gtid);
4407 new_thr->th.th_task_state = 0;
4415#ifdef KMP_ADJUST_BLOCKTIME
4420 __kmp_zero_bt =
TRUE;
4434 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4448 if (!
TCR_4(__kmp_init_monitor)) {
4450 if (!
TCR_4(__kmp_init_monitor)) {
4451 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4452 TCW_4(__kmp_init_monitor, 1);
4454 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4465 while (
TCR_4(__kmp_init_monitor) < 2) {
4468 KF_TRACE(10, (
"after monitor thread has started\n"));
4497#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4500 __itt_suppress_mark_range(
4501 __itt_suppress_range, __itt_suppress_threading_errors,
4502 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4503 __itt_suppress_mark_range(
4504 __itt_suppress_range, __itt_suppress_threading_errors,
4505 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4507 __itt_suppress_mark_range(
4508 __itt_suppress_range, __itt_suppress_threading_errors,
4509 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4511 __itt_suppress_mark_range(__itt_suppress_range,
4512 __itt_suppress_threading_errors,
4513 &new_thr->th.th_suspend_init_count,
4514 sizeof(new_thr->th.th_suspend_init_count));
4517 __itt_suppress_mark_range(__itt_suppress_range,
4518 __itt_suppress_threading_errors,
4520 sizeof(new_thr->th.th_bar[0].bb.b_go));
4521 __itt_suppress_mark_range(__itt_suppress_range,
4522 __itt_suppress_threading_errors,
4524 sizeof(new_thr->th.th_bar[1].bb.b_go));
4525 __itt_suppress_mark_range(__itt_suppress_range,
4526 __itt_suppress_threading_errors,
4528 sizeof(new_thr->th.th_bar[2].bb.b_go));
4537 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4538 new_thr->th.th_serial_team = serial_team =
4547 serial_team->
t.t_serialized = 0;
4549 serial_team->
t.t_threads[0] = new_thr;
4551 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4558 __kmp_initialize_fast_memory(new_thr);
4570 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4577 balign[
b].
bb.team = NULL;
4579 balign[
b].
bb.use_oncore_barrier = 0;
4582 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4585 new_thr->th.th_spin_here =
FALSE;
4586 new_thr->th.th_next_waiting = 0;
4588 new_thr->th.th_blocking =
false;
4591#if KMP_AFFINITY_SUPPORTED
4592 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4593 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4594 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4595 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4598 new_thr->th.th_prev_level = 0;
4599 new_thr->th.th_prev_num_threads = 1;
4602 new_thr->th.th_active_in_pool =
FALSE;
4623#ifdef KMP_ADJUST_BLOCKTIME
4628 __kmp_zero_bt =
TRUE;
4633#if KMP_AFFINITY_SUPPORTED
4635 __kmp_affinity_set_init_mask(new_gtid,
FALSE);
4640 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4643 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4659 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4660 team->
t.t_threads[0], team));
4668 copy_icvs(&team->
t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4670 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4671 team->
t.t_threads[0], team));
4680 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4688 team->
t.t_master_tid = 0;
4690 team->
t.t_serialized = new_nproc > 1 ? 0 : 1;
4691 team->
t.t_nproc = new_nproc;
4694 team->
t.t_next_pool = NULL;
4699 team->
t.t_invoke = NULL;
4704#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4705 team->
t.t_fp_control_saved =
FALSE;
4706 team->
t.t_x87_fpu_control_word = 0;
4707 team->
t.t_mxcsr = 0;
4710 team->
t.t_construct = 0;
4712 team->
t.t_ordered.dt.t_value = 0;
4713 team->
t.t_master_active =
FALSE;
4716 team->
t.t_copypriv_data = NULL;
4719 team->
t.t_copyin_counter = 0;
4722 team->
t.t_control_stack_top = NULL;
4727 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4730#if KMP_AFFINITY_SUPPORTED
4732 int first,
int last,
int newp) {
4733 th->th.th_first_place = first;
4734 th->th.th_last_place = last;
4735 th->th.th_new_place = newp;
4736 if (newp != th->th.th_current_place) {
4738 team->
t.t_display_affinity = 1;
4740 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4741 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4749static void __kmp_partition_places(
kmp_team_t *team,
int update_master_only) {
4757 int first_place = master_th->th.th_first_place;
4758 int last_place = master_th->th.th_last_place;
4759 int masters_place = master_th->th.th_current_place;
4760 int num_masks = __kmp_affinity.num_masks;
4761 team->
t.t_first_place = first_place;
4762 team->
t.t_last_place = last_place;
4764 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4765 "bound to place %d partition = [%d,%d]\n",
4767 team->
t.t_id, masters_place, first_place, last_place));
4769 switch (proc_bind) {
4779 int n_th = team->
t.t_nproc;
4780 for (
f = 1;
f < n_th;
f++) {
4783 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4785 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4786 "partition = [%d,%d]\n",
4788 f, masters_place, first_place, last_place));
4794 int n_th = team->
t.t_nproc;
4796 if (first_place <= last_place) {
4797 n_places = last_place - first_place + 1;
4799 n_places = num_masks - first_place + last_place + 1;
4801 if (n_th <= n_places) {
4802 int place = masters_place;
4803 for (
f = 1;
f < n_th;
f++) {
4807 if (place == last_place) {
4808 place = first_place;
4809 }
else if (place == (num_masks - 1)) {
4814 __kmp_set_thread_place(team, th, first_place, last_place, place);
4816 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4817 "partition = [%d,%d]\n",
4819 team->
t.t_id,
f, place, first_place, last_place));
4822 int S, rem, gap, s_count;
4823 S = n_th / n_places;
4825 rem = n_th - (
S * n_places);
4826 gap = rem > 0 ? n_places / rem : n_places;
4827 int place = masters_place;
4829 for (
f = 0;
f < n_th;
f++) {
4833 __kmp_set_thread_place(team, th, first_place, last_place, place);
4836 if ((s_count ==
S) && rem && (gap_ct == gap)) {
4838 }
else if ((s_count ==
S + 1) && rem && (gap_ct == gap)) {
4840 if (place == last_place) {
4841 place = first_place;
4842 }
else if (place == (num_masks - 1)) {
4850 }
else if (s_count ==
S) {
4851 if (place == last_place) {
4852 place = first_place;
4853 }
else if (place == (num_masks - 1)) {
4863 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4864 "partition = [%d,%d]\n",
4866 th->th.th_new_place, first_place, last_place));
4874 int n_th = team->
t.t_nproc;
4877 if (first_place <= last_place) {
4878 n_places = last_place - first_place + 1;
4880 n_places = num_masks - first_place + last_place + 1;
4882 if (n_th <= n_places) {
4885 if (n_places != num_masks) {
4886 int S = n_places / n_th;
4887 int s_count, rem, gap, gap_ct;
4889 place = masters_place;
4890 rem = n_places - n_th *
S;
4891 gap = rem ? n_th / rem : 1;
4894 if (update_master_only == 1)
4896 for (
f = 0;
f < thidx;
f++) {
4900 int fplace = place, nplace = place;
4902 while (s_count <
S) {
4903 if (place == last_place) {
4904 place = first_place;
4905 }
else if (place == (num_masks - 1)) {
4912 if (rem && (gap_ct == gap)) {
4913 if (place == last_place) {
4914 place = first_place;
4915 }
else if (place == (num_masks - 1)) {
4923 __kmp_set_thread_place(team, th, fplace, place, nplace);
4926 if (place == last_place) {
4927 place = first_place;
4928 }
else if (place == (num_masks - 1)) {
4935 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4936 "partition = [%d,%d], num_masks: %u\n",
4938 f, th->th.th_new_place, th->th.th_first_place,
4939 th->th.th_last_place, num_masks));
4945 double current =
static_cast<double>(masters_place);
4947 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4952 if (update_master_only == 1)
4954 for (
f = 0;
f < thidx;
f++) {
4955 first =
static_cast<int>(current);
4956 last =
static_cast<int>(current + spacing) - 1;
4958 if (first >= n_places) {
4959 if (masters_place) {
4962 if (first == (masters_place + 1)) {
4966 if (last == masters_place) {
4976 if (last >= n_places) {
4977 last = (n_places - 1);
4987 th = team->
t.t_threads[
f];
4989 __kmp_set_thread_place(team, th, first, last, place);
4991 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4992 "partition = [%d,%d], spacing = %.4f\n",
4994 team->
t.t_id,
f, th->th.th_new_place,
4995 th->th.th_first_place, th->th.th_last_place, spacing));
5001 int S, rem, gap, s_count;
5002 S = n_th / n_places;
5004 rem = n_th - (
S * n_places);
5005 gap = rem > 0 ? n_places / rem : n_places;
5006 int place = masters_place;
5009 if (update_master_only == 1)
5011 for (
f = 0;
f < thidx;
f++) {
5015 __kmp_set_thread_place(team, th, place, place, place);
5018 if ((s_count ==
S) && rem && (gap_ct == gap)) {
5020 }
else if ((s_count ==
S + 1) && rem && (gap_ct == gap)) {
5022 if (place == last_place) {
5023 place = first_place;
5024 }
else if (place == (num_masks - 1)) {
5032 }
else if (s_count ==
S) {
5033 if (place == last_place) {
5034 place = first_place;
5035 }
else if (place == (num_masks - 1)) {
5044 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5045 "partition = [%d,%d]\n",
5047 team->
t.t_id,
f, th->th.th_new_place,
5048 th->th.th_first_place, th->th.th_last_place));
5058 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->
t.t_id));
5068 ompt_data_t ompt_parallel_data,
5076 int use_hot_team = !root->r.r_active;
5078 int do_place_partition = 1;
5080 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5085#if KMP_NESTED_HOT_TEAMS
5086 kmp_hot_team_ptr_t *hot_teams;
5088 team = master->th.th_team;
5089 level = team->
t.t_active_level;
5090 if (master->th.th_teams_microtask) {
5091 if (master->th.th_teams_size.nteams > 1 &&
5095 master->th.th_teams_level <
5102 if ((master->th.th_teams_size.nteams == 1 &&
5103 master->th.th_teams_level >= team->
t.t_level) ||
5105 do_place_partition = 0;
5107 hot_teams = master->th.th_hot_teams;
5108 if (
level < __kmp_hot_teams_max_level && hot_teams &&
5109 hot_teams[
level].hot_team) {
5121 if (use_hot_team && new_nproc > 1) {
5123#if KMP_NESTED_HOT_TEAMS
5124 team = hot_teams[
level].hot_team;
5126 team = root->r.r_hot_team;
5130 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5131 "task_team[1] = %p before reinit\n",
5132 team->
t.t_task_team[0], team->
t.t_task_team[1]));
5136 if (team->
t.t_nproc != new_nproc &&
5139 int old_nthr = team->
t.t_nproc;
5145 if (do_place_partition == 0)
5150 if (team->
t.t_nproc == new_nproc) {
5151 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5154 if (team->
t.t_size_changed == -1) {
5155 team->
t.t_size_changed = 1;
5166 root->r.r_uber_thread->th.th_ident);
5168 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5169 team->
t.t_threads[0], team));
5172#if KMP_AFFINITY_SUPPORTED
5173 if ((team->
t.t_size_changed == 0) &&
5174 (team->
t.t_proc_bind == new_proc_bind)) {
5176 if (do_place_partition) {
5178 __kmp_partition_places(team, 1);
5181 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5182 "proc_bind = %d, partition = [%d,%d]\n",
5183 team->
t.t_id, new_proc_bind, team->
t.t_first_place,
5184 team->
t.t_last_place));
5186 if (do_place_partition) {
5188 __kmp_partition_places(team);
5194 }
else if (team->
t.t_nproc > new_nproc) {
5196 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5199 team->
t.t_size_changed = 1;
5208 for (
f = new_nproc;
f < team->
t.t_nproc;
f++) {
5211 th->th.th_task_team = NULL;
5214#if KMP_NESTED_HOT_TEAMS
5215 if (__kmp_hot_teams_mode == 0) {
5219 hot_teams[
level].hot_team_nth = new_nproc;
5222 for (
f = new_nproc;
f < team->
t.t_nproc;
f++) {
5225 team->
t.t_threads[
f] = NULL;
5227#if KMP_NESTED_HOT_TEAMS
5232 for (
f = new_nproc;
f < team->
t.t_nproc; ++
f) {
5244 team->
t.t_nproc = new_nproc;
5248 root->r.r_uber_thread->th.th_ident);
5251 for (
f = 0;
f < new_nproc; ++
f) {
5252 team->
t.t_threads[
f]->th.th_team_nproc = new_nproc;
5257 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5258 team->
t.t_threads[0], team));
5263 for (
f = 0;
f < team->
t.t_nproc;
f++) {
5265 team->
t.t_threads[
f]->th.th_team_nproc ==
5270 if (do_place_partition) {
5272#if KMP_AFFINITY_SUPPORTED
5273 __kmp_partition_places(team);
5279 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5281 int old_nproc = team->
t.t_nproc;
5282 team->
t.t_size_changed = 1;
5284#if KMP_NESTED_HOT_TEAMS
5285 int avail_threads = hot_teams[
level].hot_team_nth;
5286 if (new_nproc < avail_threads)
5287 avail_threads = new_nproc;
5289 for (
f = team->
t.t_nproc;
f < avail_threads; ++
f) {
5295 balign[
b].
bb.b_arrived = team->
t.t_bar[
b].b_arrived;
5298 balign[
b].
bb.b_worker_arrived = team->
t.t_bar[
b].b_team_arrived;
5302 if (hot_teams[
level].hot_team_nth >= new_nproc) {
5306 team->
t.t_nproc = new_nproc;
5310 team->
t.t_nproc = hot_teams[
level].hot_team_nth;
5311 hot_teams[
level].hot_team_nth = new_nproc;
5313 if (team->
t.t_max_nproc < new_nproc) {
5319#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5320 KMP_AFFINITY_SUPPORTED
5326 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5330 for (
f = team->
t.t_nproc;
f < new_nproc;
f++) {
5333 team->
t.t_threads[
f] = new_worker;
5336 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5337 "join=%llu, plain=%llu\n",
5346 balign[
b].
bb.b_arrived = team->
t.t_bar[
b].b_arrived;
5350 balign[
b].
bb.b_worker_arrived = team->
t.t_bar[
b].b_team_arrived;
5356#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5357 KMP_AFFINITY_SUPPORTED
5359 new_temp_affinity.restore();
5361#if KMP_NESTED_HOT_TEAMS
5372 root->r.r_uber_thread->th.th_ident);
5376 for (
f = 0;
f < team->
t.t_nproc; ++
f)
5381 kmp_uint8 old_state = team->
t.t_threads[old_nproc - 1]->th.th_task_state;
5382 for (
f = old_nproc;
f < team->
t.t_nproc; ++
f)
5383 team->
t.t_threads[
f]->th.th_task_state = old_state;
5386 for (
f = 0;
f < team->
t.t_nproc; ++
f) {
5388 team->
t.t_threads[
f]->th.th_team_nproc ==
5393 if (do_place_partition) {
5395#if KMP_AFFINITY_SUPPORTED
5396 __kmp_partition_places(team);
5402 if (master->th.th_teams_microtask) {
5403 for (
f = 1;
f < new_nproc; ++
f) {
5406 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5407 thr->th.th_teams_level = master->th.th_teams_level;
5408 thr->th.th_teams_size = master->th.th_teams_size;
5411#if KMP_NESTED_HOT_TEAMS
5415 for (
f = 1;
f < new_nproc; ++
f) {
5420 balign[
b].
bb.b_arrived = team->
t.t_bar[
b].b_arrived;
5423 balign[
b].
bb.b_worker_arrived = team->
t.t_bar[
b].b_team_arrived;
5436 KF_TRACE(10, (
" hot_team = %p\n", team));
5440 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5441 "task_team[1] = %p after reinit\n",
5442 team->
t.t_task_team[0], team->
t.t_task_team[1]));
5460 if (team->
t.t_max_nproc >= max_nproc) {
5464 if (max_nproc > 1 &&
5474 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5475 "task_team[1] %p to NULL\n",
5476 &team->
t.t_task_team[0], &team->
t.t_task_team[1]));
5477 team->
t.t_task_team[0] = NULL;
5478 team->
t.t_task_team[1] = NULL;
5485 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5492 team->
t.t_bar[
b].b_master_arrived = 0;
5493 team->
t.t_bar[
b].b_team_arrived = 0;
5498 team->
t.t_proc_bind = new_proc_bind;
5500 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5525 team->
t.t_max_nproc = max_nproc;
5526 if (max_nproc > 1 &&
5536 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5539 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5541 &team->
t.t_task_team[0], &team->
t.t_task_team[1]));
5542 team->
t.t_task_team[0] = NULL;
5544 team->
t.t_task_team[1] = NULL;
5553 team->
t.t_argc = argc;
5556 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5563 team->
t.t_bar[
b].b_master_arrived = 0;
5564 team->
t.t_bar[
b].b_team_arrived = 0;
5569 team->
t.t_proc_bind = new_proc_bind;
5573 team->
t.ompt_serialized_team_info = NULL;
5578 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5601 int use_hot_team = team == root->r.r_hot_team;
5602#if KMP_NESTED_HOT_TEAMS
5605 level = team->
t.t_active_level - 1;
5606 if (master->th.th_teams_microtask) {
5607 if (master->th.th_teams_size.nteams > 1) {
5612 master->th.th_teams_level == team->
t.t_level) {
5618 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5620 if (
level < __kmp_hot_teams_max_level) {
5631 team->
t.t_copyin_counter = 0;
5636 if (!use_hot_team) {
5639 for (
f = 1;
f < team->
t.t_nproc; ++
f) {
5642 volatile kmp_uint32 *state = &th->th.th_reap_state;
5653 if (th->th.th_sleep_loc)
5661 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5663 if (task_team != NULL) {
5664 for (
f = 0;
f < team->
t.t_nproc; ++
f) {
5666 team->
t.t_threads[
f]->th.th_task_team = NULL;
5670 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5672#if KMP_NESTED_HOT_TEAMS
5675 team->
t.t_task_team[tt_idx] = NULL;
5681 team->
t.t_parent = NULL;
5682 team->
t.t_level = 0;
5683 team->
t.t_active_level = 0;
5686 for (
f = 1;
f < team->
t.t_nproc; ++
f) {
5698 team->
t.b->go_release();
5700 for (
f = 1;
f < team->
t.t_nproc; ++
f) {
5701 if (team->
t.b->sleep[
f].sleep) {
5703 team->
t.t_threads[
f]->th.th_info.ds.ds_gtid,
5709 for (
int f = 1;
f < team->
t.t_nproc; ++
f) {
5710 while (team->
t.t_threads[
f]->th.th_used_in_team.load() != 0)
5716 for (
f = 1;
f < team->
t.t_nproc; ++
f) {
5717 team->
t.t_threads[
f] = NULL;
5720 if (team->
t.t_max_nproc > 1 &&
5732 team->
t.t_threads[1]->th.th_cg_roots);
5733 if (team->
t.t_threads[1]->th.th_cg_roots->cg_root == team->
t.t_threads[1]) {
5735 for (
f = 1;
f < team->
t.t_nproc; ++
f) {
5738 thr->th.th_cg_roots->cg_root == thr);
5741 thr->th.th_cg_roots = tmp->
up;
5742 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5743 " up to node %p. cg_nthreads was %d\n",
5744 thr, tmp, thr->th.th_cg_roots, tmp->
cg_nthreads));
5750 if (thr->th.th_cg_roots)
5751 thr->th.th_current_task->td_icvs.thread_limit =
5752 thr->th.th_cg_roots->cg_thread_limit;
5774 if (team->
t.t_argv != &team->
t.t_inline_argv[0])
5811 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5823 balign[
b].
bb.team = NULL;
5824 balign[
b].
bb.leaf_kids = 0;
5826 this_th->th.th_task_state = 0;
5830 TCW_PTR(this_th->th.th_team, NULL);
5831 TCW_PTR(this_th->th.th_root, NULL);
5832 TCW_PTR(this_th->th.th_dispatch, NULL);
5834 while (this_th->th.th_cg_roots) {
5835 this_th->th.th_cg_roots->cg_nthreads--;
5836 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5837 " %p of thread %p to %d\n",
5838 this_th, this_th->th.th_cg_roots,
5839 this_th->th.th_cg_roots->cg_root,
5840 this_th->th.th_cg_roots->cg_nthreads));
5842 if (tmp->
cg_root == this_th) {
5845 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5846 this_th->th.th_cg_roots = tmp->
up;
5852 this_th->th.th_cg_roots = NULL;
5863 this_th->th.th_current_task = NULL;
5867 gtid = this_th->th.th_info.ds.ds_gtid;
5885 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5886 scan = &((*scan)->th.th_next_pool))
5891 TCW_PTR(this_th->th.th_next_pool, *scan);
5894 (this_th->th.th_info.ds.ds_gtid <
5895 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5899 if (this_th->th.th_active ==
TRUE) {
5901 this_th->th.th_active_in_pool =
TRUE;
5912#ifdef KMP_ADJUST_BLOCKTIME
5918 __kmp_zero_bt =
FALSE;
5929#if OMP_PROFILING_SUPPORT
5930 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5932 if (ProfileTraceFile)
5933 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5936 int gtid = this_thr->th.th_info.ds.ds_gtid;
5941 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5948 if (ompd_state & OMPD_ENABLE_BP)
5949 ompd_bp_thread_begin();
5953 ompt_data_t *thread_data =
nullptr;
5955 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5956 *thread_data = ompt_data_none;
5958 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5959 this_thr->th.ompt_thread_info.wait_id = 0;
5961 this_thr->th.ompt_thread_info.parallel_flags = 0;
5964 ompt_thread_worker, thread_data);
5966 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5976 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5983 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5987 pteam = &this_thr->th.th_team;
5995 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5997 (*pteam)->t.t_pkfn));
6003 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6007 rc = (*pteam)->t.t_invoke(gtid);
6011 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6013 (*pteam)->t.t_pkfn));
6020 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6029 if (ompd_state & OMPD_ENABLE_BP)
6030 ompd_bp_thread_end();
6035 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6039 this_thr->th.th_task_team = NULL;
6043 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6046#if OMP_PROFILING_SUPPORT
6047 llvm::timeTraceProfilerFinishThread();
6059 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6066#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6077 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6114 gtid = thread->th.th_info.ds.ds_gtid;
6120 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6149 if (thread->th.th_active_in_pool) {
6150 thread->th.th_active_in_pool =
FALSE;
6160 __kmp_free_fast_memory(thread);
6171#ifdef KMP_ADJUST_BLOCKTIME
6177 __kmp_zero_bt =
FALSE;
6184 if (thread->th.th_cons) {
6186 thread->th.th_cons = NULL;
6190 if (thread->th.th_pri_common != NULL) {
6192 thread->th.th_pri_common = NULL;
6196 if (thread->th.th_local.bget_data != NULL) {
6201#if KMP_AFFINITY_SUPPORTED
6202 if (thread->th.th_affin_mask != NULL) {
6203 KMP_CPU_FREE(thread->th.th_affin_mask);
6204 thread->th.th_affin_mask = NULL;
6208#if KMP_USE_HIER_SCHED
6209 if (thread->th.th_hier_bar_data != NULL) {
6211 thread->th.th_hier_bar_data = NULL;
6216 thread->th.th_serial_team = NULL;
6225 if (__kmp_itt_region_domains.count > 0) {
6226 for (
int i = 0;
i < KMP_MAX_FRAME_DOMAINS; ++
i) {
6227 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[
i];
6229 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6235 if (__kmp_itt_barrier_domains.count > 0) {
6236 for (
int i = 0;
i < KMP_MAX_FRAME_DOMAINS; ++
i) {
6237 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[
i];
6239 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6259 __kmp_reclaim_dead_roots();
6283 if (
TCR_4(__kmp_init_monitor)) {
6285 TCW_4(__kmp_init_monitor, 0);
6288 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6313 thread->th.th_next_pool = NULL;
6314 thread->th.th_in_pool =
FALSE;
6325 team->
t.t_next_pool = NULL;
6352 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6361 if (
TCR_4(__kmp_init_monitor)) {
6363 TCW_4(__kmp_init_monitor, 0);
6366 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6386 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6391 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6410 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6412 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6413 "already shutdown\n"));
6416 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6417 "registered, or system shutdown\n"));
6420 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6430 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6437 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6445#ifdef DUMP_DEBUG_ON_EXIT
6461 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6484 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6486#ifdef DUMP_DEBUG_ON_EXIT
6509 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6514 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6534 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6536 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6537 "already shutdown\n"));
6540 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6541 "registered, or system shutdown\n"));
6544 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6554 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6558 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6564 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6571 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6581 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6590 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6613 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n",
i));
6627 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6629#ifdef DUMP_DEBUG_ON_EXIT
6649#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6657#if defined(KMP_USE_SHM)
6658bool __kmp_shm_available =
false;
6659bool __kmp_tmp_available =
false;
6661char *temp_reg_status_file_name =
nullptr;
6672#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6681 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n",
name,
6688#if defined(KMP_USE_SHM)
6689 char *shm_name =
nullptr;
6690 char *data1 =
nullptr;
6692 if (__kmp_shm_available) {
6695 int shm_preexist = 0;
6696 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6697 if ((fd1 == -1) && (errno == EEXIST)) {
6700 fd1 = shm_open(shm_name, O_RDWR, 0600);
6703 __kmp_shm_available =
false;
6708 if (__kmp_shm_available && shm_preexist == 0) {
6709 if (ftruncate(fd1, SHM_SIZE) == -1) {
6710 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6711 __kmp_shm_available =
false;
6714 if (__kmp_shm_available) {
6715 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6717 if (data1 == MAP_FAILED) {
6719 __kmp_shm_available =
false;
6722 if (__kmp_shm_available) {
6723 if (shm_preexist == 0) {
6728 munmap(data1, SHM_SIZE);
6733 if (!__kmp_shm_available)
6735 if (!__kmp_shm_available && __kmp_tmp_available) {
6743 int tmp_preexist = 0;
6744 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6745 if ((fd1 == -1) && (errno == EEXIST)) {
6748 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6751 __kmp_tmp_available =
false;
6756 if (__kmp_tmp_available && tmp_preexist == 0) {
6758 if (ftruncate(fd1, SHM_SIZE) == -1) {
6759 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6760 __kmp_tmp_available =
false;
6763 if (__kmp_tmp_available) {
6764 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6766 if (data1 == MAP_FAILED) {
6768 __kmp_tmp_available =
false;
6771 if (__kmp_tmp_available) {
6772 if (tmp_preexist == 0) {
6777 munmap(data1, SHM_SIZE);
6782 if (!__kmp_shm_available && !__kmp_tmp_available) {
6803 char *flag_addr_str = NULL;
6804 char *flag_val_str = NULL;
6805 char const *file_name = NULL;
6810 unsigned long *flag_addr = 0;
6811 unsigned long flag_val = 0;
6814 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6832 file_name =
"unknown library";
6849#if defined(KMP_USE_SHM)
6850 if (__kmp_shm_available) {
6851 shm_unlink(shm_name);
6852 }
else if (__kmp_tmp_available) {
6853 unlink(temp_reg_status_file_name);
6869#if defined(KMP_USE_SHM)
6883#if defined(KMP_USE_SHM)
6884 char *shm_name =
nullptr;
6886 if (__kmp_shm_available) {
6888 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6890 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6891 if (data1 != MAP_FAILED) {
6893 munmap(data1, SHM_SIZE);
6897 }
else if (__kmp_tmp_available) {
6898 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6900 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6901 if (data1 != MAP_FAILED) {
6903 munmap(data1, SHM_SIZE);
6918#if defined(KMP_USE_SHM)
6919 if (__kmp_shm_available) {
6920 shm_unlink(shm_name);
6921 }
else if (__kmp_tmp_available) {
6922 unlink(temp_reg_status_file_name);
6931#if defined(KMP_USE_SHM)
6934 if (temp_reg_status_file_name)
6950#if KMP_MIC_SUPPORTED
6952static void __kmp_check_mic_type() {
6953 kmp_cpuid_t cpuid_state = {0};
6954 kmp_cpuid_t *cs_p = &cpuid_state;
6955 __kmp_x86_cpuid(1, 0, cs_p);
6957 if ((cs_p->eax & 0xff0) == 0xB10) {
6958 __kmp_mic_type = mic2;
6959 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6960 __kmp_mic_type = mic3;
6962 __kmp_mic_type = non_mic;
6969static void __kmp_user_level_mwait_init() {
6970 struct kmp_cpuid
buf;
6971 __kmp_x86_cpuid(7, 0, &
buf);
6972 __kmp_waitpkg_enabled = ((
buf.ecx >> 5) & 1);
6973 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6974 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6975 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6976 __kmp_umwait_enabled));
6979#ifndef AT_INTELPHIUSERMWAIT
6982#define AT_INTELPHIUSERMWAIT 10000
6988unsigned long getauxval(
unsigned long) {
return 0; }
6990static void __kmp_user_level_mwait_init() {
6995 if (__kmp_mic_type == mic3) {
6996 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6997 if ((
res & 0x1) || __kmp_user_level_mwait) {
6998 __kmp_mwait_enabled =
TRUE;
6999 if (__kmp_user_level_mwait) {
7003 __kmp_mwait_enabled =
FALSE;
7006 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7007 "__kmp_mwait_enabled = %d\n",
7008 __kmp_mic_type, __kmp_mwait_enabled));
7016 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7034#if ENABLE_LIBOMPTARGET
7036 __kmp_init_omptarget();
7051 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7058#if KMP_USE_ADAPTIVE_LOCKS
7059#if KMP_DEBUG_ADAPTIVE_LOCKS
7060 __kmp_init_speculative_stats();
7063#if KMP_STATS_ENABLED
7093#if KMP_MIC_SUPPORTED
7094 __kmp_check_mic_type();
7123 __kmp_monitor_wakeups =
7125 __kmp_bt_intervals =
7138#if KMP_FAST_REDUCTION_BARRIER
7139#define kmp_reduction_barrier_gather_bb ((int)1)
7140#define kmp_reduction_barrier_release_bb ((int)1)
7141#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7142#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7149#if KMP_FAST_REDUCTION_BARRIER
7159#if KMP_FAST_REDUCTION_BARRIER
7160#undef kmp_reduction_barrier_release_pat
7161#undef kmp_reduction_barrier_gather_pat
7162#undef kmp_reduction_barrier_release_bb
7163#undef kmp_reduction_barrier_gather_bb
7165#if KMP_MIC_SUPPORTED
7166 if (__kmp_mic_type == mic2) {
7174#if KMP_FAST_REDUCTION_BARRIER
7175 if (__kmp_mic_type == mic2) {
7199#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7200 __kmp_user_level_mwait_init();
7251 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7264#if !KMP_DYNAMIC_LIB || \
7265 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7278#if KMP_HANDLE_SIGNALS
7284 __kmp_install_signals(
FALSE);
7287 __kmp_install_signals(
TRUE);
7314 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7332 int prev_dflt_team_nth;
7338 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7350#if KMP_AFFINITY_SUPPORTED
7353 __kmp_affinity_initialize(__kmp_affinity);
7372#ifdef KMP_DFLT_NTH_CORES
7375 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7376 "__kmp_ncores (%d)\n",
7381 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7382 "__kmp_avail_proc(%d)\n",
7410 if (thread->th.th_current_task->td_icvs.nproc != 0)
7418 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7421#ifdef KMP_ADJUST_BLOCKTIME
7426 __kmp_zero_bt =
TRUE;
7434 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7466 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7480 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7483#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7486 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7487 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7488 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7492#if KMP_HANDLE_SIGNALS
7494 __kmp_install_signals(
TRUE);
7500#if defined(USE_LOAD_BALANCE)
7518 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7539#if KMP_AFFINITY_SUPPORTED
7543 if (!__kmp_hh_affinity.flags.initialized)
7544 __kmp_affinity_initialize(__kmp_hh_affinity);
7575 this_thr->th.th_local.this_construct = 0;
7609 if (__itt_stack_caller_create_ptr) {
7611 if (team->
t.t_stack_id != NULL) {
7612 __kmp_itt_stack_callee_enter((__itt_caller)team->
t.t_stack_id);
7615 __kmp_itt_stack_callee_enter(
7616 (__itt_caller)team->
t.t_parent->t.t_stack_id);
7620#if INCLUDE_SSC_MARKS
7621 SSC_MARK_INVOKING();
7626 void **exit_frame_p;
7627 ompt_data_t *my_task_data;
7628 ompt_data_t *my_parallel_data;
7632 exit_frame_p = &(team->
t.t_implicit_task_taskdata[tid]
7633 .ompt_task_info.frame.exit_frame.ptr);
7635 exit_frame_p = &dummy;
7639 &(team->
t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7640 my_parallel_data = &(team->
t.ompt_team_info.parallel_data);
7642 ompt_team_size = team->
t.t_nproc;
7644 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7650#if KMP_STATS_ENABLED
7652 if (previous_state == stats_state_e::TEAMS_REGION) {
7661 tid, (
int)team->
t.t_argc, (
void **)team->
t.t_argv
7668 *exit_frame_p = NULL;
7669 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7672#if KMP_STATS_ENABLED
7673 if (previous_state == stats_state_e::TEAMS_REGION) {
7680 if (__itt_stack_caller_create_ptr) {
7682 if (team->
t.t_stack_id != NULL) {
7683 __kmp_itt_stack_callee_leave((__itt_caller)team->
t.t_stack_id);
7686 __kmp_itt_stack_callee_leave(
7687 (__itt_caller)team->
t.t_parent->t.t_stack_id);
7701 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7704 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7713 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7714 " cg_nthreads to 1\n",
7716 tmp->
up = thr->th.th_cg_roots;
7717 thr->th.th_cg_roots = tmp;
7721#if INCLUDE_SSC_MARKS
7727#if INCLUDE_SSC_MARKS
7731 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7732 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7755 ompt_data_t *task_data =
7756 &team->
t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7757 ompt_data_t *parallel_data = &team->
t.ompt_team_info.parallel_data;
7760 ompt_scope_begin, parallel_data, task_data, team->
t.t_nproc, tid,
7762 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7767 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7781 if (num_threads > 0)
7782 thr->th.th_set_nproc = num_threads;
7795 if (num_threads == 0) {
7807 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7808 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7813 if (num_threads == 0) {
7817 if (num_threads < 0) {
7824 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7831 if (new_threads == 0) {
7834 if (new_threads != num_threads) {
7838 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7842 num_threads = new_threads;
7845 thr->th.th_teams_size.nth = num_threads;
7853 if (num_teams < 0) {
7860 if (num_teams == 0) {
7878 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7886 int num_teams_ub,
int num_threads) {
7892 if (num_teams_lb > num_teams_ub) {
7899 if (num_teams_lb == 0 && num_teams_ub > 0)
7900 num_teams_lb = num_teams_ub;
7902 if (num_teams_lb == 0 && num_teams_ub == 0) {
7913 }
else if (num_teams_lb == num_teams_ub) {
7914 num_teams = num_teams_ub;
7916 if (num_threads <= 0) {
7918 num_teams = num_teams_lb;
7920 num_teams = num_teams_ub;
7926 if (num_teams < num_teams_lb) {
7927 num_teams = num_teams_lb;
7928 }
else if (num_teams > num_teams_ub) {
7929 num_teams = num_teams_ub;
7935 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7943 thr->th.th_set_proc_bind = proc_bind;
7960 team->
t.t_construct = 0;
7961 team->
t.t_ordered.dt.t_value =
7966 if (team->
t.t_max_nproc > 1) {
7969 team->
t.t_disp_buffer[
i].buffer_index =
i;
7970 team->
t.t_disp_buffer[
i].doacross_buf_idx =
i;
7973 team->
t.t_disp_buffer[0].buffer_index = 0;
7974 team->
t.t_disp_buffer[0].doacross_buf_idx = 0;
7981 for (
f = 0;
f < team->
t.t_nproc;
f++) {
7983 team->
t.t_threads[
f]->th.th_team_nproc == team->
t.t_nproc);
8004 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8006 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8007 "team->t.t_nproc=%d\n",
8019 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8020 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8021 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8022 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8024 void *codeptr = NULL;
8028 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8032 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8037 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8043 ompt_scope_end, NULL, task_data, 0, ds_tid,
8044 ompt_task_implicit);
8055#ifdef USE_LOAD_BALANCE
8059static int __kmp_active_hot_team_nproc(
kmp_root_t *root) {
8064 if (root->r.r_active) {
8067 hot_team = root->r.r_hot_team;
8069 return hot_team->
t.t_nproc - 1;
8074 for (
i = 1;
i < hot_team->
t.t_nproc;
i++) {
8075 if (hot_team->
t.t_threads[
i]->th.th_active) {
8084static int __kmp_load_balance_nproc(
kmp_root_t *root,
int set_nproc) {
8087 int hot_team_active;
8088 int team_curr_active;
8091 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8095 ->th.th_current_task->td_icvs.dynamic ==
TRUE);
8098 if (set_nproc == 1) {
8099 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8109 hot_team_active = __kmp_active_hot_team_nproc(root);
8110 team_curr_active = pool_active + hot_team_active + 1;
8114 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8115 "hot team active = %d\n",
8116 system_active, pool_active, hot_team_active));
8118 if (system_active < 0) {
8123 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8127 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8128 if (retval > set_nproc) {
8135 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8143 if (system_active < team_curr_active) {
8144 system_active = team_curr_active;
8147 if (retval > set_nproc) {
8154 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8166 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8169#if KMP_HANDLE_SIGNALS
8170 __kmp_remove_signals();
8176#if KMP_AFFINITY_SUPPORTED
8177 __kmp_affinity_uninitialize();
8183 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8214#if KMP_USE_DYNAMIC_LOCK
8215 __kmp_cleanup_indirect_user_locks();
8222 ompd_env_block = NULL;
8223 ompd_env_block_size = 0;
8227#if KMP_AFFINITY_SUPPORTED
8229 __kmp_cpuinfo_file = NULL;
8232#if KMP_USE_ADAPTIVE_LOCKS
8233#if KMP_DEBUG_ADAPTIVE_LOCKS
8234 __kmp_print_speculative_stats();
8252#if KMP_USE_HIER_SCHED
8256#if KMP_STATS_ENABLED
8260 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8268 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8279 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8297 if (root->r.r_begin)
8300 if (root->r.r_begin) {
8305 root->r.r_begin =
TRUE;
8322 root = thread->th.th_root;
8324 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8326 if (root->r.r_in_parallel) {
8334 thread->th.th_set_nproc = 0;
8338 thread->th.th_set_nproc = 0;
8343 thread->th.th_set_nproc = 0;
8359 if (arg & (0x1000 - 1)) {
8360 arg &= ~(0x1000 - 1);
8410 teams_serialized = 0;
8411 if (thr->th.th_teams_microtask) {
8413 int tlevel = thr->th.th_teams_level;
8414 int ii = team->
t.t_level;
8415 teams_serialized = team->
t.t_serialized;
8416 int level = tlevel + 1;
8419 for (teams_serialized = team->
t.t_serialized;
8420 (teams_serialized > 0) && (
ii >
level); teams_serialized--,
ii--) {
8422 if (team->
t.t_serialized && (!teams_serialized)) {
8423 team = team->
t.t_parent;
8427 team = team->
t.t_parent;
8440 if (serialized > 1) {
8443 return team->
t.t_master_tid;
8453 if (serialized > 1) {
8456 return team->
t.t_parent->t.t_nproc;
8503#if KMP_AFFINITY_SUPPORTED
8504 {
'A',
"thread_affinity",
's'},
8506 {
't',
"team_num",
'd'},
8507 {
'T',
"num_teams",
'd'},
8508 {
'L',
"nesting_level",
'd'},
8509 {
'n',
"thread_num",
'd'},
8510 {
'N',
"num_threads",
'd'},
8511 {
'a',
"ancestor_tnum",
'd'},
8513 {
'P',
"process_id",
'd'},
8514 {
'i',
"native_thread_id",
'd'}};
8520 int rc, format_index, field_value;
8521 const char *width_left, *width_right;
8522 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8523 static const int FORMAT_SIZE = 20;
8524 char format[FORMAT_SIZE] = {0};
8525 char absolute_short_name = 0;
8550 right_justify =
false;
8552 right_justify =
true;
8556 width_left = width_right = NULL;
8557 if (**ptr >=
'0' && **ptr <=
'9') {
8565 format[format_index++] =
'%';
8567 format[format_index++] =
'-';
8569 format[format_index++] =
'0';
8570 if (width_left && width_right) {
8574 while (
i < 8 && width_left < width_right) {
8575 format[format_index++] = *width_left;
8583 found_valid_name =
false;
8584 parse_long_name = (**ptr ==
'{');
8585 if (parse_long_name)
8593 if (parse_long_name) {
8595 if (strncmp(*ptr, long_name,
length) == 0) {
8596 found_valid_name =
true;
8599 }
else if (**ptr == short_name) {
8600 found_valid_name =
true;
8603 if (found_valid_name) {
8604 format[format_index++] = field_format;
8605 format[format_index++] =
'\0';
8606 absolute_short_name = short_name;
8610 if (parse_long_name) {
8612 absolute_short_name = 0;
8620 switch (absolute_short_name) {
8653#if KMP_AFFINITY_SUPPORTED
8657 __kmp_affinity_str_buf_mask(&
buf, th->th.th_affin_mask);
8667 if (parse_long_name) {
8688 const char *parse_ptr;
8705 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8710 while (*parse_ptr !=
'\0') {
8712 if (*parse_ptr ==
'%') {
8739 int blocktime = arg;
8758 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8760 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8761 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8770 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8771 "bt_intervals=%d, monitor_updates=%d\n",
8773 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8774 __kmp_monitor_wakeups));
8776 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8778 thread->th.th_team->t.t_id, tid, blocktime));
8799 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8817#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8819 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8820#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8826 if (team_size == 1) {
8834#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8835 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8836 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8838#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8839 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8840 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8842 int teamsize_cutoff = 4;
8844#if KMP_MIC_SUPPORTED
8845 if (__kmp_mic_type != non_mic) {
8846 teamsize_cutoff = 8;
8850 if (tree_available) {
8851 if (team_size <= teamsize_cutoff) {
8852 if (atomic_available) {
8856 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8858 }
else if (atomic_available) {
8862#error "Unknown or unsupported OS"
8867#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8868 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
8870#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8871 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8872 KMP_OS_WASI || KMP_OS_AIX
8876 if (atomic_available) {
8877 if (num_vars <= 2) {
8885 if (atomic_available && (num_vars <= 3)) {
8887 }
else if (tree_available) {
8888 if ((reduce_size > (9 *
sizeof(
kmp_real64))) &&
8889 (reduce_size < (2000 *
sizeof(
kmp_real64)))) {
8890 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8895#error "Unknown or unsupported OS"
8899#error "Unknown or unsupported architecture"
8912 int atomic_available, tree_available;
8921 if (!atomic_available) {
8929 if (!tree_available) {
8933#if KMP_FAST_REDUCTION_BARRIER
8934 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8943 retval = forced_retval;
8946 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8948#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8949#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9051 for (
int f = 1;
f < old_nthreads; ++
f) {
9054 if (team->
t.t_threads[
f]->th.th_used_in_team.load() == 0) {
9060 if (team->
t.t_threads[
f]->th.th_used_in_team.load() == 3) {
9061 while (team->
t.t_threads[
f]->th.th_used_in_team.load() == 3)
9067 team->
t.t_threads[
f]->th.th_used_in_team.store(2);
9071 team->
t.b->go_release();
9077 int count = old_nthreads - 1;
9079 count = old_nthreads - 1;
9080 for (
int f = 1;
f < old_nthreads; ++
f) {
9081 if (other_threads[
f]->th.th_used_in_team.load() != 0) {
9084 void *, other_threads[
f]->th.th_sleep_loc);
9094 team->
t.b->update_num_threads(new_nthreads);
9095 team->
t.b->go_reset();
9106 for (
int f = 1;
f < new_nthreads; ++
f) {
9118 int count = new_nthreads - 1;
9120 count = new_nthreads - 1;
9121 for (
int f = 1;
f < new_nthreads; ++
f) {
9122 if (team->
t.t_threads[
f]->th.th_used_in_team.load() == 1) {
9142std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9144void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9161 for (
int i = 1;
i < __kmp_hit_hidden_helper_threads_num; ++
i) {
9207 for (
int i = 0;
i < levels; ++
i)
9229 loc++, hw_level++) {
9238 int upper_levels = 1;
9274#if !KMP_STATS_ENABLED
9281#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
A simple pure header implementation of VLA that aims to replace uses of actual VLA,...
static void deallocate(distributedBarrier *db)
static distributedBarrier * allocate(int nThreads)
bool is_sleeping()
Test whether there are threads sleeping on the flag.
int get_level(kmp_hw_t type) const
int get_count(int level) const
int get_ratio(int level) const
@ KMP_IDENT_AUTOPAR
Entry point generated by auto-parallelization.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
sched_type
Describes the loop schedule to be used for a parallel for loop.
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
@ kmp_sch_static
static unspecialized
@ kmp_sch_guided_chunked
guided unspecialized
@ kmp_sch_dynamic_chunked
@ kmp_sch_guided_analytical_chunked
@ kmp_sch_static_balanced
@ kmp_sch_guided_iterative_chunked
__itt_string_handle * name
void const char const char int ITT_FORMAT __itt_group_sync s
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark S
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team)
void __kmp_finish_implicit_task(kmp_info_t *this_thr)
volatile kmp_team_t * __kmp_team_pool
#define get__dynamic_2(xteam, xtid)
kmp_bar_pat_e __kmp_barrier_release_pat_dflt
int __kmp_generate_warnings
kmp_proc_bind_t __kmp_teams_proc_bind
#define KMP_INTERNAL_MALLOC(sz)
#define KMP_DEFAULT_CHUNK
kmp_bootstrap_lock_t __kmp_initz_lock
#define KMP_MAX_STKPADDING
int __kmp_display_env_verbose
kmp_global_t __kmp_global
void __kmp_init_target_mem()
void __kmp_hidden_helper_worker_thread_signal()
void __kmp_common_initialize(void)
void __kmp_release_64(kmp_flag_64<> *flag)
kmp_pause_status_t __kmp_pause_status
#define KMP_MAX_BLOCKTIME
kmp_lock_t __kmp_debug_lock
void __kmp_read_system_time(double *delta)
kmp_bootstrap_lock_t __kmp_tp_cached_lock
void __kmp_reap_task_teams(void)
kmp_int32 __kmp_use_yield
int __kmp_dflt_team_nth_ub
void __kmp_hidden_helper_threads_initz_wait()
#define KMP_INTERNAL_REALLOC(p, sz)
#define get__nproc_2(xteam, xtid)
void __kmp_wait_to_unref_task_teams(void)
struct KMP_ALIGN_CACHE dispatch_private_info dispatch_private_info_t
#define __kmp_assign_root_init_mask()
int __kmp_dflt_max_active_levels
#define KMP_NOT_SAFE_TO_REAP
void __kmp_unlock_suspend_mx(kmp_info_t *th)
kmp_bar_pat_e __kmp_barrier_gather_pat_dflt
#define KMP_HIDDEN_HELPER_TEAM(team)
static kmp_team_t * __kmp_team_from_gtid(int gtid)
void __kmp_do_initialize_hidden_helper_threads()
kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]
kmp_tasking_mode_t __kmp_tasking_mode
char * __kmp_affinity_format
volatile kmp_info_t * __kmp_thread_pool
volatile int __kmp_init_gtid
omp_allocator_handle_t __kmp_def_allocator
static void __kmp_resume_if_hard_paused()
#define get__max_active_levels(xthread)
kmp_nested_proc_bind_t __kmp_nested_proc_bind
void __kmp_free_implicit_task(kmp_info_t *this_thr)
void __kmp_hidden_helper_main_thread_release()
fork_context_e
Tell the fork call which compiler generated the fork call, and therefore how to deal with the call.
@ fork_context_gnu
Called from GNU generated code, so must not invoke the microtask internally.
@ fork_context_intel
Called from Intel generated code.
void __kmp_suspend_initialize(void)
kmp_nested_nthreads_t __kmp_nested_nth
#define KMP_GTID_SHUTDOWN
void __kmp_internal_end_dtor(void)
volatile int __kmp_all_nth
#define set__nproc(xthread, xval)
int __kmp_is_address_mapped(void *addr)
kmp_lock_t __kmp_global_lock
union KMP_ALIGN_CACHE kmp_root kmp_root_t
int __kmp_adjust_gtid_mode
#define __kmp_entry_gtid()
kmp_old_threads_list_t * __kmp_old_threads_list
volatile int __kmp_init_common
static int __kmp_tid_from_gtid(int gtid)
static bool KMP_UBER_GTID(int gtid)
kmp_int32 __kmp_use_yield_exp_set
volatile int __kmp_init_hidden_helper
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr)
int __kmp_gtid_get_specific(void)
volatile int __kmp_init_middle
void __kmp_hidden_helper_threads_deinitz_wait()
static kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind)
#define KMP_NESTED_HOT_TEAMS
#define KMP_CHECK_UPDATE(a, b)
int __kmp_storage_map_verbose
int __kmp_allThreadsSpecified
enum sched_type __kmp_static
#define KMP_INITIAL_GTID(gtid)
int PACKED_REDUCTION_METHOD_T
std::atomic< int > __kmp_thread_pool_active_nth
#define KMP_MASTER_TID(tid)
int __kmp_duplicate_library_ok
volatile int __kmp_need_register_serial
kmp_bootstrap_lock_t __kmp_forkjoin_lock
kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]
static kmp_info_t * __kmp_entry_thread()
void __kmp_init_memkind()
void __kmp_hidden_helper_main_thread_wait()
#define KMP_GEN_TEAM_ID()
void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task)
kmp_int32 __kmp_default_device
#define get__sched_2(xteam, xtid)
void __kmp_cleanup_threadprivate_caches()
static void copy_icvs(kmp_internal_control_t *dst, kmp_internal_control_t *src)
kmp_bootstrap_lock_t __kmp_exit_lock
kmp_info_t ** __kmp_threads
void __kmp_hidden_helper_initz_release()
enum sched_type __kmp_sched
#define KMP_BARRIER_PARENT_FLAG
void __kmp_suspend_uninitialize_thread(kmp_info_t *th)
void __kmp_finalize_bget(kmp_info_t *th)
#define KMP_BARRIER_SWITCH_TO_OWN_FLAG
static void __kmp_reset_root_init_mask(int gtid)
kmp_uint32 __kmp_barrier_gather_bb_dflt
kmp_uint32 __kmp_barrier_release_bb_dflt
int __kmp_dispatch_num_buffers
#define SCHEDULE_WITHOUT_MODIFIERS(s)
#define set__max_active_levels(xthread, xval)
#define __kmp_get_team_num_threads(gtid)
#define KMP_MIN_MALLOC_ARGV_ENTRIES
#define KMP_MASTER_GTID(gtid)
void __kmp_lock_suspend_mx(kmp_info_t *th)
int __kmp_nesting_mode_nlevels
int(* launch_t)(int gtid)
void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size)
int * __kmp_nesting_nth_level
volatile int __kmp_init_parallel
kmp_queuing_lock_t __kmp_dispatch_lock
#define KMP_DEFAULT_BLOCKTIME
#define set__blocktime_team(xteam, xtid, xval)
#define __kmp_allocate(size)
enum kmp_sched kmp_sched_t
enum library_type __kmp_library
#define USE_NESTED_HOT_ARG(x)
int __kmp_env_consistency_check
#define bs_reduction_barrier
void __kmp_runtime_destroy(void)
union KMP_ALIGN_CACHE kmp_desc kmp_desc_t
static void __kmp_sched_apply_mods_intkind(kmp_sched_t kind, enum sched_type *internal_kind)
volatile int __kmp_hidden_helper_team_done
static void __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind, enum sched_type internal_kind)
#define KMP_INIT_BARRIER_STATE
size_t __kmp_sys_min_stksize
#define set__bt_set_team(xteam, xtid, xval)
kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]
#define KMP_BARRIER_NOT_WAITING
#define KMP_INTERNAL_FREE(p)
int __kmp_threads_capacity
void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, int tid)
static int __kmp_gtid_from_tid(int tid, const kmp_team_t *team)
void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team)
void __kmp_threadprivate_resize_cache(int newCapacity)
void __kmp_runtime_initialize(void)
volatile int __kmp_init_hidden_helper_threads
void __kmp_common_destroy_gtid(int gtid)
int __kmp_try_suspend_mx(kmp_info_t *th)
int __kmp_display_affinity
enum sched_type __kmp_guided
void __kmp_resume_32(int target_gtid, kmp_flag_32< C, S > *flag)
#define KMP_INLINE_ARGV_ENTRIES
#define SCHEDULE_GET_MODIFIERS(s)
PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method
#define __kmp_page_allocate(size)
void __kmp_initialize_bget(kmp_info_t *th)
int __kmp_teams_thread_limit
void __kmp_cleanup_hierarchy()
void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr)
void __kmp_gtid_set_specific(int gtid)
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64< C, S > *flag)
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
#define KMP_MIN_BLOCKTIME
#define SCHEDULE_SET_MODIFIERS(s, m)
void __kmp_suspend_initialize_thread(kmp_info_t *th)
volatile int __kmp_init_serial
@ reduction_method_not_defined
#define KMP_CHECK_UPDATE_SYNC(a, b)
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, void *argv[])
#define KMP_MAX_ACTIVE_LEVELS_LIMIT
static void __kmp_type_convert(T1 src, T2 *dest)
void __kmp_fini_memkind()
kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]
void __kmp_reap_worker(kmp_info_t *th)
void __kmp_hidden_helper_threads_deinitz_release()
void __kmp_expand_host_name(char *buffer, size_t size)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
enum sched_type __kmp_sch_map[]
void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team, int wait=1)
#define __kmp_thread_free(th, ptr)
kmp_topology_t * __kmp_topology
kmp_atomic_lock_t __kmp_atomic_lock_8c
kmp_atomic_lock_t __kmp_atomic_lock_8r
kmp_atomic_lock_t __kmp_atomic_lock_4i
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 kmp_int16
kmp_atomic_lock_t __kmp_atomic_lock_20c
kmp_atomic_lock_t __kmp_atomic_lock_16c
kmp_atomic_lock_t __kmp_atomic_lock_2i
kmp_atomic_lock_t __kmp_atomic_lock_32c
kmp_atomic_lock_t __kmp_atomic_lock_8i
kmp_atomic_lock_t __kmp_atomic_lock
kmp_atomic_lock_t __kmp_atomic_lock_10r
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 kmp_int8
kmp_atomic_lock_t __kmp_atomic_lock_1i
kmp_atomic_lock_t __kmp_atomic_lock_16r
kmp_atomic_lock_t __kmp_atomic_lock_4r
static void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck)
void __kmp_print_structure(void)
void __kmp_dump_debug_buffer(void)
#define KMP_BUILD_ASSERT(expr)
#define KMP_DEBUG_ASSERT(cond)
#define KMP_ASSERT2(cond, msg)
unsigned long long kmp_uint64
kmp_hier_sched_env_t __kmp_hier_scheds
void __kmp_dispatch_free_hierarchies(kmp_team_t *team)
void __kmp_env_free(char const **value)
char * __kmp_env_get(char const *name)
void __kmp_env_set(char const *name, char const *value, int overwrite)
void __kmp_env_unset(char const *name)
void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, kmp_user_lock_p lck)
void __kmp_push_parallel(int gtid, ident_t const *ident)
void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident)
void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident)
enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, ident_t const *ident)
void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident)
struct cons_header * __kmp_allocate_cons_stack(int gtid)
void __kmp_pop_parallel(int gtid, ident_t const *ident)
void __kmp_free_cons_stack(void *ptr)
static volatile kmp_i18n_cat_status_t status
static void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list ap)
void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer)
void __kmp_fatal(kmp_msg_t message,...)
void __kmp_i18n_catclose()
kmp_bootstrap_lock_t __kmp_stdio_lock
void __kmp_fprintf(enum kmp_io stream, char const *format,...)
void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap)
void __kmp_printf(char const *format,...)
void __kmp_printf_no_lock(char const *format,...)
void __kmp_close_console(void)
#define USE_ITT_BUILD_ARG(x)
void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck)
void __kmp_cleanup_user_locks(void)
void __kmp_validate_locks(void)
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_lock(kmp_lock_t *lck)
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
void(* microtask_t)(int *gtid, int *npr,...)
#define TCW_SYNC_PTR(a, b)
#define KMP_ATOMIC_ST_REL(p, v)
#define KMP_CACHE_PREFETCH(ADDR)
#define KMP_ATOMIC_LD_ACQ(p)
bool __kmp_atomic_compare_store_acq(std::atomic< T > *p, T expected, T desired)
#define KMP_FALLTHROUGH()
#define KMP_ATOMIC_DEC(p)
#define KMP_GET_PAGE_SIZE()
#define KMP_ATOMIC_LD_RLX(p)
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
#define KMP_WEAK_ATTRIBUTE_EXTERNAL
#define KMP_ATOMIC_INC(p)
int __kmp_pause_resource(kmp_pause_status_t level)
void __kmp_warn(char const *format,...)
kmp_info_t * __kmp_hidden_helper_main_thread
void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk)
static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc)
static void __kmp_fini_allocator()
static void __kmp_init_allocator()
void __kmp_aux_set_defaults(char const *str, size_t len)
static kmp_team_t * __kmp_aux_get_team_info(int &teams_serialized)
static int __kmp_expand_threads(int nNeed)
void __kmp_teams_master(int gtid)
static void __kmp_itthash_clean(kmp_info_t *th)
#define propagateFPControl(x)
void __kmp_itt_init_ittlib()
void __kmp_infinite_loop(void)
void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, int num_teams_ub, int num_threads)
int __kmp_aux_get_num_teams()
kmp_info_t * __kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, int new_tid)
void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team)
static long __kmp_registration_flag
int __kmp_get_max_active_levels(int gtid)
void __kmp_aux_set_library(enum library_type arg)
void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, char const *format,...)
unsigned short __kmp_get_random(kmp_info_t *thread)
int __kmp_register_root(int initial_thread)
static void __kmp_internal_end(void)
void __kmp_free_team(kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master))
void __kmp_set_max_active_levels(int gtid, int max_active_levels)
void __kmp_abort_thread(void)
void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc)
void __kmp_internal_end_atexit(void)
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master_th, int master_gtid, int fork_teams_workers)
void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind)
kmp_team_t * __kmp_reap_team(kmp_team_t *team)
void __kmp_exit_single(int gtid)
void __kmp_check_stack_overlap(kmp_info_t *th)
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, int num_threads)
int __kmp_get_team_size(int gtid, int level)
static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
static void __kmp_do_middle_initialize(void)
int __kmp_get_max_teams(void)
static void __kmp_free_team_arrays(kmp_team_t *team)
static void __kmp_initialize_root(kmp_root_t *root)
static void __kmp_reinitialize_team(kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc)
int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_va_list ap)
void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
void * __kmp_launch_thread(kmp_info_t *this_thr)
void __kmp_set_teams_thread_limit(int limit)
static int __kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_info_t *master_th, kmp_team_t *parent_team, kmp_va_list ap)
void __kmp_join_barrier(int gtid)
static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team)
void __kmp_init_random(kmp_info_t *thread)
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, int num_threads)
void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads)
void __kmp_user_set_library(enum library_type arg)
#define updateHWFPControl(x)
#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED
void __kmp_internal_end_dest(void *specific_gtid)
int __kmp_aux_get_team_num()
void __kmp_set_num_threads(int new_nth, int gtid)
void __kmp_internal_end_thread(int gtid_req)
static bool __kmp_is_fork_in_teams(kmp_info_t *master_th, microtask_t microtask, int level, int teams_level, kmp_va_list ap)
PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmp_hidden_helper_threads_initz_routine()
int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws)
static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, int gtid)
void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team)
void __kmp_join_call(ident_t *loc, int gtid, int exit_teams)
static int __kmp_reset_root(int gtid, kmp_root_t *root)
int __kmp_get_ancestor_thread_num(int gtid, int level)
void __kmp_itt_fini_ittlib()
void __kmp_omp_display_env(int verbose)
void __kmp_middle_initialize(void)
void __kmp_unregister_root_current_thread(int gtid)
static void __kmp_reap_thread(kmp_info_t *thread, int is_root)
static const unsigned __kmp_primes[]
int __kmp_get_teams_thread_limit(void)
#define FAST_REDUCTION_TREE_METHOD_GENERATED
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
kmp_r_sched_t __kmp_get_schedule_global()
void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team)
static kmp_internal_control_t __kmp_get_global_icvs(void)
void __kmp_parallel_initialize(void)
void __kmp_set_nesting_mode_threads()
void __kmp_unregister_library(void)
char const __kmp_version_omp_api[]
static char * __kmp_registration_str
int __kmp_ignore_mppbeg(void)
kmp_int32 __kmp_enable_hidden_helper
void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team)
void __kmp_aux_set_stacksize(size_t arg)
void __kmp_internal_end_library(int gtid_req)
size_t __kmp_aux_capture_affinity(int gtid, const char *format, kmp_str_buf_t *buffer)
void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, int new_nthreads)
kmp_info_t * __kmp_thread_pool_insert_pt
int __kmp_omp_debug_struct_info
static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid)
void __kmp_aux_display_affinity(int gtid, const char *format)
void __kmp_init_nesting_mode()
std::atomic< kmp_int32 > __kmp_unexecuted_hidden_helper_tasks
void __kmp_register_library_startup(void)
void __kmp_free_thread(kmp_info_t *this_th)
int __kmp_invoke_task_func(int gtid)
void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk)
void __kmp_abort_process()
static const kmp_affinity_format_field_t __kmp_affinity_format_table[]
void __kmp_set_num_teams(int num_teams)
kmp_info_t ** __kmp_hidden_helper_threads
static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc)
void __kmp_save_internal_controls(kmp_info_t *thread)
int __kmp_invoke_teams_master(int gtid)
void __kmp_hidden_helper_initialize()
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads)
static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth)
static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, int master_tid, int set_nthreads, int enter_teams)
void __kmp_serial_initialize(void)
static bool __kmp_is_entering_teams(int active_level, int level, int teams_level, kmp_va_list ap)
void __kmp_resume_if_soft_paused()
void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
int __kmp_get_global_thread_id()
void __kmp_internal_begin(void)
static char * __kmp_reg_status_name()
static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, int team_id, int num_thr)
static void __kmp_do_serial_initialize(void)
void __kmp_fork_barrier(int gtid, int tid)
int __kmp_get_global_thread_id_reg()
kmp_int32 __kmp_hidden_helper_threads_num
kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, kmp_proc_bind_t new_proc_bind, kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *master))
int __kmp_ignore_mppend(void)
kmp_int32 __kmp_get_reduce_method(void)
static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, const char **ptr, kmp_str_buf_t *field_buffer)
static int __kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team, kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root, enum fork_context_e call_context, microtask_t microtask, launch_t invoker, int master_set_numthreads, int level, kmp_va_list ap)
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid)
#define KMP_STRCPY_S(dst, bsz, src)
int __kmp_default_tp_capacity(int req_nproc, int max_nth, int all_threads_specified)
int __kmp_initial_threads_capacity(int req_nproc)
void __kmp_env_initialize(char const *string)
void __kmp_display_env_impl(int display_env, int display_env_verbose)
void __kmp_stats_init(void)
void __kmp_stats_fini(void)
Functions for collecting statistics.
#define KMP_COUNT_VALUE(n, v)
#define KMP_PUSH_PARTITIONED_TIMER(name)
#define KMP_GET_THREAD_STATE()
#define KMP_POP_PARTITIONED_TIMER()
#define KMP_INIT_PARTITIONED_TIMERS(name)
#define KMP_SET_THREAD_STATE_BLOCK(state_name)
#define KMP_TIME_PARTITIONED_BLOCK(name)
#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n)
#define KMP_SET_THREAD_STATE(state_name)
void __kmp_str_split(char *str, char delim, char **head, char **tail)
void __kmp_str_buf_clear(kmp_str_buf_t *buffer)
void __kmp_str_buf_free(kmp_str_buf_t *buffer)
char * __kmp_str_format(char const *format,...)
int __kmp_str_match_true(char const *data)
void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, size_t len)
void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src)
int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format,...)
int __kmp_str_match_false(char const *data)
#define __kmp_str_buf_init(b)
void __kmp_print_version_1(void)
void __kmp_print_version_2(void)
#define KMP_VERSION_PREFIX
char const __kmp_version_alt_comp[]
char const __kmp_version_lock[]
static void __kmp_null_resume_wrapper(kmp_info_t *thr)
__attribute__((noinline))
void microtask(int *global_tid, int *bound_tid)
ompt_callbacks_active_t ompt_enabled
ompt_callbacks_internal_t ompt_callbacks
#define OMPT_GET_FRAME_ADDRESS(level)
void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, ompt_data_t *ompt_pid, void *codeptr)
int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num)
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int on_heap, bool always)
ompt_task_info_t * __ompt_get_task_info_object(int depth)
void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid)
void __ompt_lw_taskteam_unlink(kmp_info_t *thr)
ompt_data_t * __ompt_get_thread_data_internal()
kmp_int32 tt_found_proxy_tasks
kmp_int32 tt_hidden_helper_task_encountered
kmp_int32 cg_thread_limit
void(* th_dxo_fcn)(int *gtid, int *cid, ident_t *)
kmp_int32 th_doacross_buf_idx
dispatch_private_info_t * th_dispatch_pr_current
dispatch_private_info_t * th_disp_buffer
void(* th_deo_fcn)(int *gtid, int *cid, ident_t *)
dispatch_shared_info_t * th_dispatch_sh_current
kmp_proc_bind_t proc_bind
struct kmp_internal_control * next
kmp_proc_bind_t * bind_types
struct kmp_old_threads_list_t * next
ompt_task_info_t ompt_task_info
enum sched_type r_sched_type
void __kmp_reap_monitor(kmp_info_t *th)
void __kmp_register_atfork(void)
void __kmp_free_handle(kmp_thread_t tHandle)
int __kmp_get_load_balance(int max)
int __kmp_still_running(kmp_info_t *th)
void __kmp_initialize_system_tick(void)
int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val)