LLVM OpenMP 19.0.0git
kmp_runtime.cpp
Go to the documentation of this file.
1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#include "kmp_utils.h"
28#if KMP_USE_HIER_SCHED
29#include "kmp_dispatch_hier.h"
30#endif
31
32#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35#if OMPD_SUPPORT
36#include "ompd-specific.h"
37#endif
38
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile = nullptr;
42#endif
43
44/* these are temporary issues to be dealt with */
45#define KMP_USE_PRCTL 0
46
47#if KMP_OS_WINDOWS
48#include <process.h>
49#endif
50
51#ifndef KMP_USE_SHM
52// Windows and WASI do not need these include files as they don't use shared
53// memory.
54#else
55#include <sys/mman.h>
56#include <sys/stat.h>
57#include <fcntl.h>
58#define SHM_SIZE 1024
59#endif
60
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX "alternative compiler support: yes";
64#endif /* defined(KMP_GOMP_COMPAT) */
65
67 KMP_VERSION_PREFIX "API version: 5.0 (201611)";
68
69#ifdef KMP_DEBUG
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX "lock type: run time selectable";
72#endif /* KMP_DEBUG */
73
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
75
76/* ------------------------------------------------------------------------ */
77
78#if KMP_USE_MONITOR
80#endif
81
82/* Forward declarations */
83
84void __kmp_cleanup(void);
85
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
87 int gtid);
88static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
89 kmp_internal_control_t *new_icvs,
90 ident_t *loc);
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
94#endif
95static void __kmp_do_serial_initialize(void);
96void __kmp_fork_barrier(int gtid, int tid);
97void __kmp_join_barrier(int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
100
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
103#endif
104
105static int __kmp_expand_threads(int nNeed);
106#if KMP_OS_WINDOWS
107static int __kmp_unregister_root_other_thread(int gtid);
108#endif
109static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
111
112void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
113 int new_nthreads);
114void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
115
116/* Calculate the identifier of the current thread */
117/* fast (and somewhat portable) way to get unique identifier of executing
118 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
120 int i;
121 kmp_info_t **other_threads;
122 size_t stack_data;
123 char *stack_addr;
124 size_t stack_size;
125 char *stack_base;
126
127 KA_TRACE(
128 1000,
129 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
131
132 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
133 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
134 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
135 __kmp_init_gtid for this to work. */
136
138 return KMP_GTID_DNE;
139
140#ifdef KMP_TDATA_GTID
141 if (TCR_4(__kmp_gtid_mode) >= 3) {
142 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
143 return __kmp_gtid;
144 }
145#endif
146 if (TCR_4(__kmp_gtid_mode) >= 2) {
147 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
149 }
150 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
151
152 stack_addr = (char *)&stack_data;
153 other_threads = __kmp_threads;
154
155 /* ATT: The code below is a source of potential bugs due to unsynchronized
156 access to __kmp_threads array. For example:
157 1. Current thread loads other_threads[i] to thr and checks it, it is
158 non-NULL.
159 2. Current thread is suspended by OS.
160 3. Another thread unregisters and finishes (debug versions of free()
161 may fill memory with something like 0xEF).
162 4. Current thread is resumed.
163 5. Current thread reads junk from *thr.
164 TODO: Fix it. --ln */
165
166 for (i = 0; i < __kmp_threads_capacity; i++) {
167
168 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
169 if (!thr)
170 continue;
171
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
174
175 /* stack grows down -- search through all of the active threads */
176
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
179
180 if (stack_diff <= stack_size) {
181 /* The only way we can be closer than the allocated */
182 /* stack size is if we are running on this thread. */
183 // __kmp_gtid_get_specific can return negative value because this
184 // function can be called by thread destructor. However, before the
185 // thread destructor is called, the value of the corresponding
186 // thread-specific data will be reset to NULL.
189 return i;
190 }
191 }
192 }
193
194 /* get specific to try and determine our gtid */
195 KA_TRACE(1000,
196 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
199
200 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
201
202 /* if we havn't been assigned a gtid, then return code */
203 if (i < 0)
204 return i;
205
206 // other_threads[i] can be nullptr at this point because the corresponding
207 // thread could have already been destructed. It can happen when this function
208 // is called in end library routine.
209 if (!TCR_SYNC_PTR(other_threads[i]))
210 return i;
211
212 /* dynamically updated stack window for uber threads to avoid get_specific
213 call */
214 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
215 KMP_FATAL(StackOverflow, i);
216 }
217
218 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
222 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
223 stack_base);
224 } else {
225 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
227 }
228
229 /* Reprint stack bounds for ubermaster since they have been refined */
230 if (__kmp_storage_map) {
231 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
233 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
234 other_threads[i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)", i);
236 }
237 return i;
238}
239
241 int gtid;
242
243 if (!__kmp_init_serial) {
244 gtid = KMP_GTID_DNE;
245 } else
246#ifdef KMP_TDATA_GTID
247 if (TCR_4(__kmp_gtid_mode) >= 3) {
248 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
249 gtid = __kmp_gtid;
250 } else
251#endif
252 if (TCR_4(__kmp_gtid_mode) >= 2) {
253 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
255 } else {
256 KA_TRACE(1000,
257 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
259 }
260
261 /* we must be a new uber master sibling thread */
262 if (gtid == KMP_GTID_DNE) {
263 KA_TRACE(10,
264 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
267 if (!__kmp_init_serial) {
270 } else {
272 }
274 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
275 }
276
277 KMP_DEBUG_ASSERT(gtid >= 0);
278
279 return gtid;
280}
281
282/* caller must hold forkjoin_lock */
284 int f;
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
287 int gtid;
288
289 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
290 if (__kmp_storage_map) {
291 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
293
294 gtid = __kmp_gtid_from_thread(th);
295
296 if (gtid == KMP_GTID_MONITOR) {
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)", "mon",
300 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
301 } else {
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
306 }
307 }
308
309 /* No point in checking ubermaster threads since they use refinement and
310 * cannot overlap */
311 gtid = __kmp_gtid_from_thread(th);
312 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
313 KA_TRACE(10,
314 ("__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
318 }
319
320 for (f = 0; f < __kmp_threads_capacity; f++) {
322
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
330
331 /* Print the other stack values before the abort */
334 -1, other_stack_beg, other_stack_end,
335 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
336 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
337
338 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
340 }
341 }
342 }
343 }
344 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
345}
346
347/* ------------------------------------------------------------------------ */
348
350 static int done = FALSE;
351
352 while (!done) {
354 }
355}
356
357#define MAX_MESSAGE 512
358
359void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
360 char const *format, ...) {
361 char buffer[MAX_MESSAGE];
362 va_list ap;
363
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (unsigned long)size, format);
368 __kmp_vprintf(kmp_err, buffer, ap);
369#if KMP_PRINT_DATA_PLACEMENT
370 int node;
371 if (gtid >= 0) {
372 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
374 node = __kmp_get_host_node(p1);
375 if (node < 0) /* doesn't work, so don't try this next time */
377 else {
378 char *last;
379 int lastNode;
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
381
382 const int page_size = KMP_GET_PAGE_SIZE();
383
384 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
386 if (localProc >= 0)
387 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
388 localProc >> 1);
389 else
390 __kmp_printf_no_lock(" GTID %d\n", gtid);
391#if KMP_USE_PRCTL
392 /* The more elaborate format is disabled for now because of the prctl
393 * hanging bug. */
394 do {
395 last = p1;
396 lastNode = node;
397 /* This loop collates adjacent pages with the same host node. */
398 do {
399 (char *)p1 += page_size;
400 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
401 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
402 lastNode);
403 } while (p1 <= p2);
404#else
405 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
406 (char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
408 if (p1 < p2) {
409 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
410 (char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
412 }
413#endif
414 }
415 }
416 } else
417 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
418 }
419#endif /* KMP_PRINT_DATA_PLACEMENT */
421
422 va_end(ap);
423}
424
425void __kmp_warn(char const *format, ...) {
426 char buffer[MAX_MESSAGE];
427 va_list ap;
428
430 return;
431 }
432
433 va_start(ap, format);
434
435 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
437 __kmp_vprintf(kmp_err, buffer, ap);
439
440 va_end(ap);
441}
442
444 // Later threads may stall here, but that's ok because abort() will kill them.
446
447 if (__kmp_debug_buf) {
449 }
450
451#if KMP_OS_WINDOWS
452 // Let other threads know of abnormal termination and prevent deadlock
453 // if abort happened during library initialization or shutdown
454 __kmp_global.g.g_abort = SIGABRT;
455
456 /* On Windows* OS by default abort() causes pop-up error box, which stalls
457 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
458 boxes. _set_abort_behavior() works well, but this function is not
459 available in VS7 (this is not problem for DLL, but it is a problem for
460 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
461 help, at least in some versions of MS C RTL.
462
463 It seems following sequence is the only way to simulate abort() and
464 avoid pop-up error box. */
465 raise(SIGABRT);
466 _exit(3); // Just in case, if signal ignored, exit anyway.
467#else
469 abort();
470#endif
471
474
475} // __kmp_abort_process
476
478 // TODO: Eliminate g_abort global variable and this function.
479 // In case of abort just call abort(), it will kill all the threads.
481} // __kmp_abort_thread
482
483/* Print out the storage map for the major kmp_info_t thread data structures
484 that are allocated together. */
485
486static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
487 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
488 gtid);
489
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
491 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
492
493 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
494 sizeof(kmp_local_t), "th_%d.th_local", gtid);
495
497 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
498 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
499
500 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
501 &thr->th.th_bar[bs_plain_barrier + 1],
502 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
503 gtid);
504
506 &thr->th.th_bar[bs_forkjoin_barrier + 1],
507 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
508 gtid);
509
510#if KMP_FAST_REDUCTION_BARRIER
512 &thr->th.th_bar[bs_reduction_barrier + 1],
513 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
514 gtid);
515#endif // KMP_FAST_REDUCTION_BARRIER
516}
517
518/* Print out the storage map for the major kmp_team_t team data structures
519 that are allocated together. */
520
521static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
522 int team_id, int num_thr) {
523 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
524 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
525 header, team_id);
526
527 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
528 &team->t.t_bar[bs_last_barrier],
530 "%s_%d.t_bar", header, team_id);
531
533 &team->t.t_bar[bs_plain_barrier + 1],
534 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
535 header, team_id);
536
538 &team->t.t_bar[bs_forkjoin_barrier + 1],
539 sizeof(kmp_balign_team_t),
540 "%s_%d.t_bar[forkjoin]", header, team_id);
541
542#if KMP_FAST_REDUCTION_BARRIER
544 &team->t.t_bar[bs_reduction_barrier + 1],
545 sizeof(kmp_balign_team_t),
546 "%s_%d.t_bar[reduction]", header, team_id);
547#endif // KMP_FAST_REDUCTION_BARRIER
548
550 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
551 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
552
554 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
555 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
556
557 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
558 &team->t.t_disp_buffer[num_disp_buff],
559 sizeof(dispatch_shared_info_t) * num_disp_buff,
560 "%s_%d.t_disp_buffer", header, team_id);
561}
562
563static void __kmp_init_allocator() {
566}
568
569/* ------------------------------------------------------------------------ */
570
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
574}
575#endif
576
577/* ------------------------------------------------------------------------ */
578
579#if KMP_DYNAMIC_LIB
580#if KMP_OS_WINDOWS
581
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
583 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
584
585 switch (fdwReason) {
586
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
589
590 return TRUE;
591
592 case DLL_PROCESS_DETACH:
593 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
594
595 // According to Windows* documentation for DllMain entry point:
596 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
597 // lpReserved == NULL when FreeLibrary() is called,
598 // lpReserved != NULL when the process is terminated.
599 // When FreeLibrary() is called, worker threads remain alive. So the
600 // runtime's state is consistent and executing proper shutdown is OK.
601 // When the process is terminated, worker threads have exited or been
602 // forcefully terminated by the OS and only the shutdown thread remains.
603 // This can leave the runtime in an inconsistent state.
604 // Hence, only attempt proper cleanup when FreeLibrary() is called.
605 // Otherwise, rely on OS to reclaim resources.
606 if (lpReserved == NULL)
608
609 return TRUE;
610
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
613
614 /* if we want to register new siblings all the time here call
615 * __kmp_get_gtid(); */
616 return TRUE;
617
618 case DLL_THREAD_DETACH:
619 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
620
622 return TRUE;
623 }
624
625 return TRUE;
626}
627
628#endif /* KMP_OS_WINDOWS */
629#endif /* KMP_DYNAMIC_LIB */
630
631/* __kmp_parallel_deo -- Wait until it's our turn. */
632void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
635 kmp_team_t *team = __kmp_team_from_gtid(gtid);
636#endif /* BUILD_PARALLEL_ORDERED */
637
639 if (__kmp_threads[gtid]->th.th_root->r.r_active)
640#if KMP_USE_DYNAMIC_LOCK
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
642#else
643 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
644#endif
645 }
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
648 KMP_MB();
649 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
650 NULL);
651 KMP_MB();
652 }
653#endif /* BUILD_PARALLEL_ORDERED */
654}
655
656/* __kmp_parallel_dxo -- Signal the next task. */
657void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
660 int tid = __kmp_tid_from_gtid(gtid);
661 kmp_team_t *team = __kmp_team_from_gtid(gtid);
662#endif /* BUILD_PARALLEL_ORDERED */
663
665 if (__kmp_threads[gtid]->th.th_root->r.r_active)
667 }
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->t.t_serialized) {
670 KMP_MB(); /* Flush all pending memory write invalidates. */
671
672 /* use the tid of the next thread in this team */
673 /* TODO replace with general release procedure */
674 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
675
676 KMP_MB(); /* Flush all pending memory write invalidates. */
677 }
678#endif /* BUILD_PARALLEL_ORDERED */
679}
680
681/* ------------------------------------------------------------------------ */
682/* The BARRIER for a SINGLE process section is always explicit */
683
684int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
685 int status;
686 kmp_info_t *th;
687 kmp_team_t *team;
688
692
693 th = __kmp_threads[gtid];
694 team = th->th.th_team;
695 status = 0;
696
697 th->th.th_ident = id_ref;
698
699 if (team->t.t_serialized) {
700 status = 1;
701 } else {
702 kmp_int32 old_this = th->th.th_local.this_construct;
703
704 ++th->th.th_local.this_construct;
705 /* try to set team count to thread count--success means thread got the
706 single block */
707 /* TODO: Should this be acquire or release? */
708 if (team->t.t_construct == old_this) {
709 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
710 th->th.th_local.this_construct);
711 }
712#if USE_ITT_BUILD
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
714 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
715 team->t.t_active_level == 1) {
716 // Only report metadata by primary thread of active team at level 1
717 __kmp_itt_metadata_single(id_ref);
718 }
719#endif /* USE_ITT_BUILD */
720 }
721
723 if (status && push_ws) {
724 __kmp_push_workshare(gtid, ct_psingle, id_ref);
725 } else {
726 __kmp_check_workshare(gtid, ct_psingle, id_ref);
727 }
728 }
729#if USE_ITT_BUILD
730 if (status) {
731 __kmp_itt_single_start(gtid);
732 }
733#endif /* USE_ITT_BUILD */
734 return status;
735}
736
737void __kmp_exit_single(int gtid) {
738#if USE_ITT_BUILD
739 __kmp_itt_single_end(gtid);
740#endif /* USE_ITT_BUILD */
742 __kmp_pop_workshare(gtid, ct_psingle, NULL);
743}
744
745/* determine if we can go parallel or must use a serialized parallel region and
746 * how many threads we can use
747 * set_nproc is the number of threads requested for the team
748 * returns 0 if we should serialize or only use one thread,
749 * otherwise the number of threads to use
750 * The forkjoin lock is held by the caller. */
751static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
752 int master_tid, int set_nthreads,
753 int enter_teams) {
754 int capacity;
755 int new_nthreads;
757 KMP_DEBUG_ASSERT(root && parent_team);
758 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
759
760 // If dyn-var is set, dynamically adjust the number of desired threads,
761 // according to the method specified by dynamic_mode.
762 new_nthreads = set_nthreads;
763 if (!get__dynamic_2(parent_team, master_tid)) {
764 ;
765 }
766#ifdef USE_LOAD_BALANCE
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
772 master_tid));
773 return 1;
774 }
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
779 }
780 }
781#endif /* USE_LOAD_BALANCE */
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
783 new_nthreads = __kmp_avail_proc - __kmp_nth +
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
788 master_tid));
789 return 1;
790 }
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
795 } else {
796 new_nthreads = set_nthreads;
797 }
798 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
799 if (set_nthreads > 2) {
800 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
805 master_tid));
806 return 1;
807 }
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
812 }
813 }
814 } else {
815 KMP_ASSERT(0);
816 }
817
818 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
819 if (__kmp_nth + new_nthreads -
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
822 int tl_nthreads = __kmp_max_nth - __kmp_nth +
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
825 tl_nthreads = 1;
826 }
827
828 // If dyn-var is false, emit a 1-time warning.
829 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
834 }
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
838 master_tid));
839 return 1;
840 }
841 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
845 }
846
847 // Respect OMP_THREAD_LIMIT
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
852 max_cg_threads) {
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
856 tl_nthreads = 1;
857 }
858
859 // If dyn-var is false, emit a 1-time warning.
860 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
864 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
865 }
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
869 master_tid));
870 return 1;
871 }
872 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
876 }
877
878 // Check if the threads array is large enough, or needs expanding.
879 // See comment in __kmp_register_root() about the adjustment if
880 // __kmp_threads[0] == NULL.
881 capacity = __kmp_threads_capacity;
882 if (TCR_PTR(__kmp_threads[0]) == NULL) {
883 --capacity;
884 }
885 // If it is not for initializing the hidden helper team, we need to take
886 // __kmp_hidden_helper_threads_num out of the capacity because it is included
887 // in __kmp_threads_capacity.
890 }
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
893 capacity) {
894 // Expand the threads array.
895 int slotsRequired = __kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
897 capacity;
898 int slotsAdded = __kmp_expand_threads(slotsRequired);
899 if (slotsAdded < slotsRequired) {
900 // The threads array was not expanded enough.
901 new_nthreads -= (slotsRequired - slotsAdded);
902 KMP_ASSERT(new_nthreads >= 1);
903
904 // If dyn-var is false, emit a 1-time warning.
905 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
907 if (__kmp_tp_cached) {
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
910 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
911 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
912 } else {
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
915 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
916 }
917 }
918 }
919 }
920
921#ifdef KMP_DEBUG
922 if (new_nthreads == 1) {
923 KC_TRACE(10,
924 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
926 __kmp_get_gtid(), set_nthreads));
927 } else {
928 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
929 " %d threads\n",
930 __kmp_get_gtid(), new_nthreads, set_nthreads));
931 }
932#endif // KMP_DEBUG
933 return new_nthreads;
934}
935
936/* Allocate threads from the thread pool and assign them to the new team. We are
937 assured that there are enough threads available, because we checked on that
938 earlier within critical section forkjoin */
940 kmp_info_t *master_th, int master_gtid,
941 int fork_teams_workers) {
942 int i;
943 int use_hot_team;
944
945 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
946 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
947 KMP_MB();
948
949 /* first, let's setup the primary thread */
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized = FALSE;
955 master_th->th.th_dispatch = &team->t.t_dispatch[0];
956
957/* make sure we are not the optimized hot team */
958#if KMP_NESTED_HOT_TEAMS
959 use_hot_team = 0;
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
961 if (hot_teams) { // hot teams array is not allocated if
962 // KMP_HOT_TEAMS_MAX_LEVEL=0
963 int level = team->t.t_active_level - 1; // index in array of hot teams
964 if (master_th->th.th_teams_microtask) { // are we inside the teams?
965 if (master_th->th.th_teams_size.nteams > 1) {
966 ++level; // level was not increased in teams construct for
967 // team_of_masters
968 }
969 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
970 master_th->th.th_teams_level == team->t.t_level) {
971 ++level; // level was not increased in teams construct for
972 // team_of_workers before the parallel
973 } // team->t.t_level will be increased inside parallel
974 }
975 if (level < __kmp_hot_teams_max_level) {
976 if (hot_teams[level].hot_team) {
977 // hot team has already been allocated for given level
978 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
979 use_hot_team = 1; // the team is ready to use
980 } else {
981 use_hot_team = 0; // AC: threads are not allocated yet
982 hot_teams[level].hot_team = team; // remember new hot team
983 hot_teams[level].hot_team_nth = team->t.t_nproc;
984 }
985 } else {
986 use_hot_team = 0;
987 }
988 }
989#else
990 use_hot_team = team == root->r.r_hot_team;
991#endif
992 if (!use_hot_team) {
993
994 /* install the primary thread */
995 team->t.t_threads[0] = master_th;
996 __kmp_initialize_info(master_th, team, 0, master_gtid);
997
998 /* now, install the worker threads */
999 for (i = 1; i < team->t.t_nproc; i++) {
1000
1001 /* fork or reallocate a new thread and install it in team */
1002 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1003 team->t.t_threads[i] = thr;
1004 KMP_DEBUG_ASSERT(thr);
1005 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1006 /* align team and thread arrived states */
1007 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1009 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1010 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1011 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1012 team->t.t_bar[bs_plain_barrier].b_arrived));
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1016 { // Initialize threads' barrier data.
1017 int b;
1018 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1019 for (b = 0; b < bs_last_barrier; ++b) {
1020 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1021 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1022#if USE_DEBUGGER
1023 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1024#endif
1025 }
1026 }
1027 }
1028
1029#if KMP_AFFINITY_SUPPORTED
1030 // Do not partition the places list for teams construct workers who
1031 // haven't actually been forked to do real work yet. This partitioning
1032 // will take place in the parallel region nested within the teams construct.
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1035 }
1036#endif
1037
1038 if (team->t.t_nproc > 1 &&
1040 team->t.b->update_num_threads(team->t.t_nproc);
1041 __kmp_add_threads_to_team(team, team->t.t_nproc);
1042 }
1043 }
1044
1045 // Take care of primary thread's task state
1047 if (use_hot_team) {
1048 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1049 KA_TRACE(
1050 20,
1051 ("__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1052 "%p, new task_team %p / team %p\n",
1053 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1054 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1055 team));
1056
1057 // Store primary thread's current task state on new team
1058 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1059 master_th->th.th_task_state);
1060
1061 // Restore primary thread's task state to hot team's state
1062 // by using thread 1's task state
1063 if (team->t.t_nproc > 1) {
1064 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1065 team->t.t_threads[1]->th.th_task_state == 1);
1066 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1067 team->t.t_threads[1]->th.th_task_state);
1068 } else {
1069 master_th->th.th_task_state = 0;
1070 }
1071 } else {
1072 // Store primary thread's current task_state on new team
1073 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1074 master_th->th.th_task_state);
1075 // Are not using hot team, so set task state to 0.
1076 master_th->th.th_task_state = 0;
1077 }
1078 }
1079
1080 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1081 for (i = 0; i < team->t.t_nproc; i++) {
1082 kmp_info_t *thr = team->t.t_threads[i];
1083 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1084 thr->th.th_prev_level != team->t.t_level) {
1085 team->t.t_display_affinity = 1;
1086 break;
1087 }
1088 }
1089 }
1090
1091 KMP_MB();
1092}
1093
1094#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1095// Propagate any changes to the floating point control registers out to the team
1096// We try to avoid unnecessary writes to the relevant cache line in the team
1097// structure, so we don't make changes unless they are needed.
1098inline static void propagateFPControl(kmp_team_t *team) {
1099 if (__kmp_inherit_fp_control) {
1100 kmp_int16 x87_fpu_control_word;
1101 kmp_uint32 mxcsr;
1102
1103 // Get primary thread's values of FPU control flags (both X87 and vector)
1104 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1105 __kmp_store_mxcsr(&mxcsr);
1106 mxcsr &= KMP_X86_MXCSR_MASK;
1107
1108 // There is no point looking at t_fp_control_saved here.
1109 // If it is TRUE, we still have to update the values if they are different
1110 // from those we now have. If it is FALSE we didn't save anything yet, but
1111 // our objective is the same. We have to ensure that the values in the team
1112 // are the same as those we have.
1113 // So, this code achieves what we need whether or not t_fp_control_saved is
1114 // true. By checking whether the value needs updating we avoid unnecessary
1115 // writes that would put the cache-line into a written state, causing all
1116 // threads in the team to have to read it again.
1117 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1118 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1119 // Although we don't use this value, other code in the runtime wants to know
1120 // whether it should restore them. So we must ensure it is correct.
1121 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1122 } else {
1123 // Similarly here. Don't write to this cache-line in the team structure
1124 // unless we have to.
1125 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1126 }
1127}
1128
1129// Do the opposite, setting the hardware registers to the updated values from
1130// the team.
1131inline static void updateHWFPControl(kmp_team_t *team) {
1132 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1133 // Only reset the fp control regs if they have been changed in the team.
1134 // the parallel region that we are exiting.
1135 kmp_int16 x87_fpu_control_word;
1136 kmp_uint32 mxcsr;
1137 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1138 __kmp_store_mxcsr(&mxcsr);
1139 mxcsr &= KMP_X86_MXCSR_MASK;
1140
1141 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1142 __kmp_clear_x87_fpu_status_word();
1143 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1144 }
1145
1146 if (team->t.t_mxcsr != mxcsr) {
1147 __kmp_load_mxcsr(&team->t.t_mxcsr);
1148 }
1149 }
1150}
1151#else
1152#define propagateFPControl(x) ((void)0)
1153#define updateHWFPControl(x) ((void)0)
1154#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1155
1156static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1157 int realloc); // forward declaration
1158
1159/* Run a parallel region that has been serialized, so runs only in a team of the
1160 single primary thread. */
1162 kmp_info_t *this_thr;
1163 kmp_team_t *serial_team;
1164
1165 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1166
1167 /* Skip all this code for autopar serialized loops since it results in
1168 unacceptable overhead */
1169 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1170 return;
1171
1175
1176 this_thr = __kmp_threads[global_tid];
1177 serial_team = this_thr->th.th_serial_team;
1178
1179 /* utilize the serialized team held by this thread */
1180 KMP_DEBUG_ASSERT(serial_team);
1181 KMP_MB();
1182
1183 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1184 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1185 proc_bind = proc_bind_false;
1186 } else if (proc_bind == proc_bind_default) {
1187 // No proc_bind clause was specified, so use the current value
1188 // of proc-bind-var for this parallel region.
1189 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1190 }
1191 // Reset for next parallel region
1192 this_thr->th.th_set_proc_bind = proc_bind_default;
1193
1194 // Reset num_threads for next parallel region
1195 this_thr->th.th_set_nproc = 0;
1196
1197#if OMPT_SUPPORT
1198 ompt_data_t ompt_parallel_data = ompt_data_none;
1199 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1200 if (ompt_enabled.enabled &&
1201 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1202
1203 ompt_task_info_t *parent_task_info;
1204 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1205
1206 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1207 if (ompt_enabled.ompt_callback_parallel_begin) {
1208 int team_size = 1;
1209
1210 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1211 &(parent_task_info->task_data), &(parent_task_info->frame),
1212 &ompt_parallel_data, team_size,
1213 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1214 }
1215 }
1216#endif // OMPT_SUPPORT
1217
1218 if (this_thr->th.th_team != serial_team) {
1219 // Nested level will be an index in the nested nthreads array
1220 int level = this_thr->th.th_team->t.t_level;
1221
1222 if (serial_team->t.t_serialized) {
1223 /* this serial team was already used
1224 TODO increase performance by making this locks more specific */
1225 kmp_team_t *new_team;
1226
1228
1229 new_team =
1230 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1231#if OMPT_SUPPORT
1232 ompt_parallel_data,
1233#endif
1234 proc_bind, &this_thr->th.th_current_task->td_icvs,
1235 0 USE_NESTED_HOT_ARG(NULL));
1237 KMP_ASSERT(new_team);
1238
1239 /* setup new serialized team and install it */
1240 new_team->t.t_threads[0] = this_thr;
1241 new_team->t.t_parent = this_thr->th.th_team;
1242 serial_team = new_team;
1243 this_thr->th.th_serial_team = serial_team;
1244
1245 KF_TRACE(
1246 10,
1247 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1248 global_tid, serial_team));
1249
1250 /* TODO the above breaks the requirement that if we run out of resources,
1251 then we can still guarantee that serialized teams are ok, since we may
1252 need to allocate a new one */
1253 } else {
1254 KF_TRACE(
1255 10,
1256 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1257 global_tid, serial_team));
1258 }
1259
1260 /* we have to initialize this serial team */
1261 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1262 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1263 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1264 serial_team->t.t_ident = loc;
1265 serial_team->t.t_serialized = 1;
1266 serial_team->t.t_nproc = 1;
1267 serial_team->t.t_parent = this_thr->th.th_team;
1268 // Save previous team's task state on serial team structure
1269 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1270 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1271 this_thr->th.th_team = serial_team;
1272 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1273
1274 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1275 this_thr->th.th_current_task));
1276 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1277 this_thr->th.th_current_task->td_flags.executing = 0;
1278
1279 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1280
1281 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1282 implicit task for each serialized task represented by
1283 team->t.t_serialized? */
1284 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1285 &this_thr->th.th_current_task->td_parent->td_icvs);
1286
1287 // Thread value exists in the nested nthreads array for the next nested
1288 // level
1290 this_thr->th.th_current_task->td_icvs.nproc =
1292 }
1293
1296 this_thr->th.th_current_task->td_icvs.proc_bind =
1298 }
1299
1300#if USE_DEBUGGER
1301 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1302#endif
1303 this_thr->th.th_info.ds.ds_tid = 0;
1304
1305 /* set thread cache values */
1306 this_thr->th.th_team_nproc = 1;
1307 this_thr->th.th_team_master = this_thr;
1308 this_thr->th.th_team_serialized = 1;
1309 this_thr->th.th_task_team = NULL;
1310 this_thr->th.th_task_state = 0;
1311
1312 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1313 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1314 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1315
1316 propagateFPControl(serial_team);
1317
1318 /* check if we need to allocate dispatch buffers stack */
1319 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1320 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1321 serial_team->t.t_dispatch->th_disp_buffer =
1323 sizeof(dispatch_private_info_t));
1324 }
1325 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1326
1327 KMP_MB();
1328
1329 } else {
1330 /* this serialized team is already being used,
1331 * that's fine, just add another nested level */
1332 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1333 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1334 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1335 ++serial_team->t.t_serialized;
1336 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1337
1338 // Nested level will be an index in the nested nthreads array
1339 int level = this_thr->th.th_team->t.t_level;
1340 // Thread value exists in the nested nthreads array for the next nested
1341 // level
1343 this_thr->th.th_current_task->td_icvs.nproc =
1345 }
1346 serial_team->t.t_level++;
1347 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1348 "of serial team %p to %d\n",
1349 global_tid, serial_team, serial_team->t.t_level));
1350
1351 /* allocate/push dispatch buffers stack */
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1353 {
1354 dispatch_private_info_t *disp_buffer =
1356 sizeof(dispatch_private_info_t));
1357 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1358 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1359 }
1360 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1361
1362 /* allocate/push task team stack */
1363 __kmp_push_task_team_node(this_thr, serial_team);
1364
1365 KMP_MB();
1366 }
1367 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1368
1369 // Perform the display affinity functionality for
1370 // serialized parallel regions
1372 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1373 this_thr->th.th_prev_num_threads != 1) {
1374 // NULL means use the affinity-format-var ICV
1375 __kmp_aux_display_affinity(global_tid, NULL);
1376 this_thr->th.th_prev_level = serial_team->t.t_level;
1377 this_thr->th.th_prev_num_threads = 1;
1378 }
1379 }
1380
1382 __kmp_push_parallel(global_tid, NULL);
1383#if OMPT_SUPPORT
1384 serial_team->t.ompt_team_info.master_return_address = codeptr;
1385 if (ompt_enabled.enabled &&
1386 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1387 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1389
1390 ompt_lw_taskteam_t lw_taskteam;
1391 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1392 &ompt_parallel_data, codeptr);
1393
1394 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1395 // don't use lw_taskteam after linking. content was swaped
1396
1397 /* OMPT implicit task begin */
1398 if (ompt_enabled.ompt_callback_implicit_task) {
1399 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1400 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1401 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1402 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1403 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1404 __kmp_tid_from_gtid(global_tid);
1405 }
1406
1407 /* OMPT state */
1408 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1409 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1411 }
1412#endif
1413}
1414
1415// Test if this fork is for a team closely nested in a teams construct
1416static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1418 int teams_level, kmp_va_list ap) {
1419 return (master_th->th.th_teams_microtask && ap &&
1420 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1421}
1422
1423// Test if this fork is for the teams construct, i.e. to form the outer league
1424// of teams
1425static inline bool __kmp_is_entering_teams(int active_level, int level,
1426 int teams_level, kmp_va_list ap) {
1427 return ((ap == NULL && active_level == 0) ||
1428 (ap && teams_level > 0 && teams_level == level));
1429}
1430
1431// AC: This is start of parallel that is nested inside teams construct.
1432// The team is actual (hot), all workers are ready at the fork barrier.
1433// No lock needed to initialize the team a bit, then free workers.
1434static inline int
1436 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1437 enum fork_context_e call_context, microtask_t microtask,
1438 launch_t invoker, int master_set_numthreads, int level,
1439#if OMPT_SUPPORT
1440 ompt_data_t ompt_parallel_data, void *return_address,
1441#endif
1442 kmp_va_list ap) {
1443 void **argv;
1444 int i;
1445
1446 parent_team->t.t_ident = loc;
1447 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1448 parent_team->t.t_argc = argc;
1449 argv = (void **)parent_team->t.t_argv;
1450 for (i = argc - 1; i >= 0; --i) {
1451 *argv++ = va_arg(kmp_va_deref(ap), void *);
1452 }
1453 // Increment our nested depth levels, but not increase the serialization
1454 if (parent_team == master_th->th.th_serial_team) {
1455 // AC: we are in serialized parallel
1457 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1458
1459 if (call_context == fork_context_gnu) {
1460 // AC: need to decrement t_serialized for enquiry functions to work
1461 // correctly, will restore at join time
1462 parent_team->t.t_serialized--;
1463 return TRUE;
1464 }
1465
1466#if OMPD_SUPPORT
1467 parent_team->t.t_pkfn = microtask;
1468#endif
1469
1470#if OMPT_SUPPORT
1471 void *dummy;
1472 void **exit_frame_p;
1473 ompt_data_t *implicit_task_data;
1474 ompt_lw_taskteam_t lw_taskteam;
1475
1476 if (ompt_enabled.enabled) {
1477 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1478 &ompt_parallel_data, return_address);
1479 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1480
1481 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1482 // Don't use lw_taskteam after linking. Content was swapped.
1483
1484 /* OMPT implicit task begin */
1485 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1486 if (ompt_enabled.ompt_callback_implicit_task) {
1487 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1488 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1489 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1490 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1491 }
1492
1493 /* OMPT state */
1494 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1495 } else {
1496 exit_frame_p = &dummy;
1497 }
1498#endif
1499
1500 // AC: need to decrement t_serialized for enquiry functions to work
1501 // correctly, will restore at join time
1502 parent_team->t.t_serialized--;
1503
1504 {
1505 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1506 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1507 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1508#if OMPT_SUPPORT
1509 ,
1510 exit_frame_p
1511#endif
1512 );
1513 }
1514
1515#if OMPT_SUPPORT
1516 if (ompt_enabled.enabled) {
1517 *exit_frame_p = NULL;
1518 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1519 if (ompt_enabled.ompt_callback_implicit_task) {
1520 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1521 ompt_scope_end, NULL, implicit_task_data, 1,
1522 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1523 }
1524 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1525 __ompt_lw_taskteam_unlink(master_th);
1526 if (ompt_enabled.ompt_callback_parallel_end) {
1527 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1528 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1529 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1530 }
1531 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1532 }
1533#endif
1534 return TRUE;
1535 }
1536
1537 parent_team->t.t_pkfn = microtask;
1538 parent_team->t.t_invoke = invoker;
1539 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1540 parent_team->t.t_active_level++;
1541 parent_team->t.t_level++;
1542 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1543
1544 // If the threads allocated to the team are less than the thread limit, update
1545 // the thread limit here. th_teams_size.nth is specific to this team nested
1546 // in a teams construct, the team is fully created, and we're about to do
1547 // the actual fork. Best to do this here so that the subsequent uses below
1548 // and in the join have the correct value.
1549 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1550
1551#if OMPT_SUPPORT
1552 if (ompt_enabled.enabled) {
1553 ompt_lw_taskteam_t lw_taskteam;
1554 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1555 return_address);
1556 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1557 }
1558#endif
1559
1560 /* Change number of threads in the team if requested */
1561 if (master_set_numthreads) { // The parallel has num_threads clause
1562 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1563 // AC: only can reduce number of threads dynamically, can't increase
1564 kmp_info_t **other_threads = parent_team->t.t_threads;
1565 // NOTE: if using distributed barrier, we need to run this code block
1566 // even when the team size appears not to have changed from the max.
1567 int old_proc = master_th->th.th_teams_size.nth;
1569 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1570 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1571 }
1572 parent_team->t.t_nproc = master_set_numthreads;
1573 for (i = 0; i < master_set_numthreads; ++i) {
1574 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1575 }
1576 }
1577 // Keep extra threads hot in the team for possible next parallels
1578 master_th->th.th_set_nproc = 0;
1579 }
1580
1581#if USE_DEBUGGER
1582 if (__kmp_debugging) { // Let debugger override number of threads.
1583 int nth = __kmp_omp_num_threads(loc);
1584 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1585 master_set_numthreads = nth;
1586 }
1587 }
1588#endif
1589
1590 // Figure out the proc_bind policy for the nested parallel within teams
1591 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1592 // proc_bind_default means don't update
1593 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1594 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1595 proc_bind = proc_bind_false;
1596 } else {
1597 // No proc_bind clause specified; use current proc-bind-var
1598 if (proc_bind == proc_bind_default) {
1599 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1600 }
1601 /* else: The proc_bind policy was specified explicitly on parallel clause.
1602 This overrides proc-bind-var for this parallel region, but does not
1603 change proc-bind-var. */
1604 // Figure the value of proc-bind-var for the child threads.
1605 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1607 master_th->th.th_current_task->td_icvs.proc_bind)) {
1608 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1609 }
1610 }
1611 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1612 // Need to change the bind-var ICV to correct value for each implicit task
1613 if (proc_bind_icv != proc_bind_default &&
1614 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1615 kmp_info_t **other_threads = parent_team->t.t_threads;
1616 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1617 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1618 }
1619 }
1620 // Reset for next parallel region
1621 master_th->th.th_set_proc_bind = proc_bind_default;
1622
1623#if USE_ITT_BUILD && USE_ITT_NOTIFY
1624 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1625 KMP_ITT_DEBUG) &&
1626 __kmp_forkjoin_frames_mode == 3 &&
1627 parent_team->t.t_active_level == 1 // only report frames at level 1
1628 && master_th->th.th_teams_size.nteams == 1) {
1629 kmp_uint64 tmp_time = __itt_get_timestamp();
1630 master_th->th.th_frame_time = tmp_time;
1631 parent_team->t.t_region_time = tmp_time;
1632 }
1633 if (__itt_stack_caller_create_ptr) {
1634 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1635 // create new stack stitching id before entering fork barrier
1636 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1637 }
1638#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1639#if KMP_AFFINITY_SUPPORTED
1640 __kmp_partition_places(parent_team);
1641#endif
1642
1643 KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1644 "master_th=%p, gtid=%d\n",
1645 root, parent_team, master_th, gtid));
1646 __kmp_internal_fork(loc, gtid, parent_team);
1647 KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1648 "master_th=%p, gtid=%d\n",
1649 root, parent_team, master_th, gtid));
1650
1651 if (call_context == fork_context_gnu)
1652 return TRUE;
1653
1654 /* Invoke microtask for PRIMARY thread */
1655 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1656 parent_team->t.t_id, parent_team->t.t_pkfn));
1657
1658 if (!parent_team->t.t_invoke(gtid)) {
1659 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");
1660 }
1661 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1662 parent_team->t.t_id, parent_team->t.t_pkfn));
1663 KMP_MB(); /* Flush all pending memory write invalidates. */
1664
1665 KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1666
1667 return TRUE;
1668}
1669
1670// Create a serialized parallel region
1671static inline int
1672__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
1673 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1674 kmp_info_t *master_th, kmp_team_t *parent_team,
1675#if OMPT_SUPPORT
1676 ompt_data_t *ompt_parallel_data, void **return_address,
1677 ompt_data_t **parent_task_data,
1678#endif
1679 kmp_va_list ap) {
1680 kmp_team_t *team;
1681 int i;
1682 void **argv;
1683
1684/* josh todo: hypothetical question: what do we do for OS X*? */
1685#if KMP_OS_LINUX && \
1686 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1687 SimpleVLA<void *> args(argc);
1688#else
1689 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1690#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1691 KMP_ARCH_AARCH64) */
1692
1693 KA_TRACE(
1694 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1695
1697
1698#if OMPD_SUPPORT
1699 master_th->th.th_serial_team->t.t_pkfn = microtask;
1700#endif
1701
1702 if (call_context == fork_context_intel) {
1703 /* TODO this sucks, use the compiler itself to pass args! :) */
1704 master_th->th.th_serial_team->t.t_ident = loc;
1705 if (!ap) {
1706 // revert change made in __kmpc_serialized_parallel()
1707 master_th->th.th_serial_team->t.t_level--;
1708// Get args from parent team for teams construct
1709
1710#if OMPT_SUPPORT
1711 void *dummy;
1712 void **exit_frame_p;
1713 ompt_task_info_t *task_info;
1714 ompt_lw_taskteam_t lw_taskteam;
1715
1716 if (ompt_enabled.enabled) {
1717 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1718 ompt_parallel_data, *return_address);
1719
1720 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1721 // don't use lw_taskteam after linking. content was swaped
1722 task_info = OMPT_CUR_TASK_INFO(master_th);
1723 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1724 if (ompt_enabled.ompt_callback_implicit_task) {
1725 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1726 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1727 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1728 &(task_info->task_data), 1,
1729 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1730 }
1731
1732 /* OMPT state */
1733 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1734 } else {
1735 exit_frame_p = &dummy;
1736 }
1737#endif
1738
1739 {
1740 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1741 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1742 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1743#if OMPT_SUPPORT
1744 ,
1745 exit_frame_p
1746#endif
1747 );
1748 }
1749
1750#if OMPT_SUPPORT
1751 if (ompt_enabled.enabled) {
1752 *exit_frame_p = NULL;
1753 if (ompt_enabled.ompt_callback_implicit_task) {
1754 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1755 ompt_scope_end, NULL, &(task_info->task_data), 1,
1756 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1757 }
1758 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1759 __ompt_lw_taskteam_unlink(master_th);
1760 if (ompt_enabled.ompt_callback_parallel_end) {
1761 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1762 ompt_parallel_data, *parent_task_data,
1763 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1764 }
1765 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1766 }
1767#endif
1768 } else if (microtask == (microtask_t)__kmp_teams_master) {
1769 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1770 team = master_th->th.th_team;
1771 // team->t.t_pkfn = microtask;
1772 team->t.t_invoke = invoker;
1773 __kmp_alloc_argv_entries(argc, team, TRUE);
1774 team->t.t_argc = argc;
1775 argv = (void **)team->t.t_argv;
1776 for (i = argc - 1; i >= 0; --i)
1777 *argv++ = va_arg(kmp_va_deref(ap), void *);
1778 // AC: revert change made in __kmpc_serialized_parallel()
1779 // because initial code in teams should have level=0
1780 team->t.t_level--;
1781 // AC: call special invoker for outer "parallel" of teams construct
1782 invoker(gtid);
1783#if OMPT_SUPPORT
1784 if (ompt_enabled.enabled) {
1785 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1786 if (ompt_enabled.ompt_callback_implicit_task) {
1787 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1788 ompt_scope_end, NULL, &(task_info->task_data), 0,
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1790 }
1791 if (ompt_enabled.ompt_callback_parallel_end) {
1792 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1793 ompt_parallel_data, *parent_task_data,
1794 OMPT_INVOKER(call_context) | ompt_parallel_league,
1795 *return_address);
1796 }
1797 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1798 }
1799#endif
1800 } else {
1801 argv = args;
1802 for (i = argc - 1; i >= 0; --i)
1803 *argv++ = va_arg(kmp_va_deref(ap), void *);
1804 KMP_MB();
1805
1806#if OMPT_SUPPORT
1807 void *dummy;
1808 void **exit_frame_p;
1809 ompt_task_info_t *task_info;
1810 ompt_lw_taskteam_t lw_taskteam;
1811 ompt_data_t *implicit_task_data;
1812
1813 if (ompt_enabled.enabled) {
1814 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1815 ompt_parallel_data, *return_address);
1816 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1817 // don't use lw_taskteam after linking. content was swaped
1818 task_info = OMPT_CUR_TASK_INFO(master_th);
1819 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1820
1821 /* OMPT implicit task begin */
1822 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1823 if (ompt_enabled.ompt_callback_implicit_task) {
1824 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1825 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1826 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1827 ompt_task_implicit);
1828 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1829 }
1830
1831 /* OMPT state */
1832 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1833 } else {
1834 exit_frame_p = &dummy;
1835 }
1836#endif
1837
1838 {
1839 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1840 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1841 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1842#if OMPT_SUPPORT
1843 ,
1844 exit_frame_p
1845#endif
1846 );
1847 }
1848
1849#if OMPT_SUPPORT
1850 if (ompt_enabled.enabled) {
1851 *exit_frame_p = NULL;
1852 if (ompt_enabled.ompt_callback_implicit_task) {
1853 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1854 ompt_scope_end, NULL, &(task_info->task_data), 1,
1855 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1856 }
1857
1858 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1859 __ompt_lw_taskteam_unlink(master_th);
1860 if (ompt_enabled.ompt_callback_parallel_end) {
1861 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1862 ompt_parallel_data, *parent_task_data,
1863 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1864 }
1865 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1866 }
1867#endif
1868 }
1869 } else if (call_context == fork_context_gnu) {
1870#if OMPT_SUPPORT
1871 if (ompt_enabled.enabled) {
1873 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1874 *return_address);
1875
1876 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1877 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1878 }
1879// don't use lw_taskteam after linking. content was swaped
1880#endif
1881
1882 // we were called from GNU native code
1883 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1884 return FALSE;
1885 } else {
1886 KMP_ASSERT2(call_context < fork_context_last,
1887 "__kmp_serial_fork_call: unknown fork_context parameter");
1888 }
1889
1890 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1891 KMP_MB();
1892 return FALSE;
1893}
1894
1895/* most of the work for a fork */
1896/* return true if we really went parallel, false if serialized */
1898 enum fork_context_e call_context, // Intel, GNU, ...
1899 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1900 kmp_va_list ap) {
1901 void **argv;
1902 int i;
1903 int master_tid;
1904 int master_this_cons;
1905 kmp_team_t *team;
1906 kmp_team_t *parent_team;
1907 kmp_info_t *master_th;
1908 kmp_root_t *root;
1909 int nthreads;
1910 int master_active;
1911 int master_set_numthreads;
1912 int task_thread_limit = 0;
1913 int level;
1914 int active_level;
1915 int teams_level;
1916#if KMP_NESTED_HOT_TEAMS
1917 kmp_hot_team_ptr_t **p_hot_teams;
1918#endif
1919 { // KMP_TIME_BLOCK
1921 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
1922
1923 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1924 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1925 /* Some systems prefer the stack for the root thread(s) to start with */
1926 /* some gap from the parent stack to prevent false sharing. */
1927 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1928 /* These 2 lines below are so this does not get optimized out */
1930 __kmp_stkpadding += (short)((kmp_int64)dummy);
1931 }
1932
1933 /* initialize if needed */
1935 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1939
1940 /* setup current data */
1941 // AC: potentially unsafe, not in sync with library shutdown,
1942 // __kmp_threads can be freed
1943 master_th = __kmp_threads[gtid];
1944
1945 parent_team = master_th->th.th_team;
1946 master_tid = master_th->th.th_info.ds.ds_tid;
1947 master_this_cons = master_th->th.th_local.this_construct;
1948 root = master_th->th.th_root;
1949 master_active = root->r.r_active;
1950 master_set_numthreads = master_th->th.th_set_nproc;
1951 task_thread_limit =
1952 master_th->th.th_current_task->td_icvs.task_thread_limit;
1953
1954#if OMPT_SUPPORT
1955 ompt_data_t ompt_parallel_data = ompt_data_none;
1956 ompt_data_t *parent_task_data;
1957 ompt_frame_t *ompt_frame;
1958 void *return_address = NULL;
1959
1960 if (ompt_enabled.enabled) {
1961 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1962 NULL, NULL);
1963 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1964 }
1965#endif
1966
1967 // Assign affinity to root thread if it hasn't happened yet
1969
1970 // Nested level will be an index in the nested nthreads array
1971 level = parent_team->t.t_level;
1972 // used to launch non-serial teams even if nested is not allowed
1973 active_level = parent_team->t.t_active_level;
1974 // needed to check nesting inside the teams
1975 teams_level = master_th->th.th_teams_level;
1976#if KMP_NESTED_HOT_TEAMS
1977 p_hot_teams = &master_th->th.th_hot_teams;
1978 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1979 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1980 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1981 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1982 // it is either actual or not needed (when active_level > 0)
1983 (*p_hot_teams)[0].hot_team_nth = 1;
1984 }
1985#endif
1986
1987#if OMPT_SUPPORT
1988 if (ompt_enabled.enabled) {
1989 if (ompt_enabled.ompt_callback_parallel_begin) {
1990 int team_size = master_set_numthreads
1991 ? master_set_numthreads
1992 : get__nproc_2(parent_team, master_tid);
1993 int flags = OMPT_INVOKER(call_context) |
1995 ? ompt_parallel_league
1996 : ompt_parallel_team);
1997 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1998 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1999 return_address);
2000 }
2001 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2002 }
2003#endif
2004
2005 master_th->th.th_ident = loc;
2006
2007 // Parallel closely nested in teams construct:
2008 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2009 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2010 call_context, microtask, invoker,
2011 master_set_numthreads, level,
2012#if OMPT_SUPPORT
2013 ompt_parallel_data, return_address,
2014#endif
2015 ap);
2016 } // End parallel closely nested in teams construct
2017
2018 // Need this to happen before we determine the number of threads, not while
2019 // we are allocating the team
2020 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
2021
2022 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2023
2024 // Determine the number of threads
2025 int enter_teams =
2026 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2027 if ((!enter_teams &&
2028 (parent_team->t.t_active_level >=
2029 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2031 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid));
2032 nthreads = 1;
2033 } else {
2034 nthreads = master_set_numthreads
2035 ? master_set_numthreads
2036 // TODO: get nproc directly from current task
2037 : get__nproc_2(parent_team, master_tid);
2038 // Use the thread_limit set for the current target task if exists, else go
2039 // with the deduced nthreads
2040 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2041 ? task_thread_limit
2042 : nthreads;
2043 // Check if we need to take forkjoin lock? (no need for serialized
2044 // parallel out of teams construct).
2045 if (nthreads > 1) {
2046 /* determine how many new threads we can use */
2048 /* AC: If we execute teams from parallel region (on host), then teams
2049 should be created but each can only have 1 thread if nesting is
2050 disabled. If teams called from serial region, then teams and their
2051 threads should be created regardless of the nesting setting. */
2052 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2053 nthreads, enter_teams);
2054 if (nthreads == 1) {
2055 // Free lock for single thread execution here; for multi-thread
2056 // execution it will be freed later after team of threads created
2057 // and initialized
2059 }
2060 }
2061 }
2062 KMP_DEBUG_ASSERT(nthreads > 0);
2063
2064 // If we temporarily changed the set number of threads then restore it now
2065 master_th->th.th_set_nproc = 0;
2066
2067 if (nthreads == 1) {
2068 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2069 invoker, master_th, parent_team,
2070#if OMPT_SUPPORT
2071 &ompt_parallel_data, &return_address,
2072 &parent_task_data,
2073#endif
2074 ap);
2075 } // if (nthreads == 1)
2076
2077 // GEH: only modify the executing flag in the case when not serialized
2078 // serialized case is handled in kmpc_serialized_parallel
2079 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2080 "curtask=%p, curtask_max_aclevel=%d\n",
2081 parent_team->t.t_active_level, master_th,
2082 master_th->th.th_current_task,
2083 master_th->th.th_current_task->td_icvs.max_active_levels));
2084 // TODO: GEH - cannot do this assertion because root thread not set up as
2085 // executing
2086 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2087 master_th->th.th_current_task->td_flags.executing = 0;
2088
2089 if (!master_th->th.th_teams_microtask || level > teams_level) {
2090 /* Increment our nested depth level */
2091 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2092 }
2093
2094 // See if we need to make a copy of the ICVs.
2095 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2096 if ((level + 1 < __kmp_nested_nth.used) &&
2097 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2098 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2099 } else {
2100 nthreads_icv = 0; // don't update
2101 }
2102
2103 // Figure out the proc_bind_policy for the new team.
2104 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2105 // proc_bind_default means don't update
2106 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2107 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2108 proc_bind = proc_bind_false;
2109 } else {
2110 // No proc_bind clause specified; use current proc-bind-var for this
2111 // parallel region
2112 if (proc_bind == proc_bind_default) {
2113 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2114 }
2115 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2116 if (master_th->th.th_teams_microtask &&
2118 proc_bind = __kmp_teams_proc_bind;
2119 }
2120 /* else: The proc_bind policy was specified explicitly on parallel clause.
2121 This overrides proc-bind-var for this parallel region, but does not
2122 change proc-bind-var. */
2123 // Figure the value of proc-bind-var for the child threads.
2124 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2126 master_th->th.th_current_task->td_icvs.proc_bind)) {
2127 // Do not modify the proc bind icv for the two teams construct forks
2128 // They just let the proc bind icv pass through
2129 if (!master_th->th.th_teams_microtask ||
2130 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2131 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2132 }
2133 }
2134
2135 // Reset for next parallel region
2136 master_th->th.th_set_proc_bind = proc_bind_default;
2137
2138 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2139 kmp_internal_control_t new_icvs;
2140 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2141 new_icvs.next = NULL;
2142 if (nthreads_icv > 0) {
2143 new_icvs.nproc = nthreads_icv;
2144 }
2145 if (proc_bind_icv != proc_bind_default) {
2146 new_icvs.proc_bind = proc_bind_icv;
2147 }
2148
2149 /* allocate a new parallel team */
2150 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2151 team = __kmp_allocate_team(root, nthreads, nthreads,
2152#if OMPT_SUPPORT
2153 ompt_parallel_data,
2154#endif
2155 proc_bind, &new_icvs,
2156 argc USE_NESTED_HOT_ARG(master_th));
2158 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2159 } else {
2160 /* allocate a new parallel team */
2161 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2162 team = __kmp_allocate_team(root, nthreads, nthreads,
2163#if OMPT_SUPPORT
2164 ompt_parallel_data,
2165#endif
2166 proc_bind,
2167 &master_th->th.th_current_task->td_icvs,
2168 argc USE_NESTED_HOT_ARG(master_th));
2170 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2171 &master_th->th.th_current_task->td_icvs);
2172 }
2173 KF_TRACE(
2174 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2175
2176 /* setup the new team */
2177 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2178 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2179 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2180 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2181 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2182#if OMPT_SUPPORT
2183 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2184 return_address);
2185#endif
2186 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2187 // TODO: parent_team->t.t_level == INT_MAX ???
2188 if (!master_th->th.th_teams_microtask || level > teams_level) {
2189 int new_level = parent_team->t.t_level + 1;
2190 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2191 new_level = parent_team->t.t_active_level + 1;
2192 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2193 } else {
2194 // AC: Do not increase parallel level at start of the teams construct
2195 int new_level = parent_team->t.t_level;
2196 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2197 new_level = parent_team->t.t_active_level;
2198 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2199 }
2200 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2201 // set primary thread's schedule as new run-time schedule
2202 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2203
2204 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2205 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2206
2207 // Update the floating point rounding in the team if required.
2208 propagateFPControl(team);
2209#if OMPD_SUPPORT
2210 if (ompd_state & OMPD_ENABLE_BP)
2211 ompd_bp_parallel_begin();
2212#endif
2213
2214 KA_TRACE(
2215 20,
2216 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2217 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2218 team->t.t_nproc));
2219 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2220 (team->t.t_master_tid == 0 &&
2221 (team->t.t_parent == root->r.r_root_team ||
2222 team->t.t_parent->t.t_serialized)));
2223 KMP_MB();
2224
2225 /* now, setup the arguments */
2226 argv = (void **)team->t.t_argv;
2227 if (ap) {
2228 for (i = argc - 1; i >= 0; --i) {
2229 void *new_argv = va_arg(kmp_va_deref(ap), void *);
2230 KMP_CHECK_UPDATE(*argv, new_argv);
2231 argv++;
2232 }
2233 } else {
2234 for (i = 0; i < argc; ++i) {
2235 // Get args from parent team for teams construct
2236 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2237 }
2238 }
2239
2240 /* now actually fork the threads */
2241 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2242 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2243 root->r.r_active = TRUE;
2244
2245 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2246 __kmp_setup_icv_copy(team, nthreads,
2247 &master_th->th.th_current_task->td_icvs, loc);
2248
2249#if OMPT_SUPPORT
2250 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2251#endif
2252
2254
2255#if USE_ITT_BUILD
2256 if (team->t.t_active_level == 1 // only report frames at level 1
2257 && !master_th->th.th_teams_microtask) { // not in teams construct
2258#if USE_ITT_NOTIFY
2259 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2260 (__kmp_forkjoin_frames_mode == 3 ||
2261 __kmp_forkjoin_frames_mode == 1)) {
2262 kmp_uint64 tmp_time = 0;
2263 if (__itt_get_timestamp_ptr)
2264 tmp_time = __itt_get_timestamp();
2265 // Internal fork - report frame begin
2266 master_th->th.th_frame_time = tmp_time;
2267 if (__kmp_forkjoin_frames_mode == 3)
2268 team->t.t_region_time = tmp_time;
2269 } else
2270// only one notification scheme (either "submit" or "forking/joined", not both)
2271#endif /* USE_ITT_NOTIFY */
2272 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2273 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2274 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2275 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2276 }
2277 }
2278#endif /* USE_ITT_BUILD */
2279
2280 /* now go on and do the work */
2281 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2282 KMP_MB();
2283 KF_TRACE(10,
2284 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2285 root, team, master_th, gtid));
2286
2287#if USE_ITT_BUILD
2288 if (__itt_stack_caller_create_ptr) {
2289 // create new stack stitching id before entering fork barrier
2290 if (!enter_teams) {
2291 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2292 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2293 } else if (parent_team->t.t_serialized) {
2294 // keep stack stitching id in the serialized parent_team;
2295 // current team will be used for parallel inside the teams;
2296 // if parent_team is active, then it already keeps stack stitching id
2297 // for the league of teams
2298 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2299 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2300 }
2301 }
2302#endif /* USE_ITT_BUILD */
2303
2304 // AC: skip __kmp_internal_fork at teams construct, let only primary
2305 // threads execute
2306 if (ap) {
2307 __kmp_internal_fork(loc, gtid, team);
2308 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2309 "master_th=%p, gtid=%d\n",
2310 root, team, master_th, gtid));
2311 }
2312
2313 if (call_context == fork_context_gnu) {
2314 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2315 return TRUE;
2316 }
2317
2318 /* Invoke microtask for PRIMARY thread */
2319 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2320 team->t.t_id, team->t.t_pkfn));
2321 } // END of timer KMP_fork_call block
2322
2323#if KMP_STATS_ENABLED
2324 // If beginning a teams construct, then change thread state
2325 stats_state_e previous_state = KMP_GET_THREAD_STATE();
2326 if (!ap) {
2327 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2328 }
2329#endif
2330
2331 if (!team->t.t_invoke(gtid)) {
2332 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");
2333 }
2334
2335#if KMP_STATS_ENABLED
2336 // If was beginning of a teams construct, then reset thread state
2337 if (!ap) {
2338 KMP_SET_THREAD_STATE(previous_state);
2339 }
2340#endif
2341
2342 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2343 team->t.t_id, team->t.t_pkfn));
2344 KMP_MB(); /* Flush all pending memory write invalidates. */
2345
2346 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2347#if OMPT_SUPPORT
2348 if (ompt_enabled.enabled) {
2349 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2350 }
2351#endif
2352
2353 return TRUE;
2354}
2355
2356#if OMPT_SUPPORT
2357static inline void __kmp_join_restore_state(kmp_info_t *thread,
2358 kmp_team_t *team) {
2359 // restore state outside the region
2360 thread->th.ompt_thread_info.state =
2361 ((team->t.t_serialized) ? ompt_state_work_serial
2362 : ompt_state_work_parallel);
2363}
2364
2365static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2366 kmp_team_t *team, ompt_data_t *parallel_data,
2367 int flags, void *codeptr) {
2369 if (ompt_enabled.ompt_callback_parallel_end) {
2370 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2371 parallel_data, &(task_info->task_data), flags, codeptr);
2372 }
2373
2374 task_info->frame.enter_frame = ompt_data_none;
2375 __kmp_join_restore_state(thread, team);
2376}
2377#endif
2378
2380#if OMPT_SUPPORT
2381 ,
2382 enum fork_context_e fork_context
2383#endif
2384 ,
2385 int exit_teams) {
2387 kmp_team_t *team;
2388 kmp_team_t *parent_team;
2389 kmp_info_t *master_th;
2390 kmp_root_t *root;
2391 int master_active;
2392
2393 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
2394
2395 /* setup current data */
2396 master_th = __kmp_threads[gtid];
2397 root = master_th->th.th_root;
2398 team = master_th->th.th_team;
2399 parent_team = team->t.t_parent;
2400
2401 master_th->th.th_ident = loc;
2402
2403#if OMPT_SUPPORT
2404 void *team_microtask = (void *)team->t.t_pkfn;
2405 // For GOMP interface with serialized parallel, need the
2406 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2407 // and end-parallel events.
2409 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2410 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2411 }
2412#endif
2413
2414#if KMP_DEBUG
2415 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2416 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2417 "th_task_team = %p\n",
2418 __kmp_gtid_from_thread(master_th), team,
2419 team->t.t_task_team[master_th->th.th_task_state],
2420 master_th->th.th_task_team));
2421 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2422 }
2423#endif
2424
2425 if (team->t.t_serialized) {
2426 if (master_th->th.th_teams_microtask) {
2427 // We are in teams construct
2428 int level = team->t.t_level;
2429 int tlevel = master_th->th.th_teams_level;
2430 if (level == tlevel) {
2431 // AC: we haven't incremented it earlier at start of teams construct,
2432 // so do it here - at the end of teams construct
2433 team->t.t_level++;
2434 } else if (level == tlevel + 1) {
2435 // AC: we are exiting parallel inside teams, need to increment
2436 // serialization in order to restore it in the next call to
2437 // __kmpc_end_serialized_parallel
2438 team->t.t_serialized++;
2439 }
2440 }
2442
2443#if OMPT_SUPPORT
2444 if (ompt_enabled.enabled) {
2445 if (fork_context == fork_context_gnu) {
2446 __ompt_lw_taskteam_unlink(master_th);
2447 }
2448 __kmp_join_restore_state(master_th, parent_team);
2449 }
2450#endif
2451
2452 return;
2453 }
2454
2455 master_active = team->t.t_master_active;
2456
2457 if (!exit_teams) {
2458 // AC: No barrier for internal teams at exit from teams construct.
2459 // But there is barrier for external team (league).
2460 __kmp_internal_join(loc, gtid, team);
2461#if USE_ITT_BUILD
2462 if (__itt_stack_caller_create_ptr) {
2463 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2464 // destroy the stack stitching id after join barrier
2465 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2466 team->t.t_stack_id = NULL;
2467 }
2468#endif
2469 } else {
2470 master_th->th.th_task_state =
2471 0; // AC: no tasking in teams (out of any parallel)
2472#if USE_ITT_BUILD
2473 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2474 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2475 // destroy the stack stitching id on exit from the teams construct
2476 // if parent_team is active, then the id will be destroyed later on
2477 // by master of the league of teams
2478 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2479 parent_team->t.t_stack_id = NULL;
2480 }
2481#endif
2482 }
2483
2484 KMP_MB();
2485
2486#if OMPT_SUPPORT
2487 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2488 void *codeptr = team->t.ompt_team_info.master_return_address;
2489#endif
2490
2491#if USE_ITT_BUILD
2492 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2493 if (team->t.t_active_level == 1 &&
2494 (!master_th->th.th_teams_microtask || /* not in teams construct */
2495 master_th->th.th_teams_size.nteams == 1)) {
2496 master_th->th.th_ident = loc;
2497 // only one notification scheme (either "submit" or "forking/joined", not
2498 // both)
2499 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2500 __kmp_forkjoin_frames_mode == 3)
2501 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2502 master_th->th.th_frame_time, 0, loc,
2503 master_th->th.th_team_nproc, 1);
2504 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2505 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2506 __kmp_itt_region_joined(gtid);
2507 } // active_level == 1
2508#endif /* USE_ITT_BUILD */
2509
2510#if KMP_AFFINITY_SUPPORTED
2511 if (!exit_teams) {
2512 // Restore master thread's partition.
2513 master_th->th.th_first_place = team->t.t_first_place;
2514 master_th->th.th_last_place = team->t.t_last_place;
2515 }
2516#endif // KMP_AFFINITY_SUPPORTED
2517
2518 if (master_th->th.th_teams_microtask && !exit_teams &&
2519 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2520 team->t.t_level == master_th->th.th_teams_level + 1) {
2521// AC: We need to leave the team structure intact at the end of parallel
2522// inside the teams construct, so that at the next parallel same (hot) team
2523// works, only adjust nesting levels
2524#if OMPT_SUPPORT
2525 ompt_data_t ompt_parallel_data = ompt_data_none;
2526 if (ompt_enabled.enabled) {
2528 if (ompt_enabled.ompt_callback_implicit_task) {
2529 int ompt_team_size = team->t.t_nproc;
2530 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2531 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2532 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2533 }
2534 task_info->frame.exit_frame = ompt_data_none;
2535 task_info->task_data = ompt_data_none;
2536 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2537 __ompt_lw_taskteam_unlink(master_th);
2538 }
2539#endif
2540 /* Decrement our nested depth level */
2541 team->t.t_level--;
2542 team->t.t_active_level--;
2543 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2544
2545 // Restore number of threads in the team if needed. This code relies on
2546 // the proper adjustment of th_teams_size.nth after the fork in
2547 // __kmp_teams_master on each teams primary thread in the case that
2548 // __kmp_reserve_threads reduced it.
2549 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2550 int old_num = master_th->th.th_team_nproc;
2551 int new_num = master_th->th.th_teams_size.nth;
2552 kmp_info_t **other_threads = team->t.t_threads;
2553 team->t.t_nproc = new_num;
2554 for (int i = 0; i < old_num; ++i) {
2555 other_threads[i]->th.th_team_nproc = new_num;
2556 }
2557 // Adjust states of non-used threads of the team
2558 for (int i = old_num; i < new_num; ++i) {
2559 // Re-initialize thread's barrier data.
2560 KMP_DEBUG_ASSERT(other_threads[i]);
2561 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2562 for (int b = 0; b < bs_last_barrier; ++b) {
2563 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2564 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2565#if USE_DEBUGGER
2566 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2567#endif
2568 }
2570 // Synchronize thread's task state
2571 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2572 }
2573 }
2574 }
2575
2576#if OMPT_SUPPORT
2577 if (ompt_enabled.enabled) {
2578 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2579 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2580 }
2581#endif
2582
2583 return;
2584 }
2585
2586 /* do cleanup and restore the parent team */
2587 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2588 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2589
2590 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2591
2592 /* jc: The following lock has instructions with REL and ACQ semantics,
2593 separating the parallel user code called in this parallel region
2594 from the serial user code called after this function returns. */
2596
2597 if (!master_th->th.th_teams_microtask ||
2598 team->t.t_level > master_th->th.th_teams_level) {
2599 /* Decrement our nested depth level */
2600 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2601 }
2602 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2603
2604#if OMPT_SUPPORT
2605 if (ompt_enabled.enabled) {
2607 if (ompt_enabled.ompt_callback_implicit_task) {
2608 int flags = (team_microtask == (void *)__kmp_teams_master)
2609 ? ompt_task_initial
2610 : ompt_task_implicit;
2611 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2612 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2613 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2614 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2615 }
2616 task_info->frame.exit_frame = ompt_data_none;
2617 task_info->task_data = ompt_data_none;
2618 }
2619#endif
2620
2621 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2622 master_th, team));
2624
2625 master_th->th.th_def_allocator = team->t.t_def_allocator;
2626
2627#if OMPD_SUPPORT
2628 if (ompd_state & OMPD_ENABLE_BP)
2629 ompd_bp_parallel_end();
2630#endif
2631 updateHWFPControl(team);
2632
2633 if (root->r.r_active != master_active)
2634 root->r.r_active = master_active;
2635
2637 master_th)); // this will free worker threads
2638
2639 /* this race was fun to find. make sure the following is in the critical
2640 region otherwise assertions may fail occasionally since the old team may be
2641 reallocated and the hierarchy appears inconsistent. it is actually safe to
2642 run and won't cause any bugs, but will cause those assertion failures. it's
2643 only one deref&assign so might as well put this in the critical region */
2644 master_th->th.th_team = parent_team;
2645 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2646 master_th->th.th_team_master = parent_team->t.t_threads[0];
2647 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2648
2649 /* restore serialized team, if need be */
2650 if (parent_team->t.t_serialized &&
2651 parent_team != master_th->th.th_serial_team &&
2652 parent_team != root->r.r_root_team) {
2653 __kmp_free_team(root,
2654 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2655 master_th->th.th_serial_team = parent_team;
2656 }
2657
2659 // Restore primary thread's task state from team structure
2660 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2661 team->t.t_primary_task_state == 1);
2662 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2663
2664 // Copy the task team from the parent team to the primary thread
2665 master_th->th.th_task_team =
2666 parent_team->t.t_task_team[master_th->th.th_task_state];
2667 KA_TRACE(20,
2668 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2669 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2670 parent_team));
2671 }
2672
2673 // TODO: GEH - cannot do this assertion because root thread not set up as
2674 // executing
2675 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2676 master_th->th.th_current_task->td_flags.executing = 1;
2677
2679
2680#if KMP_AFFINITY_SUPPORTED
2681 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2683 }
2684#endif
2685#if OMPT_SUPPORT
2686 int flags =
2687 OMPT_INVOKER(fork_context) |
2688 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2689 : ompt_parallel_team);
2690 if (ompt_enabled.enabled) {
2691 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2692 codeptr);
2693 }
2694#endif
2695
2696 KMP_MB();
2697 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2698}
2699
2700/* Check whether we should push an internal control record onto the
2701 serial team stack. If so, do it. */
2703
2704 if (thread->th.th_team != thread->th.th_serial_team) {
2705 return;
2706 }
2707 if (thread->th.th_team->t.t_serialized > 1) {
2708 int push = 0;
2709
2710 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2711 push = 1;
2712 } else {
2713 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2714 thread->th.th_team->t.t_serialized) {
2715 push = 1;
2716 }
2717 }
2718 if (push) { /* push a record on the serial team's stack */
2719 kmp_internal_control_t *control =
2721 sizeof(kmp_internal_control_t));
2722
2723 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2724
2725 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2726
2727 control->next = thread->th.th_team->t.t_control_stack_top;
2728 thread->th.th_team->t.t_control_stack_top = control;
2729 }
2730 }
2731}
2732
2733/* Changes set_nproc */
2734void __kmp_set_num_threads(int new_nth, int gtid) {
2735 kmp_info_t *thread;
2736 kmp_root_t *root;
2737
2738 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2740
2741 if (new_nth < 1)
2742 new_nth = 1;
2743 else if (new_nth > __kmp_max_nth)
2744 new_nth = __kmp_max_nth;
2745
2746 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2747 thread = __kmp_threads[gtid];
2748 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2749 return; // nothing to do
2750
2752
2753 set__nproc(thread, new_nth);
2754
2755 // If this omp_set_num_threads() call will cause the hot team size to be
2756 // reduced (in the absence of a num_threads clause), then reduce it now,
2757 // rather than waiting for the next parallel region.
2758 root = thread->th.th_root;
2759 if (__kmp_init_parallel && (!root->r.r_active) &&
2760 (root->r.r_hot_team->t.t_nproc > new_nth)
2762 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2763#endif
2764 ) {
2765 kmp_team_t *hot_team = root->r.r_hot_team;
2766 int f;
2767
2769
2771 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2772 }
2773 // Release the extra threads we don't need any more.
2774 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2775 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2777 // When decreasing team size, threads no longer in the team should unref
2778 // task team.
2779 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2780 }
2781 __kmp_free_thread(hot_team->t.t_threads[f]);
2782 hot_team->t.t_threads[f] = NULL;
2783 }
2784 hot_team->t.t_nproc = new_nth;
2785#if KMP_NESTED_HOT_TEAMS
2786 if (thread->th.th_hot_teams) {
2787 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2788 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2789 }
2790#endif
2791
2793 hot_team->t.b->update_num_threads(new_nth);
2794 __kmp_add_threads_to_team(hot_team, new_nth);
2795 }
2796
2798
2799 // Update the t_nproc field in the threads that are still active.
2800 for (f = 0; f < new_nth; f++) {
2801 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2802 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2803 }
2804 // Special flag in case omp_set_num_threads() call
2805 hot_team->t.t_size_changed = -1;
2806 }
2807}
2808
2809/* Changes max_active_levels */
2810void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2811 kmp_info_t *thread;
2812
2813 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2814 "%d = (%d)\n",
2815 gtid, max_active_levels));
2817
2818 // validate max_active_levels
2819 if (max_active_levels < 0) {
2820 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2821 // We ignore this call if the user has specified a negative value.
2822 // The current setting won't be changed. The last valid setting will be
2823 // used. A warning will be issued (if warnings are allowed as controlled by
2824 // the KMP_WARNINGS env var).
2825 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2826 "max_active_levels for thread %d = (%d)\n",
2827 gtid, max_active_levels));
2828 return;
2829 }
2830 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2831 // it's OK, the max_active_levels is within the valid range: [ 0;
2832 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2833 // We allow a zero value. (implementation defined behavior)
2834 } else {
2835 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2837 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2838 // Current upper limit is MAX_INT. (implementation defined behavior)
2839 // If the input exceeds the upper limit, we correct the input to be the
2840 // upper limit. (implementation defined behavior)
2841 // Actually, the flow should never get here until we use MAX_INT limit.
2842 }
2843 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2844 "max_active_levels for thread %d = (%d)\n",
2845 gtid, max_active_levels));
2846
2847 thread = __kmp_threads[gtid];
2848
2850
2851 set__max_active_levels(thread, max_active_levels);
2852}
2853
2854/* Gets max_active_levels */
2856 kmp_info_t *thread;
2857
2858 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2860
2861 thread = __kmp_threads[gtid];
2862 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2863 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2864 "curtask_maxaclevel=%d\n",
2865 gtid, thread->th.th_current_task,
2866 thread->th.th_current_task->td_icvs.max_active_levels));
2867 return thread->th.th_current_task->td_icvs.max_active_levels;
2868}
2869
2870// nteams-var per-device ICV
2871void __kmp_set_num_teams(int num_teams) {
2872 if (num_teams > 0)
2873 __kmp_nteams = num_teams;
2874}
2876// teams-thread-limit-var per-device ICV
2878 if (limit > 0)
2880}
2882
2883KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int));
2884KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int));
2885
2886/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2887void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2888 kmp_info_t *thread;
2889 kmp_sched_t orig_kind;
2890 // kmp_team_t *team;
2891
2892 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2893 gtid, (int)kind, chunk));
2895
2896 // Check if the kind parameter is valid, correct if needed.
2897 // Valid parameters should fit in one of two intervals - standard or extended:
2898 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2899 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2900 orig_kind = kind;
2901 kind = __kmp_sched_without_mods(kind);
2902
2903 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2904 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2905 // TODO: Hint needs attention in case we change the default schedule.
2906 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2907 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2909 kind = kmp_sched_default;
2910 chunk = 0; // ignore chunk value in case of bad kind
2911 }
2912
2913 thread = __kmp_threads[gtid];
2914
2916
2917 if (kind < kmp_sched_upper_std) {
2918 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2919 // differ static chunked vs. unchunked: chunk should be invalid to
2920 // indicate unchunked schedule (which is the default)
2921 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2922 } else {
2923 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2924 __kmp_sch_map[kind - kmp_sched_lower - 1];
2925 }
2926 } else {
2927 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2928 // kmp_sched_lower - 2 ];
2929 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2931 kmp_sched_lower - 2];
2932 }
2934 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2935 if (kind == kmp_sched_auto || chunk < 1) {
2936 // ignore parameter chunk for schedule auto
2937 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2938 } else {
2939 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2940 }
2941}
2942
2943/* Gets def_sched_var ICV values */
2944void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2945 kmp_info_t *thread;
2946 enum sched_type th_type;
2947
2948 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2950
2951 thread = __kmp_threads[gtid];
2952
2953 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2954 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2955 case kmp_sch_static:
2958 *kind = kmp_sched_static;
2959 __kmp_sched_apply_mods_stdkind(kind, th_type);
2960 *chunk = 0; // chunk was not set, try to show this fact via zero value
2961 return;
2963 *kind = kmp_sched_static;
2964 break;
2966 *kind = kmp_sched_dynamic;
2967 break;
2971 *kind = kmp_sched_guided;
2972 break;
2973 case kmp_sch_auto:
2974 *kind = kmp_sched_auto;
2975 break;
2977 *kind = kmp_sched_trapezoidal;
2978 break;
2979#if KMP_STATIC_STEAL_ENABLED
2981 *kind = kmp_sched_static_steal;
2982 break;
2983#endif
2984 default:
2985 KMP_FATAL(UnknownSchedulingType, th_type);
2986 }
2987
2988 __kmp_sched_apply_mods_stdkind(kind, th_type);
2989 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2990}
2991
2993
2994 int ii, dd;
2995 kmp_team_t *team;
2996 kmp_info_t *thr;
2997
2998 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3000
3001 // validate level
3002 if (level == 0)
3003 return 0;
3004 if (level < 0)
3005 return -1;
3006 thr = __kmp_threads[gtid];
3007 team = thr->th.th_team;
3008 ii = team->t.t_level;
3009 if (level > ii)
3010 return -1;
3011
3012 if (thr->th.th_teams_microtask) {
3013 // AC: we are in teams region where multiple nested teams have same level
3014 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3015 if (level <=
3016 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3017 KMP_DEBUG_ASSERT(ii >= tlevel);
3018 // AC: As we need to pass by the teams league, we need to artificially
3019 // increase ii
3020 if (ii == tlevel) {
3021 ii += 2; // three teams have same level
3022 } else {
3023 ii++; // two teams have same level
3024 }
3025 }
3026 }
3027
3028 if (ii == level)
3029 return __kmp_tid_from_gtid(gtid);
3030
3031 dd = team->t.t_serialized;
3032 level++;
3033 while (ii > level) {
3034 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3035 }
3036 if ((team->t.t_serialized) && (!dd)) {
3037 team = team->t.t_parent;
3038 continue;
3039 }
3040 if (ii > level) {
3041 team = team->t.t_parent;
3042 dd = team->t.t_serialized;
3043 ii--;
3044 }
3045 }
3046
3047 return (dd > 1) ? (0) : (team->t.t_master_tid);
3048}
3049
3050int __kmp_get_team_size(int gtid, int level) {
3051
3052 int ii, dd;
3053 kmp_team_t *team;
3054 kmp_info_t *thr;
3055
3056 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
3058
3059 // validate level
3060 if (level == 0)
3061 return 1;
3062 if (level < 0)
3063 return -1;
3064 thr = __kmp_threads[gtid];
3065 team = thr->th.th_team;
3066 ii = team->t.t_level;
3067 if (level > ii)
3068 return -1;
3069
3070 if (thr->th.th_teams_microtask) {
3071 // AC: we are in teams region where multiple nested teams have same level
3072 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3073 if (level <=
3074 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3075 KMP_DEBUG_ASSERT(ii >= tlevel);
3076 // AC: As we need to pass by the teams league, we need to artificially
3077 // increase ii
3078 if (ii == tlevel) {
3079 ii += 2; // three teams have same level
3080 } else {
3081 ii++; // two teams have same level
3082 }
3083 }
3084 }
3085
3086 while (ii > level) {
3087 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3088 }
3089 if (team->t.t_serialized && (!dd)) {
3090 team = team->t.t_parent;
3091 continue;
3092 }
3093 if (ii > level) {
3094 team = team->t.t_parent;
3095 ii--;
3096 }
3097 }
3098
3099 return team->t.t_nproc;
3100}
3101
3103 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3104 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3105 // independently. So one can get the updated schedule here.
3106
3107 kmp_r_sched_t r_sched;
3108
3109 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3110 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3111 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3112 // different roots (even in OMP 2.5)
3114 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3115 if (s == kmp_sch_static) {
3116 // replace STATIC with more detailed schedule (balanced or greedy)
3117 r_sched.r_sched_type = __kmp_static;
3118 } else if (s == kmp_sch_guided_chunked) {
3119 // replace GUIDED with more detailed schedule (iterative or analytical)
3120 r_sched.r_sched_type = __kmp_guided;
3121 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3122 r_sched.r_sched_type = __kmp_sched;
3123 }
3124 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3125
3127 // __kmp_chunk may be wrong here (if it was not ever set)
3128 r_sched.chunk = KMP_DEFAULT_CHUNK;
3129 } else {
3130 r_sched.chunk = __kmp_chunk;
3131 }
3132
3133 return r_sched;
3134}
3135
3136/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3137 at least argc number of *t_argv entries for the requested team. */
3138static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3139
3140 KMP_DEBUG_ASSERT(team);
3141 if (!realloc || argc > team->t.t_max_argc) {
3142
3143 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3144 "current entries=%d\n",
3145 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3146 /* if previously allocated heap space for args, free them */
3147 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3148 __kmp_free((void *)team->t.t_argv);
3149
3150 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3151 /* use unused space in the cache line for arguments */
3152 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3153 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3154 "argv entries\n",
3155 team->t.t_id, team->t.t_max_argc));
3156 team->t.t_argv = &team->t.t_inline_argv[0];
3157 if (__kmp_storage_map) {
3159 -1, &team->t.t_inline_argv[0],
3160 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3161 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3162 team->t.t_id);
3163 }
3164 } else {
3165 /* allocate space for arguments in the heap */
3166 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3168 : 2 * argc;
3169 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3170 "argv entries\n",
3171 team->t.t_id, team->t.t_max_argc));
3172 team->t.t_argv =
3173 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3174 if (__kmp_storage_map) {
3175 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3176 &team->t.t_argv[team->t.t_max_argc],
3177 sizeof(void *) * team->t.t_max_argc,
3178 "team_%d.t_argv", team->t.t_id);
3179 }
3180 }
3181 }
3182}
3183
3184static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3185 int i;
3186 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3187 team->t.t_threads =
3188 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3189 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3190 sizeof(dispatch_shared_info_t) * num_disp_buff);
3191 team->t.t_dispatch =
3192 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3193 team->t.t_implicit_task_taskdata =
3194 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3195 team->t.t_max_nproc = max_nth;
3196
3197 /* setup dispatch buffers */
3198 for (i = 0; i < num_disp_buff; ++i) {
3199 team->t.t_disp_buffer[i].buffer_index = i;
3200 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3201 }
3202}
3203
3205 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3206 int i;
3207 for (i = 0; i < team->t.t_max_nproc; ++i) {
3208 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3209 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3210 team->t.t_dispatch[i].th_disp_buffer = NULL;
3211 }
3212 }
3213#if KMP_USE_HIER_SCHED
3215#endif
3216 __kmp_free(team->t.t_threads);
3217 __kmp_free(team->t.t_disp_buffer);
3218 __kmp_free(team->t.t_dispatch);
3219 __kmp_free(team->t.t_implicit_task_taskdata);
3220 team->t.t_threads = NULL;
3221 team->t.t_disp_buffer = NULL;
3222 team->t.t_dispatch = NULL;
3223 team->t.t_implicit_task_taskdata = 0;
3224}
3225
3226static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3227 kmp_info_t **oldThreads = team->t.t_threads;
3228
3229 __kmp_free(team->t.t_disp_buffer);
3230 __kmp_free(team->t.t_dispatch);
3231 __kmp_free(team->t.t_implicit_task_taskdata);
3232 __kmp_allocate_team_arrays(team, max_nth);
3233
3234 KMP_MEMCPY(team->t.t_threads, oldThreads,
3235 team->t.t_nproc * sizeof(kmp_info_t *));
3236
3237 __kmp_free(oldThreads);
3238}
3239
3241
3242 kmp_r_sched_t r_sched =
3243 __kmp_get_schedule_global(); // get current state of scheduling globals
3244
3246
3247 kmp_internal_control_t g_icvs = {
3248 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3249 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3250 // adjustment of threads (per thread)
3251 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3252 // whether blocktime is explicitly set
3253 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3254#if KMP_USE_MONITOR
3255 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3256// intervals
3257#endif
3258 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3259 // next parallel region (per thread)
3260 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3261 __kmp_cg_max_nth, // int thread_limit;
3262 __kmp_task_max_nth, // int task_thread_limit; // to set the thread_limit
3263 // on task. This is used in the case of target thread_limit
3264 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3265 // for max_active_levels
3266 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3267 // {sched,chunk} pair
3270 NULL // struct kmp_internal_control *next;
3271 };
3272
3273 return g_icvs;
3274}
3275
3277
3278 kmp_internal_control_t gx_icvs;
3279 gx_icvs.serial_nesting_level =
3280 0; // probably =team->t.t_serial like in save_inter_controls
3281 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3282 gx_icvs.next = NULL;
3283
3284 return gx_icvs;
3285}
3286
3288 int f;
3289 kmp_team_t *root_team;
3290 kmp_team_t *hot_team;
3291 int hot_team_max_nth;
3292 kmp_r_sched_t r_sched =
3293 __kmp_get_schedule_global(); // get current state of scheduling globals
3295 KMP_DEBUG_ASSERT(root);
3296 KMP_ASSERT(!root->r.r_begin);
3297
3298 /* setup the root state structure */
3299 __kmp_init_lock(&root->r.r_begin_lock);
3300 root->r.r_begin = FALSE;
3301 root->r.r_active = FALSE;
3302 root->r.r_in_parallel = 0;
3303 root->r.r_blocktime = __kmp_dflt_blocktime;
3304#if KMP_AFFINITY_SUPPORTED
3305 root->r.r_affinity_assigned = FALSE;
3306#endif
3307
3308 /* setup the root team for this task */
3309 /* allocate the root team structure */
3310 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
3311
3312 root_team =
3314 1, // new_nproc
3315 1, // max_nproc
3316#if OMPT_SUPPORT
3317 ompt_data_none, // root parallel id
3318#endif
3320 0 // argc
3321 USE_NESTED_HOT_ARG(NULL) // primary thread is unknown
3322 );
3323#if USE_DEBUGGER
3324 // Non-NULL value should be assigned to make the debugger display the root
3325 // team.
3326 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3327#endif
3328
3329 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
3330
3331 root->r.r_root_team = root_team;
3332 root_team->t.t_control_stack_top = NULL;
3333
3334 /* initialize root team */
3335 root_team->t.t_threads[0] = NULL;
3336 root_team->t.t_nproc = 1;
3337 root_team->t.t_serialized = 1;
3338 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3339 root_team->t.t_sched.sched = r_sched.sched;
3340 KA_TRACE(
3341 20,
3342 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3344
3345 /* setup the hot team for this task */
3346 /* allocate the hot team structure */
3347 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
3348
3349 hot_team =
3351 1, // new_nproc
3352 __kmp_dflt_team_nth_ub * 2, // max_nproc
3353#if OMPT_SUPPORT
3354 ompt_data_none, // root parallel id
3355#endif
3357 0 // argc
3358 USE_NESTED_HOT_ARG(NULL) // primary thread is unknown
3359 );
3360 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
3361
3362 root->r.r_hot_team = hot_team;
3363 root_team->t.t_control_stack_top = NULL;
3364
3365 /* first-time initialization */
3366 hot_team->t.t_parent = root_team;
3367
3368 /* initialize hot team */
3369 hot_team_max_nth = hot_team->t.t_max_nproc;
3370 for (f = 0; f < hot_team_max_nth; ++f) {
3371 hot_team->t.t_threads[f] = NULL;
3372 }
3373 hot_team->t.t_nproc = 1;
3374 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3375 hot_team->t.t_sched.sched = r_sched.sched;
3376 hot_team->t.t_size_changed = 0;
3377}
3378
3379#ifdef KMP_DEBUG
3380
3381typedef struct kmp_team_list_item {
3382 kmp_team_p const *entry;
3383 struct kmp_team_list_item *next;
3384} kmp_team_list_item_t;
3385typedef kmp_team_list_item_t *kmp_team_list_t;
3386
3387static void __kmp_print_structure_team_accum( // Add team to list of teams.
3388 kmp_team_list_t list, // List of teams.
3389 kmp_team_p const *team // Team to add.
3390) {
3391
3392 // List must terminate with item where both entry and next are NULL.
3393 // Team is added to the list only once.
3394 // List is sorted in ascending order by team id.
3395 // Team id is *not* a key.
3396
3397 kmp_team_list_t l;
3398
3399 KMP_DEBUG_ASSERT(list != NULL);
3400 if (team == NULL) {
3401 return;
3402 }
3403
3404 __kmp_print_structure_team_accum(list, team->t.t_parent);
3405 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3406
3407 // Search list for the team.
3408 l = list;
3409 while (l->next != NULL && l->entry != team) {
3410 l = l->next;
3411 }
3412 if (l->next != NULL) {
3413 return; // Team has been added before, exit.
3414 }
3415
3416 // Team is not found. Search list again for insertion point.
3417 l = list;
3418 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3419 l = l->next;
3420 }
3421
3422 // Insert team.
3423 {
3424 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3425 sizeof(kmp_team_list_item_t));
3426 *item = *l;
3427 l->entry = team;
3428 l->next = item;
3429 }
3430}
3431
3432static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3433
3434) {
3435 __kmp_printf("%s", title);
3436 if (team != NULL) {
3437 __kmp_printf("%2x %p\n", team->t.t_id, team);
3438 } else {
3439 __kmp_printf(" - (nil)\n");
3440 }
3441}
3442
3443static void __kmp_print_structure_thread(char const *title,
3444 kmp_info_p const *thread) {
3445 __kmp_printf("%s", title);
3446 if (thread != NULL) {
3447 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3448 } else {
3449 __kmp_printf(" - (nil)\n");
3450 }
3451}
3452
3453void __kmp_print_structure(void) {
3454
3455 kmp_team_list_t list;
3456
3457 // Initialize list of teams.
3458 list =
3459 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3460 list->entry = NULL;
3461 list->next = NULL;
3462
3463 __kmp_printf("\n------------------------------\nGlobal Thread "
3464 "Table\n------------------------------\n");
3465 {
3466 int gtid;
3467 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3468 __kmp_printf("%2d", gtid);
3469 if (__kmp_threads != NULL) {
3470 __kmp_printf(" %p", __kmp_threads[gtid]);
3471 }
3472 if (__kmp_root != NULL) {
3473 __kmp_printf(" %p", __kmp_root[gtid]);
3474 }
3475 __kmp_printf("\n");
3476 }
3477 }
3478
3479 // Print out __kmp_threads array.
3480 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3481 "----------\n");
3482 if (__kmp_threads != NULL) {
3483 int gtid;
3484 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3485 kmp_info_t const *thread = __kmp_threads[gtid];
3486 if (thread != NULL) {
3487 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3488 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3489 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3490 __kmp_print_structure_team(" Serial Team: ",
3491 thread->th.th_serial_team);
3492 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3493 __kmp_print_structure_thread(" Primary: ",
3494 thread->th.th_team_master);
3495 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3496 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3497 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3498 __kmp_print_structure_thread(" Next in pool: ",
3499 thread->th.th_next_pool);
3500 __kmp_printf("\n");
3501 __kmp_print_structure_team_accum(list, thread->th.th_team);
3502 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3503 }
3504 }
3505 } else {
3506 __kmp_printf("Threads array is not allocated.\n");
3507 }
3508
3509 // Print out __kmp_root array.
3510 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3511 "--------\n");
3512 if (__kmp_root != NULL) {
3513 int gtid;
3514 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3515 kmp_root_t const *root = __kmp_root[gtid];
3516 if (root != NULL) {
3517 __kmp_printf("GTID %2d %p:\n", gtid, root);
3518 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3519 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3520 __kmp_print_structure_thread(" Uber Thread: ",
3521 root->r.r_uber_thread);
3522 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3523 __kmp_printf(" In Parallel: %2d\n",
3524 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3525 __kmp_printf("\n");
3526 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3527 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3528 }
3529 }
3530 } else {
3531 __kmp_printf("Ubers array is not allocated.\n");
3532 }
3533
3534 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3535 "--------\n");
3536 while (list->next != NULL) {
3537 kmp_team_p const *team = list->entry;
3538 int i;
3539 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3540 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3541 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3542 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3543 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3544 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3545 for (i = 0; i < team->t.t_nproc; ++i) {
3546 __kmp_printf(" Thread %2d: ", i);
3547 __kmp_print_structure_thread("", team->t.t_threads[i]);
3548 }
3549 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3550 __kmp_printf("\n");
3551 list = list->next;
3552 }
3553
3554 // Print out __kmp_thread_pool and __kmp_team_pool.
3555 __kmp_printf("\n------------------------------\nPools\n----------------------"
3556 "--------\n");
3557 __kmp_print_structure_thread("Thread pool: ",
3559 __kmp_print_structure_team("Team pool: ",
3561 __kmp_printf("\n");
3562
3563 // Free team list.
3564 while (list != NULL) {
3565 kmp_team_list_item_t *item = list;
3566 list = list->next;
3567 KMP_INTERNAL_FREE(item);
3568 }
3569}
3570
3571#endif
3572
3573//---------------------------------------------------------------------------
3574// Stuff for per-thread fast random number generator
3575// Table of primes
3576static const unsigned __kmp_primes[] = {
3577 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3578 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3579 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3580 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3581 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3582 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3583 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3584 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3585 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3586 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3587 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3588
3589//---------------------------------------------------------------------------
3590// __kmp_get_random: Get a random number using a linear congruential method.
3591unsigned short __kmp_get_random(kmp_info_t *thread) {
3592 unsigned x = thread->th.th_x;
3593 unsigned short r = (unsigned short)(x >> 16);
3594
3595 thread->th.th_x = x * thread->th.th_a + 1;
3596
3597 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3598 thread->th.th_info.ds.ds_tid, r));
3599
3600 return r;
3601}
3602//--------------------------------------------------------
3603// __kmp_init_random: Initialize a random number generator
3605 unsigned seed = thread->th.th_info.ds.ds_tid;
3606
3607 thread->th.th_a =
3608 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3609 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3610 KA_TRACE(30,
3611 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3612}
3613
3614#if KMP_OS_WINDOWS
3615/* reclaim array entries for root threads that are already dead, returns number
3616 * reclaimed */
3617static int __kmp_reclaim_dead_roots(void) {
3618 int i, r = 0;
3619
3620 for (i = 0; i < __kmp_threads_capacity; ++i) {
3621 if (KMP_UBER_GTID(i) &&
3623 !__kmp_root[i]
3624 ->r.r_active) { // AC: reclaim only roots died in non-active state
3625 r += __kmp_unregister_root_other_thread(i);
3626 }
3627 }
3628 return r;
3629}
3630#endif
3631
3632/* This function attempts to create free entries in __kmp_threads and
3633 __kmp_root, and returns the number of free entries generated.
3634
3635 For Windows* OS static library, the first mechanism used is to reclaim array
3636 entries for root threads that are already dead.
3637
3638 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3639 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3640 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3641 threadprivate cache array has been created. Synchronization with
3642 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3643
3644 After any dead root reclamation, if the clipping value allows array expansion
3645 to result in the generation of a total of nNeed free slots, the function does
3646 that expansion. If not, nothing is done beyond the possible initial root
3647 thread reclamation.
3648
3649 If any argument is negative, the behavior is undefined. */
3650static int __kmp_expand_threads(int nNeed) {
3651 int added = 0;
3652 int minimumRequiredCapacity;
3653 int newCapacity;
3654 kmp_info_t **newThreads;
3655 kmp_root_t **newRoot;
3656
3657 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3658 // resizing __kmp_threads does not need additional protection if foreign
3659 // threads are present
3660
3661#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3662 /* only for Windows static library */
3663 /* reclaim array entries for root threads that are already dead */
3664 added = __kmp_reclaim_dead_roots();
3665
3666 if (nNeed) {
3667 nNeed -= added;
3668 if (nNeed < 0)
3669 nNeed = 0;
3670 }
3671#endif
3672 if (nNeed <= 0)
3673 return added;
3674
3675 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3676 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3677 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3678 // > __kmp_max_nth in one of two ways:
3679 //
3680 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3681 // may not be reused by another thread, so we may need to increase
3682 // __kmp_threads_capacity to __kmp_max_nth + 1.
3683 //
3684 // 2) New foreign root(s) are encountered. We always register new foreign
3685 // roots. This may cause a smaller # of threads to be allocated at
3686 // subsequent parallel regions, but the worker threads hang around (and
3687 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3688 //
3689 // Anyway, that is the reason for moving the check to see if
3690 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3691 // instead of having it performed here. -BB
3692
3694
3695 /* compute expansion headroom to check if we can expand */
3697 /* possible expansion too small -- give up */
3698 return added;
3699 }
3700 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3701
3702 newCapacity = __kmp_threads_capacity;
3703 do {
3704 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3706 } while (newCapacity < minimumRequiredCapacity);
3707 newThreads = (kmp_info_t **)__kmp_allocate(
3708 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3709 newRoot =
3710 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3711 KMP_MEMCPY(newThreads, __kmp_threads,
3713 KMP_MEMCPY(newRoot, __kmp_root,
3715 // Put old __kmp_threads array on a list. Any ongoing references to the old
3716 // list will be valid. This list is cleaned up at library shutdown.
3719 node->threads = __kmp_threads;
3722
3723 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3724 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3725 added += newCapacity - __kmp_threads_capacity;
3726 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3727
3728 if (newCapacity > __kmp_tp_capacity) {
3730 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3732 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3733 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3734 }
3736 }
3737
3738 return added;
3739}
3740
3741/* Register the current thread as a root thread and obtain our gtid. We must
3742 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3743 thread that calls from __kmp_do_serial_initialize() */
3744int __kmp_register_root(int initial_thread) {
3745 kmp_info_t *root_thread;
3746 kmp_root_t *root;
3747 int gtid;
3748 int capacity;
3750 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3751 KMP_MB();
3752
3753 /* 2007-03-02:
3754 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3755 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3756 work as expected -- it may return false (that means there is at least one
3757 empty slot in __kmp_threads array), but it is possible the only free slot
3758 is #0, which is reserved for initial thread and so cannot be used for this
3759 one. Following code workarounds this bug.
3760
3761 However, right solution seems to be not reserving slot #0 for initial
3762 thread because:
3763 (1) there is no magic in slot #0,
3764 (2) we cannot detect initial thread reliably (the first thread which does
3765 serial initialization may be not a real initial thread).
3766 */
3767 capacity = __kmp_threads_capacity;
3768 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3769 --capacity;
3770 }
3771
3772 // If it is not for initializing the hidden helper team, we need to take
3773 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3774 // in __kmp_threads_capacity.
3777 }
3778
3779 /* see if there are too many threads */
3780 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3781 if (__kmp_tp_cached) {
3782 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3783 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3784 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3785 } else {
3786 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3788 }
3789 }
3790
3791 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3792 // 0: initial thread, also a regular OpenMP thread.
3793 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3794 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3795 // regular OpenMP threads.
3797 // Find an available thread slot for hidden helper thread. Slots for hidden
3798 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3799 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3801 gtid++)
3802 ;
3804 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "
3805 "hidden helper thread: T#%d\n",
3806 gtid));
3807 } else {
3808 /* find an available thread slot */
3809 // Don't reassign the zero slot since we need that to only be used by
3810 // initial thread. Slots for hidden helper threads should also be skipped.
3811 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3812 gtid = 0;
3813 } else {
3814 for (gtid = __kmp_hidden_helper_threads_num + 1;
3815 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3816 ;
3817 }
3818 KA_TRACE(
3819 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3821 }
3822
3823 /* update global accounting */
3824 __kmp_all_nth++;
3826
3827 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3828 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3831 if (TCR_4(__kmp_gtid_mode) != 2) {
3833 }
3834 } else {
3835 if (TCR_4(__kmp_gtid_mode) != 1) {
3837 }
3838 }
3839 }
3840
3841#ifdef KMP_ADJUST_BLOCKTIME
3842 /* Adjust blocktime to zero if necessary */
3843 /* Middle initialization might not have occurred yet */
3844 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3846 __kmp_zero_bt = TRUE;
3847 }
3848 }
3849#endif /* KMP_ADJUST_BLOCKTIME */
3850
3851 /* setup this new hierarchy */
3852 if (!(root = __kmp_root[gtid])) {
3853 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3854 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3855 }
3856
3857#if KMP_STATS_ENABLED
3858 // Initialize stats as soon as possible (right after gtid assignment).
3859 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3860 __kmp_stats_thread_ptr->startLife();
3861 KMP_SET_THREAD_STATE(SERIAL_REGION);
3862 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3863#endif
3865
3866 /* setup new root thread structure */
3867 if (root->r.r_uber_thread) {
3868 root_thread = root->r.r_uber_thread;
3869 } else {
3870 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3871 if (__kmp_storage_map) {
3872 __kmp_print_thread_storage_map(root_thread, gtid);
3873 }
3874 root_thread->th.th_info.ds.ds_gtid = gtid;
3875#if OMPT_SUPPORT
3876 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3877#endif
3878 root_thread->th.th_root = root;
3880 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3881 }
3882#if USE_FAST_MEMORY
3883 __kmp_initialize_fast_memory(root_thread);
3884#endif /* USE_FAST_MEMORY */
3885
3886#if KMP_USE_BGET
3887 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3888 __kmp_initialize_bget(root_thread);
3889#endif
3890 __kmp_init_random(root_thread); // Initialize random number generator
3891 }
3892
3893 /* setup the serial team held in reserve by the root thread */
3894 if (!root_thread->th.th_serial_team) {
3896 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3897 root_thread->th.th_serial_team = __kmp_allocate_team(
3898 root, 1, 1,
3899#if OMPT_SUPPORT
3900 ompt_data_none, // root parallel id
3901#endif
3902 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3903 }
3904 KMP_ASSERT(root_thread->th.th_serial_team);
3905 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3906 root_thread->th.th_serial_team));
3907
3908 /* drop root_thread into place */
3909 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3910
3911 root->r.r_root_team->t.t_threads[0] = root_thread;
3912 root->r.r_hot_team->t.t_threads[0] = root_thread;
3913 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3914 // AC: the team created in reserve, not for execution (it is unused for now).
3915 root_thread->th.th_serial_team->t.t_serialized = 0;
3916 root->r.r_uber_thread = root_thread;
3917
3918 /* initialize the thread, get it ready to go */
3919 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3921
3922 /* prepare the primary thread for get_gtid() */
3924
3925#if USE_ITT_BUILD
3926 __kmp_itt_thread_name(gtid);
3927#endif /* USE_ITT_BUILD */
3928
3929#ifdef KMP_TDATA_GTID
3930 __kmp_gtid = gtid;
3931#endif
3932 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3934
3935 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3936 "plain=%u\n",
3937 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3938 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3940 { // Initialize barrier data.
3941 int b;
3942 for (b = 0; b < bs_last_barrier; ++b) {
3943 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3944#if USE_DEBUGGER
3945 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3946#endif
3947 }
3948 }
3949 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3951
3952#if KMP_AFFINITY_SUPPORTED
3953 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3954 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3955 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3956 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3957#endif /* KMP_AFFINITY_SUPPORTED */
3958 root_thread->th.th_def_allocator = __kmp_def_allocator;
3959 root_thread->th.th_prev_level = 0;
3960 root_thread->th.th_prev_num_threads = 1;
3961
3963 tmp->cg_root = root_thread;
3965 tmp->cg_nthreads = 1;
3966 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"
3967 " cg_nthreads init to 1\n",
3968 root_thread, tmp));
3969 tmp->up = NULL;
3970 root_thread->th.th_cg_roots = tmp;
3971
3973
3974#if OMPT_SUPPORT
3975 if (!initial_thread && ompt_enabled.enabled) {
3976
3977 kmp_info_t *root_thread = ompt_get_thread();
3978
3979 ompt_set_thread_state(root_thread, ompt_state_overhead);
3980
3981 if (ompt_enabled.ompt_callback_thread_begin) {
3982 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3983 ompt_thread_initial, __ompt_get_thread_data_internal());
3984 }
3985 ompt_data_t *task_data;
3986 ompt_data_t *parallel_data;
3987 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data,
3988 NULL);
3989 if (ompt_enabled.ompt_callback_implicit_task) {
3990 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3991 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3992 }
3993
3994 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3995 }
3996#endif
3997#if OMPD_SUPPORT
3998 if (ompd_state & OMPD_ENABLE_BP)
3999 ompd_bp_thread_begin();
4000#endif
4001
4002 KMP_MB();
4004
4005 return gtid;
4006}
4007
4008#if KMP_NESTED_HOT_TEAMS
4009static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
4010 const int max_level) {
4011 int i, n, nth;
4012 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4013 if (!hot_teams || !hot_teams[level].hot_team) {
4014 return 0;
4015 }
4016 KMP_DEBUG_ASSERT(level < max_level);
4017 kmp_team_t *team = hot_teams[level].hot_team;
4018 nth = hot_teams[level].hot_team_nth;
4019 n = nth - 1; // primary thread is not freed
4020 if (level < max_level - 1) {
4021 for (i = 0; i < nth; ++i) {
4022 kmp_info_t *th = team->t.t_threads[i];
4023 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4024 if (i > 0 && th->th.th_hot_teams) {
4025 __kmp_free(th->th.th_hot_teams);
4026 th->th.th_hot_teams = NULL;
4027 }
4028 }
4029 }
4030 __kmp_free_team(root, team, NULL);
4031 return n;
4032}
4033#endif
4034
4035// Resets a root thread and clear its root and hot teams.
4036// Returns the number of __kmp_threads entries directly and indirectly freed.
4037static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4038 kmp_team_t *root_team = root->r.r_root_team;
4039 kmp_team_t *hot_team = root->r.r_hot_team;
4040 int n = hot_team->t.t_nproc;
4041 int i;
4042
4043 KMP_DEBUG_ASSERT(!root->r.r_active);
4044
4045 root->r.r_root_team = NULL;
4046 root->r.r_hot_team = NULL;
4047 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4048 // before call to __kmp_free_team().
4049 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4050#if KMP_NESTED_HOT_TEAMS
4051 if (__kmp_hot_teams_max_level >
4052 0) { // need to free nested hot teams and their threads if any
4053 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4054 kmp_info_t *th = hot_team->t.t_threads[i];
4055 if (__kmp_hot_teams_max_level > 1) {
4056 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4057 }
4058 if (th->th.th_hot_teams) {
4059 __kmp_free(th->th.th_hot_teams);
4060 th->th.th_hot_teams = NULL;
4061 }
4062 }
4063 }
4064#endif
4065 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4066
4067 // Before we can reap the thread, we need to make certain that all other
4068 // threads in the teams that had this root as ancestor have stopped trying to
4069 // steal tasks.
4072 }
4073
4074#if KMP_OS_WINDOWS
4075 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4076 KA_TRACE(
4077 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4078 "\n",
4079 (LPVOID) & (root->r.r_uber_thread->th),
4080 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4081 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4082#endif /* KMP_OS_WINDOWS */
4083
4084#if OMPD_SUPPORT
4085 if (ompd_state & OMPD_ENABLE_BP)
4086 ompd_bp_thread_end();
4087#endif
4088
4089#if OMPT_SUPPORT
4090 ompt_data_t *task_data;
4091 ompt_data_t *parallel_data;
4092 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data,
4093 NULL);
4094 if (ompt_enabled.ompt_callback_implicit_task) {
4095 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4096 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4097 }
4098 if (ompt_enabled.ompt_callback_thread_end) {
4099 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4100 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4101 }
4102#endif
4103
4105 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4106 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4107 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4108 " to %d\n",
4109 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4110 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4111 if (i == 1) {
4112 // need to free contention group structure
4113 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4114 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4115 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4116 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4117 root->r.r_uber_thread->th.th_cg_roots = NULL;
4118 }
4119 __kmp_reap_thread(root->r.r_uber_thread, 1);
4120
4121 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4122 // instead of freeing.
4123 root->r.r_uber_thread = NULL;
4124 /* mark root as no longer in use */
4125 root->r.r_begin = FALSE;
4126
4127 return n;
4128}
4129
4131 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4132 /* this lock should be ok, since unregister_root_current_thread is never
4133 called during an abort, only during a normal close. furthermore, if you
4134 have the forkjoin lock, you should never try to get the initz lock */
4136 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4137 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
4138 "exiting T#%d\n",
4139 gtid));
4141 return;
4142 }
4143 kmp_root_t *root = __kmp_root[gtid];
4144
4147 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4148 KMP_ASSERT(root->r.r_active == FALSE);
4149
4150 KMP_MB();
4151
4152 kmp_info_t *thread = __kmp_threads[gtid];
4153 kmp_team_t *team = thread->th.th_team;
4154 kmp_task_team_t *task_team = thread->th.th_task_team;
4155
4156 // we need to wait for the proxy tasks before finishing the thread
4157 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4159#if OMPT_SUPPORT
4160 // the runtime is shutting down so we won't report any events
4161 thread->th.ompt_thread_info.state = ompt_state_undefined;
4162#endif
4163 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4164 }
4165
4166 __kmp_reset_root(gtid, root);
4167
4168 KMP_MB();
4169 KC_TRACE(10,
4170 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4171
4173}
4174
4175#if KMP_OS_WINDOWS
4176/* __kmp_forkjoin_lock must be already held
4177 Unregisters a root thread that is not the current thread. Returns the number
4178 of __kmp_threads entries freed as a result. */
4179static int __kmp_unregister_root_other_thread(int gtid) {
4180 kmp_root_t *root = __kmp_root[gtid];
4181 int r;
4182
4183 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4186 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4187 KMP_ASSERT(root->r.r_active == FALSE);
4188
4189 r = __kmp_reset_root(gtid, root);
4190 KC_TRACE(10,
4191 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4192 return r;
4193}
4194#endif
4195
4196#if KMP_DEBUG
4197void __kmp_task_info() {
4198
4199 kmp_int32 gtid = __kmp_entry_gtid();
4200 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4201 kmp_info_t *this_thr = __kmp_threads[gtid];
4202 kmp_team_t *steam = this_thr->th.th_serial_team;
4203 kmp_team_t *team = this_thr->th.th_team;
4204
4206 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4207 "ptask=%p\n",
4208 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4209 team->t.t_implicit_task_taskdata[tid].td_parent);
4210}
4211#endif // KMP_DEBUG
4212
4213/* TODO optimize with one big memclr, take out what isn't needed, split
4214 responsibility to workers as much as possible, and delay initialization of
4215 features as much as possible */
4216static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4217 int tid, int gtid) {
4218 /* this_thr->th.th_info.ds.ds_gtid is setup in
4219 kmp_allocate_thread/create_worker.
4220 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4221 KMP_DEBUG_ASSERT(this_thr != NULL);
4222 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4223 KMP_DEBUG_ASSERT(team);
4224 KMP_DEBUG_ASSERT(team->t.t_threads);
4225 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4226 kmp_info_t *master = team->t.t_threads[0];
4227 KMP_DEBUG_ASSERT(master);
4228 KMP_DEBUG_ASSERT(master->th.th_root);
4229
4230 KMP_MB();
4231
4232 TCW_SYNC_PTR(this_thr->th.th_team, team);
4233
4234 this_thr->th.th_info.ds.ds_tid = tid;
4235 this_thr->th.th_set_nproc = 0;
4237 // When tasking is possible, threads are not safe to reap until they are
4238 // done tasking; this will be set when tasking code is exited in wait
4239 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4240 else // no tasking --> always safe to reap
4241 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4242 this_thr->th.th_set_proc_bind = proc_bind_default;
4243#if KMP_AFFINITY_SUPPORTED
4244 this_thr->th.th_new_place = this_thr->th.th_current_place;
4245#endif
4246 this_thr->th.th_root = master->th.th_root;
4247
4248 /* setup the thread's cache of the team structure */
4249 this_thr->th.th_team_nproc = team->t.t_nproc;
4250 this_thr->th.th_team_master = master;
4251 this_thr->th.th_team_serialized = team->t.t_serialized;
4252
4253 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4254
4255 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4256 tid, gtid, this_thr, this_thr->th.th_current_task));
4257
4258 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4259 team, tid, TRUE);
4260
4261 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4262 tid, gtid, this_thr, this_thr->th.th_current_task));
4263 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4264 // __kmp_initialize_team()?
4265
4266 /* TODO no worksharing in speculative threads */
4267 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4268
4269 this_thr->th.th_local.this_construct = 0;
4270
4271 if (!this_thr->th.th_pri_common) {
4272 this_thr->th.th_pri_common =
4273 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4274 if (__kmp_storage_map) {
4276 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4277 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4278 }
4279 this_thr->th.th_pri_head = NULL;
4280 }
4281
4282 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4283 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4284 // Make new thread's CG root same as primary thread's
4285 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4286 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4287 if (tmp) {
4288 // worker changes CG, need to check if old CG should be freed
4289 int i = tmp->cg_nthreads--;
4290 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
4291 " on node %p of thread %p to %d\n",
4292 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4293 if (i == 1) {
4294 __kmp_free(tmp); // last thread left CG --> free it
4295 }
4296 }
4297 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4298 // Increment new thread's CG root's counter to add the new thread
4299 this_thr->th.th_cg_roots->cg_nthreads++;
4300 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
4301 " node %p of thread %p to %d\n",
4302 this_thr, this_thr->th.th_cg_roots,
4303 this_thr->th.th_cg_roots->cg_root,
4304 this_thr->th.th_cg_roots->cg_nthreads));
4305 this_thr->th.th_current_task->td_icvs.thread_limit =
4306 this_thr->th.th_cg_roots->cg_thread_limit;
4307 }
4308
4309 /* Initialize dynamic dispatch */
4310 {
4311 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4312 // Use team max_nproc since this will never change for the team.
4313 size_t disp_size =
4314 sizeof(dispatch_private_info_t) *
4315 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4316 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4317 team->t.t_max_nproc));
4318 KMP_ASSERT(dispatch);
4319 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4320 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4321
4322 dispatch->th_disp_index = 0;
4323 dispatch->th_doacross_buf_idx = 0;
4324 if (!dispatch->th_disp_buffer) {
4325 dispatch->th_disp_buffer =
4327
4328 if (__kmp_storage_map) {
4330 gtid, &dispatch->th_disp_buffer[0],
4331 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4332 ? 1
4334 disp_size,
4335 "th_%d.th_dispatch.th_disp_buffer "
4336 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4337 gtid, team->t.t_id, gtid);
4338 }
4339 } else {
4340 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4341 }
4342
4343 dispatch->th_dispatch_pr_current = 0;
4344 dispatch->th_dispatch_sh_current = 0;
4345
4346 dispatch->th_deo_fcn = 0; /* ORDERED */
4347 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4348 }
4349
4350 this_thr->th.th_next_pool = NULL;
4351
4352 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4353 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4354
4355 KMP_MB();
4356}
4357
4358/* allocate a new thread for the requesting team. this is only called from
4359 within a forkjoin critical section. we will first try to get an available
4360 thread from the thread pool. if none is available, we will fork a new one
4361 assuming we are able to create a new one. this should be assured, as the
4362 caller should check on this first. */
4364 int new_tid) {
4365 kmp_team_t *serial_team;
4366 kmp_info_t *new_thr;
4367 int new_gtid;
4368
4369 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4370 KMP_DEBUG_ASSERT(root && team);
4371#if !KMP_NESTED_HOT_TEAMS
4373#endif
4374 KMP_MB();
4375
4376 /* first, try to get one from the thread pool unless allocating thread is
4377 * the main hidden helper thread. The hidden helper team should always
4378 * allocate new OS threads. */
4380 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4381 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4382 if (new_thr == __kmp_thread_pool_insert_pt) {
4384 }
4385 TCW_4(new_thr->th.th_in_pool, FALSE);
4387 __kmp_lock_suspend_mx(new_thr);
4388 if (new_thr->th.th_active_in_pool == TRUE) {
4389 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4391 new_thr->th.th_active_in_pool = FALSE;
4392 }
4393 __kmp_unlock_suspend_mx(new_thr);
4394
4395 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4396 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4397 KMP_ASSERT(!new_thr->th.th_team);
4399
4400 /* setup the thread structure */
4401 __kmp_initialize_info(new_thr, team, new_tid,
4402 new_thr->th.th_info.ds.ds_gtid);
4403 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4404
4406
4407 new_thr->th.th_task_state = 0;
4408
4410 // Make sure pool thread has transitioned to waiting on own thread struct
4411 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4412 // Thread activated in __kmp_allocate_team when increasing team size
4413 }
4414
4415#ifdef KMP_ADJUST_BLOCKTIME
4416 /* Adjust blocktime back to zero if necessary */
4417 /* Middle initialization might not have occurred yet */
4418 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4420 __kmp_zero_bt = TRUE;
4421 }
4422 }
4423#endif /* KMP_ADJUST_BLOCKTIME */
4424
4425#if KMP_DEBUG
4426 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4427 // KMP_BARRIER_PARENT_FLAG.
4428 int b;
4429 kmp_balign_t *balign = new_thr->th.th_bar;
4430 for (b = 0; b < bs_last_barrier; ++b)
4431 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4432#endif
4433
4434 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4435 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4436
4437 KMP_MB();
4438 return new_thr;
4439 }
4440
4441 /* no, well fork a new one */
4444
4445#if KMP_USE_MONITOR
4446 // If this is the first worker thread the RTL is creating, then also
4447 // launch the monitor thread. We try to do this as early as possible.
4448 if (!TCR_4(__kmp_init_monitor)) {
4449 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4450 if (!TCR_4(__kmp_init_monitor)) {
4451 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4452 TCW_4(__kmp_init_monitor, 1);
4453 __kmp_create_monitor(&__kmp_monitor);
4454 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4455#if KMP_OS_WINDOWS
4456 // AC: wait until monitor has started. This is a fix for CQ232808.
4457 // The reason is that if the library is loaded/unloaded in a loop with
4458 // small (parallel) work in between, then there is high probability that
4459 // monitor thread started after the library shutdown. At shutdown it is
4460 // too late to cope with the problem, because when the primary thread is
4461 // in DllMain (process detach) the monitor has no chances to start (it is
4462 // blocked), and primary thread has no means to inform the monitor that
4463 // the library has gone, because all the memory which the monitor can
4464 // access is going to be released/reset.
4465 while (TCR_4(__kmp_init_monitor) < 2) {
4466 KMP_YIELD(TRUE);
4467 }
4468 KF_TRACE(10, ("after monitor thread has started\n"));
4469#endif
4470 }
4471 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4472 }
4473#endif
4474
4475 KMP_MB();
4476
4477 {
4478 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4479 ? 1
4481
4482 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4483 ++new_gtid) {
4485 }
4486
4489 }
4490 }
4491
4492 /* allocate space for it. */
4493 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4494
4495 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4496
4497#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4498 // suppress race conditions detection on synchronization flags in debug mode
4499 // this helps to analyze library internals eliminating false positives
4500 __itt_suppress_mark_range(
4501 __itt_suppress_range, __itt_suppress_threading_errors,
4502 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4503 __itt_suppress_mark_range(
4504 __itt_suppress_range, __itt_suppress_threading_errors,
4505 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4506#if KMP_OS_WINDOWS
4507 __itt_suppress_mark_range(
4508 __itt_suppress_range, __itt_suppress_threading_errors,
4509 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4510#else
4511 __itt_suppress_mark_range(__itt_suppress_range,
4512 __itt_suppress_threading_errors,
4513 &new_thr->th.th_suspend_init_count,
4514 sizeof(new_thr->th.th_suspend_init_count));
4515#endif
4516 // TODO: check if we need to also suppress b_arrived flags
4517 __itt_suppress_mark_range(__itt_suppress_range,
4518 __itt_suppress_threading_errors,
4519 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4520 sizeof(new_thr->th.th_bar[0].bb.b_go));
4521 __itt_suppress_mark_range(__itt_suppress_range,
4522 __itt_suppress_threading_errors,
4523 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4524 sizeof(new_thr->th.th_bar[1].bb.b_go));
4525 __itt_suppress_mark_range(__itt_suppress_range,
4526 __itt_suppress_threading_errors,
4527 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4528 sizeof(new_thr->th.th_bar[2].bb.b_go));
4529#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4530 if (__kmp_storage_map) {
4531 __kmp_print_thread_storage_map(new_thr, new_gtid);
4532 }
4533
4534 // add the reserve serialized team, initialized from the team's primary thread
4535 {
4537 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4538 new_thr->th.th_serial_team = serial_team =
4539 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4540#if OMPT_SUPPORT
4541 ompt_data_none, // root parallel id
4542#endif
4543 proc_bind_default, &r_icvs,
4544 0 USE_NESTED_HOT_ARG(NULL));
4545 }
4546 KMP_ASSERT(serial_team);
4547 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4548 // execution (it is unused for now).
4549 serial_team->t.t_threads[0] = new_thr;
4550 KF_TRACE(10,
4551 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4552 new_thr));
4553
4554 /* setup the thread structures */
4555 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4556
4557#if USE_FAST_MEMORY
4558 __kmp_initialize_fast_memory(new_thr);
4559#endif /* USE_FAST_MEMORY */
4560
4561#if KMP_USE_BGET
4562 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4563 __kmp_initialize_bget(new_thr);
4564#endif
4565
4566 __kmp_init_random(new_thr); // Initialize random number generator
4567
4568 /* Initialize these only once when thread is grabbed for a team allocation */
4569 KA_TRACE(20,
4570 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4572
4573 int b;
4574 kmp_balign_t *balign = new_thr->th.th_bar;
4575 for (b = 0; b < bs_last_barrier; ++b) {
4576 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4577 balign[b].bb.team = NULL;
4578 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4579 balign[b].bb.use_oncore_barrier = 0;
4580 }
4581
4582 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4583 new_thr->th.th_sleep_loc_type = flag_unset;
4584
4585 new_thr->th.th_spin_here = FALSE;
4586 new_thr->th.th_next_waiting = 0;
4587#if KMP_OS_UNIX
4588 new_thr->th.th_blocking = false;
4589#endif
4590
4591#if KMP_AFFINITY_SUPPORTED
4592 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4593 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4594 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4595 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4596#endif
4597 new_thr->th.th_def_allocator = __kmp_def_allocator;
4598 new_thr->th.th_prev_level = 0;
4599 new_thr->th.th_prev_num_threads = 1;
4600
4601 TCW_4(new_thr->th.th_in_pool, FALSE);
4602 new_thr->th.th_active_in_pool = FALSE;
4603 TCW_4(new_thr->th.th_active, TRUE);
4604
4605 /* adjust the global counters */
4606 __kmp_all_nth++;
4607 __kmp_nth++;
4608
4609 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4610 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4613 if (TCR_4(__kmp_gtid_mode) != 2) {
4615 }
4616 } else {
4617 if (TCR_4(__kmp_gtid_mode) != 1) {
4619 }
4620 }
4621 }
4622
4623#ifdef KMP_ADJUST_BLOCKTIME
4624 /* Adjust blocktime back to zero if necessary */
4625 /* Middle initialization might not have occurred yet */
4626 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4628 __kmp_zero_bt = TRUE;
4629 }
4630 }
4631#endif /* KMP_ADJUST_BLOCKTIME */
4632
4633#if KMP_AFFINITY_SUPPORTED
4634 // Set the affinity and topology information for new thread
4635 __kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);
4636#endif
4637
4638 /* actually fork it and create the new worker thread */
4639 KF_TRACE(
4640 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4641 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4642 KF_TRACE(10,
4643 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4644
4645 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4646 new_gtid));
4647 KMP_MB();
4648 return new_thr;
4649}
4650
4651/* Reinitialize team for reuse.
4652 The hot team code calls this case at every fork barrier, so EPCC barrier
4653 test are extremely sensitive to changes in it, esp. writes to the team
4654 struct, which cause a cache invalidation in all threads.
4655 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4657 kmp_internal_control_t *new_icvs,
4658 ident_t *loc) {
4659 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4660 team->t.t_threads[0], team));
4661 KMP_DEBUG_ASSERT(team && new_icvs);
4663 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4664
4665 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4666 // Copy ICVs to the primary thread's implicit taskdata
4667 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4668 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4669
4670 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4671 team->t.t_threads[0], team));
4672}
4673
4674/* Initialize the team data structure.
4675 This assumes the t_threads and t_max_nproc are already set.
4676 Also, we don't touch the arguments */
4677static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4678 kmp_internal_control_t *new_icvs,
4679 ident_t *loc) {
4680 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
4681
4682 /* verify */
4683 KMP_DEBUG_ASSERT(team);
4684 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4685 KMP_DEBUG_ASSERT(team->t.t_threads);
4686 KMP_MB();
4687
4688 team->t.t_master_tid = 0; /* not needed */
4689 /* team->t.t_master_bar; not needed */
4690 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4691 team->t.t_nproc = new_nproc;
4692
4693 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4694 team->t.t_next_pool = NULL;
4695 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4696 * up hot team */
4697
4698 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4699 team->t.t_invoke = NULL; /* not needed */
4700
4701 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4702 team->t.t_sched.sched = new_icvs->sched.sched;
4703
4704#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4705 team->t.t_fp_control_saved = FALSE; /* not needed */
4706 team->t.t_x87_fpu_control_word = 0; /* not needed */
4707 team->t.t_mxcsr = 0; /* not needed */
4708#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4709
4710 team->t.t_construct = 0;
4711
4712 team->t.t_ordered.dt.t_value = 0;
4713 team->t.t_master_active = FALSE;
4714
4715#ifdef KMP_DEBUG
4716 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
4717#endif
4718#if KMP_OS_WINDOWS
4719 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4720#endif
4721
4722 team->t.t_control_stack_top = NULL;
4723
4724 __kmp_reinitialize_team(team, new_icvs, loc);
4725
4726 KMP_MB();
4727 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
4728}
4729
4730#if KMP_AFFINITY_SUPPORTED
4731static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4732 int first, int last, int newp) {
4733 th->th.th_first_place = first;
4734 th->th.th_last_place = last;
4735 th->th.th_new_place = newp;
4736 if (newp != th->th.th_current_place) {
4737 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4738 team->t.t_display_affinity = 1;
4739 // Copy topology information associated with the new place
4740 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4741 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4742 }
4743}
4744
4745// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4746// It calculates the worker + primary thread's partition based upon the parent
4747// thread's partition, and binds each worker to a thread in their partition.
4748// The primary thread's partition should already include its current binding.
4749static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4750 // Do not partition places for the hidden helper team
4751 if (KMP_HIDDEN_HELPER_TEAM(team))
4752 return;
4753 // Copy the primary thread's place partition to the team struct
4754 kmp_info_t *master_th = team->t.t_threads[0];
4755 KMP_DEBUG_ASSERT(master_th != NULL);
4756 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4757 int first_place = master_th->th.th_first_place;
4758 int last_place = master_th->th.th_last_place;
4759 int masters_place = master_th->th.th_current_place;
4760 int num_masks = __kmp_affinity.num_masks;
4761 team->t.t_first_place = first_place;
4762 team->t.t_last_place = last_place;
4763
4764 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4765 "bound to place %d partition = [%d,%d]\n",
4766 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4767 team->t.t_id, masters_place, first_place, last_place));
4768
4769 switch (proc_bind) {
4770
4771 case proc_bind_default:
4772 // Serial teams might have the proc_bind policy set to proc_bind_default.
4773 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4774 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4775 break;
4776
4777 case proc_bind_primary: {
4778 int f;
4779 int n_th = team->t.t_nproc;
4780 for (f = 1; f < n_th; f++) {
4781 kmp_info_t *th = team->t.t_threads[f];
4782 KMP_DEBUG_ASSERT(th != NULL);
4783 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4784
4785 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4786 "partition = [%d,%d]\n",
4787 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4788 f, masters_place, first_place, last_place));
4789 }
4790 } break;
4791
4792 case proc_bind_close: {
4793 int f;
4794 int n_th = team->t.t_nproc;
4795 int n_places;
4796 if (first_place <= last_place) {
4797 n_places = last_place - first_place + 1;
4798 } else {
4799 n_places = num_masks - first_place + last_place + 1;
4800 }
4801 if (n_th <= n_places) {
4802 int place = masters_place;
4803 for (f = 1; f < n_th; f++) {
4804 kmp_info_t *th = team->t.t_threads[f];
4805 KMP_DEBUG_ASSERT(th != NULL);
4806
4807 if (place == last_place) {
4808 place = first_place;
4809 } else if (place == (num_masks - 1)) {
4810 place = 0;
4811 } else {
4812 place++;
4813 }
4814 __kmp_set_thread_place(team, th, first_place, last_place, place);
4815
4816 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4817 "partition = [%d,%d]\n",
4818 __kmp_gtid_from_thread(team->t.t_threads[f]),
4819 team->t.t_id, f, place, first_place, last_place));
4820 }
4821 } else {
4822 int S, rem, gap, s_count;
4823 S = n_th / n_places;
4824 s_count = 0;
4825 rem = n_th - (S * n_places);
4826 gap = rem > 0 ? n_places / rem : n_places;
4827 int place = masters_place;
4828 int gap_ct = gap;
4829 for (f = 0; f < n_th; f++) {
4830 kmp_info_t *th = team->t.t_threads[f];
4831 KMP_DEBUG_ASSERT(th != NULL);
4832
4833 __kmp_set_thread_place(team, th, first_place, last_place, place);
4834 s_count++;
4835
4836 if ((s_count == S) && rem && (gap_ct == gap)) {
4837 // do nothing, add an extra thread to place on next iteration
4838 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4839 // we added an extra thread to this place; move to next place
4840 if (place == last_place) {
4841 place = first_place;
4842 } else if (place == (num_masks - 1)) {
4843 place = 0;
4844 } else {
4845 place++;
4846 }
4847 s_count = 0;
4848 gap_ct = 1;
4849 rem--;
4850 } else if (s_count == S) { // place full; don't add extra
4851 if (place == last_place) {
4852 place = first_place;
4853 } else if (place == (num_masks - 1)) {
4854 place = 0;
4855 } else {
4856 place++;
4857 }
4858 gap_ct++;
4859 s_count = 0;
4860 }
4861
4862 KA_TRACE(100,
4863 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4864 "partition = [%d,%d]\n",
4865 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4866 th->th.th_new_place, first_place, last_place));
4867 }
4868 KMP_DEBUG_ASSERT(place == masters_place);
4869 }
4870 } break;
4871
4872 case proc_bind_spread: {
4873 int f;
4874 int n_th = team->t.t_nproc;
4875 int n_places;
4876 int thidx;
4877 if (first_place <= last_place) {
4878 n_places = last_place - first_place + 1;
4879 } else {
4880 n_places = num_masks - first_place + last_place + 1;
4881 }
4882 if (n_th <= n_places) {
4883 int place = -1;
4884
4885 if (n_places != num_masks) {
4886 int S = n_places / n_th;
4887 int s_count, rem, gap, gap_ct;
4888
4889 place = masters_place;
4890 rem = n_places - n_th * S;
4891 gap = rem ? n_th / rem : 1;
4892 gap_ct = gap;
4893 thidx = n_th;
4894 if (update_master_only == 1)
4895 thidx = 1;
4896 for (f = 0; f < thidx; f++) {
4897 kmp_info_t *th = team->t.t_threads[f];
4898 KMP_DEBUG_ASSERT(th != NULL);
4899
4900 int fplace = place, nplace = place;
4901 s_count = 1;
4902 while (s_count < S) {
4903 if (place == last_place) {
4904 place = first_place;
4905 } else if (place == (num_masks - 1)) {
4906 place = 0;
4907 } else {
4908 place++;
4909 }
4910 s_count++;
4911 }
4912 if (rem && (gap_ct == gap)) {
4913 if (place == last_place) {
4914 place = first_place;
4915 } else if (place == (num_masks - 1)) {
4916 place = 0;
4917 } else {
4918 place++;
4919 }
4920 rem--;
4921 gap_ct = 0;
4922 }
4923 __kmp_set_thread_place(team, th, fplace, place, nplace);
4924 gap_ct++;
4925
4926 if (place == last_place) {
4927 place = first_place;
4928 } else if (place == (num_masks - 1)) {
4929 place = 0;
4930 } else {
4931 place++;
4932 }
4933
4934 KA_TRACE(100,
4935 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4936 "partition = [%d,%d], num_masks: %u\n",
4937 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4938 f, th->th.th_new_place, th->th.th_first_place,
4939 th->th.th_last_place, num_masks));
4940 }
4941 } else {
4942 /* Having uniform space of available computation places I can create
4943 T partitions of round(P/T) size and put threads into the first
4944 place of each partition. */
4945 double current = static_cast<double>(masters_place);
4946 double spacing =
4947 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4948 int first, last;
4949 kmp_info_t *th;
4950
4951 thidx = n_th + 1;
4952 if (update_master_only == 1)
4953 thidx = 1;
4954 for (f = 0; f < thidx; f++) {
4955 first = static_cast<int>(current);
4956 last = static_cast<int>(current + spacing) - 1;
4957 KMP_DEBUG_ASSERT(last >= first);
4958 if (first >= n_places) {
4959 if (masters_place) {
4960 first -= n_places;
4961 last -= n_places;
4962 if (first == (masters_place + 1)) {
4963 KMP_DEBUG_ASSERT(f == n_th);
4964 first--;
4965 }
4966 if (last == masters_place) {
4967 KMP_DEBUG_ASSERT(f == (n_th - 1));
4968 last--;
4969 }
4970 } else {
4971 KMP_DEBUG_ASSERT(f == n_th);
4972 first = 0;
4973 last = 0;
4974 }
4975 }
4976 if (last >= n_places) {
4977 last = (n_places - 1);
4978 }
4979 place = first;
4980 current += spacing;
4981 if (f < n_th) {
4982 KMP_DEBUG_ASSERT(0 <= first);
4983 KMP_DEBUG_ASSERT(n_places > first);
4984 KMP_DEBUG_ASSERT(0 <= last);
4985 KMP_DEBUG_ASSERT(n_places > last);
4986 KMP_DEBUG_ASSERT(last_place >= first_place);
4987 th = team->t.t_threads[f];
4988 KMP_DEBUG_ASSERT(th);
4989 __kmp_set_thread_place(team, th, first, last, place);
4990 KA_TRACE(100,
4991 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4992 "partition = [%d,%d], spacing = %.4f\n",
4993 __kmp_gtid_from_thread(team->t.t_threads[f]),
4994 team->t.t_id, f, th->th.th_new_place,
4995 th->th.th_first_place, th->th.th_last_place, spacing));
4996 }
4997 }
4998 }
4999 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5000 } else {
5001 int S, rem, gap, s_count;
5002 S = n_th / n_places;
5003 s_count = 0;
5004 rem = n_th - (S * n_places);
5005 gap = rem > 0 ? n_places / rem : n_places;
5006 int place = masters_place;
5007 int gap_ct = gap;
5008 thidx = n_th;
5009 if (update_master_only == 1)
5010 thidx = 1;
5011 for (f = 0; f < thidx; f++) {
5012 kmp_info_t *th = team->t.t_threads[f];
5013 KMP_DEBUG_ASSERT(th != NULL);
5014
5015 __kmp_set_thread_place(team, th, place, place, place);
5016 s_count++;
5017
5018 if ((s_count == S) && rem && (gap_ct == gap)) {
5019 // do nothing, add an extra thread to place on next iteration
5020 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5021 // we added an extra thread to this place; move on to next place
5022 if (place == last_place) {
5023 place = first_place;
5024 } else if (place == (num_masks - 1)) {
5025 place = 0;
5026 } else {
5027 place++;
5028 }
5029 s_count = 0;
5030 gap_ct = 1;
5031 rem--;
5032 } else if (s_count == S) { // place is full; don't add extra thread
5033 if (place == last_place) {
5034 place = first_place;
5035 } else if (place == (num_masks - 1)) {
5036 place = 0;
5037 } else {
5038 place++;
5039 }
5040 gap_ct++;
5041 s_count = 0;
5042 }
5043
5044 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5045 "partition = [%d,%d]\n",
5046 __kmp_gtid_from_thread(team->t.t_threads[f]),
5047 team->t.t_id, f, th->th.th_new_place,
5048 th->th.th_first_place, th->th.th_last_place));
5049 }
5050 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5051 }
5052 } break;
5053
5054 default:
5055 break;
5056 }
5057
5058 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
5059}
5060
5061#endif // KMP_AFFINITY_SUPPORTED
5062
5063/* allocate a new team data structure to use. take one off of the free pool if
5064 available */
5065kmp_team_t *
5066__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5067#if OMPT_SUPPORT
5068 ompt_data_t ompt_parallel_data,
5069#endif
5070 kmp_proc_bind_t new_proc_bind,
5071 kmp_internal_control_t *new_icvs,
5072 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5073 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5074 int f;
5075 kmp_team_t *team;
5076 int use_hot_team = !root->r.r_active;
5077 int level = 0;
5078 int do_place_partition = 1;
5079
5080 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
5081 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5082 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5083 KMP_MB();
5084
5085#if KMP_NESTED_HOT_TEAMS
5086 kmp_hot_team_ptr_t *hot_teams;
5087 if (master) {
5088 team = master->th.th_team;
5089 level = team->t.t_active_level;
5090 if (master->th.th_teams_microtask) { // in teams construct?
5091 if (master->th.th_teams_size.nteams > 1 &&
5092 ( // #teams > 1
5093 team->t.t_pkfn ==
5094 (microtask_t)__kmp_teams_master || // inner fork of the teams
5095 master->th.th_teams_level <
5096 team->t.t_level)) { // or nested parallel inside the teams
5097 ++level; // not increment if #teams==1, or for outer fork of the teams;
5098 // increment otherwise
5099 }
5100 // Do not perform the place partition if inner fork of the teams
5101 // Wait until nested parallel region encountered inside teams construct
5102 if ((master->th.th_teams_size.nteams == 1 &&
5103 master->th.th_teams_level >= team->t.t_level) ||
5104 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5105 do_place_partition = 0;
5106 }
5107 hot_teams = master->th.th_hot_teams;
5108 if (level < __kmp_hot_teams_max_level && hot_teams &&
5109 hot_teams[level].hot_team) {
5110 // hot team has already been allocated for given level
5111 use_hot_team = 1;
5112 } else {
5113 use_hot_team = 0;
5114 }
5115 } else {
5116 // check we won't access uninitialized hot_teams, just in case
5117 KMP_DEBUG_ASSERT(new_nproc == 1);
5118 }
5119#endif
5120 // Optimization to use a "hot" team
5121 if (use_hot_team && new_nproc > 1) {
5122 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5123#if KMP_NESTED_HOT_TEAMS
5124 team = hot_teams[level].hot_team;
5125#else
5126 team = root->r.r_hot_team;
5127#endif
5128#if KMP_DEBUG
5130 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5131 "task_team[1] = %p before reinit\n",
5132 team->t.t_task_team[0], team->t.t_task_team[1]));
5133 }
5134#endif
5135
5136 if (team->t.t_nproc != new_nproc &&
5138 // Distributed barrier may need a resize
5139 int old_nthr = team->t.t_nproc;
5140 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5141 }
5142
5143 // If not doing the place partition, then reset the team's proc bind
5144 // to indicate that partitioning of all threads still needs to take place
5145 if (do_place_partition == 0)
5146 team->t.t_proc_bind = proc_bind_default;
5147 // Has the number of threads changed?
5148 /* Let's assume the most common case is that the number of threads is
5149 unchanged, and put that case first. */
5150 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5151 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
5152 // This case can mean that omp_set_num_threads() was called and the hot
5153 // team size was already reduced, so we check the special flag
5154 if (team->t.t_size_changed == -1) {
5155 team->t.t_size_changed = 1;
5156 } else {
5157 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5158 }
5159
5160 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5161 kmp_r_sched_t new_sched = new_icvs->sched;
5162 // set primary thread's schedule as new run-time schedule
5163 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5164
5165 __kmp_reinitialize_team(team, new_icvs,
5166 root->r.r_uber_thread->th.th_ident);
5167
5168 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5169 team->t.t_threads[0], team));
5170 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5171
5172#if KMP_AFFINITY_SUPPORTED
5173 if ((team->t.t_size_changed == 0) &&
5174 (team->t.t_proc_bind == new_proc_bind)) {
5175 if (new_proc_bind == proc_bind_spread) {
5176 if (do_place_partition) {
5177 // add flag to update only master for spread
5178 __kmp_partition_places(team, 1);
5179 }
5180 }
5181 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
5182 "proc_bind = %d, partition = [%d,%d]\n",
5183 team->t.t_id, new_proc_bind, team->t.t_first_place,
5184 team->t.t_last_place));
5185 } else {
5186 if (do_place_partition) {
5187 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5188 __kmp_partition_places(team);
5189 }
5190 }
5191#else
5192 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5193#endif /* KMP_AFFINITY_SUPPORTED */
5194 } else if (team->t.t_nproc > new_nproc) {
5195 KA_TRACE(20,
5196 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
5197 new_nproc));
5198
5199 team->t.t_size_changed = 1;
5201 // Barrier size already reduced earlier in this function
5202 // Activate team threads via th_used_in_team
5203 __kmp_add_threads_to_team(team, new_nproc);
5204 }
5205 // When decreasing team size, threads no longer in the team should
5206 // unref task team.
5208 for (f = new_nproc; f < team->t.t_nproc; f++) {
5209 kmp_info_t *th = team->t.t_threads[f];
5210 KMP_DEBUG_ASSERT(th);
5211 th->th.th_task_team = NULL;
5212 }
5213 }
5214#if KMP_NESTED_HOT_TEAMS
5215 if (__kmp_hot_teams_mode == 0) {
5216 // AC: saved number of threads should correspond to team's value in this
5217 // mode, can be bigger in mode 1, when hot team has threads in reserve
5218 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5219 hot_teams[level].hot_team_nth = new_nproc;
5220#endif // KMP_NESTED_HOT_TEAMS
5221 /* release the extra threads we don't need any more */
5222 for (f = new_nproc; f < team->t.t_nproc; f++) {
5223 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5224 __kmp_free_thread(team->t.t_threads[f]);
5225 team->t.t_threads[f] = NULL;
5226 }
5227#if KMP_NESTED_HOT_TEAMS
5228 } // (__kmp_hot_teams_mode == 0)
5229 else {
5230 // When keeping extra threads in team, switch threads to wait on own
5231 // b_go flag
5232 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5233 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5234 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5235 for (int b = 0; b < bs_last_barrier; ++b) {
5236 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5237 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5238 }
5239 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5240 }
5241 }
5242 }
5243#endif // KMP_NESTED_HOT_TEAMS
5244 team->t.t_nproc = new_nproc;
5245 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5246 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5247 __kmp_reinitialize_team(team, new_icvs,
5248 root->r.r_uber_thread->th.th_ident);
5249
5250 // Update remaining threads
5251 for (f = 0; f < new_nproc; ++f) {
5252 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5253 }
5254
5255 // restore the current task state of the primary thread: should be the
5256 // implicit task
5257 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5258 team->t.t_threads[0], team));
5259
5260 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5261
5262#ifdef KMP_DEBUG
5263 for (f = 0; f < team->t.t_nproc; f++) {
5264 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5265 team->t.t_threads[f]->th.th_team_nproc ==
5266 team->t.t_nproc);
5267 }
5268#endif
5269
5270 if (do_place_partition) {
5271 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5272#if KMP_AFFINITY_SUPPORTED
5273 __kmp_partition_places(team);
5274#endif
5275 }
5276 } else { // team->t.t_nproc < new_nproc
5277
5278 KA_TRACE(20,
5279 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5280 new_nproc));
5281 int old_nproc = team->t.t_nproc; // save old value and use to update only
5282 team->t.t_size_changed = 1;
5283
5284#if KMP_NESTED_HOT_TEAMS
5285 int avail_threads = hot_teams[level].hot_team_nth;
5286 if (new_nproc < avail_threads)
5287 avail_threads = new_nproc;
5288 kmp_info_t **other_threads = team->t.t_threads;
5289 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5290 // Adjust barrier data of reserved threads (if any) of the team
5291 // Other data will be set in __kmp_initialize_info() below.
5292 int b;
5293 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5294 for (b = 0; b < bs_last_barrier; ++b) {
5295 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5296 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5297#if USE_DEBUGGER
5298 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5299#endif
5300 }
5301 }
5302 if (hot_teams[level].hot_team_nth >= new_nproc) {
5303 // we have all needed threads in reserve, no need to allocate any
5304 // this only possible in mode 1, cannot have reserved threads in mode 0
5305 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5306 team->t.t_nproc = new_nproc; // just get reserved threads involved
5307 } else {
5308 // We may have some threads in reserve, but not enough;
5309 // get reserved threads involved if any.
5310 team->t.t_nproc = hot_teams[level].hot_team_nth;
5311 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5312#endif // KMP_NESTED_HOT_TEAMS
5313 if (team->t.t_max_nproc < new_nproc) {
5314 /* reallocate larger arrays */
5315 __kmp_reallocate_team_arrays(team, new_nproc);
5316 __kmp_reinitialize_team(team, new_icvs, NULL);
5317 }
5318
5319#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5320 KMP_AFFINITY_SUPPORTED
5321 /* Temporarily set full mask for primary thread before creation of
5322 workers. The reason is that workers inherit the affinity from the
5323 primary thread, so if a lot of workers are created on the single
5324 core quickly, they don't get a chance to set their own affinity for
5325 a long time. */
5326 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5327#endif
5328
5329 /* allocate new threads for the hot team */
5330 for (f = team->t.t_nproc; f < new_nproc; f++) {
5331 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5332 KMP_DEBUG_ASSERT(new_worker);
5333 team->t.t_threads[f] = new_worker;
5334
5335 KA_TRACE(20,
5336 ("__kmp_allocate_team: team %d init T#%d arrived: "
5337 "join=%llu, plain=%llu\n",
5338 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5339 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5340 team->t.t_bar[bs_plain_barrier].b_arrived));
5341
5342 { // Initialize barrier data for new threads.
5343 int b;
5344 kmp_balign_t *balign = new_worker->th.th_bar;
5345 for (b = 0; b < bs_last_barrier; ++b) {
5346 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5347 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5349#if USE_DEBUGGER
5350 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5351#endif
5352 }
5353 }
5354 }
5355
5356#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5357 KMP_AFFINITY_SUPPORTED
5358 /* Restore initial primary thread's affinity mask */
5359 new_temp_affinity.restore();
5360#endif
5361#if KMP_NESTED_HOT_TEAMS
5362 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5363#endif // KMP_NESTED_HOT_TEAMS
5365 // Barrier size already increased earlier in this function
5366 // Activate team threads via th_used_in_team
5367 __kmp_add_threads_to_team(team, new_nproc);
5368 }
5369 /* make sure everyone is syncronized */
5370 // new threads below
5371 __kmp_initialize_team(team, new_nproc, new_icvs,
5372 root->r.r_uber_thread->th.th_ident);
5373
5374 /* reinitialize the threads */
5375 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5376 for (f = 0; f < team->t.t_nproc; ++f)
5377 __kmp_initialize_info(team->t.t_threads[f], team, f,
5378 __kmp_gtid_from_tid(f, team));
5379
5380 // set th_task_state for new threads in hot team with older thread's state
5381 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5382 for (f = old_nproc; f < team->t.t_nproc; ++f)
5383 team->t.t_threads[f]->th.th_task_state = old_state;
5384
5385#ifdef KMP_DEBUG
5386 for (f = 0; f < team->t.t_nproc; ++f) {
5387 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5388 team->t.t_threads[f]->th.th_team_nproc ==
5389 team->t.t_nproc);
5390 }
5391#endif
5392
5393 if (do_place_partition) {
5394 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5395#if KMP_AFFINITY_SUPPORTED
5396 __kmp_partition_places(team);
5397#endif
5398 }
5399 } // Check changes in number of threads
5400
5401 kmp_info_t *master = team->t.t_threads[0];
5402 if (master->th.th_teams_microtask) {
5403 for (f = 1; f < new_nproc; ++f) {
5404 // propagate teams construct specific info to workers
5405 kmp_info_t *thr = team->t.t_threads[f];
5406 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5407 thr->th.th_teams_level = master->th.th_teams_level;
5408 thr->th.th_teams_size = master->th.th_teams_size;
5409 }
5410 }
5411#if KMP_NESTED_HOT_TEAMS
5412 if (level) {
5413 // Sync barrier state for nested hot teams, not needed for outermost hot
5414 // team.
5415 for (f = 1; f < new_nproc; ++f) {
5416 kmp_info_t *thr = team->t.t_threads[f];
5417 int b;
5418 kmp_balign_t *balign = thr->th.th_bar;
5419 for (b = 0; b < bs_last_barrier; ++b) {
5420 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5421 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5422#if USE_DEBUGGER
5423 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5424#endif
5425 }
5426 }
5427 }
5428#endif // KMP_NESTED_HOT_TEAMS
5429
5430 /* reallocate space for arguments if necessary */
5431 __kmp_alloc_argv_entries(argc, team, TRUE);
5432 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5433 // The hot team re-uses the previous task team,
5434 // if untouched during the previous release->gather phase.
5435
5436 KF_TRACE(10, (" hot_team = %p\n", team));
5437
5438#if KMP_DEBUG
5440 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5441 "task_team[1] = %p after reinit\n",
5442 team->t.t_task_team[0], team->t.t_task_team[1]));
5443 }
5444#endif
5445
5446#if OMPT_SUPPORT
5447 __ompt_team_assign_id(team, ompt_parallel_data);
5448#endif
5449
5450 KMP_MB();
5451
5452 return team;
5453 }
5454
5455 /* next, let's try to take one from the team pool */
5456 KMP_MB();
5457 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5458 /* TODO: consider resizing undersized teams instead of reaping them, now
5459 that we have a resizing mechanism */
5460 if (team->t.t_max_nproc >= max_nproc) {
5461 /* take this team from the team pool */
5462 __kmp_team_pool = team->t.t_next_pool;
5463
5464 if (max_nproc > 1 &&
5466 if (!team->t.b) { // Allocate barrier structure
5468 }
5469 }
5470
5471 /* setup the team for fresh use */
5472 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5473
5474 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5475 "task_team[1] %p to NULL\n",
5476 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5477 team->t.t_task_team[0] = NULL;
5478 team->t.t_task_team[1] = NULL;
5479
5480 /* reallocate space for arguments if necessary */
5481 __kmp_alloc_argv_entries(argc, team, TRUE);
5482 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5483
5484 KA_TRACE(
5485 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5487 { // Initialize barrier data.
5488 int b;
5489 for (b = 0; b < bs_last_barrier; ++b) {
5490 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5491#if USE_DEBUGGER
5492 team->t.t_bar[b].b_master_arrived = 0;
5493 team->t.t_bar[b].b_team_arrived = 0;
5494#endif
5495 }
5496 }
5497
5498 team->t.t_proc_bind = new_proc_bind;
5499
5500 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5501 team->t.t_id));
5502
5503#if OMPT_SUPPORT
5504 __ompt_team_assign_id(team, ompt_parallel_data);
5505#endif
5506
5507 KMP_MB();
5508
5509 return team;
5510 }
5511
5512 /* reap team if it is too small, then loop back and check the next one */
5513 // not sure if this is wise, but, will be redone during the hot-teams
5514 // rewrite.
5515 /* TODO: Use technique to find the right size hot-team, don't reap them */
5516 team = __kmp_reap_team(team);
5517 __kmp_team_pool = team;
5518 }
5519
5520 /* nothing available in the pool, no matter, make a new team! */
5521 KMP_MB();
5522 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5523
5524 /* and set it up */
5525 team->t.t_max_nproc = max_nproc;
5526 if (max_nproc > 1 &&
5528 // Allocate barrier structure
5530 }
5531
5532 /* NOTE well, for some reason allocating one big buffer and dividing it up
5533 seems to really hurt performance a lot on the P4, so, let's not use this */
5534 __kmp_allocate_team_arrays(team, max_nproc);
5535
5536 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5537 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5538
5539 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5540 "%p to NULL\n",
5541 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5542 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5543 // memory, no need to duplicate
5544 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5545 // memory, no need to duplicate
5546
5547 if (__kmp_storage_map) {
5548 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5549 }
5550
5551 /* allocate space for arguments */
5552 __kmp_alloc_argv_entries(argc, team, FALSE);
5553 team->t.t_argc = argc;
5554
5555 KA_TRACE(20,
5556 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5558 { // Initialize barrier data.
5559 int b;
5560 for (b = 0; b < bs_last_barrier; ++b) {
5561 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5562#if USE_DEBUGGER
5563 team->t.t_bar[b].b_master_arrived = 0;
5564 team->t.t_bar[b].b_team_arrived = 0;
5565#endif
5566 }
5567 }
5568
5569 team->t.t_proc_bind = new_proc_bind;
5570
5571#if OMPT_SUPPORT
5572 __ompt_team_assign_id(team, ompt_parallel_data);
5573 team->t.ompt_serialized_team_info = NULL;
5574#endif
5575
5576 KMP_MB();
5577
5578 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5579 team->t.t_id));
5580
5581 return team;
5582}
5583
5584/* TODO implement hot-teams at all levels */
5585/* TODO implement lazy thread release on demand (disband request) */
5586
5587/* free the team. return it to the team pool. release all the threads
5588 * associated with it */
5590 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5591 int f;
5592 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5593 team->t.t_id));
5594
5595 /* verify state */
5596 KMP_DEBUG_ASSERT(root);
5597 KMP_DEBUG_ASSERT(team);
5598 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5599 KMP_DEBUG_ASSERT(team->t.t_threads);
5600
5601 int use_hot_team = team == root->r.r_hot_team;
5602#if KMP_NESTED_HOT_TEAMS
5603 int level;
5604 if (master) {
5605 level = team->t.t_active_level - 1;
5606 if (master->th.th_teams_microtask) { // in teams construct?
5607 if (master->th.th_teams_size.nteams > 1) {
5608 ++level; // level was not increased in teams construct for
5609 // team_of_masters
5610 }
5611 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5612 master->th.th_teams_level == team->t.t_level) {
5613 ++level; // level was not increased in teams construct for
5614 // team_of_workers before the parallel
5615 } // team->t.t_level will be increased inside parallel
5616 }
5617#if KMP_DEBUG
5618 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5619#endif
5620 if (level < __kmp_hot_teams_max_level) {
5621 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5622 use_hot_team = 1;
5623 }
5624 }
5625#endif // KMP_NESTED_HOT_TEAMS
5626
5627 /* team is done working */
5628 TCW_SYNC_PTR(team->t.t_pkfn,
5629 NULL); // Important for Debugging Support Library.
5630#if KMP_OS_WINDOWS
5631 team->t.t_copyin_counter = 0; // init counter for possible reuse
5632#endif
5633 // Do not reset pointer to parent team to NULL for hot teams.
5634
5635 /* if we are non-hot team, release our threads */
5636 if (!use_hot_team) {
5638 // Wait for threads to reach reapable state
5639 for (f = 1; f < team->t.t_nproc; ++f) {
5640 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5641 kmp_info_t *th = team->t.t_threads[f];
5642 volatile kmp_uint32 *state = &th->th.th_reap_state;
5643 while (*state != KMP_SAFE_TO_REAP) {
5644#if KMP_OS_WINDOWS
5645 // On Windows a thread can be killed at any time, check this
5646 DWORD ecode;
5647 if (!__kmp_is_thread_alive(th, &ecode)) {
5648 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5649 break;
5650 }
5651#endif
5652 // first check if thread is sleeping
5653 if (th->th.th_sleep_loc)
5655 KMP_CPU_PAUSE();
5656 }
5657 }
5658
5659 // Delete task teams
5660 int tt_idx;
5661 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5662 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5663 if (task_team != NULL) {
5664 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5665 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5666 team->t.t_threads[f]->th.th_task_team = NULL;
5667 }
5668 KA_TRACE(
5669 20,
5670 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5671 __kmp_get_gtid(), task_team, team->t.t_id));
5672#if KMP_NESTED_HOT_TEAMS
5673 __kmp_free_task_team(master, task_team);
5674#endif
5675 team->t.t_task_team[tt_idx] = NULL;
5676 }
5677 }
5678 }
5679
5680 // Reset pointer to parent team only for non-hot teams.
5681 team->t.t_parent = NULL;
5682 team->t.t_level = 0;
5683 team->t.t_active_level = 0;
5684
5685 /* free the worker threads */
5686 for (f = 1; f < team->t.t_nproc; ++f) {
5687 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5689 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5690 1, 2);
5691 }
5692 __kmp_free_thread(team->t.t_threads[f]);
5693 }
5694
5696 if (team->t.b) {
5697 // wake up thread at old location
5698 team->t.b->go_release();
5700 for (f = 1; f < team->t.t_nproc; ++f) {
5701 if (team->t.b->sleep[f].sleep) {
5703 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5704 (kmp_atomic_flag_64<> *)NULL);
5705 }
5706 }
5707 }
5708 // Wait for threads to be removed from team
5709 for (int f = 1; f < team->t.t_nproc; ++f) {
5710 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5711 KMP_CPU_PAUSE();
5712 }
5713 }
5714 }
5715
5716 for (f = 1; f < team->t.t_nproc; ++f) {
5717 team->t.t_threads[f] = NULL;
5718 }
5719
5720 if (team->t.t_max_nproc > 1 &&
5723 team->t.b = NULL;
5724 }
5725 /* put the team back in the team pool */
5726 /* TODO limit size of team pool, call reap_team if pool too large */
5727 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5728 __kmp_team_pool = (volatile kmp_team_t *)team;
5729 } else { // Check if team was created for primary threads in teams construct
5730 // See if first worker is a CG root
5731 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5732 team->t.t_threads[1]->th.th_cg_roots);
5733 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5734 // Clean up the CG root nodes on workers so that this team can be re-used
5735 for (f = 1; f < team->t.t_nproc; ++f) {
5736 kmp_info_t *thr = team->t.t_threads[f];
5737 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5738 thr->th.th_cg_roots->cg_root == thr);
5739 // Pop current CG root off list
5740 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5741 thr->th.th_cg_roots = tmp->up;
5742 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"
5743 " up to node %p. cg_nthreads was %d\n",
5744 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5745 int i = tmp->cg_nthreads--;
5746 if (i == 1) {
5747 __kmp_free(tmp); // free CG if we are the last thread in it
5748 }
5749 // Restore current task's thread_limit from CG root
5750 if (thr->th.th_cg_roots)
5751 thr->th.th_current_task->td_icvs.thread_limit =
5752 thr->th.th_cg_roots->cg_thread_limit;
5753 }
5754 }
5755 }
5756
5757 KMP_MB();
5758}
5759
5760/* reap the team. destroy it, reclaim all its resources and free its memory */
5762 kmp_team_t *next_pool = team->t.t_next_pool;
5763
5764 KMP_DEBUG_ASSERT(team);
5765 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5766 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5767 KMP_DEBUG_ASSERT(team->t.t_threads);
5768 KMP_DEBUG_ASSERT(team->t.t_argv);
5769
5770 /* TODO clean the threads that are a part of this? */
5771
5772 /* free stuff */
5774 if (team->t.t_argv != &team->t.t_inline_argv[0])
5775 __kmp_free((void *)team->t.t_argv);
5776 __kmp_free(team);
5777
5778 KMP_MB();
5779 return next_pool;
5780}
5781
5782// Free the thread. Don't reap it, just place it on the pool of available
5783// threads.
5784//
5785// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5786// binding for the affinity mechanism to be useful.
5787//
5788// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5789// However, we want to avoid a potential performance problem by always
5790// scanning through the list to find the correct point at which to insert
5791// the thread (potential N**2 behavior). To do this we keep track of the
5792// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5793// With single-level parallelism, threads will always be added to the tail
5794// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5795// parallelism, all bets are off and we may need to scan through the entire
5796// free list.
5797//
5798// This change also has a potentially large performance benefit, for some
5799// applications. Previously, as threads were freed from the hot team, they
5800// would be placed back on the free list in inverse order. If the hot team
5801// grew back to it's original size, then the freed thread would be placed
5802// back on the hot team in reverse order. This could cause bad cache
5803// locality problems on programs where the size of the hot team regularly
5804// grew and shrunk.
5805//
5806// Now, for single-level parallelism, the OMP tid is always == gtid.
5808 int gtid;
5809 kmp_info_t **scan;
5810
5811 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5812 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5813
5814 KMP_DEBUG_ASSERT(this_th);
5815
5816 // When moving thread to pool, switch thread to wait on own b_go flag, and
5817 // uninitialized (NULL team).
5818 int b;
5819 kmp_balign_t *balign = this_th->th.th_bar;
5820 for (b = 0; b < bs_last_barrier; ++b) {
5821 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5822 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5823 balign[b].bb.team = NULL;
5824 balign[b].bb.leaf_kids = 0;
5825 }
5826 this_th->th.th_task_state = 0;
5827 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5828
5829 /* put thread back on the free pool */
5830 TCW_PTR(this_th->th.th_team, NULL);
5831 TCW_PTR(this_th->th.th_root, NULL);
5832 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5833
5834 while (this_th->th.th_cg_roots) {
5835 this_th->th.th_cg_roots->cg_nthreads--;
5836 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5837 " %p of thread %p to %d\n",
5838 this_th, this_th->th.th_cg_roots,
5839 this_th->th.th_cg_roots->cg_root,
5840 this_th->th.th_cg_roots->cg_nthreads));
5841 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5842 if (tmp->cg_root == this_th) { // Thread is a cg_root
5843 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5844 KA_TRACE(
5845 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5846 this_th->th.th_cg_roots = tmp->up;
5847 __kmp_free(tmp);
5848 } else { // Worker thread
5849 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5850 __kmp_free(tmp);
5851 }
5852 this_th->th.th_cg_roots = NULL;
5853 break;
5854 }
5855 }
5856
5857 /* If the implicit task assigned to this thread can be used by other threads
5858 * -> multiple threads can share the data and try to free the task at
5859 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5860 * with higher probability when hot team is disabled but can occurs even when
5861 * the hot team is enabled */
5862 __kmp_free_implicit_task(this_th);
5863 this_th->th.th_current_task = NULL;
5864
5865 // If the __kmp_thread_pool_insert_pt is already past the new insert
5866 // point, then we need to re-scan the entire list.
5867 gtid = this_th->th.th_info.ds.ds_gtid;
5868 if (__kmp_thread_pool_insert_pt != NULL) {
5870 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5872 }
5873 }
5874
5875 // Scan down the list to find the place to insert the thread.
5876 // scan is the address of a link in the list, possibly the address of
5877 // __kmp_thread_pool itself.
5878 //
5879 // In the absence of nested parallelism, the for loop will have 0 iterations.
5880 if (__kmp_thread_pool_insert_pt != NULL) {
5881 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5882 } else {
5883 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5884 }
5885 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5886 scan = &((*scan)->th.th_next_pool))
5887 ;
5888
5889 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5890 // to its address.
5891 TCW_PTR(this_th->th.th_next_pool, *scan);
5892 __kmp_thread_pool_insert_pt = *scan = this_th;
5893 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5894 (this_th->th.th_info.ds.ds_gtid <
5895 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5896 TCW_4(this_th->th.th_in_pool, TRUE);
5898 __kmp_lock_suspend_mx(this_th);
5899 if (this_th->th.th_active == TRUE) {
5901 this_th->th.th_active_in_pool = TRUE;
5902 }
5903#if KMP_DEBUG
5904 else {
5905 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5906 }
5907#endif
5908 __kmp_unlock_suspend_mx(this_th);
5909
5911
5912#ifdef KMP_ADJUST_BLOCKTIME
5913 /* Adjust blocktime back to user setting or default if necessary */
5914 /* Middle initialization might never have occurred */
5915 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5917 if (__kmp_nth <= __kmp_avail_proc) {
5918 __kmp_zero_bt = FALSE;
5919 }
5920 }
5921#endif /* KMP_ADJUST_BLOCKTIME */
5922
5923 KMP_MB();
5924}
5925
5926/* ------------------------------------------------------------------------ */
5927
5929#if OMP_PROFILING_SUPPORT
5930 ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
5931 // TODO: add a configuration option for time granularity
5932 if (ProfileTraceFile)
5933 llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
5934#endif
5935
5936 int gtid = this_thr->th.th_info.ds.ds_gtid;
5937 /* void *stack_data;*/
5938 kmp_team_t **volatile pteam;
5939
5940 KMP_MB();
5941 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
5942
5944 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5945 }
5946
5947#if OMPD_SUPPORT
5948 if (ompd_state & OMPD_ENABLE_BP)
5949 ompd_bp_thread_begin();
5950#endif
5951
5952#if OMPT_SUPPORT
5953 ompt_data_t *thread_data = nullptr;
5954 if (ompt_enabled.enabled) {
5955 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5956 *thread_data = ompt_data_none;
5957
5958 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5959 this_thr->th.ompt_thread_info.wait_id = 0;
5960 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5961 this_thr->th.ompt_thread_info.parallel_flags = 0;
5962 if (ompt_enabled.ompt_callback_thread_begin) {
5963 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5964 ompt_thread_worker, thread_data);
5965 }
5966 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5967 }
5968#endif
5969
5970 /* This is the place where threads wait for work */
5971 while (!TCR_4(__kmp_global.g.g_done)) {
5972 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5973 KMP_MB();
5974
5975 /* wait for work to do */
5976 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
5977
5978 /* No tid yet since not part of a team */
5980
5981#if OMPT_SUPPORT
5982 if (ompt_enabled.enabled) {
5983 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5984 }
5985#endif
5986
5987 pteam = &this_thr->th.th_team;
5988
5989 /* have we been allocated? */
5990 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5991 /* we were just woken up, so run our new task */
5992 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5993 int rc;
5994 KA_TRACE(20,
5995 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5996 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5997 (*pteam)->t.t_pkfn));
5998
5999 updateHWFPControl(*pteam);
6000
6001#if OMPT_SUPPORT
6002 if (ompt_enabled.enabled) {
6003 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6004 }
6005#endif
6006
6007 rc = (*pteam)->t.t_invoke(gtid);
6008 KMP_ASSERT(rc);
6009
6010 KMP_MB();
6011 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6012 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6013 (*pteam)->t.t_pkfn));
6014 }
6015#if OMPT_SUPPORT
6016 if (ompt_enabled.enabled) {
6017 /* no frame set while outside task */
6018 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6019
6020 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6021 }
6022#endif
6023 /* join barrier after parallel region */
6024 __kmp_join_barrier(gtid);
6025 }
6026 }
6027
6028#if OMPD_SUPPORT
6029 if (ompd_state & OMPD_ENABLE_BP)
6030 ompd_bp_thread_end();
6031#endif
6032
6033#if OMPT_SUPPORT
6034 if (ompt_enabled.ompt_callback_thread_end) {
6035 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6036 }
6037#endif
6038
6039 this_thr->th.th_task_team = NULL;
6040 /* run the destructors for the threadprivate data for this thread */
6042
6043 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
6044 KMP_MB();
6045
6046#if OMP_PROFILING_SUPPORT
6047 llvm::timeTraceProfilerFinishThread();
6048#endif
6049 return this_thr;
6050}
6051
6052/* ------------------------------------------------------------------------ */
6053
6054void __kmp_internal_end_dest(void *specific_gtid) {
6055 // Make sure no significant bits are lost
6056 int gtid;
6057 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, &gtid);
6058
6059 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
6060 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6061 * this is because 0 is reserved for the nothing-stored case */
6062
6064}
6065
6066#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6067
6068__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6070}
6071
6072#endif
6073
6074/* [Windows] josh: when the atexit handler is called, there may still be more
6075 than one thread alive */
6077 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
6078 /* [Windows]
6079 josh: ideally, we want to completely shutdown the library in this atexit
6080 handler, but stat code that depends on thread specific data for gtid fails
6081 because that data becomes unavailable at some point during the shutdown, so
6082 we call __kmp_internal_end_thread instead. We should eventually remove the
6083 dependency on __kmp_get_specific_gtid in the stat code and use
6084 __kmp_internal_end_library to cleanly shutdown the library.
6085
6086 // TODO: Can some of this comment about GVS be removed?
6087 I suspect that the offending stat code is executed when the calling thread
6088 tries to clean up a dead root thread's data structures, resulting in GVS
6089 code trying to close the GVS structures for that thread, but since the stat
6090 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6091 the calling thread is cleaning up itself instead of another thread, it get
6092 confused. This happens because allowing a thread to unregister and cleanup
6093 another thread is a recent modification for addressing an issue.
6094 Based on the current design (20050722), a thread may end up
6095 trying to unregister another thread only if thread death does not trigger
6096 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
6097 thread specific data destructor function to detect thread death. For
6098 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6099 is nothing. Thus, the workaround is applicable only for Windows static
6100 stat library. */
6102#if KMP_OS_WINDOWS
6104#endif
6105}
6106
6107static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
6108 // It is assumed __kmp_forkjoin_lock is acquired.
6109
6110 int gtid;
6111
6112 KMP_DEBUG_ASSERT(thread != NULL);
6113
6114 gtid = thread->th.th_info.ds.ds_gtid;
6115
6116 if (!is_root) {
6118 /* Assume the threads are at the fork barrier here */
6119 KA_TRACE(
6120 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6121 gtid));
6123 while (
6124 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6125 KMP_CPU_PAUSE();
6127 } else {
6128 /* Need release fence here to prevent seg faults for tree forkjoin
6129 barrier (GEH) */
6130 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6131 thread);
6133 }
6134 }
6135
6136 // Terminate OS thread.
6137 __kmp_reap_worker(thread);
6138
6139 // The thread was killed asynchronously. If it was actively
6140 // spinning in the thread pool, decrement the global count.
6141 //
6142 // There is a small timing hole here - if the worker thread was just waking
6143 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
6144 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
6145 // the global counter might not get updated.
6146 //
6147 // Currently, this can only happen as the library is unloaded,
6148 // so there are no harmful side effects.
6149 if (thread->th.th_active_in_pool) {
6150 thread->th.th_active_in_pool = FALSE;
6153 }
6154 }
6155
6157
6158// Free the fast memory for tasking
6159#if USE_FAST_MEMORY
6160 __kmp_free_fast_memory(thread);
6161#endif /* USE_FAST_MEMORY */
6162
6164
6165 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6166 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6167
6168 --__kmp_all_nth;
6169 // __kmp_nth was decremented when thread is added to the pool.
6170
6171#ifdef KMP_ADJUST_BLOCKTIME
6172 /* Adjust blocktime back to user setting or default if necessary */
6173 /* Middle initialization might never have occurred */
6174 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6176 if (__kmp_nth <= __kmp_avail_proc) {
6177 __kmp_zero_bt = FALSE;
6178 }
6179 }
6180#endif /* KMP_ADJUST_BLOCKTIME */
6181
6182 /* free the memory being used */
6184 if (thread->th.th_cons) {
6185 __kmp_free_cons_stack(thread->th.th_cons);
6186 thread->th.th_cons = NULL;
6187 }
6188 }
6189
6190 if (thread->th.th_pri_common != NULL) {
6191 __kmp_free(thread->th.th_pri_common);
6192 thread->th.th_pri_common = NULL;
6193 }
6194
6195#if KMP_USE_BGET
6196 if (thread->th.th_local.bget_data != NULL) {
6197 __kmp_finalize_bget(thread);
6198 }
6199#endif
6200
6201#if KMP_AFFINITY_SUPPORTED
6202 if (thread->th.th_affin_mask != NULL) {
6203 KMP_CPU_FREE(thread->th.th_affin_mask);
6204 thread->th.th_affin_mask = NULL;
6205 }
6206#endif /* KMP_AFFINITY_SUPPORTED */
6207
6208#if KMP_USE_HIER_SCHED
6209 if (thread->th.th_hier_bar_data != NULL) {
6210 __kmp_free(thread->th.th_hier_bar_data);
6211 thread->th.th_hier_bar_data = NULL;
6212 }
6213#endif
6214
6215 __kmp_reap_team(thread->th.th_serial_team);
6216 thread->th.th_serial_team = NULL;
6217 __kmp_free(thread);
6218
6219 KMP_MB();
6220
6221} // __kmp_reap_thread
6222
6224#if USE_ITT_NOTIFY
6225 if (__kmp_itt_region_domains.count > 0) {
6226 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6227 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6228 while (bucket) {
6229 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6230 __kmp_thread_free(th, bucket);
6231 bucket = next;
6232 }
6233 }
6234 }
6235 if (__kmp_itt_barrier_domains.count > 0) {
6236 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6237 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6238 while (bucket) {
6239 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6240 __kmp_thread_free(th, bucket);
6241 bucket = next;
6242 }
6243 }
6244 }
6245#endif
6246}
6247
6248static void __kmp_internal_end(void) {
6249 int i;
6250
6251 /* First, unregister the library */
6253
6254#if KMP_OS_WINDOWS
6255 /* In Win static library, we can't tell when a root actually dies, so we
6256 reclaim the data structures for any root threads that have died but not
6257 unregistered themselves, in order to shut down cleanly.
6258 In Win dynamic library we also can't tell when a thread dies. */
6259 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6260// dead roots
6261#endif
6262
6263 for (i = 0; i < __kmp_threads_capacity; i++)
6264 if (__kmp_root[i])
6265 if (__kmp_root[i]->r.r_active)
6266 break;
6267 KMP_MB(); /* Flush all pending memory write invalidates. */
6268 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6269
6270 if (i < __kmp_threads_capacity) {
6271#if KMP_USE_MONITOR
6272 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6273 KMP_MB(); /* Flush all pending memory write invalidates. */
6274
6275 // Need to check that monitor was initialized before reaping it. If we are
6276 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6277 // __kmp_monitor will appear to contain valid data, but it is only valid in
6278 // the parent process, not the child.
6279 // New behavior (201008): instead of keying off of the flag
6280 // __kmp_init_parallel, the monitor thread creation is keyed off
6281 // of the new flag __kmp_init_monitor.
6282 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6283 if (TCR_4(__kmp_init_monitor)) {
6285 TCW_4(__kmp_init_monitor, 0);
6286 }
6287 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6288 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6289#endif // KMP_USE_MONITOR
6290 } else {
6291/* TODO move this to cleanup code */
6292#ifdef KMP_DEBUG
6293 /* make sure that everything has properly ended */
6294 for (i = 0; i < __kmp_threads_capacity; i++) {
6295 if (__kmp_root[i]) {
6296 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6297 // there can be uber threads alive here
6298 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
6299 }
6300 }
6301#endif
6302
6303 KMP_MB();
6304
6305 // Reap the worker threads.
6306 // This is valid for now, but be careful if threads are reaped sooner.
6307 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
6308 // Get the next thread from the pool.
6310 __kmp_thread_pool = thread->th.th_next_pool;
6311 // Reap it.
6312 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6313 thread->th.th_next_pool = NULL;
6314 thread->th.th_in_pool = FALSE;
6315 __kmp_reap_thread(thread, 0);
6316 }
6318
6319 // Reap teams.
6320 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
6321 // Get the next team from the pool.
6323 __kmp_team_pool = team->t.t_next_pool;
6324 // Reap it.
6325 team->t.t_next_pool = NULL;
6326 __kmp_reap_team(team);
6327 }
6328
6330
6331#if KMP_OS_UNIX
6332 // Threads that are not reaped should not access any resources since they
6333 // are going to be deallocated soon, so the shutdown sequence should wait
6334 // until all threads either exit the final spin-waiting loop or begin
6335 // sleeping after the given blocktime.
6336 for (i = 0; i < __kmp_threads_capacity; i++) {
6337 kmp_info_t *thr = __kmp_threads[i];
6338 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6339 KMP_CPU_PAUSE();
6340 }
6341#endif
6342
6343 for (i = 0; i < __kmp_threads_capacity; ++i) {
6344 // TBD: Add some checking...
6345 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6346 }
6347
6348 /* Make sure all threadprivate destructors get run by joining with all
6349 worker threads before resetting this flag */
6351
6352 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
6353 KMP_MB();
6354
6355#if KMP_USE_MONITOR
6356 // See note above: One of the possible fixes for CQ138434 / CQ140126
6357 //
6358 // FIXME: push both code fragments down and CSE them?
6359 // push them into __kmp_cleanup() ?
6360 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6361 if (TCR_4(__kmp_init_monitor)) {
6363 TCW_4(__kmp_init_monitor, 0);
6364 }
6365 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6366 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6367#endif
6368 } /* else !__kmp_global.t_active */
6370 KMP_MB(); /* Flush all pending memory write invalidates. */
6371
6372 __kmp_cleanup();
6373#if OMPT_SUPPORT
6374 ompt_fini();
6375#endif
6376}
6377
6378void __kmp_internal_end_library(int gtid_req) {
6379 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6380 /* this shouldn't be a race condition because __kmp_internal_end() is the
6381 only place to clear __kmp_serial_init */
6382 /* we'll check this later too, after we get the lock */
6383 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6384 // redundant, because the next check will work in any case.
6385 if (__kmp_global.g.g_abort) {
6386 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6387 /* TODO abort? */
6388 return;
6389 }
6390 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6391 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6392 return;
6393 }
6394
6395 // If hidden helper team has been initialized, we need to deinit it
6399 // First release the main thread to let it continue its work
6401 // Wait until the hidden helper team has been destroyed
6403 }
6404
6405 KMP_MB(); /* Flush all pending memory write invalidates. */
6406 /* find out who we are and what we should do */
6407 {
6408 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6409 KA_TRACE(
6410 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6411 if (gtid == KMP_GTID_SHUTDOWN) {
6412 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6413 "already shutdown\n"));
6414 return;
6415 } else if (gtid == KMP_GTID_MONITOR) {
6416 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6417 "registered, or system shutdown\n"));
6418 return;
6419 } else if (gtid == KMP_GTID_DNE) {
6420 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6421 "shutdown\n"));
6422 /* we don't know who we are, but we may still shutdown the library */
6423 } else if (KMP_UBER_GTID(gtid)) {
6424 /* unregister ourselves as an uber thread. gtid is no longer valid */
6425 if (__kmp_root[gtid]->r.r_active) {
6426 __kmp_global.g.g_abort = -1;
6427 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6429 KA_TRACE(10,
6430 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6431 gtid));
6432 return;
6433 } else {
6435 KA_TRACE(
6436 10,
6437 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6439 }
6440 } else {
6441/* worker threads may call this function through the atexit handler, if they
6442 * call exit() */
6443/* For now, skip the usual subsequent processing and just dump the debug buffer.
6444 TODO: do a thorough shutdown instead */
6445#ifdef DUMP_DEBUG_ON_EXIT
6446 if (__kmp_debug_buf)
6448#endif
6449 // added unregister library call here when we switch to shm linux
6450 // if we don't, it will leave lots of files in /dev/shm
6451 // cleanup shared memory file before exiting.
6453 return;
6454 }
6455 }
6456 /* synchronize the termination process */
6458
6459 /* have we already finished */
6460 if (__kmp_global.g.g_abort) {
6461 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6462 /* TODO abort? */
6464 return;
6465 }
6466 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6468 return;
6469 }
6470
6471 /* We need this lock to enforce mutex between this reading of
6472 __kmp_threads_capacity and the writing by __kmp_register_root.
6473 Alternatively, we can use a counter of roots that is atomically updated by
6474 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6475 __kmp_internal_end_*. */
6477
6478 /* now we can safely conduct the actual termination */
6480
6483
6484 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
6485
6486#ifdef DUMP_DEBUG_ON_EXIT
6487 if (__kmp_debug_buf)
6489#endif
6490
6491#if KMP_OS_WINDOWS
6493#endif
6494
6496
6497} // __kmp_internal_end_library
6498
6499void __kmp_internal_end_thread(int gtid_req) {
6500 int i;
6501
6502 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6503 /* this shouldn't be a race condition because __kmp_internal_end() is the
6504 * only place to clear __kmp_serial_init */
6505 /* we'll check this later too, after we get the lock */
6506 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6507 // redundant, because the next check will work in any case.
6508 if (__kmp_global.g.g_abort) {
6509 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6510 /* TODO abort? */
6511 return;
6512 }
6513 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6514 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6515 return;
6516 }
6517
6518 // If hidden helper team has been initialized, we need to deinit it
6522 // First release the main thread to let it continue its work
6524 // Wait until the hidden helper team has been destroyed
6526 }
6527
6528 KMP_MB(); /* Flush all pending memory write invalidates. */
6529
6530 /* find out who we are and what we should do */
6531 {
6532 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6533 KA_TRACE(10,
6534 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6535 if (gtid == KMP_GTID_SHUTDOWN) {
6536 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6537 "already shutdown\n"));
6538 return;
6539 } else if (gtid == KMP_GTID_MONITOR) {
6540 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6541 "registered, or system shutdown\n"));
6542 return;
6543 } else if (gtid == KMP_GTID_DNE) {
6544 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6545 "shutdown\n"));
6546 return;
6547 /* we don't know who we are */
6548 } else if (KMP_UBER_GTID(gtid)) {
6549 /* unregister ourselves as an uber thread. gtid is no longer valid */
6550 if (__kmp_root[gtid]->r.r_active) {
6551 __kmp_global.g.g_abort = -1;
6552 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6553 KA_TRACE(10,
6554 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6555 gtid));
6556 return;
6557 } else {
6558 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6559 gtid));
6561 }
6562 } else {
6563 /* just a worker thread, let's leave */
6564 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6565
6566 if (gtid >= 0) {
6567 __kmp_threads[gtid]->th.th_task_team = NULL;
6568 }
6569
6570 KA_TRACE(10,
6571 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6572 gtid));
6573 return;
6574 }
6575 }
6576#if KMP_DYNAMIC_LIB
6578 // AC: lets not shutdown the dynamic library at the exit of uber thread,
6579 // because we will better shutdown later in the library destructor.
6580 {
6581 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6582 return;
6583 }
6584#endif
6585 /* synchronize the termination process */
6587
6588 /* have we already finished */
6589 if (__kmp_global.g.g_abort) {
6590 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6591 /* TODO abort? */
6593 return;
6594 }
6595 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6597 return;
6598 }
6599
6600 /* We need this lock to enforce mutex between this reading of
6601 __kmp_threads_capacity and the writing by __kmp_register_root.
6602 Alternatively, we can use a counter of roots that is atomically updated by
6603 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6604 __kmp_internal_end_*. */
6605
6606 /* should we finish the run-time? are all siblings done? */
6608
6609 for (i = 0; i < __kmp_threads_capacity; ++i) {
6610 if (KMP_UBER_GTID(i)) {
6611 KA_TRACE(
6612 10,
6613 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6616 return;
6617 }
6618 }
6619
6620 /* now we can safely conduct the actual termination */
6621
6623
6626
6627 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6628
6629#ifdef DUMP_DEBUG_ON_EXIT
6630 if (__kmp_debug_buf)
6632#endif
6633} // __kmp_internal_end_thread
6634
6635// -----------------------------------------------------------------------------
6636// Library registration stuff.
6637
6639// Random value used to indicate library initialization.
6640static char *__kmp_registration_str = NULL;
6641// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6642
6643static inline char *__kmp_reg_status_name() {
6644/* On RHEL 3u5 if linked statically, getpid() returns different values in
6645 each thread. If registration and unregistration go in different threads
6646 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6647 env var can not be found, because the name will contain different pid. */
6648// macOS* complains about name being too long with additional getuid()
6649#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6650 return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6651 (int)getuid());
6652#else
6653 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6654#endif
6655} // __kmp_reg_status_get
6656
6657#if defined(KMP_USE_SHM)
6658bool __kmp_shm_available = false;
6659bool __kmp_tmp_available = false;
6660// If /dev/shm is not accessible, we will create a temporary file under /tmp.
6661char *temp_reg_status_file_name = nullptr;
6662#endif
6663
6665
6666 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6667 int done = 0;
6668 union {
6669 double dtime;
6670 long ltime;
6671 } time;
6672#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6674#endif
6675 __kmp_read_system_time(&time.dtime);
6676 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6679 __kmp_registration_flag, KMP_LIBRARY_FILE);
6680
6681 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6683
6684 while (!done) {
6685
6686 char *value = NULL; // Actual value of the environment variable.
6687
6688#if defined(KMP_USE_SHM)
6689 char *shm_name = nullptr;
6690 char *data1 = nullptr;
6691 __kmp_shm_available = __kmp_detect_shm();
6692 if (__kmp_shm_available) {
6693 int fd1 = -1;
6694 shm_name = __kmp_str_format("/%s", name);
6695 int shm_preexist = 0;
6696 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6697 if ((fd1 == -1) && (errno == EEXIST)) {
6698 // file didn't open because it already exists.
6699 // try opening existing file
6700 fd1 = shm_open(shm_name, O_RDWR, 0600);
6701 if (fd1 == -1) { // file didn't open
6702 KMP_WARNING(FunctionError, "Can't open SHM");
6703 __kmp_shm_available = false;
6704 } else { // able to open existing file
6705 shm_preexist = 1;
6706 }
6707 }
6708 if (__kmp_shm_available && shm_preexist == 0) { // SHM created, set size
6709 if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
6710 KMP_WARNING(FunctionError, "Can't set size of SHM");
6711 __kmp_shm_available = false;
6712 }
6713 }
6714 if (__kmp_shm_available) { // SHM exists, now map it
6715 data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6716 fd1, 0);
6717 if (data1 == MAP_FAILED) { // failed to map shared memory
6718 KMP_WARNING(FunctionError, "Can't map SHM");
6719 __kmp_shm_available = false;
6720 }
6721 }
6722 if (__kmp_shm_available) { // SHM mapped
6723 if (shm_preexist == 0) { // set data to SHM, set value
6724 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6725 }
6726 // Read value from either what we just wrote or existing file.
6727 value = __kmp_str_format("%s", data1); // read value from SHM
6728 munmap(data1, SHM_SIZE);
6729 }
6730 if (fd1 != -1)
6731 close(fd1);
6732 }
6733 if (!__kmp_shm_available)
6734 __kmp_tmp_available = __kmp_detect_tmp();
6735 if (!__kmp_shm_available && __kmp_tmp_available) {
6736 // SHM failed to work due to an error other than that the file already
6737 // exists. Try to create a temp file under /tmp.
6738 // If /tmp isn't accessible, fall back to using environment variable.
6739 // TODO: /tmp might not always be the temporary directory. For now we will
6740 // not consider TMPDIR.
6741 int fd1 = -1;
6742 temp_reg_status_file_name = __kmp_str_format("/tmp/%s", name);
6743 int tmp_preexist = 0;
6744 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6745 if ((fd1 == -1) && (errno == EEXIST)) {
6746 // file didn't open because it already exists.
6747 // try opening existing file
6748 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6749 if (fd1 == -1) { // file didn't open if (fd1 == -1) {
6750 KMP_WARNING(FunctionError, "Can't open TEMP");
6751 __kmp_tmp_available = false;
6752 } else {
6753 tmp_preexist = 1;
6754 }
6755 }
6756 if (__kmp_tmp_available && tmp_preexist == 0) {
6757 // we created /tmp file now set size
6758 if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
6759 KMP_WARNING(FunctionError, "Can't set size of /tmp file");
6760 __kmp_tmp_available = false;
6761 }
6762 }
6763 if (__kmp_tmp_available) {
6764 data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6765 fd1, 0);
6766 if (data1 == MAP_FAILED) { // failed to map /tmp
6767 KMP_WARNING(FunctionError, "Can't map /tmp");
6768 __kmp_tmp_available = false;
6769 }
6770 }
6771 if (__kmp_tmp_available) {
6772 if (tmp_preexist == 0) { // set data to TMP, set value
6773 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6774 }
6775 // Read value from either what we just wrote or existing file.
6776 value = __kmp_str_format("%s", data1); // read value from SHM
6777 munmap(data1, SHM_SIZE);
6778 }
6779 if (fd1 != -1)
6780 close(fd1);
6781 }
6782 if (!__kmp_shm_available && !__kmp_tmp_available) {
6783 // no /dev/shm and no /tmp -- fall back to environment variable
6784 // Set environment variable, but do not overwrite if it exists.
6786 // read value to see if it got set
6788 }
6789#else // Windows and unix with static library
6790 // Set environment variable, but do not overwrite if it exists.
6792 // read value to see if it got set
6794#endif
6795
6796 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6797 done = 1; // Ok, environment variable set successfully, exit the loop.
6798 } else {
6799 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6800 // Check whether it alive or dead.
6801 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6802 char *tail = value;
6803 char *flag_addr_str = NULL;
6804 char *flag_val_str = NULL;
6805 char const *file_name = NULL;
6806 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6807 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6808 file_name = tail;
6809 if (tail != NULL) {
6810 unsigned long *flag_addr = 0;
6811 unsigned long flag_val = 0;
6812 KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr));
6813 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6814 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6815 // First, check whether environment-encoded address is mapped into
6816 // addr space.
6817 // If so, dereference it to see if it still has the right value.
6818 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6819 neighbor = 1;
6820 } else {
6821 // If not, then we know the other copy of the library is no longer
6822 // running.
6823 neighbor = 2;
6824 }
6825 }
6826 }
6827 switch (neighbor) {
6828 case 0: // Cannot parse environment variable -- neighbor status unknown.
6829 // Assume it is the incompatible format of future version of the
6830 // library. Assume the other library is alive.
6831 // WARN( ... ); // TODO: Issue a warning.
6832 file_name = "unknown library";
6834 // Attention! Falling to the next case. That's intentional.
6835 case 1: { // Neighbor is alive.
6836 // Check it is allowed.
6837 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6838 if (!__kmp_str_match_true(duplicate_ok)) {
6839 // That's not allowed. Issue fatal error.
6840 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6841 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6842 }
6843 KMP_INTERNAL_FREE(duplicate_ok);
6845 done = 1; // Exit the loop.
6846 } break;
6847 case 2: { // Neighbor is dead.
6848
6849#if defined(KMP_USE_SHM)
6850 if (__kmp_shm_available) { // close shared memory.
6851 shm_unlink(shm_name); // this removes file in /dev/shm
6852 } else if (__kmp_tmp_available) {
6853 unlink(temp_reg_status_file_name); // this removes the temp file
6854 } else {
6855 // Clear the variable and try to register library again.
6857 }
6858#else
6859 // Clear the variable and try to register library again.
6861#endif
6862 } break;
6863 default: {
6865 } break;
6866 }
6867 }
6868 KMP_INTERNAL_FREE((void *)value);
6869#if defined(KMP_USE_SHM)
6870 if (shm_name)
6871 KMP_INTERNAL_FREE((void *)shm_name);
6872#endif
6873 } // while
6874 KMP_INTERNAL_FREE((void *)name);
6875
6876} // func __kmp_register_library_startup
6877
6879
6880 char *name = __kmp_reg_status_name();
6881 char *value = NULL;
6882
6883#if defined(KMP_USE_SHM)
6884 char *shm_name = nullptr;
6885 int fd1;
6886 if (__kmp_shm_available) {
6887 shm_name = __kmp_str_format("/%s", name);
6888 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6889 if (fd1 != -1) { // File opened successfully
6890 char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6891 if (data1 != MAP_FAILED) {
6892 value = __kmp_str_format("%s", data1); // read value from SHM
6893 munmap(data1, SHM_SIZE);
6894 }
6895 close(fd1);
6896 }
6897 } else if (__kmp_tmp_available) { // try /tmp
6898 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6899 if (fd1 != -1) { // File opened successfully
6900 char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6901 if (data1 != MAP_FAILED) {
6902 value = __kmp_str_format("%s", data1); // read value from /tmp
6903 munmap(data1, SHM_SIZE);
6904 }
6905 close(fd1);
6906 }
6907 } else { // fall back to envirable
6909 }
6910#else
6912#endif
6913
6916 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6917// Ok, this is our variable. Delete it.
6918#if defined(KMP_USE_SHM)
6919 if (__kmp_shm_available) {
6920 shm_unlink(shm_name); // this removes file in /dev/shm
6921 } else if (__kmp_tmp_available) {
6922 unlink(temp_reg_status_file_name); // this removes the temp file
6923 } else {
6925 }
6926#else
6928#endif
6929 }
6930
6931#if defined(KMP_USE_SHM)
6932 if (shm_name)
6933 KMP_INTERNAL_FREE(shm_name);
6934 if (temp_reg_status_file_name)
6935 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6936#endif
6937
6941
6944
6945} // __kmp_unregister_library
6946
6947// End of Library registration stuff.
6948// -----------------------------------------------------------------------------
6949
6950#if KMP_MIC_SUPPORTED
6951
6952static void __kmp_check_mic_type() {
6953 kmp_cpuid_t cpuid_state = {0};
6954 kmp_cpuid_t *cs_p = &cpuid_state;
6955 __kmp_x86_cpuid(1, 0, cs_p);
6956 // We don't support mic1 at the moment
6957 if ((cs_p->eax & 0xff0) == 0xB10) {
6958 __kmp_mic_type = mic2;
6959 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6960 __kmp_mic_type = mic3;
6961 } else {
6962 __kmp_mic_type = non_mic;
6963 }
6964}
6965
6966#endif /* KMP_MIC_SUPPORTED */
6967
6968#if KMP_HAVE_UMWAIT
6969static void __kmp_user_level_mwait_init() {
6970 struct kmp_cpuid buf;
6971 __kmp_x86_cpuid(7, 0, &buf);
6972 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6973 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6974 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6975 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6976 __kmp_umwait_enabled));
6977}
6978#elif KMP_HAVE_MWAIT
6979#ifndef AT_INTELPHIUSERMWAIT
6980// Spurious, non-existent value that should always fail to return anything.
6981// Will be replaced with the correct value when we know that.
6982#define AT_INTELPHIUSERMWAIT 10000
6983#endif
6984// getauxval() function is available in RHEL7 and SLES12. If a system with an
6985// earlier OS is used to build the RTL, we'll use the following internal
6986// function when the entry is not found.
6987unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6988unsigned long getauxval(unsigned long) { return 0; }
6989
6990static void __kmp_user_level_mwait_init() {
6991 // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
6992 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
6993 // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
6994 // KMP_USER_LEVEL_MWAIT was set to TRUE.
6995 if (__kmp_mic_type == mic3) {
6996 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6997 if ((res & 0x1) || __kmp_user_level_mwait) {
6998 __kmp_mwait_enabled = TRUE;
6999 if (__kmp_user_level_mwait) {
7000 KMP_INFORM(EnvMwaitWarn);
7001 }
7002 } else {
7003 __kmp_mwait_enabled = FALSE;
7004 }
7005 }
7006 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7007 "__kmp_mwait_enabled = %d\n",
7008 __kmp_mic_type, __kmp_mwait_enabled));
7009}
7010#endif /* KMP_HAVE_UMWAIT */
7011
7013 int i, gtid;
7014 size_t size;
7015
7016 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
7017
7018 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
7019 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
7020 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
7021 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
7022 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
7023
7024#if OMPT_SUPPORT
7025 ompt_pre_init();
7026#endif
7027#if OMPD_SUPPORT
7028 __kmp_env_dump();
7029 ompd_init();
7030#endif
7031
7033
7034#if ENABLE_LIBOMPTARGET
7035 /* Initialize functions from libomptarget */
7036 __kmp_init_omptarget();
7037#endif
7038
7039 /* Initialize internal memory allocator */
7041
7042 /* Register the library startup via an environment variable or via mapped
7043 shared memory file and check to see whether another copy of the library is
7044 already registered. Since forked child process is often terminated, we
7045 postpone the registration till middle initialization in the child */
7048
7049 /* TODO reinitialization of library */
7050 if (TCR_4(__kmp_global.g.g_done)) {
7051 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
7052 }
7053
7054 __kmp_global.g.g_abort = 0;
7055 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7056
7057/* initialize the locks */
7058#if KMP_USE_ADAPTIVE_LOCKS
7059#if KMP_DEBUG_ADAPTIVE_LOCKS
7060 __kmp_init_speculative_stats();
7061#endif
7062#endif
7063#if KMP_STATS_ENABLED
7065#endif
7084#if KMP_USE_MONITOR
7085 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7086#endif
7088
7089 /* conduct initialization and initial setup of configuration */
7090
7092
7093#if KMP_MIC_SUPPORTED
7094 __kmp_check_mic_type();
7095#endif
7096
7097// Some global variable initialization moved here from kmp_env_initialize()
7098#ifdef KMP_DEBUG
7099 kmp_diag = 0;
7100#endif
7102
7103 // From __kmp_init_dflt_team_nth()
7104 /* assume the entire machine will be used */
7108 }
7111 }
7114 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7117 }
7118
7119 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7120 // part
7122#if KMP_USE_MONITOR
7123 __kmp_monitor_wakeups =
7124 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7125 __kmp_bt_intervals =
7126 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7127#endif
7128 // From "KMP_LIBRARY" part of __kmp_env_initialize()
7130 // From KMP_SCHEDULE initialization
7132// AC: do not use analytical here, because it is non-monotonous
7133//__kmp_guided = kmp_sch_guided_iterative_chunked;
7134//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7135// need to repeat assignment
7136// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7137// bit control and barrier method control parts
7138#if KMP_FAST_REDUCTION_BARRIER
7139#define kmp_reduction_barrier_gather_bb ((int)1)
7140#define kmp_reduction_barrier_release_bb ((int)1)
7141#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7142#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7143#endif // KMP_FAST_REDUCTION_BARRIER
7144 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7149#if KMP_FAST_REDUCTION_BARRIER
7150 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7151 // lin_64 ): hyper,1
7152 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7153 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7154 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7155 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7156 }
7157#endif // KMP_FAST_REDUCTION_BARRIER
7158 }
7159#if KMP_FAST_REDUCTION_BARRIER
7160#undef kmp_reduction_barrier_release_pat
7161#undef kmp_reduction_barrier_gather_pat
7162#undef kmp_reduction_barrier_release_bb
7163#undef kmp_reduction_barrier_gather_bb
7164#endif // KMP_FAST_REDUCTION_BARRIER
7165#if KMP_MIC_SUPPORTED
7166 if (__kmp_mic_type == mic2) { // KNC
7167 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7170 1; // forkjoin release
7173 }
7174#if KMP_FAST_REDUCTION_BARRIER
7175 if (__kmp_mic_type == mic2) { // KNC
7178 }
7179#endif // KMP_FAST_REDUCTION_BARRIER
7180#endif // KMP_MIC_SUPPORTED
7181
7182// From KMP_CHECKS initialization
7183#ifdef KMP_DEBUG
7184 __kmp_env_checks = TRUE; /* development versions have the extra checks */
7185#else
7186 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
7187#endif
7188
7189 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7191
7192 __kmp_global.g.g_dynamic = FALSE;
7193 __kmp_global.g.g_dynamic_mode = dynamic_default;
7194
7196
7198
7199#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7200 __kmp_user_level_mwait_init();
7201#endif
7202// Print all messages in message catalog for testing purposes.
7203#ifdef KMP_DEBUG
7204 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
7206 kmp_str_buf_t buffer;
7207 __kmp_str_buf_init(&buffer);
7208 __kmp_i18n_dump_catalog(&buffer);
7209 __kmp_printf("%s", buffer.str);
7210 __kmp_str_buf_free(&buffer);
7211 }
7213#endif
7214
7217 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7220
7221 // If the library is shut down properly, both pools must be NULL. Just in
7222 // case, set them to NULL -- some memory may leak, but subsequent code will
7223 // work even if pools are not freed.
7227 __kmp_thread_pool = NULL;
7229 __kmp_team_pool = NULL;
7230
7231 /* Allocate all of the variable sized records */
7232 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
7233 * expandable */
7234 /* Since allocation is cache-aligned, just add extra padding at the end */
7235 size =
7236 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7237 CACHE_LINE;
7239 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
7241
7242 /* init thread counts */
7244 0); // Asserts fail if the library is reinitializing and
7245 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
7246 __kmp_all_nth = 0;
7247 __kmp_nth = 0;
7248
7249 /* setup the uber master thread and hierarchy */
7250 gtid = __kmp_register_root(TRUE);
7251 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
7254
7255 KMP_MB(); /* Flush all pending memory write invalidates. */
7256
7258
7259#if KMP_OS_UNIX
7260 /* invoke the child fork handler */
7262#endif
7263
7264#if !KMP_DYNAMIC_LIB || \
7265 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7266 {
7267 /* Invoke the exit handler when the program finishes, only for static
7268 library and macOS* dynamic. For other dynamic libraries, we already
7269 have _fini and DllMain. */
7270 int rc = atexit(__kmp_internal_end_atexit);
7271 if (rc != 0) {
7272 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
7274 }
7275 }
7276#endif
7277
7278#if KMP_HANDLE_SIGNALS
7279#if KMP_OS_UNIX
7280 /* NOTE: make sure that this is called before the user installs their own
7281 signal handlers so that the user handlers are called first. this way they
7282 can return false, not call our handler, avoid terminating the library, and
7283 continue execution where they left off. */
7284 __kmp_install_signals(FALSE);
7285#endif /* KMP_OS_UNIX */
7286#if KMP_OS_WINDOWS
7287 __kmp_install_signals(TRUE);
7288#endif /* KMP_OS_WINDOWS */
7289#endif
7290
7291 /* we have finished the serial initialization */
7293
7295
7296 if (__kmp_version) {
7298 }
7299
7300 if (__kmp_settings) {
7302 }
7303
7306 }
7307
7308#if OMPT_SUPPORT
7310#endif
7311
7312 KMP_MB();
7313
7314 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
7315}
7316
7318 if (__kmp_init_serial) {
7319 return;
7320 }
7322 if (__kmp_init_serial) {
7324 return;
7325 }
7328}
7329
7331 int i, j;
7332 int prev_dflt_team_nth;
7333
7334 if (!__kmp_init_serial) {
7336 }
7337
7338 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
7339
7341 // We are in a forked child process. The registration was skipped during
7342 // serial initialization in __kmp_atfork_child handler. Do it here.
7344 }
7345
7346 // Save the previous value for the __kmp_dflt_team_nth so that
7347 // we can avoid some reinitialization if it hasn't changed.
7348 prev_dflt_team_nth = __kmp_dflt_team_nth;
7349
7350#if KMP_AFFINITY_SUPPORTED
7351 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7352 // number of cores on the machine.
7353 __kmp_affinity_initialize(__kmp_affinity);
7354
7355#endif /* KMP_AFFINITY_SUPPORTED */
7356
7358 if (__kmp_avail_proc == 0) {
7360 }
7361
7362 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7363 // correct them now
7364 j = 0;
7365 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7368 j++;
7369 }
7370
7371 if (__kmp_dflt_team_nth == 0) {
7372#ifdef KMP_DFLT_NTH_CORES
7373 // Default #threads = #cores
7375 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7376 "__kmp_ncores (%d)\n",
7378#else
7379 // Default #threads = #available OS procs
7381 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7382 "__kmp_avail_proc(%d)\n",
7384#endif /* KMP_DFLT_NTH_CORES */
7385 }
7386
7389 }
7392 }
7393
7394 if (__kmp_nesting_mode > 0)
7396
7397 // There's no harm in continuing if the following check fails,
7398 // but it indicates an error in the previous logic.
7400
7401 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7402 // Run through the __kmp_threads array and set the num threads icv for each
7403 // root thread that is currently registered with the RTL (which has not
7404 // already explicitly set its nthreads-var with a call to
7405 // omp_set_num_threads()).
7406 for (i = 0; i < __kmp_threads_capacity; i++) {
7407 kmp_info_t *thread = __kmp_threads[i];
7408 if (thread == NULL)
7409 continue;
7410 if (thread->th.th_current_task->td_icvs.nproc != 0)
7411 continue;
7412
7414 }
7415 }
7416 KA_TRACE(
7417 20,
7418 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7420
7421#ifdef KMP_ADJUST_BLOCKTIME
7422 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
7423 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7426 __kmp_zero_bt = TRUE;
7427 }
7428 }
7429#endif /* KMP_ADJUST_BLOCKTIME */
7430
7431 /* we have finished middle initialization */
7433
7434 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
7435}
7436
7438 if (__kmp_init_middle) {
7439 return;
7440 }
7442 if (__kmp_init_middle) {
7444 return;
7445 }
7448}
7449
7451 int gtid = __kmp_entry_gtid(); // this might be a new root
7452
7453 /* synchronize parallel initialization (for sibling) */
7455 return;
7459 return;
7460 }
7461
7462 /* TODO reinitialization after we have already shut down */
7463 if (TCR_4(__kmp_global.g.g_done)) {
7464 KA_TRACE(
7465 10,
7466 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
7468 }
7469
7470 /* jc: The lock __kmp_initz_lock is already held, so calling
7471 __kmp_serial_initialize would cause a deadlock. So we call
7472 __kmp_do_serial_initialize directly. */
7473 if (!__kmp_init_middle) {
7475 }
7478
7479 /* begin initialization */
7480 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
7482
7483#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7484 // Save the FP control regs.
7485 // Worker threads will set theirs to these values at thread startup.
7486 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7487 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7488 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7489#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7490
7491#if KMP_OS_UNIX
7492#if KMP_HANDLE_SIGNALS
7493 /* must be after __kmp_serial_initialize */
7494 __kmp_install_signals(TRUE);
7495#endif
7496#endif
7497
7499
7500#if defined(USE_LOAD_BALANCE)
7501 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7502 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7503 }
7504#else
7505 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7506 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7507 }
7508#endif
7509
7510 if (__kmp_version) {
7512 }
7513
7514 /* we have finished parallel initialization */
7516
7517 KMP_MB();
7518 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
7519
7521}
7522
7525 return;
7526
7527 // __kmp_parallel_initialize is required before we initialize hidden helper
7530
7531 // Double check. Note that this double check should not be placed before
7532 // __kmp_parallel_initialize as it will cause dead lock.
7536 return;
7537 }
7538
7539#if KMP_AFFINITY_SUPPORTED
7540 // Initialize hidden helper affinity settings.
7541 // The above __kmp_parallel_initialize() will initialize
7542 // regular affinity (and topology) if not already done.
7543 if (!__kmp_hh_affinity.flags.initialized)
7544 __kmp_affinity_initialize(__kmp_hh_affinity);
7545#endif
7546
7547 // Set the count of hidden helper tasks to be executed to zero
7549
7550 // Set the global variable indicating that we're initializing hidden helper
7551 // team/threads
7553
7554 // Platform independent initialization
7556
7557 // Wait here for the finish of initialization of hidden helper teams
7559
7560 // We have finished hidden helper initialization
7562
7564}
7565
7566/* ------------------------------------------------------------------------ */
7567
7568void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7569 kmp_team_t *team) {
7570 kmp_disp_t *dispatch;
7571
7572 KMP_MB();
7573
7574 /* none of the threads have encountered any constructs, yet. */
7575 this_thr->th.th_local.this_construct = 0;
7576#if KMP_CACHE_MANAGE
7577 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7578#endif /* KMP_CACHE_MANAGE */
7579 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7580 KMP_DEBUG_ASSERT(dispatch);
7581 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7582 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7583 // this_thr->th.th_info.ds.ds_tid ] );
7584
7585 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7586 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7588 __kmp_push_parallel(gtid, team->t.t_ident);
7589
7590 KMP_MB(); /* Flush all pending memory write invalidates. */
7591}
7592
7593void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7594 kmp_team_t *team) {
7596 __kmp_pop_parallel(gtid, team->t.t_ident);
7597
7599}
7600
7602 int rc;
7603 int tid = __kmp_tid_from_gtid(gtid);
7604 kmp_info_t *this_thr = __kmp_threads[gtid];
7605 kmp_team_t *team = this_thr->th.th_team;
7606
7607 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7608#if USE_ITT_BUILD
7609 if (__itt_stack_caller_create_ptr) {
7610 // inform ittnotify about entering user's code
7611 if (team->t.t_stack_id != NULL) {
7612 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7613 } else {
7614 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7615 __kmp_itt_stack_callee_enter(
7616 (__itt_caller)team->t.t_parent->t.t_stack_id);
7617 }
7618 }
7619#endif /* USE_ITT_BUILD */
7620#if INCLUDE_SSC_MARKS
7621 SSC_MARK_INVOKING();
7622#endif
7623
7624#if OMPT_SUPPORT
7625 void *dummy;
7626 void **exit_frame_p;
7627 ompt_data_t *my_task_data;
7628 ompt_data_t *my_parallel_data;
7629 int ompt_team_size;
7630
7631 if (ompt_enabled.enabled) {
7632 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7633 .ompt_task_info.frame.exit_frame.ptr);
7634 } else {
7635 exit_frame_p = &dummy;
7636 }
7637
7638 my_task_data =
7639 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7640 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7641 if (ompt_enabled.ompt_callback_implicit_task) {
7642 ompt_team_size = team->t.t_nproc;
7643 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7644 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7645 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7646 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7647 }
7648#endif
7649
7650#if KMP_STATS_ENABLED
7651 stats_state_e previous_state = KMP_GET_THREAD_STATE();
7652 if (previous_state == stats_state_e::TEAMS_REGION) {
7653 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7654 } else {
7655 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7656 }
7657 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7658#endif
7659
7660 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7661 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7662#if OMPT_SUPPORT
7663 ,
7664 exit_frame_p
7665#endif
7666 );
7667#if OMPT_SUPPORT
7668 *exit_frame_p = NULL;
7669 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7670#endif
7671
7672#if KMP_STATS_ENABLED
7673 if (previous_state == stats_state_e::TEAMS_REGION) {
7674 KMP_SET_THREAD_STATE(previous_state);
7675 }
7677#endif
7678
7679#if USE_ITT_BUILD
7680 if (__itt_stack_caller_create_ptr) {
7681 // inform ittnotify about leaving user's code
7682 if (team->t.t_stack_id != NULL) {
7683 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7684 } else {
7685 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7686 __kmp_itt_stack_callee_leave(
7687 (__itt_caller)team->t.t_parent->t.t_stack_id);
7688 }
7689 }
7690#endif /* USE_ITT_BUILD */
7691 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7692
7693 return rc;
7694}
7695
7696void __kmp_teams_master(int gtid) {
7697 // This routine is called by all primary threads in teams construct
7698 kmp_info_t *thr = __kmp_threads[gtid];
7699 kmp_team_t *team = thr->th.th_team;
7700 ident_t *loc = team->t.t_ident;
7701 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7702 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7703 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7704 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7705 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7706
7707 // This thread is a new CG root. Set up the proper variables.
7709 tmp->cg_root = thr; // Make thr the CG root
7710 // Init to thread limit stored when league primary threads were forked
7711 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7712 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7713 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"
7714 " cg_nthreads to 1\n",
7715 thr, tmp));
7716 tmp->up = thr->th.th_cg_roots;
7717 thr->th.th_cg_roots = tmp;
7718
7719// Launch league of teams now, but not let workers execute
7720// (they hang on fork barrier until next parallel)
7721#if INCLUDE_SSC_MARKS
7722 SSC_MARK_FORKING();
7723#endif
7724 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7725 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7727#if INCLUDE_SSC_MARKS
7728 SSC_MARK_JOINING();
7729#endif
7730 // If the team size was reduced from the limit, set it to the new size
7731 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7732 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7733 // AC: last parameter "1" eliminates join barrier which won't work because
7734 // worker threads are in a fork barrier waiting for more parallel regions
7735 __kmp_join_call(loc, gtid
7736#if OMPT_SUPPORT
7737 ,
7739#endif
7740 ,
7741 1);
7742}
7743
7745 kmp_info_t *this_thr = __kmp_threads[gtid];
7746 kmp_team_t *team = this_thr->th.th_team;
7747#if KMP_DEBUG
7748 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7749 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7750 (void *)__kmp_teams_master);
7751#endif
7752 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7753#if OMPT_SUPPORT
7754 int tid = __kmp_tid_from_gtid(gtid);
7755 ompt_data_t *task_data =
7756 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7757 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7758 if (ompt_enabled.ompt_callback_implicit_task) {
7759 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7760 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7761 ompt_task_initial);
7762 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7763 }
7764#endif
7765 __kmp_teams_master(gtid);
7766#if OMPT_SUPPORT
7767 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7768#endif
7769 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7770 return 1;
7771}
7772
7773/* this sets the requested number of threads for the next parallel region
7774 encountered by this team. since this should be enclosed in the forkjoin
7775 critical section it should avoid race conditions with asymmetrical nested
7776 parallelism */
7777
7778void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7779 kmp_info_t *thr = __kmp_threads[gtid];
7780
7781 if (num_threads > 0)
7782 thr->th.th_set_nproc = num_threads;
7783}
7784
7785static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
7786 int num_threads) {
7787 KMP_DEBUG_ASSERT(thr);
7788 // Remember the number of threads for inner parallel regions
7790 __kmp_middle_initialize(); // get internal globals calculated
7794
7795 if (num_threads == 0) {
7796 if (__kmp_teams_thread_limit > 0) {
7797 num_threads = __kmp_teams_thread_limit;
7798 } else {
7799 num_threads = __kmp_avail_proc / num_teams;
7800 }
7801 // adjust num_threads w/o warning as it is not user setting
7802 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7803 // no thread_limit clause specified - do not change thread-limit-var ICV
7804 if (num_threads > __kmp_dflt_team_nth) {
7805 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7806 }
7807 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7808 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7809 } // prevent team size to exceed thread-limit-var
7810 if (num_teams * num_threads > __kmp_teams_max_nth) {
7811 num_threads = __kmp_teams_max_nth / num_teams;
7812 }
7813 if (num_threads == 0) {
7814 num_threads = 1;
7815 }
7816 } else {
7817 if (num_threads < 0) {
7818 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7820 num_threads = 1;
7821 }
7822 // This thread will be the primary thread of the league primary threads
7823 // Store new thread limit; old limit is saved in th_cg_roots list
7824 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7825 // num_threads = min(num_threads, nthreads-var)
7826 if (num_threads > __kmp_dflt_team_nth) {
7827 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7828 }
7829 if (num_teams * num_threads > __kmp_teams_max_nth) {
7830 int new_threads = __kmp_teams_max_nth / num_teams;
7831 if (new_threads == 0) {
7832 new_threads = 1;
7833 }
7834 if (new_threads != num_threads) {
7835 if (!__kmp_reserve_warn) { // user asked for too many threads
7836 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
7838 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7839 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7840 }
7841 }
7842 num_threads = new_threads;
7843 }
7844 }
7845 thr->th.th_teams_size.nth = num_threads;
7846}
7847
7848/* this sets the requested number of teams for the teams region and/or
7849 the number of threads for the next parallel region encountered */
7850void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7851 int num_threads) {
7852 kmp_info_t *thr = __kmp_threads[gtid];
7853 if (num_teams < 0) {
7854 // OpenMP specification requires requested values to be positive,
7855 // but people can send us any value, so we'd better check
7856 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7858 num_teams = 1;
7859 }
7860 if (num_teams == 0) {
7861 if (__kmp_nteams > 0) {
7862 num_teams = __kmp_nteams;
7863 } else {
7864 num_teams = 1; // default number of teams is 1.
7865 }
7866 }
7867 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7868 if (!__kmp_reserve_warn) {
7871 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7872 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7873 }
7874 num_teams = __kmp_teams_max_nth;
7875 }
7876 // Set number of teams (number of threads in the outer "parallel" of the
7877 // teams)
7878 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7879
7880 __kmp_push_thread_limit(thr, num_teams, num_threads);
7881}
7882
7883/* This sets the requested number of teams for the teams region and/or
7884 the number of threads for the next parallel region encountered */
7885void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
7886 int num_teams_ub, int num_threads) {
7887 kmp_info_t *thr = __kmp_threads[gtid];
7888 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7889 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7890 KMP_DEBUG_ASSERT(num_threads >= 0);
7891
7892 if (num_teams_lb > num_teams_ub) {
7893 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7895 }
7896
7897 int num_teams = 1; // defalt number of teams is 1.
7898
7899 if (num_teams_lb == 0 && num_teams_ub > 0)
7900 num_teams_lb = num_teams_ub;
7901
7902 if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
7903 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7904 if (num_teams > __kmp_teams_max_nth) {
7905 if (!__kmp_reserve_warn) {
7908 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7909 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7910 }
7911 num_teams = __kmp_teams_max_nth;
7912 }
7913 } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
7914 num_teams = num_teams_ub;
7915 } else { // num_teams_lb <= num_teams <= num_teams_ub
7916 if (num_threads <= 0) {
7917 if (num_teams_ub > __kmp_teams_max_nth) {
7918 num_teams = num_teams_lb;
7919 } else {
7920 num_teams = num_teams_ub;
7921 }
7922 } else {
7923 num_teams = (num_threads > __kmp_teams_max_nth)
7924 ? num_teams
7925 : __kmp_teams_max_nth / num_threads;
7926 if (num_teams < num_teams_lb) {
7927 num_teams = num_teams_lb;
7928 } else if (num_teams > num_teams_ub) {
7929 num_teams = num_teams_ub;
7930 }
7931 }
7932 }
7933 // Set number of teams (number of threads in the outer "parallel" of the
7934 // teams)
7935 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7936
7937 __kmp_push_thread_limit(thr, num_teams, num_threads);
7938}
7939
7940// Set the proc_bind var to use in the following parallel region.
7941void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7942 kmp_info_t *thr = __kmp_threads[gtid];
7943 thr->th.th_set_proc_bind = proc_bind;
7944}
7945
7946/* Launch the worker threads into the microtask. */
7947
7948void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7949 kmp_info_t *this_thr = __kmp_threads[gtid];
7950
7951#ifdef KMP_DEBUG
7952 int f;
7953#endif /* KMP_DEBUG */
7954
7955 KMP_DEBUG_ASSERT(team);
7956 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7958 KMP_MB(); /* Flush all pending memory write invalidates. */
7959
7960 team->t.t_construct = 0; /* no single directives seen yet */
7961 team->t.t_ordered.dt.t_value =
7962 0; /* thread 0 enters the ordered section first */
7963
7964 /* Reset the identifiers on the dispatch buffer */
7965 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7966 if (team->t.t_max_nproc > 1) {
7967 int i;
7968 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7969 team->t.t_disp_buffer[i].buffer_index = i;
7970 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7971 }
7972 } else {
7973 team->t.t_disp_buffer[0].buffer_index = 0;
7974 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7975 }
7976
7977 KMP_MB(); /* Flush all pending memory write invalidates. */
7978 KMP_ASSERT(this_thr->th.th_team == team);
7979
7980#ifdef KMP_DEBUG
7981 for (f = 0; f < team->t.t_nproc; f++) {
7982 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7983 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7984 }
7985#endif /* KMP_DEBUG */
7986
7987 /* release the worker threads so they may begin working */
7988 __kmp_fork_barrier(gtid, 0);
7989}
7990
7991void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7992 kmp_info_t *this_thr = __kmp_threads[gtid];
7993
7994 KMP_DEBUG_ASSERT(team);
7995 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7997 KMP_MB(); /* Flush all pending memory write invalidates. */
7998
7999 /* Join barrier after fork */
8000
8001#ifdef KMP_DEBUG
8002 if (__kmp_threads[gtid] &&
8003 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8004 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8005 __kmp_threads[gtid]);
8006 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8007 "team->t.t_nproc=%d\n",
8008 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8009 team->t.t_nproc);
8011 }
8013 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8014#endif /* KMP_DEBUG */
8015
8016 __kmp_join_barrier(gtid); /* wait for everyone */
8017#if OMPT_SUPPORT
8018 if (ompt_enabled.enabled &&
8019 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8020 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8021 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8022 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8023#if OMPT_OPTIONAL
8024 void *codeptr = NULL;
8025 if (KMP_MASTER_TID(ds_tid) &&
8026 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8027 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8028 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8029
8030 if (ompt_enabled.ompt_callback_sync_region_wait) {
8031 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8032 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8033 codeptr);
8034 }
8035 if (ompt_enabled.ompt_callback_sync_region) {
8036 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8037 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8038 codeptr);
8039 }
8040#endif
8041 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8042 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8043 ompt_scope_end, NULL, task_data, 0, ds_tid,
8044 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
8045 }
8046 }
8047#endif
8048
8049 KMP_MB(); /* Flush all pending memory write invalidates. */
8050 KMP_ASSERT(this_thr->th.th_team == team);
8051}
8052
8053/* ------------------------------------------------------------------------ */
8054
8055#ifdef USE_LOAD_BALANCE
8056
8057// Return the worker threads actively spinning in the hot team, if we
8058// are at the outermost level of parallelism. Otherwise, return 0.
8059static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8060 int i;
8061 int retval;
8062 kmp_team_t *hot_team;
8063
8064 if (root->r.r_active) {
8065 return 0;
8066 }
8067 hot_team = root->r.r_hot_team;
8069 return hot_team->t.t_nproc - 1; // Don't count primary thread
8070 }
8071
8072 // Skip the primary thread - it is accounted for elsewhere.
8073 retval = 0;
8074 for (i = 1; i < hot_team->t.t_nproc; i++) {
8075 if (hot_team->t.t_threads[i]->th.th_active) {
8076 retval++;
8077 }
8078 }
8079 return retval;
8080}
8081
8082// Perform an automatic adjustment to the number of
8083// threads used by the next parallel region.
8084static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
8085 int retval;
8086 int pool_active;
8087 int hot_team_active;
8088 int team_curr_active;
8089 int system_active;
8090
8091 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8092 set_nproc));
8093 KMP_DEBUG_ASSERT(root);
8094 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8095 ->th.th_current_task->td_icvs.dynamic == TRUE);
8096 KMP_DEBUG_ASSERT(set_nproc > 1);
8097
8098 if (set_nproc == 1) {
8099 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
8100 return 1;
8101 }
8102
8103 // Threads that are active in the thread pool, active in the hot team for this
8104 // particular root (if we are at the outer par level), and the currently
8105 // executing thread (to become the primary thread) are available to add to the
8106 // new team, but are currently contributing to the system load, and must be
8107 // accounted for.
8108 pool_active = __kmp_thread_pool_active_nth;
8109 hot_team_active = __kmp_active_hot_team_nproc(root);
8110 team_curr_active = pool_active + hot_team_active + 1;
8111
8112 // Check the system load.
8113 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8114 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
8115 "hot team active = %d\n",
8116 system_active, pool_active, hot_team_active));
8117
8118 if (system_active < 0) {
8119 // There was an error reading the necessary info from /proc, so use the
8120 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8121 // = dynamic_thread_limit, we shouldn't wind up getting back here.
8122 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8123 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
8124
8125 // Make this call behave like the thread limit algorithm.
8126 retval = __kmp_avail_proc - __kmp_nth +
8127 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8128 if (retval > set_nproc) {
8129 retval = set_nproc;
8130 }
8131 if (retval < KMP_MIN_NTH) {
8132 retval = KMP_MIN_NTH;
8133 }
8134
8135 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8136 retval));
8137 return retval;
8138 }
8139
8140 // There is a slight delay in the load balance algorithm in detecting new
8141 // running procs. The real system load at this instant should be at least as
8142 // large as the #active omp thread that are available to add to the team.
8143 if (system_active < team_curr_active) {
8144 system_active = team_curr_active;
8145 }
8146 retval = __kmp_avail_proc - system_active + team_curr_active;
8147 if (retval > set_nproc) {
8148 retval = set_nproc;
8149 }
8150 if (retval < KMP_MIN_NTH) {
8151 retval = KMP_MIN_NTH;
8152 }
8153
8154 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8155 return retval;
8156} // __kmp_load_balance_nproc()
8157
8158#endif /* USE_LOAD_BALANCE */
8159
8160/* ------------------------------------------------------------------------ */
8161
8162/* NOTE: this is called with the __kmp_init_lock held */
8163void __kmp_cleanup(void) {
8164 int f;
8165
8166 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
8167
8169#if KMP_HANDLE_SIGNALS
8170 __kmp_remove_signals();
8171#endif
8173 }
8174
8175 if (TCR_4(__kmp_init_middle)) {
8176#if KMP_AFFINITY_SUPPORTED
8177 __kmp_affinity_uninitialize();
8178#endif /* KMP_AFFINITY_SUPPORTED */
8181 }
8182
8183 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
8184
8185 if (__kmp_init_serial) {
8188 }
8189
8191
8192 for (f = 0; f < __kmp_threads_capacity; f++) {
8193 if (__kmp_root[f] != NULL) {
8195 __kmp_root[f] = NULL;
8196 }
8197 }
8199 // __kmp_threads and __kmp_root were allocated at once, as single block, so
8200 // there is no need in freeing __kmp_root.
8201 __kmp_threads = NULL;
8202 __kmp_root = NULL;
8204
8205 // Free old __kmp_threads arrays if they exist.
8207 while (ptr) {
8208 kmp_old_threads_list_t *next = ptr->next;
8209 __kmp_free(ptr->threads);
8210 __kmp_free(ptr);
8211 ptr = next;
8212 }
8213
8214#if KMP_USE_DYNAMIC_LOCK
8215 __kmp_cleanup_indirect_user_locks();
8216#else
8218#endif
8219#if OMPD_SUPPORT
8220 if (ompd_state) {
8221 __kmp_free(ompd_env_block);
8222 ompd_env_block = NULL;
8223 ompd_env_block_size = 0;
8224 }
8225#endif
8226
8227#if KMP_AFFINITY_SUPPORTED
8228 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
8229 __kmp_cpuinfo_file = NULL;
8230#endif /* KMP_AFFINITY_SUPPORTED */
8231
8232#if KMP_USE_ADAPTIVE_LOCKS
8233#if KMP_DEBUG_ADAPTIVE_LOCKS
8234 __kmp_print_speculative_stats();
8235#endif
8236#endif
8238 __kmp_nested_nth.nth = NULL;
8247 __kmp_affinity_format = NULL;
8248 }
8249
8251
8252#if KMP_USE_HIER_SCHED
8254#endif
8255
8256#if KMP_STATS_ENABLED
8258#endif
8259
8260 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
8261}
8262
8263/* ------------------------------------------------------------------------ */
8264
8266 char *env;
8267
8268 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
8269 if (__kmp_str_match_false(env))
8270 return FALSE;
8271 }
8272 // By default __kmpc_begin() is no-op.
8273 return TRUE;
8274}
8275
8277 char *env;
8278
8279 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
8280 if (__kmp_str_match_false(env))
8281 return FALSE;
8282 }
8283 // By default __kmpc_end() is no-op.
8284 return TRUE;
8285}
8286
8288 int gtid;
8289 kmp_root_t *root;
8290
8291 /* this is a very important step as it will register new sibling threads
8292 and assign these new uber threads a new gtid */
8293 gtid = __kmp_entry_gtid();
8294 root = __kmp_threads[gtid]->th.th_root;
8296
8297 if (root->r.r_begin)
8298 return;
8299 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8300 if (root->r.r_begin) {
8301 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8302 return;
8303 }
8304
8305 root->r.r_begin = TRUE;
8306
8307 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8308}
8309
8310/* ------------------------------------------------------------------------ */
8311
8313 int gtid;
8314 kmp_root_t *root;
8315 kmp_info_t *thread;
8316
8317 /* first, make sure we are initialized so we can get our gtid */
8318
8319 gtid = __kmp_entry_gtid();
8320 thread = __kmp_threads[gtid];
8321
8322 root = thread->th.th_root;
8323
8324 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8326 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8327 thread */
8328 KMP_WARNING(SetLibraryIncorrectCall);
8329 return;
8330 }
8331
8332 switch (arg) {
8333 case library_serial:
8334 thread->th.th_set_nproc = 0;
8335 set__nproc(thread, 1);
8336 break;
8337 case library_turnaround:
8338 thread->th.th_set_nproc = 0;
8341 break;
8342 case library_throughput:
8343 thread->th.th_set_nproc = 0;
8346 break;
8347 default:
8348 KMP_FATAL(UnknownLibraryType, arg);
8349 }
8350
8352}
8353
8354void __kmp_aux_set_stacksize(size_t arg) {
8355 if (!__kmp_init_serial)
8357
8358#if KMP_OS_DARWIN
8359 if (arg & (0x1000 - 1)) {
8360 arg &= ~(0x1000 - 1);
8361 if (arg + 0x1000) /* check for overflow if we round up */
8362 arg += 0x1000;
8363 }
8364#endif
8366
8367 /* only change the default stacksize before the first parallel region */
8368 if (!TCR_4(__kmp_init_parallel)) {
8369 size_t value = arg; /* argument is in bytes */
8370
8373 else if (value > KMP_MAX_STKSIZE)
8375
8377
8378 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
8379 }
8380
8382}
8383
8384/* set the behaviour of the runtime library */
8385/* TODO this can cause some odd behaviour with sibling parallelism... */
8387 __kmp_library = arg;
8388
8389 switch (__kmp_library) {
8390 case library_serial: {
8391 KMP_INFORM(LibraryIsSerial);
8392 } break;
8393 case library_turnaround:
8395 __kmp_use_yield = 2; // only yield when oversubscribed
8396 break;
8397 case library_throughput:
8400 break;
8401 default:
8402 KMP_FATAL(UnknownLibraryType, arg);
8403 }
8404}
8405
8406/* Getting team information common for all team API */
8407// Returns NULL if not in teams construct
8408static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
8410 teams_serialized = 0;
8411 if (thr->th.th_teams_microtask) {
8412 kmp_team_t *team = thr->th.th_team;
8413 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8414 int ii = team->t.t_level;
8415 teams_serialized = team->t.t_serialized;
8416 int level = tlevel + 1;
8417 KMP_DEBUG_ASSERT(ii >= tlevel);
8418 while (ii > level) {
8419 for (teams_serialized = team->t.t_serialized;
8420 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8421 }
8422 if (team->t.t_serialized && (!teams_serialized)) {
8423 team = team->t.t_parent;
8424 continue;
8425 }
8426 if (ii > level) {
8427 team = team->t.t_parent;
8428 ii--;
8429 }
8430 }
8431 return team;
8432 }
8433 return NULL;
8434}
8435
8437 int serialized;
8438 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8439 if (team) {
8440 if (serialized > 1) {
8441 return 0; // teams region is serialized ( 1 team of 1 thread ).
8442 } else {
8443 return team->t.t_master_tid;
8444 }
8445 }
8446 return 0;
8447}
8448
8450 int serialized;
8451 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8452 if (team) {
8453 if (serialized > 1) {
8454 return 1;
8455 } else {
8456 return team->t.t_parent->t.t_nproc;
8457 }
8458 }
8459 return 1;
8460}
8461
8462/* ------------------------------------------------------------------------ */
8463
8464/*
8465 * Affinity Format Parser
8466 *
8467 * Field is in form of: %[[[0].]size]type
8468 * % and type are required (%% means print a literal '%')
8469 * type is either single char or long name surrounded by {},
8470 * e.g., N or {num_threads}
8471 * 0 => leading zeros
8472 * . => right justified when size is specified
8473 * by default output is left justified
8474 * size is the *minimum* field length
8475 * All other characters are printed as is
8476 *
8477 * Available field types:
8478 * L {thread_level} - omp_get_level()
8479 * n {thread_num} - omp_get_thread_num()
8480 * h {host} - name of host machine
8481 * P {process_id} - process id (integer)
8482 * T {thread_identifier} - native thread identifier (integer)
8483 * N {num_threads} - omp_get_num_threads()
8484 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8485 * a {thread_affinity} - comma separated list of integers or integer ranges
8486 * (values of affinity mask)
8487 *
8488 * Implementation-specific field types can be added
8489 * If a type is unknown, print "undefined"
8490 */
8491
8492// Structure holding the short name, long name, and corresponding data type
8493// for snprintf. A table of these will represent the entire valid keyword
8494// field types.
8496 char short_name; // from spec e.g., L -> thread level
8497 const char *long_name; // from spec thread_level -> thread level
8498 char field_format; // data type for snprintf (typically 'd' or 's'
8499 // for integer or string)
8501
8503#if KMP_AFFINITY_SUPPORTED
8504 {'A', "thread_affinity", 's'},
8505#endif
8506 {'t', "team_num", 'd'},
8507 {'T', "num_teams", 'd'},
8508 {'L', "nesting_level", 'd'},
8509 {'n', "thread_num", 'd'},
8510 {'N', "num_threads", 'd'},
8511 {'a', "ancestor_tnum", 'd'},
8512 {'H', "host", 's'},
8513 {'P', "process_id", 'd'},
8514 {'i', "native_thread_id", 'd'}};
8515
8516// Return the number of characters it takes to hold field
8517static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8518 const char **ptr,
8519 kmp_str_buf_t *field_buffer) {
8520 int rc, format_index, field_value;
8521 const char *width_left, *width_right;
8522 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8523 static const int FORMAT_SIZE = 20;
8524 char format[FORMAT_SIZE] = {0};
8525 char absolute_short_name = 0;
8526
8527 KMP_DEBUG_ASSERT(gtid >= 0);
8528 KMP_DEBUG_ASSERT(th);
8529 KMP_DEBUG_ASSERT(**ptr == '%');
8530 KMP_DEBUG_ASSERT(field_buffer);
8531
8532 __kmp_str_buf_clear(field_buffer);
8533
8534 // Skip the initial %
8535 (*ptr)++;
8536
8537 // Check for %% first
8538 if (**ptr == '%') {
8539 __kmp_str_buf_cat(field_buffer, "%", 1);
8540 (*ptr)++; // skip over the second %
8541 return 1;
8542 }
8543
8544 // Parse field modifiers if they are present
8545 pad_zeros = false;
8546 if (**ptr == '0') {
8547 pad_zeros = true;
8548 (*ptr)++; // skip over 0
8549 }
8550 right_justify = false;
8551 if (**ptr == '.') {
8552 right_justify = true;
8553 (*ptr)++; // skip over .
8554 }
8555 // Parse width of field: [width_left, width_right)
8556 width_left = width_right = NULL;
8557 if (**ptr >= '0' && **ptr <= '9') {
8558 width_left = *ptr;
8559 SKIP_DIGITS(*ptr);
8560 width_right = *ptr;
8561 }
8562
8563 // Create the format for KMP_SNPRINTF based on flags parsed above
8564 format_index = 0;
8565 format[format_index++] = '%';
8566 if (!right_justify)
8567 format[format_index++] = '-';
8568 if (pad_zeros)
8569 format[format_index++] = '0';
8570 if (width_left && width_right) {
8571 int i = 0;
8572 // Only allow 8 digit number widths.
8573 // This also prevents overflowing format variable
8574 while (i < 8 && width_left < width_right) {
8575 format[format_index++] = *width_left;
8576 width_left++;
8577 i++;
8578 }
8579 }
8580
8581 // Parse a name (long or short)
8582 // Canonicalize the name into absolute_short_name
8583 found_valid_name = false;
8584 parse_long_name = (**ptr == '{');
8585 if (parse_long_name)
8586 (*ptr)++; // skip initial left brace
8587 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
8588 sizeof(__kmp_affinity_format_table[0]);
8589 ++i) {
8590 char short_name = __kmp_affinity_format_table[i].short_name;
8591 const char *long_name = __kmp_affinity_format_table[i].long_name;
8592 char field_format = __kmp_affinity_format_table[i].field_format;
8593 if (parse_long_name) {
8594 size_t length = KMP_STRLEN(long_name);
8595 if (strncmp(*ptr, long_name, length) == 0) {
8596 found_valid_name = true;
8597 (*ptr) += length; // skip the long name
8598 }
8599 } else if (**ptr == short_name) {
8600 found_valid_name = true;
8601 (*ptr)++; // skip the short name
8602 }
8603 if (found_valid_name) {
8604 format[format_index++] = field_format;
8605 format[format_index++] = '\0';
8606 absolute_short_name = short_name;
8607 break;
8608 }
8609 }
8610 if (parse_long_name) {
8611 if (**ptr != '}') {
8612 absolute_short_name = 0;
8613 } else {
8614 (*ptr)++; // skip over the right brace
8615 }
8616 }
8617
8618 // Attempt to fill the buffer with the requested
8619 // value using snprintf within __kmp_str_buf_print()
8620 switch (absolute_short_name) {
8621 case 't':
8622 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8623 break;
8624 case 'T':
8625 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8626 break;
8627 case 'L':
8628 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8629 break;
8630 case 'n':
8631 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8632 break;
8633 case 'H': {
8634 static const int BUFFER_SIZE = 256;
8635 char buf[BUFFER_SIZE];
8637 rc = __kmp_str_buf_print(field_buffer, format, buf);
8638 } break;
8639 case 'P':
8640 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8641 break;
8642 case 'i':
8643 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8644 break;
8645 case 'N':
8646 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8647 break;
8648 case 'a':
8649 field_value =
8650 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8651 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8652 break;
8653#if KMP_AFFINITY_SUPPORTED
8654 case 'A': {
8657 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8658 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8660 } break;
8661#endif
8662 default:
8663 // According to spec, If an implementation does not have info for field
8664 // type, then "undefined" is printed
8665 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
8666 // Skip the field
8667 if (parse_long_name) {
8668 SKIP_TOKEN(*ptr);
8669 if (**ptr == '}')
8670 (*ptr)++;
8671 } else {
8672 (*ptr)++;
8673 }
8674 }
8675
8676 KMP_ASSERT(format_index <= FORMAT_SIZE);
8677 return rc;
8678}
8679
8680/*
8681 * Return number of characters needed to hold the affinity string
8682 * (not including null byte character)
8683 * The resultant string is printed to buffer, which the caller can then
8684 * handle afterwards
8685 */
8686size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8687 kmp_str_buf_t *buffer) {
8688 const char *parse_ptr;
8689 size_t retval;
8690 const kmp_info_t *th;
8691 kmp_str_buf_t field;
8692
8693 KMP_DEBUG_ASSERT(buffer);
8694 KMP_DEBUG_ASSERT(gtid >= 0);
8695
8696 __kmp_str_buf_init(&field);
8697 __kmp_str_buf_clear(buffer);
8698
8699 th = __kmp_threads[gtid];
8700 retval = 0;
8701
8702 // If format is NULL or zero-length string, then we use
8703 // affinity-format-var ICV
8704 parse_ptr = format;
8705 if (parse_ptr == NULL || *parse_ptr == '\0') {
8706 parse_ptr = __kmp_affinity_format;
8707 }
8708 KMP_DEBUG_ASSERT(parse_ptr);
8709
8710 while (*parse_ptr != '\0') {
8711 // Parse a field
8712 if (*parse_ptr == '%') {
8713 // Put field in the buffer
8714 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8715 __kmp_str_buf_catbuf(buffer, &field);
8716 retval += rc;
8717 } else {
8718 // Put literal character in buffer
8719 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8720 retval++;
8721 parse_ptr++;
8722 }
8723 }
8724 __kmp_str_buf_free(&field);
8725 return retval;
8726}
8727
8728// Displays the affinity string to stdout
8729void __kmp_aux_display_affinity(int gtid, const char *format) {
8732 __kmp_aux_capture_affinity(gtid, format, &buf);
8733 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
8735}
8736
8737/* ------------------------------------------------------------------------ */
8738void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8739 int blocktime = arg; /* argument is in microseconds */
8740#if KMP_USE_MONITOR
8741 int bt_intervals;
8742#endif
8743 kmp_int8 bt_set;
8744
8746
8747 /* Normalize and set blocktime for the teams */
8748 if (blocktime < KMP_MIN_BLOCKTIME)
8749 blocktime = KMP_MIN_BLOCKTIME;
8750 else if (blocktime > KMP_MAX_BLOCKTIME)
8751 blocktime = KMP_MAX_BLOCKTIME;
8752
8753 set__blocktime_team(thread->th.th_team, tid, blocktime);
8754 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8755
8756#if KMP_USE_MONITOR
8757 /* Calculate and set blocktime intervals for the teams */
8758 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8759
8760 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8761 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8762#endif
8763
8764 /* Set whether blocktime has been set to "TRUE" */
8765 bt_set = TRUE;
8766
8767 set__bt_set_team(thread->th.th_team, tid, bt_set);
8768 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8769#if KMP_USE_MONITOR
8770 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8771 "bt_intervals=%d, monitor_updates=%d\n",
8772 __kmp_gtid_from_tid(tid, thread->th.th_team),
8773 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8774 __kmp_monitor_wakeups));
8775#else
8776 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8777 __kmp_gtid_from_tid(tid, thread->th.th_team),
8778 thread->th.th_team->t.t_id, tid, blocktime));
8779#endif
8780}
8781
8782void __kmp_aux_set_defaults(char const *str, size_t len) {
8783 if (!__kmp_init_serial) {
8785 }
8787
8790 }
8791} // __kmp_aux_set_defaults
8792
8793/* ------------------------------------------------------------------------ */
8794/* internal fast reduction routines */
8795
8798 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8799 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8801
8802 // Default reduction method: critical construct ( lck != NULL, like in current
8803 // PAROPT )
8804 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8805 // can be selected by RTL
8806 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8807 // can be selected by RTL
8808 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8809 // among generated by PAROPT.
8810
8812
8813 int team_size;
8814
8815 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
8816
8817#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8818 (loc && \
8819 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8820#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8821
8822 retval = critical_reduce_block;
8823
8824 // another choice of getting a team size (with 1 dynamic deference) is slower
8825 team_size = __kmp_get_team_num_threads(global_tid);
8826 if (team_size == 1) {
8827
8828 retval = empty_reduce_block;
8829
8830 } else {
8831
8832 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8833
8834#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8835 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8836 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8837
8838#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8839 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8840 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8841
8842 int teamsize_cutoff = 4;
8843
8844#if KMP_MIC_SUPPORTED
8845 if (__kmp_mic_type != non_mic) {
8846 teamsize_cutoff = 8;
8847 }
8848#endif
8849 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8850 if (tree_available) {
8851 if (team_size <= teamsize_cutoff) {
8852 if (atomic_available) {
8853 retval = atomic_reduce_block;
8854 }
8855 } else {
8856 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8857 }
8858 } else if (atomic_available) {
8859 retval = atomic_reduce_block;
8860 }
8861#else
8862#error "Unknown or unsupported OS"
8863#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8864 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD ||
8865 // KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8866
8867#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8868 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
8869
8870#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8871 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8872 KMP_OS_WASI || KMP_OS_AIX
8873
8874 // basic tuning
8875
8876 if (atomic_available) {
8877 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8878 retval = atomic_reduce_block;
8879 }
8880 } // otherwise: use critical section
8881
8882#elif KMP_OS_DARWIN
8883
8884 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8885 if (atomic_available && (num_vars <= 3)) {
8886 retval = atomic_reduce_block;
8887 } else if (tree_available) {
8888 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8889 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8890 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8891 }
8892 } // otherwise: use critical section
8893
8894#else
8895#error "Unknown or unsupported OS"
8896#endif
8897
8898#else
8899#error "Unknown or unsupported architecture"
8900#endif
8901 }
8902
8903 // KMP_FORCE_REDUCTION
8904
8905 // If the team is serialized (team_size == 1), ignore the forced reduction
8906 // method and stay with the unsynchronized method (empty_reduce_block)
8908 team_size != 1) {
8909
8911
8912 int atomic_available, tree_available;
8913
8914 switch ((forced_retval = __kmp_force_reduction_method)) {
8916 KMP_ASSERT(lck); // lck should be != 0
8917 break;
8918
8920 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8921 if (!atomic_available) {
8922 KMP_WARNING(RedMethodNotSupported, "atomic");
8923 forced_retval = critical_reduce_block;
8924 }
8925 break;
8926
8927 case tree_reduce_block:
8928 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8929 if (!tree_available) {
8930 KMP_WARNING(RedMethodNotSupported, "tree");
8931 forced_retval = critical_reduce_block;
8932 } else {
8933#if KMP_FAST_REDUCTION_BARRIER
8934 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8935#endif
8936 }
8937 break;
8938
8939 default:
8940 KMP_ASSERT(0); // "unsupported method specified"
8941 }
8942
8943 retval = forced_retval;
8944 }
8945
8946 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
8947
8948#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8949#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8950
8951 return (retval);
8952}
8953// this function is for testing set/get/determine reduce method
8955 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8956}
8957
8958// Soft pause sets up threads to ignore blocktime and just go to sleep.
8959// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8961
8962// Hard pause shuts down the runtime completely. Resume happens naturally when
8963// OpenMP is used subsequently.
8967}
8968
8969// Soft resume sets __kmp_pause_status, and wakes up all threads.
8973
8974 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8975 kmp_info_t *thread = __kmp_threads[gtid];
8976 if (thread) { // Wake it if sleeping
8977 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8978 thread);
8979 if (fl.is_sleeping())
8980 fl.resume(gtid);
8981 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8982 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8983 } else { // thread holds the lock and may sleep soon
8984 do { // until either the thread sleeps, or we can get the lock
8985 if (fl.is_sleeping()) {
8986 fl.resume(gtid);
8987 break;
8988 } else if (__kmp_try_suspend_mx(thread)) {
8990 break;
8991 }
8992 } while (1);
8993 }
8994 }
8995 }
8996 }
8997}
8998
8999// This function is called via __kmpc_pause_resource. Returns 0 if successful.
9000// TODO: add warning messages
9002 if (level == kmp_not_paused) { // requesting resume
9004 // error message about runtime not being paused, so can't resume
9005 return 1;
9006 } else {
9010 return 0;
9011 }
9012 } else if (level == kmp_soft_paused) { // requesting soft pause
9014 // error message about already being paused
9015 return 1;
9016 } else {
9018 return 0;
9019 }
9020 } else if (level == kmp_hard_paused) { // requesting hard pause
9022 // error message about already being paused
9023 return 1;
9024 } else {
9026 return 0;
9027 }
9028 } else {
9029 // error message about invalid level
9030 return 1;
9031 }
9032}
9033
9034void __kmp_omp_display_env(int verbose) {
9036 if (__kmp_init_serial == 0)
9038 __kmp_display_env_impl(!verbose, verbose);
9040}
9041
9042// The team size is changing, so distributed barrier must be modified
9043void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
9044 int new_nthreads) {
9046 bp_dist_bar);
9047 kmp_info_t **other_threads = team->t.t_threads;
9048
9049 // We want all the workers to stop waiting on the barrier while we adjust the
9050 // size of the team.
9051 for (int f = 1; f < old_nthreads; ++f) {
9052 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9053 // Ignore threads that are already inactive or not present in the team
9054 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9055 // teams construct causes thread_limit to get passed in, and some of
9056 // those could be inactive; just ignore them
9057 continue;
9058 }
9059 // If thread is transitioning still to in_use state, wait for it
9060 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9061 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9062 KMP_CPU_PAUSE();
9063 }
9064 // The thread should be in_use now
9065 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9066 // Transition to unused state
9067 team->t.t_threads[f]->th.th_used_in_team.store(2);
9068 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9069 }
9070 // Release all the workers
9071 team->t.b->go_release();
9072
9073 KMP_MFENCE();
9074
9075 // Workers should see transition status 2 and move to 0; but may need to be
9076 // woken up first
9077 int count = old_nthreads - 1;
9078 while (count > 0) {
9079 count = old_nthreads - 1;
9080 for (int f = 1; f < old_nthreads; ++f) {
9081 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9082 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
9084 void *, other_threads[f]->th.th_sleep_loc);
9085 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9086 }
9087 } else {
9088 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9089 count--;
9090 }
9091 }
9092 }
9093 // Now update the barrier size
9094 team->t.b->update_num_threads(new_nthreads);
9095 team->t.b->go_reset();
9096}
9097
9098void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
9099 // Add the threads back to the team
9100 KMP_DEBUG_ASSERT(team);
9101 // Threads were paused and pointed at th_used_in_team temporarily during a
9102 // resize of the team. We're going to set th_used_in_team to 3 to indicate to
9103 // the thread that it should transition itself back into the team. Then, if
9104 // blocktime isn't infinite, the thread could be sleeping, so we send a resume
9105 // to wake it up.
9106 for (int f = 1; f < new_nthreads; ++f) {
9107 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9108 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9109 3);
9110 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
9111 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9113 }
9114 }
9115 // The threads should be transitioning to the team; when they are done, they
9116 // should have set th_used_in_team to 1. This loop forces master to wait until
9117 // all threads have moved into the team and are waiting in the barrier.
9118 int count = new_nthreads - 1;
9119 while (count > 0) {
9120 count = new_nthreads - 1;
9121 for (int f = 1; f < new_nthreads; ++f) {
9122 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9123 count--;
9124 }
9125 }
9126 }
9127}
9128
9129// Globals and functions for hidden helper task
9133#if KMP_OS_LINUX
9136#else
9139#endif
9140
9141namespace {
9142std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9143
9144void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {
9145 // This is an explicit synchronization on all hidden helper threads in case
9146 // that when a regular thread pushes a hidden helper task to one hidden
9147 // helper thread, the thread has not been awaken once since they're released
9148 // by the main thread after creating the team.
9149 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9150 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9152 ;
9153
9154 // If main thread, then wait for signal
9155 if (__kmpc_master(nullptr, *gtid)) {
9156 // First, unset the initial state and release the initial thread
9160 // Now wake up all worker threads
9161 for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9163 }
9164 }
9165}
9166} // namespace
9167
9169 // Create a new root for hidden helper team/threads
9170 const int gtid = __kmp_register_root(TRUE);
9173 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9175
9176 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9177
9178 __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);
9179
9180 // Set the initialization flag to FALSE
9182
9184}
9185
9186/* Nesting Mode:
9187 Set via KMP_NESTING_MODE, which takes an integer.
9188 Note: we skip duplicate topology levels, and skip levels with only
9189 one entity.
9190 KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9191 KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9192 in the topology, and initializes the number of threads at each of those
9193 levels to the number of entities at each level, respectively, below the
9194 entity at the parent level.
9195 KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9196 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9197 the user to turn nesting on explicitly. This is an even more experimental
9198 option to this experimental feature, and may change or go away in the
9199 future.
9200*/
9201
9202// Allocate space to store nesting levels
9204 int levels = KMP_HW_LAST;
9206 __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int));
9207 for (int i = 0; i < levels; ++i)
9209 if (__kmp_nested_nth.size < levels) {
9211 (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int));
9212 __kmp_nested_nth.size = levels;
9213 }
9214}
9215
9216// Set # threads for top levels of nesting; must be called after topology set
9219
9220 if (__kmp_nesting_mode == 1)
9222 else if (__kmp_nesting_mode > 1)
9224
9225 if (__kmp_topology) { // use topology info
9226 int loc, hw_level;
9227 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9229 loc++, hw_level++) {
9231 if (__kmp_nesting_nth_level[loc] == 1)
9232 loc--;
9233 }
9234 // Make sure all cores are used
9235 if (__kmp_nesting_mode > 1 && loc > 1) {
9236 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9237 int num_cores = __kmp_topology->get_count(core_level);
9238 int upper_levels = 1;
9239 for (int level = 0; level < loc - 1; ++level)
9240 upper_levels *= __kmp_nesting_nth_level[level];
9241 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9243 num_cores / __kmp_nesting_nth_level[loc - 2];
9244 }
9247 } else { // no topology info available; provide a reasonable guesstimation
9248 if (__kmp_avail_proc >= 4) {
9252 } else {
9255 }
9257 }
9258 for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9260 }
9264 if (get__max_active_levels(thread) > 1) {
9265 // if max levels was set, set nesting mode levels to same
9267 }
9268 if (__kmp_nesting_mode == 1) // turn on nesting for this case only
9270}
9271
9272// Empty symbols to export (see exports_so.txt) when feature is disabled
9273extern "C" {
9274#if !KMP_STATS_ENABLED
9276#endif
9277#if !USE_DEBUGGER
9280#endif
9281#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9284#endif
9285}
9286
9287// end of file
char buf[BUFFER_SIZE]
#define BUFFER_SIZE
uint8_t kmp_uint8
A simple pure header implementation of VLA that aims to replace uses of actual VLA,...
Definition: kmp_utils.h:26
static void deallocate(distributedBarrier *db)
Definition: kmp_barrier.h:134
static distributedBarrier * allocate(int nThreads)
Definition: kmp_barrier.h:113
void resume(int th_gtid)
bool is_sleeping()
Test whether there are threads sleeping on the flag.
int get_level(kmp_hw_t type) const
int get_count(int level) const
int get_ratio(int level) const
int get_depth() const
int64_t kmp_int64
Definition: common.h:10
@ KMP_IDENT_AUTOPAR
Entry point generated by auto-parallelization.
Definition: kmp.h:212
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
sched_type
Describes the loop schedule to be used for a parallel for loop.
Definition: kmp.h:370
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
@ kmp_sch_auto
auto
Definition: kmp.h:377
@ kmp_sch_static
static unspecialized
Definition: kmp.h:373
@ kmp_sch_guided_chunked
guided unspecialized
Definition: kmp.h:375
@ kmp_sch_dynamic_chunked
Definition: kmp.h:374
@ kmp_sch_guided_analytical_chunked
Definition: kmp.h:385
@ kmp_sch_static_balanced
Definition: kmp.h:382
@ kmp_sch_static_greedy
Definition: kmp.h:381
@ kmp_sch_static_chunked
Definition: kmp.h:372
@ kmp_sch_trapezoidal
Definition: kmp.h:378
@ kmp_sch_guided_iterative_chunked
Definition: kmp.h:384
@ kmp_sch_static_steal
Definition: kmp.h:387
__itt_string_handle * name
Definition: ittnotify.h:3305
void const char const char int ITT_FORMAT __itt_group_sync s
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark S
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team)
void __kmp_finish_implicit_task(kmp_info_t *this_thr)
volatile kmp_team_t * __kmp_team_pool
Definition: kmp_global.cpp:459
#define get__dynamic_2(xteam, xtid)
Definition: kmp.h:2369
#define __kmp_free(ptr)
Definition: kmp.h:3756
kmp_bar_pat_e __kmp_barrier_release_pat_dflt
Definition: kmp_global.cpp:87
int __kmp_generate_warnings
Definition: kmp_global.cpp:120
int __kmp_cg_max_nth
Definition: kmp_global.cpp:127
int __kmp_abort_delay
Definition: kmp_global.cpp:174
kmp_proc_bind_t __kmp_teams_proc_bind
Definition: kmp_global.cpp:292
#define KMP_INTERNAL_MALLOC(sz)
Definition: kmp.h:122
#define KMP_CPU_PAUSE()
Definition: kmp.h:1564
#define KMP_DEFAULT_CHUNK
Definition: kmp.h:1288
bool __kmp_detect_shm()
int __kmp_version
Definition: kmp_global.cpp:64
kmp_bootstrap_lock_t __kmp_initz_lock
#define KMP_MAX_STKSIZE
Definition: kmp.h:1190
#define KMP_MAX_STKPADDING
Definition: kmp.h:1223
int __kmp_display_env_verbose
Definition: kmp_global.cpp:214
kmp_global_t __kmp_global
Definition: kmp_global.cpp:467
void __kmp_init_target_mem()
Definition: kmp_alloc.cpp:1359
@ ct_psingle
Definition: kmp.h:1667
@ ct_ordered_in_parallel
Definition: kmp.h:1669
void __kmp_hidden_helper_worker_thread_signal()
void __kmp_common_initialize(void)
void __kmp_release_64(kmp_flag_64<> *flag)
kmp_pause_status_t __kmp_pause_status
Definition: kmp_global.cpp:558
#define KMP_MAX_BLOCKTIME
Definition: kmp.h:1228
kmp_lock_t __kmp_debug_lock
int __kmp_teams_max_nth
Definition: kmp_global.cpp:129
void __kmp_read_system_time(double *delta)
kmp_bootstrap_lock_t __kmp_tp_cached_lock
void __kmp_reap_task_teams(void)
kmp_int32 __kmp_use_yield
Definition: kmp_global.cpp:433
kmp_pause_status_t
Definition: kmp.h:4524
@ kmp_hard_paused
Definition: kmp.h:4527
@ kmp_soft_paused
Definition: kmp.h:4526
@ kmp_not_paused
Definition: kmp.h:4525
int __kmp_dflt_team_nth_ub
Definition: kmp_global.cpp:132
void __kmp_hidden_helper_threads_initz_wait()
#define KMP_INTERNAL_REALLOC(p, sz)
Definition: kmp.h:124
#define get__nproc_2(xteam, xtid)
Definition: kmp.h:2371
void __kmp_wait_to_unref_task_teams(void)
struct KMP_ALIGN_CACHE dispatch_private_info dispatch_private_info_t
#define __kmp_assign_root_init_mask()
Definition: kmp.h:3948
int __kmp_dflt_max_active_levels
Definition: kmp_global.cpp:136
#define KMP_NOT_SAFE_TO_REAP
Definition: kmp.h:2118
int __kmp_xproc
Definition: kmp_global.cpp:122
int __kmp_debug_buf
Definition: kmp_global.cpp:383
void __kmp_unlock_suspend_mx(kmp_info_t *th)
kmp_bar_pat_e __kmp_barrier_gather_pat_dflt
Definition: kmp_global.cpp:85
#define KMP_HIDDEN_HELPER_TEAM(team)
Definition: kmp.h:4584
static kmp_team_t * __kmp_team_from_gtid(int gtid)
Definition: kmp.h:3639
void __kmp_do_initialize_hidden_helper_threads()
kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]
Definition: kmp_global.cpp:92
kmp_tasking_mode_t __kmp_tasking_mode
Definition: kmp_global.cpp:299
char * __kmp_affinity_format
Definition: kmp_global.cpp:295
int __kmp_dflt_blocktime
Definition: kmp_global.cpp:158
volatile kmp_info_t * __kmp_thread_pool
Definition: kmp_global.cpp:458
volatile int __kmp_init_gtid
Definition: kmp_global.cpp:45
omp_allocator_handle_t __kmp_def_allocator
Definition: kmp_global.cpp:329
static void __kmp_resume_if_hard_paused()
Definition: kmp.h:4539
size_t __kmp_stksize
Definition: kmp_global.cpp:69
int __kmp_env_checks
Definition: kmp_global.cpp:422
#define get__max_active_levels(xthread)
Definition: kmp.h:2403
kmp_nested_proc_bind_t __kmp_nested_proc_bind
Definition: kmp_global.cpp:291
void __kmp_free_implicit_task(kmp_info_t *this_thr)
void __kmp_hidden_helper_main_thread_release()
fork_context_e
Tell the fork call which compiler generated the fork call, and therefore how to deal with the call.
Definition: kmp.h:4054
@ fork_context_gnu
Called from GNU generated code, so must not invoke the microtask internally.
Definition: kmp.h:4055
@ fork_context_intel
Called from Intel generated code.
Definition: kmp.h:4057
@ fork_context_last
Definition: kmp.h:4058
void __kmp_suspend_initialize(void)
kmp_nested_nthreads_t __kmp_nested_nth
Definition: kmp_global.cpp:200
int __kmp_max_nth
Definition: kmp_global.cpp:126
int __kmp_chunk
Definition: kmp_global.cpp:172
#define KMP_GTID_SHUTDOWN
Definition: kmp.h:1028
@ flag_unset
Definition: kmp.h:2128
void __kmp_internal_end_dtor(void)
volatile int __kmp_all_nth
Definition: kmp_global.cpp:457
#define set__nproc(xthread, xval)
Definition: kmp.h:2394
#define KMP_MIN_NTH
Definition: kmp.h:1164
int __kmp_is_address_mapped(void *addr)
kmp_lock_t __kmp_global_lock
union KMP_ALIGN_CACHE kmp_root kmp_root_t
int __kmp_adjust_gtid_mode
Definition: kmp_global.cpp:183
int __kmp_env_blocktime
Definition: kmp_global.cpp:421
#define __kmp_entry_gtid()
Definition: kmp.h:3601
kmp_old_threads_list_t * __kmp_old_threads_list
Definition: kmp_global.cpp:452
#define KMP_GTID_MONITOR
Definition: kmp.h:1029
volatile int __kmp_init_common
Definition: kmp_global.cpp:46
kmp_info_t __kmp_monitor
static int __kmp_tid_from_gtid(int gtid)
Definition: kmp.h:3619
static bool KMP_UBER_GTID(int gtid)
Definition: kmp.h:3612
int __kmp_display_env
Definition: kmp_global.cpp:213
kmp_int32 __kmp_use_yield_exp_set
Definition: kmp_global.cpp:437
int __kmp_tp_cached
Definition: kmp_global.cpp:134
volatile int __kmp_init_hidden_helper
Definition: kmp_global.cpp:50
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr)
Definition: kmp.h:4145
int __kmp_gtid_get_specific(void)
volatile int __kmp_init_middle
Definition: kmp_global.cpp:48
void __kmp_hidden_helper_threads_deinitz_wait()
static kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind)
Definition: kmp.h:501
@ cancel_noreq
Definition: kmp.h:998
#define KMP_NESTED_HOT_TEAMS
Definition: kmp.h:173
int __kmp_reserve_warn
Definition: kmp_global.cpp:121
#define KMP_CHECK_UPDATE(a, b)
Definition: kmp.h:2353
int __kmp_storage_map_verbose
Definition: kmp_global.cpp:411
int __kmp_allThreadsSpecified
Definition: kmp_global.cpp:117
enum sched_type __kmp_static
Definition: kmp_global.cpp:146
#define KMP_INITIAL_GTID(gtid)
Definition: kmp.h:1320
volatile int __kmp_nth
Definition: kmp_global.cpp:456
int PACKED_REDUCTION_METHOD_T
Definition: kmp.h:596
std::atomic< int > __kmp_thread_pool_active_nth
Definition: kmp_global.cpp:462
#define KMP_MASTER_TID(tid)
Definition: kmp.h:1315
int __kmp_duplicate_library_ok
Definition: kmp_global.cpp:363
volatile int __kmp_need_register_serial
Definition: kmp_global.cpp:47
kmp_bootstrap_lock_t __kmp_forkjoin_lock
kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]
Definition: kmp_global.cpp:91
static kmp_info_t * __kmp_entry_thread()
Definition: kmp.h:3731
void __kmp_init_memkind()
Definition: kmp_alloc.cpp:1272
void __kmp_hidden_helper_main_thread_wait()
#define KMP_GEN_TEAM_ID()
Definition: kmp.h:3681
void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task)
kmp_int32 __kmp_default_device
Definition: kmp_global.cpp:297
#define get__sched_2(xteam, xtid)
Definition: kmp.h:2373
void __kmp_cleanup_threadprivate_caches()
static void copy_icvs(kmp_internal_control_t *dst, kmp_internal_control_t *src)
Definition: kmp.h:2184
kmp_bootstrap_lock_t __kmp_exit_lock
kmp_info_t ** __kmp_threads
Definition: kmp_global.cpp:450
void __kmp_hidden_helper_initz_release()
enum sched_type __kmp_sched
Definition: kmp_global.cpp:144
#define KMP_BARRIER_PARENT_FLAG
Definition: kmp.h:2111
void __kmp_suspend_uninitialize_thread(kmp_info_t *th)
void __kmp_finalize_bget(kmp_info_t *th)
#define KMP_BARRIER_SWITCH_TO_OWN_FLAG
Definition: kmp.h:2113
static void __kmp_reset_root_init_mask(int gtid)
Definition: kmp.h:3949
kmp_uint32 __kmp_barrier_gather_bb_dflt
Definition: kmp_global.cpp:80
kmp_uint32 __kmp_barrier_release_bb_dflt
Definition: kmp_global.cpp:82
int __kmp_dispatch_num_buffers
Definition: kmp_global.cpp:135
#define SCHEDULE_WITHOUT_MODIFIERS(s)
Definition: kmp.h:463
int __kmp_nesting_mode
Definition: kmp_global.cpp:561
#define set__max_active_levels(xthread, xval)
Definition: kmp.h:2400
#define __kmp_get_team_num_threads(gtid)
Definition: kmp.h:3609
#define KMP_MIN_MALLOC_ARGV_ENTRIES
Definition: kmp.h:3095
#define KMP_MASTER_GTID(gtid)
Definition: kmp.h:1318
void __kmp_lock_suspend_mx(kmp_info_t *th)
bool __kmp_detect_tmp()
int __kmp_nesting_mode_nlevels
Definition: kmp_global.cpp:562
int __kmp_nteams
Definition: kmp_global.cpp:216
int __kmp_storage_map
Definition: kmp_global.cpp:409
#define KMP_YIELD(cond)
Definition: kmp.h:1582
int(* launch_t)(int gtid)
Definition: kmp.h:3092
void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size)
int * __kmp_nesting_nth_level
Definition: kmp_global.cpp:563
volatile int __kmp_init_parallel
Definition: kmp_global.cpp:49
int __kmp_init_counter
Definition: kmp_global.cpp:62
int __kmp_sys_max_nth
Definition: kmp_global.cpp:125
kmp_queuing_lock_t __kmp_dispatch_lock
Definition: kmp_global.cpp:516
kmp_root_t ** __kmp_root
Definition: kmp_global.cpp:451
#define KMP_DEFAULT_BLOCKTIME
Definition: kmp.h:1232
#define set__blocktime_team(xteam, xtid, xval)
Definition: kmp.h:2376
#define __kmp_allocate(size)
Definition: kmp.h:3754
enum kmp_sched kmp_sched_t
#define TRUE
Definition: kmp.h:1324
enum library_type __kmp_library
Definition: kmp_global.cpp:143
#define FALSE
Definition: kmp.h:1323
int __kmp_tp_capacity
Definition: kmp_global.cpp:133
int __kmp_settings
Definition: kmp_global.cpp:362
@ tskm_immediate_exec
Definition: kmp.h:2417
#define UNLIKELY(x)
Definition: kmp.h:159
#define USE_NESTED_HOT_ARG(x)
Definition: kmp.h:174
int __kmp_env_consistency_check
Definition: kmp_global.cpp:423
#define bs_reduction_barrier
Definition: kmp.h:2143
void __kmp_runtime_destroy(void)
union KMP_ALIGN_CACHE kmp_desc kmp_desc_t
static void __kmp_sched_apply_mods_intkind(kmp_sched_t kind, enum sched_type *internal_kind)
Definition: kmp.h:492
volatile int __kmp_hidden_helper_team_done
Definition: kmp_global.cpp:52
static void __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind, enum sched_type internal_kind)
Definition: kmp.h:483
#define KMP_INIT_BARRIER_STATE
Definition: kmp.h:2091
size_t __kmp_sys_min_stksize
Definition: kmp_global.cpp:124
@ kmp_sched_upper
Definition: kmp.h:360
@ kmp_sched_lower
Definition: kmp.h:348
@ kmp_sched_trapezoidal
Definition: kmp.h:356
@ kmp_sched_upper_std
Definition: kmp.h:354
@ kmp_sched_dynamic
Definition: kmp.h:351
@ kmp_sched_auto
Definition: kmp.h:353
@ kmp_sched_guided
Definition: kmp.h:352
@ kmp_sched_lower_ext
Definition: kmp.h:355
@ kmp_sched_default
Definition: kmp.h:361
@ kmp_sched_static
Definition: kmp.h:350
#define set__bt_set_team(xteam, xtid, xval)
Definition: kmp.h:2386
kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]
Definition: kmp_global.cpp:90
#define KMP_BARRIER_NOT_WAITING
Definition: kmp.h:2108
int __kmp_task_max_nth
Definition: kmp_global.cpp:128
#define KMP_INTERNAL_FREE(p)
Definition: kmp.h:123
int __kmp_threads_capacity
Definition: kmp_global.cpp:130
void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, int tid)
int __kmp_foreign_tp
Definition: kmp_global.cpp:189
static int __kmp_gtid_from_tid(int tid, const kmp_team_t *team)
Definition: kmp.h:3624
#define KMP_SAFE_TO_REAP
Definition: kmp.h:2120
void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team)
void __kmp_threadprivate_resize_cache(int newCapacity)
void __kmp_runtime_initialize(void)
@ bs_plain_barrier
Definition: kmp.h:2132
@ bs_last_barrier
Definition: kmp.h:2138
@ bs_forkjoin_barrier
Definition: kmp.h:2134
volatile int __kmp_init_hidden_helper_threads
Definition: kmp_global.cpp:51
void __kmp_common_destroy_gtid(int gtid)
int __kmp_try_suspend_mx(kmp_info_t *th)
int __kmp_display_affinity
Definition: kmp_global.cpp:294
enum sched_type __kmp_guided
Definition: kmp_global.cpp:148
void __kmp_resume_32(int target_gtid, kmp_flag_32< C, S > *flag)
#define KMP_INLINE_ARGV_ENTRIES
Definition: kmp.h:3111
#define __kmp_get_gtid()
Definition: kmp.h:3600
#define SCHEDULE_GET_MODIFIERS(s)
Definition: kmp.h:470
PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method
Definition: kmp_global.cpp:368
int __kmp_avail_proc
Definition: kmp_global.cpp:123
#define __kmp_page_allocate(size)
Definition: kmp.h:3755
#define SKIP_DIGITS(_x)
Definition: kmp.h:300
void __kmp_initialize_bget(kmp_info_t *th)
int __kmp_teams_thread_limit
Definition: kmp_global.cpp:217
int __kmp_stkpadding
Definition: kmp_global.cpp:74
void __kmp_cleanup_hierarchy()
int __kmp_dflt_team_nth
Definition: kmp_global.cpp:131
void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr)
void __kmp_gtid_set_specific(int gtid)
kmp_proc_bind_t
Definition: kmp.h:958
@ proc_bind_false
Definition: kmp.h:959
@ proc_bind_close
Definition: kmp.h:962
@ proc_bind_primary
Definition: kmp.h:961
@ proc_bind_spread
Definition: kmp.h:963
@ proc_bind_default
Definition: kmp.h:965
@ KMP_HW_CORE
Definition: kmp.h:631
@ KMP_HW_LAST
Definition: kmp.h:633
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64< C, S > *flag)
int __kmp_root_counter
Definition: kmp_global.cpp:63
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
Definition: kmp.h:3629
int __kmp_gtid_mode
Definition: kmp_global.cpp:182
#define KMP_MIN_BLOCKTIME
Definition: kmp.h:1227
#define SCHEDULE_SET_MODIFIERS(s, m)
Definition: kmp.h:473
void __kmp_suspend_initialize_thread(kmp_info_t *th)
library_type
Definition: kmp.h:517
@ library_turnaround
Definition: kmp.h:520
@ library_throughput
Definition: kmp.h:521
@ library_serial
Definition: kmp.h:519
volatile int __kmp_init_serial
Definition: kmp_global.cpp:44
@ empty_reduce_block
Definition: kmp.h:550
@ critical_reduce_block
Definition: kmp.h:547
@ tree_reduce_block
Definition: kmp.h:549
@ reduction_method_not_defined
Definition: kmp.h:546
@ atomic_reduce_block
Definition: kmp.h:548
#define KMP_CHECK_UPDATE_SYNC(a, b)
Definition: kmp.h:2356
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, void *argv[])
#define KMP_MAX_ACTIVE_LEVELS_LIMIT
Definition: kmp.h:1300
static void __kmp_type_convert(T1 src, T2 *dest)
Definition: kmp.h:4855
#define SKIP_TOKEN(_x)
Definition: kmp.h:305
void __kmp_fini_memkind()
Definition: kmp_alloc.cpp:1334
kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]
Definition: kmp_global.cpp:93
void __kmp_reap_worker(kmp_info_t *th)
int __kmp_env_stksize
Definition: kmp_global.cpp:420
#define KMP_GTID_DNE
Definition: kmp.h:1027
@ bp_dist_bar
Definition: kmp.h:2154
@ bp_hierarchical_bar
Definition: kmp.h:2153
@ dynamic_thread_limit
Definition: kmp.h:339
@ dynamic_default
Definition: kmp.h:334
@ dynamic_random
Definition: kmp.h:338
void __kmp_hidden_helper_threads_deinitz_release()
void __kmp_expand_host_name(char *buffer, size_t size)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
enum sched_type __kmp_sch_map[]
Definition: kmp_global.cpp:235
int __kmp_tls_gtid_min
Definition: kmp_global.cpp:188
void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team, int wait=1)
#define __kmp_thread_free(th, ptr)
Definition: kmp.h:3782
kmp_topology_t * __kmp_topology
static int __kmp_ncores
kmp_atomic_lock_t __kmp_atomic_lock_8c
Definition: kmp_atomic.cpp:588
kmp_atomic_lock_t __kmp_atomic_lock_8r
Definition: kmp_atomic.cpp:586
kmp_atomic_lock_t __kmp_atomic_lock_4i
Definition: kmp_atomic.cpp:580
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 kmp_int16
kmp_atomic_lock_t __kmp_atomic_lock_20c
Definition: kmp_atomic.cpp:596
kmp_atomic_lock_t __kmp_atomic_lock_16c
Definition: kmp_atomic.cpp:594
KMP_ARCH_X86 short
kmp_atomic_lock_t __kmp_atomic_lock_2i
Definition: kmp_atomic.cpp:578
kmp_atomic_lock_t __kmp_atomic_lock_32c
Definition: kmp_atomic.cpp:598
kmp_atomic_lock_t __kmp_atomic_lock_8i
Definition: kmp_atomic.cpp:584
kmp_atomic_lock_t __kmp_atomic_lock
Definition: kmp_atomic.cpp:574
kmp_atomic_lock_t __kmp_atomic_lock_10r
Definition: kmp_atomic.cpp:590
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 kmp_int8
Definition: kmp_atomic.cpp:985
kmp_atomic_lock_t __kmp_atomic_lock_1i
Definition: kmp_atomic.cpp:576
kmp_atomic_lock_t __kmp_atomic_lock_16r
Definition: kmp_atomic.cpp:592
kmp_atomic_lock_t __kmp_atomic_lock_4r
Definition: kmp_atomic.cpp:582
static void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck)
Definition: kmp_atomic.h:405
void __kmp_print_structure(void)
void __kmp_dump_debug_buffer(void)
Definition: kmp_debug.cpp:84
#define KA_TRACE(d, x)
Definition: kmp_debug.h:157
#define KMP_ASSERT(cond)
Definition: kmp_debug.h:59
#define KMP_BUILD_ASSERT(expr)
Definition: kmp_debug.h:26
#define KF_TRACE(d, x)
Definition: kmp_debug.h:162
#define KD_TRACE(d, x)
Definition: kmp_debug.h:160
#define KC_TRACE(d, x)
Definition: kmp_debug.h:159
#define KMP_DEBUG_ASSERT(cond)
Definition: kmp_debug.h:61
#define KB_TRACE(d, x)
Definition: kmp_debug.h:158
#define KMP_ASSERT2(cond, msg)
Definition: kmp_debug.h:60
unsigned long long kmp_uint64
kmp_hier_sched_env_t __kmp_hier_scheds
void __kmp_dispatch_free_hierarchies(kmp_team_t *team)
void __kmp_env_free(char const **value)
char * __kmp_env_get(char const *name)
void __kmp_env_set(char const *name, char const *value, int overwrite)
void __kmp_env_unset(char const *name)
void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, kmp_user_lock_p lck)
Definition: kmp_error.cpp:338
void __kmp_push_parallel(int gtid, ident_t const *ident)
Definition: kmp_error.cpp:193
void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:212
void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:233
enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:383
void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:411
struct cons_header * __kmp_allocate_cons_stack(int gtid)
Definition: kmp_error.cpp:133
void __kmp_pop_parallel(int gtid, ident_t const *ident)
Definition: kmp_error.cpp:363
void __kmp_free_cons_stack(void *ptr)
Definition: kmp_error.cpp:153
static volatile kmp_i18n_cat_status_t status
Definition: kmp_i18n.cpp:48
kmp_msg_t __kmp_msg_null
Definition: kmp_i18n.cpp:36
static void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list ap)
Definition: kmp_i18n.cpp:789
void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer)
Definition: kmp_i18n.cpp:593
void __kmp_fatal(kmp_msg_t message,...)
Definition: kmp_i18n.cpp:864
#define KMP_INFORM(...)
Definition: kmp_i18n.h:142
#define KMP_WARNING(...)
Definition: kmp_i18n.h:144
#define KMP_MSG(...)
Definition: kmp_i18n.h:121
@ kmp_ms_warning
Definition: kmp_i18n.h:130
#define KMP_I18N_STR(id)
Definition: kmp_i18n.h:46
#define KMP_FATAL(...)
Definition: kmp_i18n.h:146
#define KMP_HNT(...)
Definition: kmp_i18n.h:122
void __kmp_i18n_catclose()
#define KMP_ERR
Definition: kmp_i18n.h:125
kmp_bootstrap_lock_t __kmp_stdio_lock
Definition: kmp_io.cpp:41
void __kmp_fprintf(enum kmp_io stream, char const *format,...)
Definition: kmp_io.cpp:206
void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap)
Definition: kmp_io.cpp:115
void __kmp_printf(char const *format,...)
Definition: kmp_io.cpp:186
void __kmp_printf_no_lock(char const *format,...)
Definition: kmp_io.cpp:197
@ kmp_out
Definition: kmp_io.h:22
@ kmp_err
Definition: kmp_io.h:22
void __kmp_close_console(void)
#define USE_ITT_BUILD_ARG(x)
Definition: kmp_itt.h:346
void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1516
void __kmp_cleanup_user_locks(void)
Definition: kmp_lock.cpp:3948
void __kmp_validate_locks(void)
Definition: kmp_lock.cpp:43
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition: kmp_lock.h:535
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.h:559
static void __kmp_init_lock(kmp_lock_t *lck)
Definition: kmp_lock.h:571
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition: kmp_lock.h:527
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.h:567
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition: kmp_lock.h:539
#define TCW_PTR(a, b)
Definition: kmp_os.h:1165
void(* microtask_t)(int *gtid, int *npr,...)
Definition: kmp_os.h:1183
#define kmp_va_deref(ap)
Definition: kmp_os.h:228
#define KMP_WAIT
Definition: kmp_os.h:1191
#define TCW_SYNC_PTR(a, b)
Definition: kmp_os.h:1167
#define KMP_ATOMIC_ST_REL(p, v)
Definition: kmp_os.h:1259
double kmp_real64
Definition: kmp_os.h:200
long kmp_intptr_t
Definition: kmp_os.h:204
#define TCR_SYNC_PTR(a)
Definition: kmp_os.h:1166
#define TCR_PTR(a)
Definition: kmp_os.h:1164
#define KMP_UINTPTR_SPEC
Definition: kmp_os.h:207
@ kmp_warnings_off
Definition: kmp_os.h:1239
#define RCAST(type, var)
Definition: kmp_os.h:291
#define CACHE_LINE
Definition: kmp_os.h:339
#define KMP_CACHE_PREFETCH(ADDR)
Definition: kmp_os.h:347
#define KMP_ATOMIC_LD_ACQ(p)
Definition: kmp_os.h:1257
#define TCW_SYNC_4(a, b)
Definition: kmp_os.h:1144
#define VOLATILE_CAST(x)
Definition: kmp_os.h:1188
#define CCAST(type, var)
Definition: kmp_os.h:290
#define KMP_MB()
Definition: kmp_os.h:1064
#define KMP_EQ
Definition: kmp_os.h:1193
bool __kmp_atomic_compare_store_acq(std::atomic< T > *p, T expected, T desired)
Definition: kmp_os.h:1280
#define TCR_4(a)
Definition: kmp_os.h:1135
#define KMP_FALLTHROUGH()
Definition: kmp_os.h:363
#define KMP_ATOMIC_DEC(p)
Definition: kmp_os.h:1268
#define KMP_GET_PAGE_SIZE()
Definition: kmp_os.h:321
#define KMP_ATOMIC_LD_RLX(p)
Definition: kmp_os.h:1258
#define KMP_MFENCE()
Definition: kmp_os.h:1097
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
Definition: kmp_os.h:813
#define TCW_4(a, b)
Definition: kmp_os.h:1136
#define KMP_WEAK_ATTRIBUTE_EXTERNAL
Definition: kmp_os.h:400
va_list kmp_va_list
Definition: kmp_os.h:227
#define KMP_ATOMIC_INC(p)
Definition: kmp_os.h:1267
int __kmp_pause_resource(kmp_pause_status_t level)
void __kmp_warn(char const *format,...)
kmp_info_t * __kmp_hidden_helper_main_thread
void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk)
static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc)
static void __kmp_fini_allocator()
void __kmp_soft_pause()
static void __kmp_init_allocator()
void __kmp_aux_set_defaults(char const *str, size_t len)
static kmp_team_t * __kmp_aux_get_team_info(int &teams_serialized)
static int __kmp_expand_threads(int nNeed)
void __kmp_teams_master(int gtid)
static void __kmp_itthash_clean(kmp_info_t *th)
#define propagateFPControl(x)
void __kmp_itt_init_ittlib()
void __kmp_infinite_loop(void)
void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, int num_teams_ub, int num_threads)
int __kmp_aux_get_num_teams()
kmp_info_t * __kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, int new_tid)
void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team)
static long __kmp_registration_flag
int __kmp_get_max_active_levels(int gtid)
void __kmp_aux_set_library(enum library_type arg)
void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, char const *format,...)
unsigned short __kmp_get_random(kmp_info_t *thread)
int __kmp_register_root(int initial_thread)
static void __kmp_internal_end(void)
void __kmp_free_team(kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master))
void __kmp_set_max_active_levels(int gtid, int max_active_levels)
void __kmp_abort_thread(void)
void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc)
void __kmp_internal_end_atexit(void)
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master_th, int master_gtid, int fork_teams_workers)
void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind)
kmp_team_t * __kmp_reap_team(kmp_team_t *team)
void __kmp_exit_single(int gtid)
void __kmp_check_stack_overlap(kmp_info_t *th)
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, int num_threads)
int __kmp_get_team_size(int gtid, int level)
static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
static void __kmp_do_middle_initialize(void)
int __kmp_get_max_teams(void)
static void __kmp_free_team_arrays(kmp_team_t *team)
static void __kmp_initialize_root(kmp_root_t *root)
static void __kmp_reinitialize_team(kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc)
int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_va_list ap)
void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
void * __kmp_launch_thread(kmp_info_t *this_thr)
void __kmp_set_teams_thread_limit(int limit)
static int __kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_info_t *master_th, kmp_team_t *parent_team, kmp_va_list ap)
void __kmp_join_barrier(int gtid)
static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team)
void __kmp_init_random(kmp_info_t *thread)
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, int num_threads)
void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads)
void __kmp_user_set_library(enum library_type arg)
#define updateHWFPControl(x)
#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED
void __kmp_internal_end_dest(void *specific_gtid)
int __kmp_aux_get_team_num()
void __kmp_set_num_threads(int new_nth, int gtid)
void __kmp_internal_end_thread(int gtid_req)
static bool __kmp_is_fork_in_teams(kmp_info_t *master_th, microtask_t microtask, int level, int teams_level, kmp_va_list ap)
PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmp_hidden_helper_threads_initz_routine()
int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws)
static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, int gtid)
void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team)
void __kmp_join_call(ident_t *loc, int gtid, int exit_teams)
static int __kmp_reset_root(int gtid, kmp_root_t *root)
int __kmp_get_ancestor_thread_num(int gtid, int level)
void __kmp_itt_fini_ittlib()
void __kmp_omp_display_env(int verbose)
void __kmp_reset_stats()
void __kmp_middle_initialize(void)
void __kmp_unregister_root_current_thread(int gtid)
int __kmp_debugging
#define MAX_MESSAGE
static void __kmp_reap_thread(kmp_info_t *thread, int is_root)
static const unsigned __kmp_primes[]
int __kmp_get_teams_thread_limit(void)
#define FAST_REDUCTION_TREE_METHOD_GENERATED
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
kmp_r_sched_t __kmp_get_schedule_global()
void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team)
static kmp_internal_control_t __kmp_get_global_icvs(void)
void __kmp_parallel_initialize(void)
void __kmp_set_nesting_mode_threads()
void __kmp_unregister_library(void)
char const __kmp_version_omp_api[]
Definition: kmp_runtime.cpp:66
static char * __kmp_registration_str
int __kmp_ignore_mppbeg(void)
kmp_int32 __kmp_enable_hidden_helper
void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team)
void __kmp_aux_set_stacksize(size_t arg)
void __kmp_internal_end_library(int gtid_req)
size_t __kmp_aux_capture_affinity(int gtid, const char *format, kmp_str_buf_t *buffer)
void __kmp_hard_pause()
void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, int new_nthreads)
kmp_info_t * __kmp_thread_pool_insert_pt
int __kmp_omp_debug_struct_info
static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid)
void __kmp_aux_display_affinity(int gtid, const char *format)
void __kmp_init_nesting_mode()
std::atomic< kmp_int32 > __kmp_unexecuted_hidden_helper_tasks
void __kmp_register_library_startup(void)
void __kmp_free_thread(kmp_info_t *this_th)
int __kmp_invoke_task_func(int gtid)
void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk)
void __kmp_abort_process()
static const kmp_affinity_format_field_t __kmp_affinity_format_table[]
void __kmp_set_num_teams(int num_teams)
kmp_info_t ** __kmp_hidden_helper_threads
static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc)
void __kmp_save_internal_controls(kmp_info_t *thread)
int __kmp_invoke_teams_master(int gtid)
void __kmp_hidden_helper_initialize()
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads)
static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth)
static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, int master_tid, int set_nthreads, int enter_teams)
void __kmp_serial_initialize(void)
static bool __kmp_is_entering_teams(int active_level, int level, int teams_level, kmp_va_list ap)
void __kmp_resume_if_soft_paused()
void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
int __kmp_get_global_thread_id()
void __kmp_internal_begin(void)
static char * __kmp_reg_status_name()
static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, int team_id, int num_thr)
static void __kmp_do_serial_initialize(void)
void __kmp_fork_barrier(int gtid, int tid)
int __kmp_get_global_thread_id_reg()
kmp_int32 __kmp_hidden_helper_threads_num
kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, kmp_proc_bind_t new_proc_bind, kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *master))
int __kmp_ignore_mppend(void)
kmp_int32 __kmp_get_reduce_method(void)
static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, const char **ptr, kmp_str_buf_t *field_buffer)
void __kmp_cleanup(void)
static int __kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team, kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root, enum fork_context_e call_context, microtask_t microtask, launch_t invoker, int master_set_numthreads, int level, kmp_va_list ap)
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid)
#define KMP_ALLOCA
#define KMP_STRCPY_S(dst, bsz, src)
#define KMP_SNPRINTF
#define KMP_SSCANF
#define KMP_MEMCPY
#define KMP_STRLEN
void __kmp_env_print_2()
int __kmp_default_tp_capacity(int req_nproc, int max_nth, int all_threads_specified)
int __kmp_initial_threads_capacity(int req_nproc)
void __kmp_env_initialize(char const *string)
void __kmp_display_env_impl(int display_env, int display_env_verbose)
void __kmp_env_print()
void __kmp_stats_init(void)
Definition: kmp_stats.cpp:911
void __kmp_stats_fini(void)
Definition: kmp_stats.cpp:918
Functions for collecting statistics.
#define KMP_COUNT_VALUE(n, v)
Definition: kmp_stats.h:1000
#define KMP_PUSH_PARTITIONED_TIMER(name)
Definition: kmp_stats.h:1014
#define KMP_GET_THREAD_STATE()
Definition: kmp_stats.h:1017
#define KMP_POP_PARTITIONED_TIMER()
Definition: kmp_stats.h:1015
#define KMP_INIT_PARTITIONED_TIMERS(name)
Definition: kmp_stats.h:1012
#define KMP_SET_THREAD_STATE_BLOCK(state_name)
Definition: kmp_stats.h:1018
#define KMP_TIME_PARTITIONED_BLOCK(name)
Definition: kmp_stats.h:1013
#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n)
Definition: kmp_stats.h:1008
#define KMP_SET_THREAD_STATE(state_name)
Definition: kmp_stats.h:1016
void __kmp_str_split(char *str, char delim, char **head, char **tail)
Definition: kmp_str.cpp:571
void __kmp_str_buf_clear(kmp_str_buf_t *buffer)
Definition: kmp_str.cpp:71
void __kmp_str_buf_free(kmp_str_buf_t *buffer)
Definition: kmp_str.cpp:123
char * __kmp_str_format(char const *format,...)
Definition: kmp_str.cpp:448
int __kmp_str_match_true(char const *data)
Definition: kmp_str.cpp:552
void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, size_t len)
Definition: kmp_str.cpp:134
void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src)
Definition: kmp_str.cpp:146
#define args
int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format,...)
Definition: kmp_str.cpp:221
int __kmp_str_match_false(char const *data)
Definition: kmp_str.cpp:543
#define __kmp_str_buf_init(b)
Definition: kmp_str.h:40
#define i
Definition: kmp_stub.cpp:87
void __kmp_print_version_1(void)
void __kmp_print_version_2(void)
#define KMP_VERSION_PREFIX
Definition: kmp_version.h:34
char const __kmp_version_alt_comp[]
char const __kmp_version_lock[]
static void __kmp_null_resume_wrapper(kmp_info_t *thr)
__attribute__((noinline))
void microtask(int *global_tid, int *bound_tid)
int32_t kmp_int32
omp_lock_t lck
Definition: omp_lock.c:7
static int ii
#define res
void ompt_fini()
if(ret)
ompt_callbacks_active_t ompt_enabled
void ompt_pre_init()
void ompt_post_init()
ompt_callbacks_internal_t ompt_callbacks
#define OMPT_INVOKER(x)
Definition: ompt-internal.h:23
#define OMPT_GET_FRAME_ADDRESS(level)
void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, ompt_data_t *ompt_pid, void *codeptr)
int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num)
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int on_heap, bool always)
ompt_task_info_t * __ompt_get_task_info_object(int depth)
void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid)
void __ompt_lw_taskteam_unlink(kmp_info_t *thr)
ompt_data_t * __ompt_get_thread_data_internal()
static id loc
volatile int flag
kmp_int32 tt_found_proxy_tasks
Definition: kmp.h:2853
kmp_int32 tt_hidden_helper_task_encountered
Definition: kmp.h:2858
kmp_info_p * cg_root
Definition: kmp.h:2915
kmp_int32 cg_nthreads
Definition: kmp.h:2919
kmp_int32 cg_thread_limit
Definition: kmp.h:2918
struct kmp_cg_root * up
Definition: kmp.h:2920
Definition: kmp.h:2068
void(* th_dxo_fcn)(int *gtid, int *cid, ident_t *)
Definition: kmp.h:2072
kmp_int32 th_doacross_buf_idx
Definition: kmp.h:2079
dispatch_private_info_t * th_dispatch_pr_current
Definition: kmp.h:2075
kmp_uint32 th_disp_index
Definition: kmp.h:2078
dispatch_private_info_t * th_disp_buffer
Definition: kmp.h:2077
void(* th_deo_fcn)(int *gtid, int *cid, ident_t *)
Definition: kmp.h:2070
dispatch_shared_info_t * th_dispatch_sh_current
Definition: kmp.h:2074
kmp_proc_bind_t proc_bind
Definition: kmp.h:2179
kmp_r_sched_t sched
Definition: kmp.h:2178
struct kmp_internal_control * next
Definition: kmp.h:2181
int serial_nesting_level
Definition: kmp.h:2162
kmp_proc_bind_t * bind_types
Definition: kmp.h:969
struct kmp_old_threads_list_t * next
Definition: kmp.h:3281
kmp_info_t ** threads
Definition: kmp.h:3280
char * str
Definition: kmp_str.h:33
ompt_task_info_t ompt_task_info
Definition: ompt-internal.h:70
ompt_data_t task_data
Definition: ompt-internal.h:57
ompt_frame_t frame
Definition: ompt-internal.h:56
kmp_bstate_t bb
Definition: kmp.h:2226
Definition: kmp.h:3068
kmp_base_info_t th
Definition: kmp.h:3071
int chunk
Definition: kmp.h:509
enum sched_type r_sched_type
Definition: kmp.h:508
kmp_int64 sched
Definition: kmp.h:511
kmp_base_task_team_t tt
Definition: kmp.h:2869
Definition: kmp.h:3215
kmp_base_team_t t
Definition: kmp.h:3216
void __kmp_reap_monitor(kmp_info_t *th)
void __kmp_register_atfork(void)
void __kmp_free_handle(kmp_thread_t tHandle)
int __kmp_get_load_balance(int max)
int __kmp_still_running(kmp_info_t *th)
void __kmp_initialize_system_tick(void)
int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val)