LLVM OpenMP 22.0.0git
kmp_runtime.cpp
Go to the documentation of this file.
1/*
2 * kmp_runtime.cpp -- KPTS runtime support library
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
17#include "kmp_error.h"
18#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_itt.h"
21#include "kmp_settings.h"
22#include "kmp_stats.h"
23#include "kmp_str.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
27#include "kmp_utils.h"
28#if KMP_USE_HIER_SCHED
29#include "kmp_dispatch_hier.h"
30#endif
31
32#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35#if OMPD_SUPPORT
36#include "ompd-specific.h"
37#endif
38
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile = nullptr;
42#endif
43
44/* these are temporary issues to be dealt with */
45#define KMP_USE_PRCTL 0
46
47#if KMP_OS_WINDOWS
48#include <process.h>
49#endif
50
51#ifndef KMP_USE_SHM
52// Windows and WASI do not need these include files as they don't use shared
53// memory.
54#else
55#include <sys/mman.h>
56#include <sys/stat.h>
57#include <fcntl.h>
58#define SHM_SIZE 1024
59#endif
60
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX "alternative compiler support: yes";
64#endif /* defined(KMP_GOMP_COMPAT) */
65
67 KMP_VERSION_PREFIX "API version: 5.0 (201611)";
68
69#ifdef KMP_DEBUG
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX "lock type: run time selectable";
72#endif /* KMP_DEBUG */
73
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
75
76/* ------------------------------------------------------------------------ */
77
78#if KMP_USE_MONITOR
80#endif
81
82/* Forward declarations */
83
84void __kmp_cleanup(void);
85
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
87 int gtid);
88static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
89 kmp_internal_control_t *new_icvs,
90 ident_t *loc);
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
94#endif
95static void __kmp_do_serial_initialize(void);
96#if ENABLE_LIBOMPTARGET
97static void __kmp_target_init(void);
98#endif // ENABLE_LIBOMPTARGET
99void __kmp_fork_barrier(int gtid, int tid);
100void __kmp_join_barrier(int gtid);
101void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
103
104#ifdef USE_LOAD_BALANCE
105static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
106#endif
107
108static int __kmp_expand_threads(int nNeed);
109#if KMP_OS_WINDOWS
110static int __kmp_unregister_root_other_thread(int gtid);
111#endif
112static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
114
115void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
116 int new_nthreads);
117void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
118
120 int level) {
121 kmp_nested_nthreads_t *new_nested_nth =
123 sizeof(kmp_nested_nthreads_t));
124 int new_size = level + thr->th.th_set_nested_nth_sz;
125 new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int));
126 for (int i = 0; i < level + 1; ++i)
127 new_nested_nth->nth[i] = 0;
128 for (int i = level + 1, j = 1; i < new_size; ++i, ++j)
129 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
130 new_nested_nth->size = new_nested_nth->used = new_size;
131 return new_nested_nth;
132}
133
134/* Calculate the identifier of the current thread */
135/* fast (and somewhat portable) way to get unique identifier of executing
136 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
138 int i;
139 kmp_info_t **other_threads;
140 size_t stack_data;
141 char *stack_addr;
142 size_t stack_size;
143 char *stack_base;
144
145 KA_TRACE(
146 1000,
147 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
149
150 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
151 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
152 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
153 __kmp_init_gtid for this to work. */
154
156 return KMP_GTID_DNE;
157
158#ifdef KMP_TDATA_GTID
159 if (TCR_4(__kmp_gtid_mode) >= 3) {
160 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
161 return __kmp_gtid;
162 }
163#endif
164 if (TCR_4(__kmp_gtid_mode) >= 2) {
165 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
167 }
168 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
169
170 stack_addr = (char *)&stack_data;
171 other_threads = __kmp_threads;
172
173 /* ATT: The code below is a source of potential bugs due to unsynchronized
174 access to __kmp_threads array. For example:
175 1. Current thread loads other_threads[i] to thr and checks it, it is
176 non-NULL.
177 2. Current thread is suspended by OS.
178 3. Another thread unregisters and finishes (debug versions of free()
179 may fill memory with something like 0xEF).
180 4. Current thread is resumed.
181 5. Current thread reads junk from *thr.
182 TODO: Fix it. --ln */
183
184 for (i = 0; i < __kmp_threads_capacity; i++) {
185
186 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
187 if (!thr)
188 continue;
189
190 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
191 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
192
193 /* stack grows down -- search through all of the active threads */
194
195 if (stack_addr <= stack_base) {
196 size_t stack_diff = stack_base - stack_addr;
197
198 if (stack_diff <= stack_size) {
199 /* The only way we can be closer than the allocated */
200 /* stack size is if we are running on this thread. */
201 // __kmp_gtid_get_specific can return negative value because this
202 // function can be called by thread destructor. However, before the
203 // thread destructor is called, the value of the corresponding
204 // thread-specific data will be reset to NULL.
207 return i;
208 }
209 }
210 }
211
212 /* get specific to try and determine our gtid */
213 KA_TRACE(1000,
214 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
215 "thread, using TLS\n"));
217
218 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
219
220 /* if we havn't been assigned a gtid, then return code */
221 if (i < 0)
222 return i;
223
224 // other_threads[i] can be nullptr at this point because the corresponding
225 // thread could have already been destructed. It can happen when this function
226 // is called in end library routine.
227 if (!TCR_SYNC_PTR(other_threads[i]))
228 return i;
229
230 /* dynamically updated stack window for uber threads to avoid get_specific
231 call */
232 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
233 KMP_FATAL(StackOverflow, i);
234 }
235
236 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
237 if (stack_addr > stack_base) {
238 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
239 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
240 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
241 stack_base);
242 } else {
243 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
244 stack_base - stack_addr);
245 }
246
247 /* Reprint stack bounds for ubermaster since they have been refined */
248 if (__kmp_storage_map) {
249 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
250 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
251 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
252 other_threads[i]->th.th_info.ds.ds_stacksize,
253 "th_%d stack (refinement)", i);
254 }
255 return i;
256}
257
259 int gtid;
260
261 if (!__kmp_init_serial) {
262 gtid = KMP_GTID_DNE;
263 } else
264#ifdef KMP_TDATA_GTID
265 if (TCR_4(__kmp_gtid_mode) >= 3) {
266 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
267 gtid = __kmp_gtid;
268 } else
269#endif
270 if (TCR_4(__kmp_gtid_mode) >= 2) {
271 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
273 } else {
274 KA_TRACE(1000,
275 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
277 }
278
279 /* we must be a new uber master sibling thread */
280 if (gtid == KMP_GTID_DNE) {
281 KA_TRACE(10,
282 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
283 "Registering a new gtid.\n"));
285 if (!__kmp_init_serial) {
288 } else {
290 }
292 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
293 }
294
295 KMP_DEBUG_ASSERT(gtid >= 0);
296
297 return gtid;
298}
299
300/* caller must hold forkjoin_lock */
302 int f;
303 char *stack_beg = NULL;
304 char *stack_end = NULL;
305 int gtid;
306
307 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
308 if (__kmp_storage_map) {
309 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
310 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
311
312 gtid = __kmp_gtid_from_thread(th);
313
314 if (gtid == KMP_GTID_MONITOR) {
316 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
317 "th_%s stack (%s)", "mon",
318 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
319 } else {
321 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
322 "th_%d stack (%s)", gtid,
323 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
324 }
325 }
326
327 /* No point in checking ubermaster threads since they use refinement and
328 * cannot overlap */
329 gtid = __kmp_gtid_from_thread(th);
330 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
331 KA_TRACE(10,
332 ("__kmp_check_stack_overlap: performing extensive checking\n"));
333 if (stack_beg == NULL) {
334 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
335 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
336 }
337
338 for (f = 0; f < __kmp_threads_capacity; f++) {
340
341 if (f_th && f_th != th) {
342 char *other_stack_end =
343 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
344 char *other_stack_beg =
345 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
346 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
347 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
348
349 /* Print the other stack values before the abort */
352 -1, other_stack_beg, other_stack_end,
353 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
354 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
355
356 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
358 }
359 }
360 }
361 }
362 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
363}
364
365/* ------------------------------------------------------------------------ */
366
368 static int done = FALSE;
369
370 while (!done) {
372 }
373}
374
375#define MAX_MESSAGE 512
376
377void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
378 char const *format, ...) {
379 char buffer[MAX_MESSAGE];
380 va_list ap;
381
382 va_start(ap, format);
383 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
384 p2, (unsigned long)size, format);
386 __kmp_vprintf(kmp_err, buffer, ap);
387#if KMP_PRINT_DATA_PLACEMENT
388 int node;
389 if (gtid >= 0) {
390 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
392 node = __kmp_get_host_node(p1);
393 if (node < 0) /* doesn't work, so don't try this next time */
395 else {
396 char *last;
397 int lastNode;
398 int localProc = __kmp_get_cpu_from_gtid(gtid);
399
400 const int page_size = KMP_GET_PAGE_SIZE();
401
402 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
403 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
404 if (localProc >= 0)
405 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
406 localProc >> 1);
407 else
408 __kmp_printf_no_lock(" GTID %d\n", gtid);
409#if KMP_USE_PRCTL
410 /* The more elaborate format is disabled for now because of the prctl
411 * hanging bug. */
412 do {
413 last = p1;
414 lastNode = node;
415 /* This loop collates adjacent pages with the same host node. */
416 do {
417 (char *)p1 += page_size;
418 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
419 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
420 lastNode);
421 } while (p1 <= p2);
422#else
423 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
424 (char *)p1 + (page_size - 1),
425 __kmp_get_host_node(p1));
426 if (p1 < p2) {
427 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
428 (char *)p2 + (page_size - 1),
429 __kmp_get_host_node(p2));
430 }
431#endif
432 }
433 }
434 } else
435 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
436 }
437#endif /* KMP_PRINT_DATA_PLACEMENT */
439
440 va_end(ap);
441}
442
443void __kmp_warn(char const *format, ...) {
444 char buffer[MAX_MESSAGE];
445 va_list ap;
446
448 return;
449 }
450
451 va_start(ap, format);
452
453 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
455 __kmp_vprintf(kmp_err, buffer, ap);
457
458 va_end(ap);
459}
460
462 // Later threads may stall here, but that's ok because abort() will kill them.
464
465 if (__kmp_debug_buf) {
467 }
468
469#if KMP_OS_WINDOWS
470 // Let other threads know of abnormal termination and prevent deadlock
471 // if abort happened during library initialization or shutdown
472 __kmp_global.g.g_abort = SIGABRT;
473
474 /* On Windows* OS by default abort() causes pop-up error box, which stalls
475 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
476 boxes. _set_abort_behavior() works well, but this function is not
477 available in VS7 (this is not problem for DLL, but it is a problem for
478 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
479 help, at least in some versions of MS C RTL.
480
481 It seems following sequence is the only way to simulate abort() and
482 avoid pop-up error box. */
483 raise(SIGABRT);
484 _exit(3); // Just in case, if signal ignored, exit anyway.
485#else
487 abort();
488#endif
489
492
493} // __kmp_abort_process
494
496 // TODO: Eliminate g_abort global variable and this function.
497 // In case of abort just call abort(), it will kill all the threads.
499} // __kmp_abort_thread
500
501/* Print out the storage map for the major kmp_info_t thread data structures
502 that are allocated together. */
503
504static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
505 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
506 gtid);
507
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
509 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
510
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
512 sizeof(kmp_local_t), "th_%d.th_local", gtid);
513
515 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
516 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
517
518 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
519 &thr->th.th_bar[bs_plain_barrier + 1],
520 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
521 gtid);
522
524 &thr->th.th_bar[bs_forkjoin_barrier + 1],
525 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
526 gtid);
527
528#if KMP_FAST_REDUCTION_BARRIER
530 &thr->th.th_bar[bs_reduction_barrier + 1],
531 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
532 gtid);
533#endif // KMP_FAST_REDUCTION_BARRIER
534}
535
536/* Print out the storage map for the major kmp_team_t team data structures
537 that are allocated together. */
538
539static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
540 int team_id, int num_thr) {
541 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
542 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
543 header, team_id);
544
545 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
546 &team->t.t_bar[bs_last_barrier],
548 "%s_%d.t_bar", header, team_id);
549
551 &team->t.t_bar[bs_plain_barrier + 1],
552 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
553 header, team_id);
554
556 &team->t.t_bar[bs_forkjoin_barrier + 1],
557 sizeof(kmp_balign_team_t),
558 "%s_%d.t_bar[forkjoin]", header, team_id);
559
560#if KMP_FAST_REDUCTION_BARRIER
562 &team->t.t_bar[bs_reduction_barrier + 1],
563 sizeof(kmp_balign_team_t),
564 "%s_%d.t_bar[reduction]", header, team_id);
565#endif // KMP_FAST_REDUCTION_BARRIER
566
568 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
569 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
570
572 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
573 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
574
575 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
576 &team->t.t_disp_buffer[num_disp_buff],
577 sizeof(dispatch_shared_info_t) * num_disp_buff,
578 "%s_%d.t_disp_buffer", header, team_id);
579}
580
589
590/* ------------------------------------------------------------------------ */
591
592#if ENABLE_LIBOMPTARGET
593static void __kmp_init_omptarget() {
594 __kmp_init_target_task();
595}
596#endif
597
598/* ------------------------------------------------------------------------ */
599
600#if KMP_DYNAMIC_LIB
601#if KMP_OS_WINDOWS
602
603BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
604 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
605
606 switch (fdwReason) {
607
608 case DLL_PROCESS_ATTACH:
609 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
610
611 return TRUE;
612
613 case DLL_PROCESS_DETACH:
614 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
615
616 // According to Windows* documentation for DllMain entry point:
617 // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
618 // lpReserved == NULL when FreeLibrary() is called,
619 // lpReserved != NULL when the process is terminated.
620 // When FreeLibrary() is called, worker threads remain alive. So the
621 // runtime's state is consistent and executing proper shutdown is OK.
622 // When the process is terminated, worker threads have exited or been
623 // forcefully terminated by the OS and only the shutdown thread remains.
624 // This can leave the runtime in an inconsistent state.
625 // Hence, only attempt proper cleanup when FreeLibrary() is called.
626 // Otherwise, rely on OS to reclaim resources.
627 if (lpReserved == NULL)
629
630 return TRUE;
631
632 case DLL_THREAD_ATTACH:
633 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
634
635 /* if we want to register new siblings all the time here call
636 * __kmp_get_gtid(); */
637 return TRUE;
638
639 case DLL_THREAD_DETACH:
640 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
641
643 return TRUE;
644 }
645
646 return TRUE;
647}
648
649#endif /* KMP_OS_WINDOWS */
650#endif /* KMP_DYNAMIC_LIB */
651
652/* __kmp_parallel_deo -- Wait until it's our turn. */
653void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
654 int gtid = *gtid_ref;
655#ifdef BUILD_PARALLEL_ORDERED
656 kmp_team_t *team = __kmp_team_from_gtid(gtid);
657#endif /* BUILD_PARALLEL_ORDERED */
658
660 if (__kmp_threads[gtid]->th.th_root->r.r_active)
661#if KMP_USE_DYNAMIC_LOCK
662 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
663#else
664 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
665#endif
666 }
667#ifdef BUILD_PARALLEL_ORDERED
668 if (!team->t.t_serialized) {
669 KMP_MB();
670 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
671 NULL);
672 KMP_MB();
673 }
674#endif /* BUILD_PARALLEL_ORDERED */
675}
676
677/* __kmp_parallel_dxo -- Signal the next task. */
678void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
679 int gtid = *gtid_ref;
680#ifdef BUILD_PARALLEL_ORDERED
681 int tid = __kmp_tid_from_gtid(gtid);
682 kmp_team_t *team = __kmp_team_from_gtid(gtid);
683#endif /* BUILD_PARALLEL_ORDERED */
684
686 if (__kmp_threads[gtid]->th.th_root->r.r_active)
688 }
689#ifdef BUILD_PARALLEL_ORDERED
690 if (!team->t.t_serialized) {
691 KMP_MB(); /* Flush all pending memory write invalidates. */
692
693 /* use the tid of the next thread in this team */
694 /* TODO replace with general release procedure */
695 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
696
697 KMP_MB(); /* Flush all pending memory write invalidates. */
698 }
699#endif /* BUILD_PARALLEL_ORDERED */
700}
701
702/* ------------------------------------------------------------------------ */
703/* The BARRIER for a SINGLE process section is always explicit */
704
705int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
706 int status;
707 kmp_info_t *th;
708 kmp_team_t *team;
709
713
714 th = __kmp_threads[gtid];
715 team = th->th.th_team;
716 status = 0;
717
718 th->th.th_ident = id_ref;
719
720 if (team->t.t_serialized) {
721 status = 1;
722 } else {
723 kmp_int32 old_this = th->th.th_local.this_construct;
724
725 ++th->th.th_local.this_construct;
726 /* try to set team count to thread count--success means thread got the
727 single block */
728 /* TODO: Should this be acquire or release? */
729 if (team->t.t_construct == old_this) {
730 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
731 th->th.th_local.this_construct);
732 }
733#if USE_ITT_BUILD
734 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
735 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
736 team->t.t_active_level == 1) {
737 // Only report metadata by primary thread of active team at level 1
738 __kmp_itt_metadata_single(id_ref);
739 }
740#endif /* USE_ITT_BUILD */
741 }
742
744 if (status && push_ws) {
745 __kmp_push_workshare(gtid, ct_psingle, id_ref);
746 } else {
747 __kmp_check_workshare(gtid, ct_psingle, id_ref);
748 }
749 }
750#if USE_ITT_BUILD
751 if (status) {
752 __kmp_itt_single_start(gtid);
753 }
754#endif /* USE_ITT_BUILD */
755 return status;
756}
757
758void __kmp_exit_single(int gtid) {
759#if USE_ITT_BUILD
760 __kmp_itt_single_end(gtid);
761#endif /* USE_ITT_BUILD */
763 __kmp_pop_workshare(gtid, ct_psingle, NULL);
764}
765
766/* determine if we can go parallel or must use a serialized parallel region and
767 * how many threads we can use
768 * set_nproc is the number of threads requested for the team
769 * returns 0 if we should serialize or only use one thread,
770 * otherwise the number of threads to use
771 * The forkjoin lock is held by the caller. */
772static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
773 int master_tid, int set_nthreads,
774 int enter_teams) {
775 int capacity;
776 int new_nthreads;
778 KMP_DEBUG_ASSERT(root && parent_team);
779 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
780
781 // If dyn-var is set, dynamically adjust the number of desired threads,
782 // according to the method specified by dynamic_mode.
783 new_nthreads = set_nthreads;
784 if (!get__dynamic_2(parent_team, master_tid)) {
785 ;
786 }
787#ifdef USE_LOAD_BALANCE
788 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
789 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
790 if (new_nthreads == 1) {
791 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
792 "reservation to 1 thread\n",
793 master_tid));
794 return 1;
795 }
796 if (new_nthreads < set_nthreads) {
797 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
798 "reservation to %d threads\n",
799 master_tid, new_nthreads));
800 }
801 }
802#endif /* USE_LOAD_BALANCE */
803 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
804 new_nthreads = __kmp_avail_proc - __kmp_nth +
805 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
806 if (new_nthreads <= 1) {
807 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
808 "reservation to 1 thread\n",
809 master_tid));
810 return 1;
811 }
812 if (new_nthreads < set_nthreads) {
813 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
814 "reservation to %d threads\n",
815 master_tid, new_nthreads));
816 } else {
817 new_nthreads = set_nthreads;
818 }
819 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
820 if (set_nthreads > 2) {
821 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
822 new_nthreads = (new_nthreads % set_nthreads) + 1;
823 if (new_nthreads == 1) {
824 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
825 "reservation to 1 thread\n",
826 master_tid));
827 return 1;
828 }
829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
833 }
834 }
835 } else {
836 KMP_ASSERT(0);
837 }
838
839 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
840 if (__kmp_nth + new_nthreads -
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
843 int tl_nthreads = __kmp_max_nth - __kmp_nth +
844 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
845 if (tl_nthreads <= 0) {
846 tl_nthreads = 1;
847 }
848
849 // If dyn-var is false, emit a 1-time warning.
850 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
853 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
854 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
855 }
856 if (tl_nthreads == 1) {
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
858 "reduced reservation to 1 thread\n",
859 master_tid));
860 return 1;
861 }
862 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
863 "reservation to %d threads\n",
864 master_tid, tl_nthreads));
865 new_nthreads = tl_nthreads;
866 }
867
868 // Respect OMP_THREAD_LIMIT
869 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
870 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
871 if (cg_nthreads + new_nthreads -
872 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
873 max_cg_threads) {
874 int tl_nthreads = max_cg_threads - cg_nthreads +
875 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
876 if (tl_nthreads <= 0) {
877 tl_nthreads = 1;
878 }
879
880 // If dyn-var is false, emit a 1-time warning.
881 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
884 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
885 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
886 }
887 if (tl_nthreads == 1) {
888 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
889 "reduced reservation to 1 thread\n",
890 master_tid));
891 return 1;
892 }
893 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
894 "reservation to %d threads\n",
895 master_tid, tl_nthreads));
896 new_nthreads = tl_nthreads;
897 }
898
899 // Check if the threads array is large enough, or needs expanding.
900 // See comment in __kmp_register_root() about the adjustment if
901 // __kmp_threads[0] == NULL.
902 capacity = __kmp_threads_capacity;
903 if (TCR_PTR(__kmp_threads[0]) == NULL) {
904 --capacity;
905 }
906 // If it is not for initializing the hidden helper team, we need to take
907 // __kmp_hidden_helper_threads_num out of the capacity because it is included
908 // in __kmp_threads_capacity.
911 }
912 if (__kmp_nth + new_nthreads -
913 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
914 capacity) {
915 // Expand the threads array.
916 int slotsRequired = __kmp_nth + new_nthreads -
917 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
918 capacity;
919 int slotsAdded = __kmp_expand_threads(slotsRequired);
920 if (slotsAdded < slotsRequired) {
921 // The threads array was not expanded enough.
922 new_nthreads -= (slotsRequired - slotsAdded);
923 KMP_ASSERT(new_nthreads >= 1);
924
925 // If dyn-var is false, emit a 1-time warning.
926 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
928 if (__kmp_tp_cached) {
930 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
931 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
932 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
933 } else {
935 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
936 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
937 }
938 }
939 }
940 }
941
942#ifdef KMP_DEBUG
943 if (new_nthreads == 1) {
944 KC_TRACE(10,
945 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
946 "dead roots and rechecking; requested %d threads\n",
947 __kmp_get_gtid(), set_nthreads));
948 } else {
949 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
950 " %d threads\n",
951 __kmp_get_gtid(), new_nthreads, set_nthreads));
952 }
953#endif // KMP_DEBUG
954
955 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
956 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
957 this_thr->th.th_nt_msg);
958 }
959 return new_nthreads;
960}
961
962/* Allocate threads from the thread pool and assign them to the new team. We are
963 assured that there are enough threads available, because we checked on that
964 earlier within critical section forkjoin */
966 kmp_info_t *master_th, int master_gtid,
967 int fork_teams_workers) {
968 int i;
969 int use_hot_team;
970
971 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
972 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
973 KMP_MB();
974
975 /* first, let's setup the primary thread */
976 master_th->th.th_info.ds.ds_tid = 0;
977 master_th->th.th_team = team;
978 master_th->th.th_team_nproc = team->t.t_nproc;
979 master_th->th.th_team_master = master_th;
980 master_th->th.th_team_serialized = FALSE;
981 master_th->th.th_dispatch = &team->t.t_dispatch[0];
982
983 /* make sure we are not the optimized hot team */
984 use_hot_team = 0;
985 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
986 if (hot_teams) { // hot teams array is not allocated if
987 // KMP_HOT_TEAMS_MAX_LEVEL=0
988 int level = team->t.t_active_level - 1; // index in array of hot teams
989 if (master_th->th.th_teams_microtask) { // are we inside the teams?
990 if (master_th->th.th_teams_size.nteams > 1) {
991 ++level; // level was not increased in teams construct for
992 // team_of_masters
993 }
994 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
995 master_th->th.th_teams_level == team->t.t_level) {
996 ++level; // level was not increased in teams construct for
997 // team_of_workers before the parallel
998 } // team->t.t_level will be increased inside parallel
999 }
1001 if (hot_teams[level].hot_team) {
1002 // hot team has already been allocated for given level
1003 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1004 use_hot_team = 1; // the team is ready to use
1005 } else {
1006 use_hot_team = 0; // AC: threads are not allocated yet
1007 hot_teams[level].hot_team = team; // remember new hot team
1008 hot_teams[level].hot_team_nth = team->t.t_nproc;
1009 }
1010 } else {
1011 use_hot_team = 0;
1012 }
1013 }
1014 if (!use_hot_team) {
1015
1016 /* install the primary thread */
1017 team->t.t_threads[0] = master_th;
1018 __kmp_initialize_info(master_th, team, 0, master_gtid);
1019
1020 /* now, install the worker threads */
1021 for (i = 1; i < team->t.t_nproc; i++) {
1022
1023 /* fork or reallocate a new thread and install it in team */
1024 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1025 team->t.t_threads[i] = thr;
1026 KMP_DEBUG_ASSERT(thr);
1027 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1028 /* align team and thread arrived states */
1029 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1030 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1031 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1032 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1033 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1034 team->t.t_bar[bs_plain_barrier].b_arrived));
1035 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1036 thr->th.th_teams_level = master_th->th.th_teams_level;
1037 thr->th.th_teams_size = master_th->th.th_teams_size;
1038 { // Initialize threads' barrier data.
1039 int b;
1040 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1041 for (b = 0; b < bs_last_barrier; ++b) {
1042 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1043 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1044#if USE_DEBUGGER
1045 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1046#endif
1047 }
1048 }
1049 }
1050
1051#if KMP_AFFINITY_SUPPORTED
1052 // Do not partition the places list for teams construct workers who
1053 // haven't actually been forked to do real work yet. This partitioning
1054 // will take place in the parallel region nested within the teams construct.
1055 if (!fork_teams_workers) {
1056 __kmp_partition_places(team);
1057 }
1058#endif
1059
1060 if (team->t.t_nproc > 1 &&
1062 team->t.b->update_num_threads(team->t.t_nproc);
1063 __kmp_add_threads_to_team(team, team->t.t_nproc);
1064 }
1065 }
1066
1067 // Take care of primary thread's task state
1069 if (use_hot_team) {
1070 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1071 KA_TRACE(
1072 20,
1073 ("__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1074 "%p, new task_team %p / team %p\n",
1075 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1076 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1077 team));
1078
1079 // Store primary thread's current task state on new team
1080 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1081 master_th->th.th_task_state);
1082
1083 // Restore primary thread's task state to hot team's state
1084 // by using thread 1's task state
1085 if (team->t.t_nproc > 1) {
1086 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1087 team->t.t_threads[1]->th.th_task_state == 1);
1088 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1089 team->t.t_threads[1]->th.th_task_state);
1090 } else {
1091 master_th->th.th_task_state = 0;
1092 }
1093 } else {
1094 // Store primary thread's current task_state on new team
1095 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1096 master_th->th.th_task_state);
1097 // Are not using hot team, so set task state to 0.
1098 master_th->th.th_task_state = 0;
1099 }
1100 }
1101
1102 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1103 for (i = 0; i < team->t.t_nproc; i++) {
1104 kmp_info_t *thr = team->t.t_threads[i];
1105 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1106 thr->th.th_prev_level != team->t.t_level) {
1107 team->t.t_display_affinity = 1;
1108 break;
1109 }
1110 }
1111 }
1112
1113 KMP_MB();
1114}
1115
1116#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1117// Propagate any changes to the floating point control registers out to the team
1118// We try to avoid unnecessary writes to the relevant cache line in the team
1119// structure, so we don't make changes unless they are needed.
1120inline static void propagateFPControl(kmp_team_t *team) {
1121 if (__kmp_inherit_fp_control) {
1122 kmp_int16 x87_fpu_control_word;
1123 kmp_uint32 mxcsr;
1124
1125 // Get primary thread's values of FPU control flags (both X87 and vector)
1126 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1127 __kmp_store_mxcsr(&mxcsr);
1128 mxcsr &= KMP_X86_MXCSR_MASK;
1129
1130 // There is no point looking at t_fp_control_saved here.
1131 // If it is TRUE, we still have to update the values if they are different
1132 // from those we now have. If it is FALSE we didn't save anything yet, but
1133 // our objective is the same. We have to ensure that the values in the team
1134 // are the same as those we have.
1135 // So, this code achieves what we need whether or not t_fp_control_saved is
1136 // true. By checking whether the value needs updating we avoid unnecessary
1137 // writes that would put the cache-line into a written state, causing all
1138 // threads in the team to have to read it again.
1139 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1140 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1141 // Although we don't use this value, other code in the runtime wants to know
1142 // whether it should restore them. So we must ensure it is correct.
1143 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1144 } else {
1145 // Similarly here. Don't write to this cache-line in the team structure
1146 // unless we have to.
1147 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1148 }
1149}
1150
1151// Do the opposite, setting the hardware registers to the updated values from
1152// the team.
1153inline static void updateHWFPControl(kmp_team_t *team) {
1154 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1155 // Only reset the fp control regs if they have been changed in the team.
1156 // the parallel region that we are exiting.
1157 kmp_int16 x87_fpu_control_word;
1158 kmp_uint32 mxcsr;
1159 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1160 __kmp_store_mxcsr(&mxcsr);
1161 mxcsr &= KMP_X86_MXCSR_MASK;
1162
1163 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1164 __kmp_clear_x87_fpu_status_word();
1165 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1166 }
1167
1168 if (team->t.t_mxcsr != mxcsr) {
1169 __kmp_load_mxcsr(&team->t.t_mxcsr);
1170 }
1171 }
1172}
1173#else
1174#define propagateFPControl(x) ((void)0)
1175#define updateHWFPControl(x) ((void)0)
1176#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1177
1178static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1179 int realloc); // forward declaration
1180
1181/* Run a parallel region that has been serialized, so runs only in a team of the
1182 single primary thread. */
1184 kmp_info_t *this_thr;
1185 kmp_team_t *serial_team;
1186
1187 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1188
1189 /* Skip all this code for autopar serialized loops since it results in
1190 unacceptable overhead */
1191 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1192 return;
1193
1197
1198 this_thr = __kmp_threads[global_tid];
1199 serial_team = this_thr->th.th_serial_team;
1200
1201 /* utilize the serialized team held by this thread */
1202 KMP_DEBUG_ASSERT(serial_team);
1203 KMP_MB();
1204
1205 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1206 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1207 proc_bind = proc_bind_false;
1208 } else if (proc_bind == proc_bind_default) {
1209 // No proc_bind clause was specified, so use the current value
1210 // of proc-bind-var for this parallel region.
1211 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1212 }
1213 // Reset for next parallel region
1214 this_thr->th.th_set_proc_bind = proc_bind_default;
1215
1216 // OpenMP 6.0 12.1.2 requires the num_threads 'strict' modifier to also have
1217 // effect when parallel execution is disabled by a corresponding if clause
1218 // attached to the parallel directive.
1219 if (this_thr->th.th_nt_strict && this_thr->th.th_set_nproc > 1)
1220 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
1221 this_thr->th.th_nt_msg);
1222 // Reset num_threads for next parallel region
1223 this_thr->th.th_set_nproc = 0;
1224
1225#if OMPT_SUPPORT
1226 ompt_data_t ompt_parallel_data = ompt_data_none;
1227 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1228 if (ompt_enabled.enabled &&
1229 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1230
1231 ompt_task_info_t *parent_task_info;
1232 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1233
1234 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1235 if (ompt_enabled.ompt_callback_parallel_begin) {
1236 int team_size = 1;
1237
1238 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1239 &(parent_task_info->task_data), &(parent_task_info->frame),
1240 &ompt_parallel_data, team_size,
1241 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1242 }
1243 }
1244#endif // OMPT_SUPPORT
1245
1246 if (this_thr->th.th_team != serial_team) {
1247 // Nested level will be an index in the nested nthreads array
1248 int level = this_thr->th.th_team->t.t_level;
1249
1250 if (serial_team->t.t_serialized) {
1251 /* this serial team was already used
1252 TODO increase performance by making this locks more specific */
1253 kmp_team_t *new_team;
1254
1256
1257 new_team = __kmp_allocate_team(
1258 this_thr->th.th_root, 1, 1,
1259#if OMPT_SUPPORT
1260 ompt_parallel_data,
1261#endif
1262 proc_bind, &this_thr->th.th_current_task->td_icvs, 0, NULL);
1264 KMP_ASSERT(new_team);
1265
1266 /* setup new serialized team and install it */
1267 new_team->t.t_threads[0] = this_thr;
1268 new_team->t.t_parent = this_thr->th.th_team;
1269 serial_team = new_team;
1270 this_thr->th.th_serial_team = serial_team;
1271
1272 KF_TRACE(
1273 10,
1274 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1275 global_tid, serial_team));
1276
1277 /* TODO the above breaks the requirement that if we run out of resources,
1278 then we can still guarantee that serialized teams are ok, since we may
1279 need to allocate a new one */
1280 } else {
1281 KF_TRACE(
1282 10,
1283 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1284 global_tid, serial_team));
1285 }
1286
1287 /* we have to initialize this serial team */
1288 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1289 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1290 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1291 serial_team->t.t_ident = loc;
1292 serial_team->t.t_serialized = 1;
1293 serial_team->t.t_nproc = 1;
1294 serial_team->t.t_parent = this_thr->th.th_team;
1295 if (this_thr->th.th_team->t.t_nested_nth)
1296 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1297 else
1298 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1299 // Save previous team's task state on serial team structure
1300 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1301 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1302 this_thr->th.th_team = serial_team;
1303 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1304
1305 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1306 this_thr->th.th_current_task));
1307 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1308 this_thr->th.th_current_task->td_flags.executing = 0;
1309
1310 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1311
1312 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1313 implicit task for each serialized task represented by
1314 team->t.t_serialized? */
1315 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1316 &this_thr->th.th_current_task->td_parent->td_icvs);
1317
1318 // Thread value exists in the nested nthreads array for the next nested
1319 // level
1321 if (this_thr->th.th_team->t.t_nested_nth)
1322 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1323 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1324 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1325 }
1326
1327 if (__kmp_nested_proc_bind.used &&
1328 (level + 1 < __kmp_nested_proc_bind.used)) {
1329 this_thr->th.th_current_task->td_icvs.proc_bind =
1330 __kmp_nested_proc_bind.bind_types[level + 1];
1331 }
1332
1333#if USE_DEBUGGER
1334 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
1335#endif
1336 this_thr->th.th_info.ds.ds_tid = 0;
1337
1338 /* set thread cache values */
1339 this_thr->th.th_team_nproc = 1;
1340 this_thr->th.th_team_master = this_thr;
1341 this_thr->th.th_team_serialized = 1;
1342 this_thr->th.th_task_team = NULL;
1343 this_thr->th.th_task_state = 0;
1344
1345 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1346 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1347 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1348
1349 propagateFPControl(serial_team);
1350
1351 /* check if we need to allocate dispatch buffers stack */
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1353 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1354 serial_team->t.t_dispatch->th_disp_buffer =
1356 sizeof(dispatch_private_info_t));
1357 }
1358 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1359
1360 KMP_MB();
1361
1362 } else {
1363 /* this serialized team is already being used,
1364 * that's fine, just add another nested level */
1365 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1366 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1367 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1368 ++serial_team->t.t_serialized;
1369 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1370
1371 // Nested level will be an index in the nested nthreads array
1372 int level = this_thr->th.th_team->t.t_level;
1373 // Thread value exists in the nested nthreads array for the next nested
1374 // level
1375
1377 if (serial_team->t.t_nested_nth)
1378 nested_nth = serial_team->t.t_nested_nth;
1379 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1380 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1381 }
1382
1383 serial_team->t.t_level++;
1384 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1385 "of serial team %p to %d\n",
1386 global_tid, serial_team, serial_team->t.t_level));
1387
1388 /* allocate/push dispatch buffers stack */
1389 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1390 {
1391 dispatch_private_info_t *disp_buffer =
1393 sizeof(dispatch_private_info_t));
1394 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1395 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1396 }
1397 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1398
1399 /* allocate/push task team stack */
1400 __kmp_push_task_team_node(this_thr, serial_team);
1401
1402 KMP_MB();
1403 }
1404 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1405
1406 // Perform the display affinity functionality for
1407 // serialized parallel regions
1409 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1410 this_thr->th.th_prev_num_threads != 1) {
1411 // NULL means use the affinity-format-var ICV
1412 __kmp_aux_display_affinity(global_tid, NULL);
1413 this_thr->th.th_prev_level = serial_team->t.t_level;
1414 this_thr->th.th_prev_num_threads = 1;
1415 }
1416 }
1417
1419 __kmp_push_parallel(global_tid, NULL);
1420#if OMPT_SUPPORT
1421 serial_team->t.ompt_team_info.master_return_address = codeptr;
1422 if (ompt_enabled.enabled &&
1423 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1424 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1426
1427 ompt_lw_taskteam_t lw_taskteam;
1428 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1429 &ompt_parallel_data, codeptr);
1430
1431 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1432 // don't use lw_taskteam after linking. content was swaped
1433
1434 /* OMPT implicit task begin */
1435 if (ompt_enabled.ompt_callback_implicit_task) {
1436 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1437 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1438 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1439 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1440 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1441 __kmp_tid_from_gtid(global_tid);
1442 }
1443
1444 /* OMPT state */
1445 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1446 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1448 }
1449#endif
1450}
1451
1452// Test if this fork is for a team closely nested in a teams construct
1453static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1455 int teams_level, kmp_va_list ap) {
1456 return (master_th->th.th_teams_microtask && ap &&
1457 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1458}
1459
1460// Test if this fork is for the teams construct, i.e. to form the outer league
1461// of teams
1462static inline bool __kmp_is_entering_teams(int active_level, int level,
1463 int teams_level, kmp_va_list ap) {
1464 return ((ap == NULL && active_level == 0) ||
1465 (ap && teams_level > 0 && teams_level == level));
1466}
1467
1468// AC: This is start of parallel that is nested inside teams construct.
1469// The team is actual (hot), all workers are ready at the fork barrier.
1470// No lock needed to initialize the team a bit, then free workers.
1471static inline int
1473 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1474 enum fork_context_e call_context, microtask_t microtask,
1475 launch_t invoker, int master_set_numthreads, int level,
1476#if OMPT_SUPPORT
1477 ompt_data_t ompt_parallel_data, void *return_address,
1478#endif
1479 kmp_va_list ap) {
1480 void **argv;
1481 int i;
1482
1483 parent_team->t.t_ident = loc;
1484 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1485 parent_team->t.t_argc = argc;
1486 argv = (void **)parent_team->t.t_argv;
1487 for (i = argc - 1; i >= 0; --i) {
1488 *argv++ = va_arg(kmp_va_deref(ap), void *);
1489 }
1490 // Increment our nested depth levels, but not increase the serialization
1491 if (parent_team == master_th->th.th_serial_team) {
1492 // AC: we are in serialized parallel
1494 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1495
1496 if (call_context == fork_context_gnu) {
1497 // AC: need to decrement t_serialized for enquiry functions to work
1498 // correctly, will restore at join time
1499 parent_team->t.t_serialized--;
1500 return TRUE;
1501 }
1502
1503#if OMPD_SUPPORT
1504 parent_team->t.t_pkfn = microtask;
1505#endif
1506
1507#if OMPT_SUPPORT
1508 void *dummy;
1509 void **exit_frame_p;
1510 ompt_data_t *implicit_task_data;
1511 ompt_lw_taskteam_t lw_taskteam;
1512
1513 if (ompt_enabled.enabled) {
1514 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1515 &ompt_parallel_data, return_address);
1516 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1517
1518 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1519 // Don't use lw_taskteam after linking. Content was swapped.
1520
1521 /* OMPT implicit task begin */
1522 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1523 if (ompt_enabled.ompt_callback_implicit_task) {
1524 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1525 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1526 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1527 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1528 }
1529
1530 /* OMPT state */
1531 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1532 } else {
1533 exit_frame_p = &dummy;
1534 }
1535#endif
1536
1537 // AC: need to decrement t_serialized for enquiry functions to work
1538 // correctly, will restore at join time
1539 parent_team->t.t_serialized--;
1540
1541 {
1542 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1543 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1544 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1545#if OMPT_SUPPORT
1546 ,
1547 exit_frame_p
1548#endif
1549 );
1550 }
1551
1552#if OMPT_SUPPORT
1553 if (ompt_enabled.enabled) {
1554 *exit_frame_p = NULL;
1555 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1556 if (ompt_enabled.ompt_callback_implicit_task) {
1557 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1558 ompt_scope_end, NULL, implicit_task_data, 1,
1559 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1560 }
1561 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1562 __ompt_lw_taskteam_unlink(master_th);
1563 if (ompt_enabled.ompt_callback_parallel_end) {
1564 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1565 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1566 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1567 }
1568 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1569 }
1570#endif
1571 return TRUE;
1572 }
1573
1574 parent_team->t.t_pkfn = microtask;
1575 parent_team->t.t_invoke = invoker;
1576 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1577 parent_team->t.t_active_level++;
1578 parent_team->t.t_level++;
1579 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1580
1581 // If the threads allocated to the team are less than the thread limit, update
1582 // the thread limit here. th_teams_size.nth is specific to this team nested
1583 // in a teams construct, the team is fully created, and we're about to do
1584 // the actual fork. Best to do this here so that the subsequent uses below
1585 // and in the join have the correct value.
1586 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1587
1588#if OMPT_SUPPORT
1589 if (ompt_enabled.enabled) {
1590 ompt_lw_taskteam_t lw_taskteam;
1591 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1592 return_address);
1593 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
1594 }
1595#endif
1596
1597 /* Change number of threads in the team if requested */
1598 if (master_set_numthreads) { // The parallel has num_threads clause
1599 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1600 // AC: only can reduce number of threads dynamically, can't increase
1601 kmp_info_t **other_threads = parent_team->t.t_threads;
1602 // NOTE: if using distributed barrier, we need to run this code block
1603 // even when the team size appears not to have changed from the max.
1604 int old_proc = master_th->th.th_teams_size.nth;
1606 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1607 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1608 }
1609 parent_team->t.t_nproc = master_set_numthreads;
1610 for (i = 0; i < master_set_numthreads; ++i) {
1611 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1612 }
1613 }
1614 // Keep extra threads hot in the team for possible next parallels
1615 master_th->th.th_set_nproc = 0;
1616 }
1617
1618#if USE_DEBUGGER
1619 if (__kmp_debugging) { // Let debugger override number of threads.
1620 int nth = __kmp_omp_num_threads(loc);
1621 if (nth > 0) { // 0 means debugger doesn't want to change num threads
1622 master_set_numthreads = nth;
1623 }
1624 }
1625#endif
1626
1627 // Figure out the proc_bind policy for the nested parallel within teams
1628 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1629 // proc_bind_default means don't update
1630 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1631 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1632 proc_bind = proc_bind_false;
1633 } else {
1634 // No proc_bind clause specified; use current proc-bind-var
1635 if (proc_bind == proc_bind_default) {
1636 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1637 }
1638 /* else: The proc_bind policy was specified explicitly on parallel clause.
1639 This overrides proc-bind-var for this parallel region, but does not
1640 change proc-bind-var. */
1641 // Figure the value of proc-bind-var for the child threads.
1642 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1643 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1644 master_th->th.th_current_task->td_icvs.proc_bind)) {
1645 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1646 }
1647 }
1648 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1649 // Need to change the bind-var ICV to correct value for each implicit task
1650 if (proc_bind_icv != proc_bind_default &&
1651 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1652 kmp_info_t **other_threads = parent_team->t.t_threads;
1653 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1654 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1655 }
1656 }
1657 // Reset for next parallel region
1658 master_th->th.th_set_proc_bind = proc_bind_default;
1659
1660#if USE_ITT_BUILD && USE_ITT_NOTIFY
1661 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1662 KMP_ITT_DEBUG) &&
1663 __kmp_forkjoin_frames_mode == 3 &&
1664 parent_team->t.t_active_level == 1 // only report frames at level 1
1665 && master_th->th.th_teams_size.nteams == 1) {
1666 kmp_uint64 tmp_time = __itt_get_timestamp();
1667 master_th->th.th_frame_time = tmp_time;
1668 parent_team->t.t_region_time = tmp_time;
1669 }
1670 if (__itt_stack_caller_create_ptr) {
1671 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1672 // create new stack stitching id before entering fork barrier
1673 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1674 }
1675#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1676#if KMP_AFFINITY_SUPPORTED
1677 __kmp_partition_places(parent_team);
1678#endif
1679
1680 KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1681 "master_th=%p, gtid=%d\n",
1682 root, parent_team, master_th, gtid));
1683 __kmp_internal_fork(loc, gtid, parent_team);
1684 KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1685 "master_th=%p, gtid=%d\n",
1686 root, parent_team, master_th, gtid));
1687
1688 if (call_context == fork_context_gnu)
1689 return TRUE;
1690
1691 /* Invoke microtask for PRIMARY thread */
1692 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1693 parent_team->t.t_id, parent_team->t.t_pkfn));
1694
1695 if (!parent_team->t.t_invoke(gtid)) {
1696 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");
1697 }
1698 KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1699 parent_team->t.t_id, parent_team->t.t_pkfn));
1700 KMP_MB(); /* Flush all pending memory write invalidates. */
1701
1702 KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1703
1704 return TRUE;
1705}
1706
1707// Create a serialized parallel region
1708static inline int
1709__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
1710 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1711 kmp_info_t *master_th, kmp_team_t *parent_team,
1712#if OMPT_SUPPORT
1713 ompt_data_t *ompt_parallel_data, void **return_address,
1714 ompt_data_t **parent_task_data,
1715#endif
1716 kmp_va_list ap) {
1717 kmp_team_t *team;
1718 int i;
1719 void **argv;
1720
1721/* josh todo: hypothetical question: what do we do for OS X*? */
1722#if KMP_OS_LINUX && \
1723 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1724 SimpleVLA<void *> args(argc);
1725#else
1726 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1727#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1728 KMP_ARCH_AARCH64) */
1729
1730 KA_TRACE(
1731 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1732
1734
1735#if OMPD_SUPPORT
1736 master_th->th.th_serial_team->t.t_pkfn = microtask;
1737#endif
1738
1739 if (call_context == fork_context_intel) {
1740 /* TODO this sucks, use the compiler itself to pass args! :) */
1741 master_th->th.th_serial_team->t.t_ident = loc;
1742 if (!ap) {
1743 // revert change made in __kmpc_serialized_parallel()
1744 master_th->th.th_serial_team->t.t_level--;
1745// Get args from parent team for teams construct
1746
1747#if OMPT_SUPPORT
1748 void *dummy;
1749 void **exit_frame_p;
1750 ompt_task_info_t *task_info;
1751 ompt_lw_taskteam_t lw_taskteam;
1752
1753 if (ompt_enabled.enabled) {
1754 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1755 ompt_parallel_data, *return_address);
1756
1757 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1758 // don't use lw_taskteam after linking. content was swaped
1759 task_info = OMPT_CUR_TASK_INFO(master_th);
1760 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1761 if (ompt_enabled.ompt_callback_implicit_task) {
1762 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1763 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1764 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1765 &(task_info->task_data), 1,
1766 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1767 }
1768
1769 /* OMPT state */
1770 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1771 } else {
1772 exit_frame_p = &dummy;
1773 }
1774#endif
1775
1776 {
1777 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1778 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1779 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1780#if OMPT_SUPPORT
1781 ,
1782 exit_frame_p
1783#endif
1784 );
1785 }
1786
1787#if OMPT_SUPPORT
1788 if (ompt_enabled.enabled) {
1789 *exit_frame_p = NULL;
1790 if (ompt_enabled.ompt_callback_implicit_task) {
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_end, NULL, &(task_info->task_data), 1,
1793 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1794 }
1795 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1796 __ompt_lw_taskteam_unlink(master_th);
1797 if (ompt_enabled.ompt_callback_parallel_end) {
1798 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1799 ompt_parallel_data, *parent_task_data,
1800 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1801 }
1802 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1803 }
1804#endif
1805 } else if (microtask == (microtask_t)__kmp_teams_master) {
1806 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1807 team = master_th->th.th_team;
1808 // team->t.t_pkfn = microtask;
1809 team->t.t_invoke = invoker;
1810 __kmp_alloc_argv_entries(argc, team, TRUE);
1811 team->t.t_argc = argc;
1812 argv = (void **)team->t.t_argv;
1813 for (i = argc - 1; i >= 0; --i)
1814 *argv++ = va_arg(kmp_va_deref(ap), void *);
1815 // AC: revert change made in __kmpc_serialized_parallel()
1816 // because initial code in teams should have level=0
1817 team->t.t_level--;
1818 // AC: call special invoker for outer "parallel" of teams construct
1819 invoker(gtid);
1820#if OMPT_SUPPORT
1821 if (ompt_enabled.enabled) {
1822 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1823 if (ompt_enabled.ompt_callback_implicit_task) {
1824 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1825 ompt_scope_end, NULL, &(task_info->task_data), 0,
1826 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1827 }
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 ompt_parallel_data, *parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_league,
1832 *return_address);
1833 }
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1835 }
1836#endif
1837 } else {
1838 argv = args;
1839 for (i = argc - 1; i >= 0; --i)
1840 *argv++ = va_arg(kmp_va_deref(ap), void *);
1841 KMP_MB();
1842
1843#if OMPT_SUPPORT
1844 void *dummy;
1845 void **exit_frame_p;
1846 ompt_task_info_t *task_info;
1847 ompt_lw_taskteam_t lw_taskteam;
1848 ompt_data_t *implicit_task_data;
1849
1850 if (ompt_enabled.enabled) {
1851 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1852 ompt_parallel_data, *return_address);
1853 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1854 // don't use lw_taskteam after linking. content was swaped
1855 task_info = OMPT_CUR_TASK_INFO(master_th);
1856 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1857
1858 /* OMPT implicit task begin */
1859 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1860 if (ompt_enabled.ompt_callback_implicit_task) {
1861 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1862 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1863 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1864 ompt_task_implicit);
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1866 }
1867
1868 /* OMPT state */
1869 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1870 } else {
1871 exit_frame_p = &dummy;
1872 }
1873#endif
1874
1875 {
1876 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1877 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1878 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1879#if OMPT_SUPPORT
1880 ,
1881 exit_frame_p
1882#endif
1883 );
1884 }
1885
1886#if OMPT_SUPPORT
1887 if (ompt_enabled.enabled) {
1888 *exit_frame_p = NULL;
1889 if (ompt_enabled.ompt_callback_implicit_task) {
1890 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1891 ompt_scope_end, NULL, &(task_info->task_data), 1,
1892 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1893 }
1894
1895 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1896 __ompt_lw_taskteam_unlink(master_th);
1897 if (ompt_enabled.ompt_callback_parallel_end) {
1898 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1899 ompt_parallel_data, *parent_task_data,
1900 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1901 }
1902 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1903 }
1904#endif
1905 }
1906 } else if (call_context == fork_context_gnu) {
1907#if OMPT_SUPPORT
1908 if (ompt_enabled.enabled) {
1910 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1911 *return_address);
1912
1913 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1914 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1915 }
1916// don't use lw_taskteam after linking. content was swaped
1917#endif
1918
1919 // we were called from GNU native code
1920 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1921 return FALSE;
1922 } else {
1923 KMP_ASSERT2(call_context < fork_context_last,
1924 "__kmp_serial_fork_call: unknown fork_context parameter");
1925 }
1926
1927 KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1928 KMP_MB();
1929 return FALSE;
1930}
1931
1932/* most of the work for a fork */
1933/* return true if we really went parallel, false if serialized */
1935 enum fork_context_e call_context, // Intel, GNU, ...
1936 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1937 kmp_va_list ap) {
1938 void **argv;
1939 int i;
1940 int master_tid;
1941 int master_this_cons;
1942 kmp_team_t *team;
1943 kmp_team_t *parent_team;
1944 kmp_info_t *master_th;
1945 kmp_root_t *root;
1946 int nthreads;
1947 int master_active;
1948 int master_set_numthreads;
1949 int task_thread_limit = 0;
1950 int level;
1951 int active_level;
1952 int teams_level;
1953 kmp_hot_team_ptr_t **p_hot_teams;
1954 { // KMP_TIME_BLOCK
1956 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
1957
1958 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1959 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1960 /* Some systems prefer the stack for the root thread(s) to start with */
1961 /* some gap from the parent stack to prevent false sharing. */
1962 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1963 /* These 2 lines below are so this does not get optimized out */
1965 __kmp_stkpadding += (short)((kmp_int64)dummy);
1966 }
1967
1968 /* initialize if needed */
1970 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1974
1975 /* setup current data */
1976 // AC: potentially unsafe, not in sync with library shutdown,
1977 // __kmp_threads can be freed
1978 master_th = __kmp_threads[gtid];
1979
1980 parent_team = master_th->th.th_team;
1981 master_tid = master_th->th.th_info.ds.ds_tid;
1982 master_this_cons = master_th->th.th_local.this_construct;
1983 root = master_th->th.th_root;
1984 master_active = root->r.r_active;
1985 master_set_numthreads = master_th->th.th_set_nproc;
1986 task_thread_limit =
1987 master_th->th.th_current_task->td_icvs.task_thread_limit;
1988
1989#if OMPT_SUPPORT
1990 ompt_data_t ompt_parallel_data = ompt_data_none;
1991 ompt_data_t *parent_task_data = NULL;
1992 ompt_frame_t *ompt_frame = NULL;
1993 void *return_address = NULL;
1994
1995 if (ompt_enabled.enabled) {
1996 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1997 NULL, NULL);
1998 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1999 }
2000#endif
2001
2002 // Assign affinity to root thread if it hasn't happened yet
2004
2005 // Nested level will be an index in the nested nthreads array
2006 level = parent_team->t.t_level;
2007 // used to launch non-serial teams even if nested is not allowed
2008 active_level = parent_team->t.t_active_level;
2009 // needed to check nesting inside the teams
2010 teams_level = master_th->th.th_teams_level;
2011 p_hot_teams = &master_th->th.th_hot_teams;
2012 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
2013 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
2015 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2016 // it is either actual or not needed (when active_level > 0)
2017 (*p_hot_teams)[0].hot_team_nth = 1;
2018 }
2019
2020#if OMPT_SUPPORT
2021 if (ompt_enabled.enabled) {
2022 if (ompt_enabled.ompt_callback_parallel_begin) {
2023 int team_size = master_set_numthreads
2024 ? master_set_numthreads
2025 : get__nproc_2(parent_team, master_tid);
2026 int flags = OMPT_INVOKER(call_context) |
2028 ? ompt_parallel_league
2029 : ompt_parallel_team);
2030 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2031 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2032 return_address);
2033 }
2034 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2035 }
2036#endif
2037
2038 master_th->th.th_ident = loc;
2039
2040 // Parallel closely nested in teams construct:
2041 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2042 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2043 call_context, microtask, invoker,
2044 master_set_numthreads, level,
2045#if OMPT_SUPPORT
2046 ompt_parallel_data, return_address,
2047#endif
2048 ap);
2049 } // End parallel closely nested in teams construct
2050
2051 // Need this to happen before we determine the number of threads, not while
2052 // we are allocating the team
2053 //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
2054
2055 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2056
2057 // Determine the number of threads
2058 int enter_teams =
2059 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2060 if ((!enter_teams &&
2061 (parent_team->t.t_active_level >=
2062 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2064 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid));
2065 nthreads = 1;
2066 } else {
2067 nthreads = master_set_numthreads
2068 ? master_set_numthreads
2069 // TODO: get nproc directly from current task
2070 : get__nproc_2(parent_team, master_tid);
2071 // Use the thread_limit set for the current target task if exists, else go
2072 // with the deduced nthreads
2073 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2074 ? task_thread_limit
2075 : nthreads;
2076 // Check if we need to take forkjoin lock? (no need for serialized
2077 // parallel out of teams construct).
2078 if (nthreads > 1) {
2079 /* determine how many new threads we can use */
2081 /* AC: If we execute teams from parallel region (on host), then teams
2082 should be created but each can only have 1 thread if nesting is
2083 disabled. If teams called from serial region, then teams and their
2084 threads should be created regardless of the nesting setting. */
2085 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2086 nthreads, enter_teams);
2087 if (nthreads == 1) {
2088 // Free lock for single thread execution here; for multi-thread
2089 // execution it will be freed later after team of threads created
2090 // and initialized
2092 }
2093 }
2094 }
2095 KMP_DEBUG_ASSERT(nthreads > 0);
2096
2097 // If we temporarily changed the set number of threads then restore it now
2098 master_th->th.th_set_nproc = 0;
2099
2100 if (nthreads == 1) {
2101 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2102 invoker, master_th, parent_team,
2103#if OMPT_SUPPORT
2104 &ompt_parallel_data, &return_address,
2105 &parent_task_data,
2106#endif
2107 ap);
2108 } // if (nthreads == 1)
2109
2110 // GEH: only modify the executing flag in the case when not serialized
2111 // serialized case is handled in kmpc_serialized_parallel
2112 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2113 "curtask=%p, curtask_max_aclevel=%d\n",
2114 parent_team->t.t_active_level, master_th,
2115 master_th->th.th_current_task,
2116 master_th->th.th_current_task->td_icvs.max_active_levels));
2117 // TODO: GEH - cannot do this assertion because root thread not set up as
2118 // executing
2119 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2120 master_th->th.th_current_task->td_flags.executing = 0;
2121
2122 if (!master_th->th.th_teams_microtask || level > teams_level) {
2123 /* Increment our nested depth level */
2124 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2125 }
2126
2127 // See if we need to make a copy of the ICVs.
2128 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2129 kmp_nested_nthreads_t *nested_nth = NULL;
2130 if (!master_th->th.th_set_nested_nth &&
2131 (level + 1 < parent_team->t.t_nested_nth->used) &&
2132 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2133 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2134 } else if (master_th->th.th_set_nested_nth) {
2135 nested_nth = __kmp_override_nested_nth(master_th, level);
2136 if ((level + 1 < nested_nth->used) &&
2137 (nested_nth->nth[level + 1] != nthreads_icv))
2138 nthreads_icv = nested_nth->nth[level + 1];
2139 else
2140 nthreads_icv = 0; // don't update
2141 } else {
2142 nthreads_icv = 0; // don't update
2143 }
2144
2145 // Figure out the proc_bind_policy for the new team.
2146 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2147 // proc_bind_default means don't update
2148 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2149 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2150 proc_bind = proc_bind_false;
2151 } else {
2152 // No proc_bind clause specified; use current proc-bind-var for this
2153 // parallel region
2154 if (proc_bind == proc_bind_default) {
2155 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2156 }
2157 // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2158 if (master_th->th.th_teams_microtask &&
2160 proc_bind = __kmp_teams_proc_bind;
2161 }
2162 /* else: The proc_bind policy was specified explicitly on parallel clause.
2163 This overrides proc-bind-var for this parallel region, but does not
2164 change proc-bind-var. */
2165 // Figure the value of proc-bind-var for the child threads.
2166 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2167 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2168 master_th->th.th_current_task->td_icvs.proc_bind)) {
2169 // Do not modify the proc bind icv for the two teams construct forks
2170 // They just let the proc bind icv pass through
2171 if (!master_th->th.th_teams_microtask ||
2172 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2173 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2174 }
2175 }
2176
2177 // Reset for next parallel region
2178 master_th->th.th_set_proc_bind = proc_bind_default;
2179
2180 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2181 kmp_internal_control_t new_icvs;
2182 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2183 new_icvs.next = NULL;
2184 if (nthreads_icv > 0) {
2185 new_icvs.nproc = nthreads_icv;
2186 }
2187 if (proc_bind_icv != proc_bind_default) {
2188 new_icvs.proc_bind = proc_bind_icv;
2189 }
2190
2191 /* allocate a new parallel team */
2192 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2193 team = __kmp_allocate_team(root, nthreads, nthreads,
2194#if OMPT_SUPPORT
2195 ompt_parallel_data,
2196#endif
2197 proc_bind, &new_icvs, argc, master_th);
2199 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2200 } else {
2201 /* allocate a new parallel team */
2202 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2203 team = __kmp_allocate_team(
2204 root, nthreads, nthreads,
2205#if OMPT_SUPPORT
2206 ompt_parallel_data,
2207#endif
2208 proc_bind, &master_th->th.th_current_task->td_icvs, argc, master_th);
2210 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2211 &master_th->th.th_current_task->td_icvs);
2212 }
2213 KF_TRACE(
2214 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2215
2216 /* setup the new team */
2217 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2218 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2219 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2220 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2221 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2222#if OMPT_SUPPORT
2223 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2224 return_address);
2225#endif
2226 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2227 // TODO: parent_team->t.t_level == INT_MAX ???
2228 if (!master_th->th.th_teams_microtask || level > teams_level) {
2229 int new_level = parent_team->t.t_level + 1;
2230 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2231 new_level = parent_team->t.t_active_level + 1;
2232 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2233 } else {
2234 // AC: Do not increase parallel level at start of the teams construct
2235 int new_level = parent_team->t.t_level;
2236 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2237 new_level = parent_team->t.t_active_level;
2238 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2239 }
2240 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2241 // set primary thread's schedule as new run-time schedule
2242 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2243
2244 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2245 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2246
2247 // Check if hot team has potentially outdated list, and if so, free it
2248 if (team->t.t_nested_nth &&
2249 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2250 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2251 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2252 team->t.t_nested_nth = NULL;
2253 }
2254 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2255 if (master_th->th.th_set_nested_nth) {
2256 if (!nested_nth)
2257 nested_nth = __kmp_override_nested_nth(master_th, level);
2258 team->t.t_nested_nth = nested_nth;
2259 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2260 master_th->th.th_set_nested_nth = NULL;
2261 master_th->th.th_set_nested_nth_sz = 0;
2262 master_th->th.th_nt_strict = false;
2263 }
2264
2265 // Update the floating point rounding in the team if required.
2266 propagateFPControl(team);
2267#if OMPD_SUPPORT
2268 if (ompd_state & OMPD_ENABLE_BP)
2269 ompd_bp_parallel_begin();
2270#endif
2271
2272 KA_TRACE(
2273 20,
2274 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2275 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2276 team->t.t_nproc));
2277 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2278 (team->t.t_master_tid == 0 &&
2279 (team->t.t_parent == root->r.r_root_team ||
2280 team->t.t_parent->t.t_serialized)));
2281 KMP_MB();
2282
2283 /* now, setup the arguments */
2284 argv = (void **)team->t.t_argv;
2285 if (ap) {
2286 for (i = argc - 1; i >= 0; --i) {
2287 void *new_argv = va_arg(kmp_va_deref(ap), void *);
2288 KMP_CHECK_UPDATE(*argv, new_argv);
2289 argv++;
2290 }
2291 } else {
2292 for (i = 0; i < argc; ++i) {
2293 // Get args from parent team for teams construct
2294 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2295 }
2296 }
2297
2298 /* now actually fork the threads */
2299 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2300 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2301 root->r.r_active = TRUE;
2302
2303 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2304 __kmp_setup_icv_copy(team, nthreads,
2305 &master_th->th.th_current_task->td_icvs, loc);
2306
2307#if OMPT_SUPPORT
2308 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2309#endif
2310
2312
2313#if USE_ITT_BUILD
2314 if (team->t.t_active_level == 1 // only report frames at level 1
2315 && !master_th->th.th_teams_microtask) { // not in teams construct
2316#if USE_ITT_NOTIFY
2317 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2318 (__kmp_forkjoin_frames_mode == 3 ||
2319 __kmp_forkjoin_frames_mode == 1)) {
2320 kmp_uint64 tmp_time = 0;
2321 if (__itt_get_timestamp_ptr)
2322 tmp_time = __itt_get_timestamp();
2323 // Internal fork - report frame begin
2324 master_th->th.th_frame_time = tmp_time;
2325 if (__kmp_forkjoin_frames_mode == 3)
2326 team->t.t_region_time = tmp_time;
2327 } else
2328// only one notification scheme (either "submit" or "forking/joined", not both)
2329#endif /* USE_ITT_NOTIFY */
2330 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2331 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2332 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2333 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2334 }
2335 }
2336#endif /* USE_ITT_BUILD */
2337
2338 /* now go on and do the work */
2339 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2340 KMP_MB();
2341 KF_TRACE(10,
2342 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2343 root, team, master_th, gtid));
2344
2345#if USE_ITT_BUILD
2346 if (__itt_stack_caller_create_ptr) {
2347 // create new stack stitching id before entering fork barrier
2348 if (!enter_teams) {
2349 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2350 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2351 } else if (parent_team->t.t_serialized) {
2352 // keep stack stitching id in the serialized parent_team;
2353 // current team will be used for parallel inside the teams;
2354 // if parent_team is active, then it already keeps stack stitching id
2355 // for the league of teams
2356 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2357 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2358 }
2359 }
2360#endif /* USE_ITT_BUILD */
2361
2362 // AC: skip __kmp_internal_fork at teams construct, let only primary
2363 // threads execute
2364 if (ap) {
2365 __kmp_internal_fork(loc, gtid, team);
2366 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2367 "master_th=%p, gtid=%d\n",
2368 root, team, master_th, gtid));
2369 }
2370
2371 if (call_context == fork_context_gnu) {
2372 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2373 return TRUE;
2374 }
2375
2376 /* Invoke microtask for PRIMARY thread */
2377 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2378 team->t.t_id, team->t.t_pkfn));
2379 } // END of timer KMP_fork_call block
2380
2381#if KMP_STATS_ENABLED
2382 // If beginning a teams construct, then change thread state
2383 stats_state_e previous_state = KMP_GET_THREAD_STATE();
2384 if (!ap) {
2385 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2386 }
2387#endif
2388
2389 if (!team->t.t_invoke(gtid)) {
2390 KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");
2391 }
2392
2393#if KMP_STATS_ENABLED
2394 // If was beginning of a teams construct, then reset thread state
2395 if (!ap) {
2396 KMP_SET_THREAD_STATE(previous_state);
2397 }
2398#endif
2399
2400 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2401 team->t.t_id, team->t.t_pkfn));
2402 KMP_MB(); /* Flush all pending memory write invalidates. */
2403
2404 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2405#if OMPT_SUPPORT
2406 if (ompt_enabled.enabled) {
2407 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2408 }
2409#endif
2410
2411 return TRUE;
2412}
2413
2414#if OMPT_SUPPORT
2415static inline void __kmp_join_restore_state(kmp_info_t *thread,
2416 kmp_team_t *team) {
2417 // restore state outside the region
2418 thread->th.ompt_thread_info.state =
2419 ((team->t.t_serialized) ? ompt_state_work_serial
2420 : ompt_state_work_parallel);
2421}
2422
2423static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2424 kmp_team_t *team, ompt_data_t *parallel_data,
2425 int flags, void *codeptr) {
2427 if (ompt_enabled.ompt_callback_parallel_end) {
2428 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2429 parallel_data, &(task_info->task_data), flags, codeptr);
2430 }
2431
2432 task_info->frame.enter_frame = ompt_data_none;
2433 __kmp_join_restore_state(thread, team);
2434}
2435#endif
2436
2438#if OMPT_SUPPORT
2439 ,
2440 enum fork_context_e fork_context
2441#endif
2442 ,
2443 int exit_teams) {
2445 kmp_team_t *team;
2446 kmp_team_t *parent_team;
2447 kmp_info_t *master_th;
2448 kmp_root_t *root;
2449 int master_active;
2450
2451 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
2452
2453 /* setup current data */
2454 master_th = __kmp_threads[gtid];
2455 root = master_th->th.th_root;
2456 team = master_th->th.th_team;
2457 parent_team = team->t.t_parent;
2458
2459 master_th->th.th_ident = loc;
2460
2461#if OMPT_SUPPORT
2462 void *team_microtask = (void *)team->t.t_pkfn;
2463 // For GOMP interface with serialized parallel, need the
2464 // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2465 // and end-parallel events.
2466 if (ompt_enabled.enabled &&
2467 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2468 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2469 }
2470#endif
2471
2472#if KMP_DEBUG
2473 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2474 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2475 "th_task_team = %p\n",
2476 __kmp_gtid_from_thread(master_th), team,
2477 team->t.t_task_team[master_th->th.th_task_state],
2478 master_th->th.th_task_team));
2479 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2480 }
2481#endif
2482
2483 if (team->t.t_serialized) {
2484 if (master_th->th.th_teams_microtask) {
2485 // We are in teams construct
2486 int level = team->t.t_level;
2487 int tlevel = master_th->th.th_teams_level;
2488 if (level == tlevel) {
2489 // AC: we haven't incremented it earlier at start of teams construct,
2490 // so do it here - at the end of teams construct
2491 team->t.t_level++;
2492 } else if (level == tlevel + 1) {
2493 // AC: we are exiting parallel inside teams, need to increment
2494 // serialization in order to restore it in the next call to
2495 // __kmpc_end_serialized_parallel
2496 team->t.t_serialized++;
2497 }
2498 }
2500
2501#if OMPT_SUPPORT
2502 if (ompt_enabled.enabled) {
2503 if (fork_context == fork_context_gnu) {
2504 __ompt_lw_taskteam_unlink(master_th);
2505 }
2506 __kmp_join_restore_state(master_th, parent_team);
2507 }
2508#endif
2509
2510 return;
2511 }
2512
2513 master_active = team->t.t_master_active;
2514
2515 if (!exit_teams) {
2516 // AC: No barrier for internal teams at exit from teams construct.
2517 // But there is barrier for external team (league).
2518 __kmp_internal_join(loc, gtid, team);
2519#if USE_ITT_BUILD
2520 if (__itt_stack_caller_create_ptr) {
2521 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2522 // destroy the stack stitching id after join barrier
2523 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2524 team->t.t_stack_id = NULL;
2525 }
2526#endif
2527 } else {
2528 master_th->th.th_task_state =
2529 0; // AC: no tasking in teams (out of any parallel)
2530#if USE_ITT_BUILD
2531 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2532 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2533 // destroy the stack stitching id on exit from the teams construct
2534 // if parent_team is active, then the id will be destroyed later on
2535 // by master of the league of teams
2536 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2537 parent_team->t.t_stack_id = NULL;
2538 }
2539#endif
2540 }
2541
2542 KMP_MB();
2543
2544#if OMPT_SUPPORT
2545 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2546 void *codeptr = team->t.ompt_team_info.master_return_address;
2547#endif
2548
2549#if USE_ITT_BUILD
2550 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2551 if (team->t.t_active_level == 1 &&
2552 (!master_th->th.th_teams_microtask || /* not in teams construct */
2553 master_th->th.th_teams_size.nteams == 1)) {
2554 master_th->th.th_ident = loc;
2555 // only one notification scheme (either "submit" or "forking/joined", not
2556 // both)
2557 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2558 __kmp_forkjoin_frames_mode == 3)
2559 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2560 master_th->th.th_frame_time, 0, loc,
2561 master_th->th.th_team_nproc, 1);
2562 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2563 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2564 __kmp_itt_region_joined(gtid);
2565 } // active_level == 1
2566#endif /* USE_ITT_BUILD */
2567
2568#if KMP_AFFINITY_SUPPORTED
2569 if (!exit_teams) {
2570 // Restore master thread's partition.
2571 master_th->th.th_first_place = team->t.t_first_place;
2572 master_th->th.th_last_place = team->t.t_last_place;
2573 }
2574#endif // KMP_AFFINITY_SUPPORTED
2575
2576 if (master_th->th.th_teams_microtask && !exit_teams &&
2577 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2578 team->t.t_level == master_th->th.th_teams_level + 1) {
2579// AC: We need to leave the team structure intact at the end of parallel
2580// inside the teams construct, so that at the next parallel same (hot) team
2581// works, only adjust nesting levels
2582#if OMPT_SUPPORT
2583 ompt_data_t ompt_parallel_data = ompt_data_none;
2584 if (ompt_enabled.enabled) {
2586 if (ompt_enabled.ompt_callback_implicit_task) {
2587 int ompt_team_size = team->t.t_nproc;
2588 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2589 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2590 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2591 }
2592 task_info->frame.exit_frame = ompt_data_none;
2593 task_info->task_data = ompt_data_none;
2594 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2595 __ompt_lw_taskteam_unlink(master_th);
2596 }
2597#endif
2598 /* Decrement our nested depth level */
2599 team->t.t_level--;
2600 team->t.t_active_level--;
2601 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2602
2603 // Restore number of threads in the team if needed. This code relies on
2604 // the proper adjustment of th_teams_size.nth after the fork in
2605 // __kmp_teams_master on each teams primary thread in the case that
2606 // __kmp_reserve_threads reduced it.
2607 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2608 int old_num = master_th->th.th_team_nproc;
2609 int new_num = master_th->th.th_teams_size.nth;
2610 kmp_info_t **other_threads = team->t.t_threads;
2611 team->t.t_nproc = new_num;
2612 for (int i = 0; i < old_num; ++i) {
2613 other_threads[i]->th.th_team_nproc = new_num;
2614 }
2615 // Adjust states of non-used threads of the team
2616 for (int i = old_num; i < new_num; ++i) {
2617 // Re-initialize thread's barrier data.
2618 KMP_DEBUG_ASSERT(other_threads[i]);
2619 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2620 for (int b = 0; b < bs_last_barrier; ++b) {
2621 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2622 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2623#if USE_DEBUGGER
2624 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2625#endif
2626 }
2628 // Synchronize thread's task state
2629 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2630 }
2631 }
2632 }
2633
2634#if OMPT_SUPPORT
2635 if (ompt_enabled.enabled) {
2636 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2637 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2638 }
2639#endif
2640
2641 return;
2642 }
2643
2644 /* do cleanup and restore the parent team */
2645 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2646 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2647
2648 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2649
2650 /* jc: The following lock has instructions with REL and ACQ semantics,
2651 separating the parallel user code called in this parallel region
2652 from the serial user code called after this function returns. */
2654
2655 if (!master_th->th.th_teams_microtask ||
2656 team->t.t_level > master_th->th.th_teams_level) {
2657 /* Decrement our nested depth level */
2658 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2659 }
2660 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2661
2662#if OMPT_SUPPORT
2663 if (ompt_enabled.enabled) {
2665 if (ompt_enabled.ompt_callback_implicit_task) {
2666 int flags = (team_microtask == (void *)__kmp_teams_master)
2667 ? ompt_task_initial
2668 : ompt_task_implicit;
2669 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2670 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2671 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2672 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2673 }
2674 task_info->frame.exit_frame = ompt_data_none;
2675 task_info->task_data = ompt_data_none;
2676 }
2677#endif
2678
2679 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2680 master_th, team));
2682
2683 master_th->th.th_def_allocator = team->t.t_def_allocator;
2684
2685#if OMPD_SUPPORT
2686 if (ompd_state & OMPD_ENABLE_BP)
2687 ompd_bp_parallel_end();
2688#endif
2689 updateHWFPControl(team);
2690
2691 if (root->r.r_active != master_active)
2692 root->r.r_active = master_active;
2693
2694 __kmp_free_team(root, team, master_th); // this will free worker threads
2695
2696 /* this race was fun to find. make sure the following is in the critical
2697 region otherwise assertions may fail occasionally since the old team may be
2698 reallocated and the hierarchy appears inconsistent. it is actually safe to
2699 run and won't cause any bugs, but will cause those assertion failures. it's
2700 only one deref&assign so might as well put this in the critical region */
2701 master_th->th.th_team = parent_team;
2702 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2703 master_th->th.th_team_master = parent_team->t.t_threads[0];
2704 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2705
2706 /* restore serialized team, if need be */
2707 if (parent_team->t.t_serialized &&
2708 parent_team != master_th->th.th_serial_team &&
2709 parent_team != root->r.r_root_team) {
2710 __kmp_free_team(root, master_th->th.th_serial_team, NULL);
2711 master_th->th.th_serial_team = parent_team;
2712 }
2713
2715 // Restore primary thread's task state from team structure
2716 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2717 team->t.t_primary_task_state == 1);
2718 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2719
2720 // Copy the task team from the parent team to the primary thread
2721 master_th->th.th_task_team =
2722 parent_team->t.t_task_team[master_th->th.th_task_state];
2723 KA_TRACE(20,
2724 ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2725 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2726 parent_team));
2727 }
2728
2729 // TODO: GEH - cannot do this assertion because root thread not set up as
2730 // executing
2731 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2732 master_th->th.th_current_task->td_flags.executing = 1;
2733
2735
2736#if KMP_AFFINITY_SUPPORTED
2737 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2739 }
2740#endif
2741#if OMPT_SUPPORT
2742 int flags =
2743 OMPT_INVOKER(fork_context) |
2744 ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2745 : ompt_parallel_team);
2746 if (ompt_enabled.enabled) {
2747 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2748 codeptr);
2749 }
2750#endif
2751
2752 KMP_MB();
2753 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2754}
2755
2756/* Check whether we should push an internal control record onto the
2757 serial team stack. If so, do it. */
2759
2760 if (thread->th.th_team != thread->th.th_serial_team) {
2761 return;
2762 }
2763 if (thread->th.th_team->t.t_serialized > 1) {
2764 int push = 0;
2765
2766 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2767 push = 1;
2768 } else {
2769 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2770 thread->th.th_team->t.t_serialized) {
2771 push = 1;
2772 }
2773 }
2774 if (push) { /* push a record on the serial team's stack */
2775 kmp_internal_control_t *control =
2777 sizeof(kmp_internal_control_t));
2778
2779 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2780
2781 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2782
2783 control->next = thread->th.th_team->t.t_control_stack_top;
2784 thread->th.th_team->t.t_control_stack_top = control;
2785 }
2786 }
2787}
2788
2789/* Changes set_nproc */
2790void __kmp_set_num_threads(int new_nth, int gtid) {
2791 kmp_info_t *thread;
2792 kmp_root_t *root;
2793
2794 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2796
2797 if (new_nth < 1)
2798 new_nth = 1;
2799 else if (new_nth > __kmp_max_nth)
2800 new_nth = __kmp_max_nth;
2801
2802 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2803 thread = __kmp_threads[gtid];
2804 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2805 return; // nothing to do
2806
2808
2809 set__nproc(thread, new_nth);
2810
2811 // If this omp_set_num_threads() call will cause the hot team size to be
2812 // reduced (in the absence of a num_threads clause), then reduce it now,
2813 // rather than waiting for the next parallel region.
2814 root = thread->th.th_root;
2815 if (__kmp_init_parallel && (!root->r.r_active) &&
2816 (root->r.r_hot_team->t.t_nproc > new_nth) && __kmp_hot_teams_max_level &&
2818 kmp_team_t *hot_team = root->r.r_hot_team;
2819 int f;
2820
2822
2824 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2825 }
2826 // Release the extra threads we don't need any more.
2827 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2830 // When decreasing team size, threads no longer in the team should unref
2831 // task team.
2832 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2833 }
2834 __kmp_free_thread(hot_team->t.t_threads[f]);
2835 hot_team->t.t_threads[f] = NULL;
2836 }
2837 hot_team->t.t_nproc = new_nth;
2838 if (thread->th.th_hot_teams) {
2839 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2840 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2841 }
2842
2844 hot_team->t.b->update_num_threads(new_nth);
2845 __kmp_add_threads_to_team(hot_team, new_nth);
2846 }
2847
2849
2850 // Update the t_nproc field in the threads that are still active.
2851 for (f = 0; f < new_nth; f++) {
2852 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2853 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2854 }
2855 // Special flag in case omp_set_num_threads() call
2856 hot_team->t.t_size_changed = -1;
2857 }
2858}
2859
2860/* Changes max_active_levels */
2861void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2862 kmp_info_t *thread;
2863
2864 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2865 "%d = (%d)\n",
2866 gtid, max_active_levels));
2868
2869 // validate max_active_levels
2870 if (max_active_levels < 0) {
2871 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2872 // We ignore this call if the user has specified a negative value.
2873 // The current setting won't be changed. The last valid setting will be
2874 // used. A warning will be issued (if warnings are allowed as controlled by
2875 // the KMP_WARNINGS env var).
2876 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2877 "max_active_levels for thread %d = (%d)\n",
2878 gtid, max_active_levels));
2879 return;
2880 }
2881 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2882 // it's OK, the max_active_levels is within the valid range: [ 0;
2883 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2884 // We allow a zero value. (implementation defined behavior)
2885 } else {
2886 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2888 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2889 // Current upper limit is MAX_INT. (implementation defined behavior)
2890 // If the input exceeds the upper limit, we correct the input to be the
2891 // upper limit. (implementation defined behavior)
2892 // Actually, the flow should never get here until we use MAX_INT limit.
2893 }
2894 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2895 "max_active_levels for thread %d = (%d)\n",
2896 gtid, max_active_levels));
2897
2898 thread = __kmp_threads[gtid];
2899
2901
2902 set__max_active_levels(thread, max_active_levels);
2903}
2904
2905/* Gets max_active_levels */
2907 kmp_info_t *thread;
2908
2909 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2911
2912 thread = __kmp_threads[gtid];
2913 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2914 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2915 "curtask_maxaclevel=%d\n",
2916 gtid, thread->th.th_current_task,
2917 thread->th.th_current_task->td_icvs.max_active_levels));
2918 return thread->th.th_current_task->td_icvs.max_active_levels;
2919}
2920
2921// nteams-var per-device ICV
2922void __kmp_set_num_teams(int num_teams) {
2923 if (num_teams > 0)
2924 __kmp_nteams = num_teams;
2925}
2927// teams-thread-limit-var per-device ICV
2929 if (limit > 0)
2931}
2933
2934KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int));
2935KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int));
2936
2937/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
2938void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2939 kmp_info_t *thread;
2940 kmp_sched_t orig_kind;
2941 // kmp_team_t *team;
2942
2943 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2944 gtid, (int)kind, chunk));
2946
2947 // Check if the kind parameter is valid, correct if needed.
2948 // Valid parameters should fit in one of two intervals - standard or extended:
2949 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2950 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2951 orig_kind = kind;
2952 kind = __kmp_sched_without_mods(kind);
2953
2954 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2955 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2956 // TODO: Hint needs attention in case we change the default schedule.
2957 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2958 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2960 kind = kmp_sched_default;
2961 chunk = 0; // ignore chunk value in case of bad kind
2962 }
2963
2964 thread = __kmp_threads[gtid];
2965
2967
2968 if (kind < kmp_sched_upper_std) {
2969 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2970 // differ static chunked vs. unchunked: chunk should be invalid to
2971 // indicate unchunked schedule (which is the default)
2972 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2973 } else {
2974 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2975 __kmp_sch_map[kind - kmp_sched_lower - 1];
2976 }
2977 } else {
2978 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2979 // kmp_sched_lower - 2 ];
2980 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2982 kmp_sched_lower - 2];
2983 }
2985 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2986 if (kind == kmp_sched_auto || chunk < 1) {
2987 // ignore parameter chunk for schedule auto
2988 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2989 } else {
2990 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2991 }
2992}
2993
2994/* Gets def_sched_var ICV values */
2995void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2996 kmp_info_t *thread;
2997 enum sched_type th_type;
2998
2999 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
3001
3002 thread = __kmp_threads[gtid];
3003
3004 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3005 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3006 case kmp_sch_static:
3009 *kind = kmp_sched_static;
3010 __kmp_sched_apply_mods_stdkind(kind, th_type);
3011 *chunk = 0; // chunk was not set, try to show this fact via zero value
3012 return;
3014 *kind = kmp_sched_static;
3015 break;
3017 *kind = kmp_sched_dynamic;
3018 break;
3022 *kind = kmp_sched_guided;
3023 break;
3024 case kmp_sch_auto:
3025 *kind = kmp_sched_auto;
3026 break;
3028 *kind = kmp_sched_trapezoidal;
3029 break;
3030#if KMP_STATIC_STEAL_ENABLED
3032 *kind = kmp_sched_static_steal;
3033 break;
3034#endif
3035 default:
3036 KMP_FATAL(UnknownSchedulingType, th_type);
3037 }
3038
3039 __kmp_sched_apply_mods_stdkind(kind, th_type);
3040 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3041}
3042
3044
3045 int ii, dd;
3046 kmp_team_t *team;
3047 kmp_info_t *thr;
3048
3049 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3051
3052 // validate level
3053 if (level == 0)
3054 return 0;
3055 if (level < 0)
3056 return -1;
3057 thr = __kmp_threads[gtid];
3058 team = thr->th.th_team;
3059 ii = team->t.t_level;
3060 if (level > ii)
3061 return -1;
3062
3063 if (thr->th.th_teams_microtask) {
3064 // AC: we are in teams region where multiple nested teams have same level
3065 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3066 if (level <=
3067 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3068 KMP_DEBUG_ASSERT(ii >= tlevel);
3069 // AC: As we need to pass by the teams league, we need to artificially
3070 // increase ii
3071 if (ii == tlevel) {
3072 ii += 2; // three teams have same level
3073 } else {
3074 ii++; // two teams have same level
3075 }
3076 }
3077 }
3078
3079 if (ii == level)
3080 return __kmp_tid_from_gtid(gtid);
3081
3082 dd = team->t.t_serialized;
3083 level++;
3084 while (ii > level) {
3085 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3086 }
3087 if ((team->t.t_serialized) && (!dd)) {
3088 team = team->t.t_parent;
3089 continue;
3090 }
3091 if (ii > level) {
3092 team = team->t.t_parent;
3093 dd = team->t.t_serialized;
3094 ii--;
3095 }
3096 }
3097
3098 return (dd > 1) ? (0) : (team->t.t_master_tid);
3099}
3100
3101int __kmp_get_team_size(int gtid, int level) {
3102
3103 int ii, dd;
3104 kmp_team_t *team;
3105 kmp_info_t *thr;
3106
3107 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
3109
3110 // validate level
3111 if (level == 0)
3112 return 1;
3113 if (level < 0)
3114 return -1;
3115 thr = __kmp_threads[gtid];
3116 team = thr->th.th_team;
3117 ii = team->t.t_level;
3118 if (level > ii)
3119 return -1;
3120
3121 if (thr->th.th_teams_microtask) {
3122 // AC: we are in teams region where multiple nested teams have same level
3123 int tlevel = thr->th.th_teams_level; // the level of the teams construct
3124 if (level <=
3125 tlevel) { // otherwise usual algorithm works (will not touch the teams)
3126 KMP_DEBUG_ASSERT(ii >= tlevel);
3127 // AC: As we need to pass by the teams league, we need to artificially
3128 // increase ii
3129 if (ii == tlevel) {
3130 ii += 2; // three teams have same level
3131 } else {
3132 ii++; // two teams have same level
3133 }
3134 }
3135 }
3136
3137 while (ii > level) {
3138 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3139 }
3140 if (team->t.t_serialized && (!dd)) {
3141 team = team->t.t_parent;
3142 continue;
3143 }
3144 if (ii > level) {
3145 team = team->t.t_parent;
3146 ii--;
3147 }
3148 }
3149
3150 return team->t.t_nproc;
3151}
3152
3154 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3155 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3156 // independently. So one can get the updated schedule here.
3157
3158 kmp_r_sched_t r_sched;
3159
3160 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3161 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3162 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3163 // different roots (even in OMP 2.5)
3165 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3166 if (s == kmp_sch_static) {
3167 // replace STATIC with more detailed schedule (balanced or greedy)
3168 r_sched.r_sched_type = __kmp_static;
3169 } else if (s == kmp_sch_guided_chunked) {
3170 // replace GUIDED with more detailed schedule (iterative or analytical)
3171 r_sched.r_sched_type = __kmp_guided;
3172 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3173 r_sched.r_sched_type = __kmp_sched;
3174 }
3175 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3176
3178 // __kmp_chunk may be wrong here (if it was not ever set)
3179 r_sched.chunk = KMP_DEFAULT_CHUNK;
3180 } else {
3181 r_sched.chunk = __kmp_chunk;
3182 }
3183
3184 return r_sched;
3185}
3186
3187/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3188 at least argc number of *t_argv entries for the requested team. */
3189static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3190
3191 KMP_DEBUG_ASSERT(team);
3192 if (!realloc || argc > team->t.t_max_argc) {
3193
3194 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3195 "current entries=%d\n",
3196 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3197 /* if previously allocated heap space for args, free them */
3198 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3199 __kmp_free((void *)team->t.t_argv);
3200
3201 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3202 /* use unused space in the cache line for arguments */
3203 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3204 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3205 "argv entries\n",
3206 team->t.t_id, team->t.t_max_argc));
3207 team->t.t_argv = &team->t.t_inline_argv[0];
3208 if (__kmp_storage_map) {
3210 -1, &team->t.t_inline_argv[0],
3211 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3212 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3213 team->t.t_id);
3214 }
3215 } else {
3216 /* allocate space for arguments in the heap */
3217 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3219 : 2 * argc;
3220 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3221 "argv entries\n",
3222 team->t.t_id, team->t.t_max_argc));
3223 team->t.t_argv =
3224 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3225 if (__kmp_storage_map) {
3226 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3227 &team->t.t_argv[team->t.t_max_argc],
3228 sizeof(void *) * team->t.t_max_argc,
3229 "team_%d.t_argv", team->t.t_id);
3230 }
3231 }
3232 }
3233}
3234
3235static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3236 int i;
3237 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3238 team->t.t_threads =
3239 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3240 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3241 sizeof(dispatch_shared_info_t) * num_disp_buff);
3242 team->t.t_dispatch =
3243 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3244 team->t.t_implicit_task_taskdata =
3245 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3246 team->t.t_max_nproc = max_nth;
3247
3248 /* setup dispatch buffers */
3249 for (i = 0; i < num_disp_buff; ++i) {
3250 team->t.t_disp_buffer[i].buffer_index = i;
3251 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3252 }
3253}
3254
3256 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3257 int i;
3258 for (i = 0; i < team->t.t_max_nproc; ++i) {
3259 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3260 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3261 team->t.t_dispatch[i].th_disp_buffer = NULL;
3262 }
3263 }
3264#if KMP_USE_HIER_SCHED
3266#endif
3267 __kmp_free(team->t.t_threads);
3268 __kmp_free(team->t.t_disp_buffer);
3269 __kmp_free(team->t.t_dispatch);
3270 __kmp_free(team->t.t_implicit_task_taskdata);
3271 team->t.t_threads = NULL;
3272 team->t.t_disp_buffer = NULL;
3273 team->t.t_dispatch = NULL;
3274 team->t.t_implicit_task_taskdata = 0;
3275}
3276
3277static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3278 kmp_info_t **oldThreads = team->t.t_threads;
3279
3280 __kmp_free(team->t.t_disp_buffer);
3281 __kmp_free(team->t.t_dispatch);
3282 __kmp_free(team->t.t_implicit_task_taskdata);
3283 __kmp_allocate_team_arrays(team, max_nth);
3284
3285 KMP_MEMCPY(team->t.t_threads, oldThreads,
3286 team->t.t_nproc * sizeof(kmp_info_t *));
3287
3288 __kmp_free(oldThreads);
3289}
3290
3292
3293 kmp_r_sched_t r_sched =
3294 __kmp_get_schedule_global(); // get current state of scheduling globals
3295
3297
3298 kmp_internal_control_t g_icvs = {
3299 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3300 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3301 // adjustment of threads (per thread)
3302 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3303 // whether blocktime is explicitly set
3304 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3305#if KMP_USE_MONITOR
3306 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3307// intervals
3308#endif
3309 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3310 // next parallel region (per thread)
3311 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3312 __kmp_cg_max_nth, // int thread_limit;
3313 __kmp_task_max_nth, // int task_thread_limit; // to set the thread_limit
3314 // on task. This is used in the case of target thread_limit
3315 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3316 // for max_active_levels
3317 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3318 // {sched,chunk} pair
3319 __kmp_nested_proc_bind.bind_types[0],
3321 NULL // struct kmp_internal_control *next;
3322 };
3323
3324 return g_icvs;
3325}
3326
3328
3329 kmp_internal_control_t gx_icvs;
3330 gx_icvs.serial_nesting_level =
3331 0; // probably =team->t.t_serial like in save_inter_controls
3332 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3333 gx_icvs.next = NULL;
3334
3335 return gx_icvs;
3336}
3337
3339 int f;
3340 kmp_team_t *root_team;
3341 kmp_team_t *hot_team;
3342 int hot_team_max_nth;
3343 kmp_r_sched_t r_sched =
3344 __kmp_get_schedule_global(); // get current state of scheduling globals
3346 KMP_DEBUG_ASSERT(root);
3347 KMP_ASSERT(!root->r.r_begin);
3348
3349 /* setup the root state structure */
3350 __kmp_init_lock(&root->r.r_begin_lock);
3351 root->r.r_begin = FALSE;
3352 root->r.r_active = FALSE;
3353 root->r.r_in_parallel = 0;
3354 root->r.r_blocktime = __kmp_dflt_blocktime;
3355#if KMP_AFFINITY_SUPPORTED
3356 root->r.r_affinity_assigned = FALSE;
3357#endif
3358
3359 /* setup the root team for this task */
3360 /* allocate the root team structure */
3361 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
3362
3363 root_team = __kmp_allocate_team(root,
3364 1, // new_nproc
3365 1, // max_nproc
3366#if OMPT_SUPPORT
3367 ompt_data_none, // root parallel id
3368#endif
3369 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3370 0, // argc
3371 NULL // primary thread is unknown
3372 );
3373#if USE_DEBUGGER
3374 // Non-NULL value should be assigned to make the debugger display the root
3375 // team.
3376 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3377#endif
3378
3379 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
3380
3381 root->r.r_root_team = root_team;
3382 root_team->t.t_control_stack_top = NULL;
3383
3384 /* initialize root team */
3385 root_team->t.t_threads[0] = NULL;
3386 root_team->t.t_nproc = 1;
3387 root_team->t.t_serialized = 1;
3388 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3389 root_team->t.t_sched.sched = r_sched.sched;
3390 root_team->t.t_nested_nth = &__kmp_nested_nth;
3391 KA_TRACE(
3392 20,
3393 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3395
3396 /* setup the hot team for this task */
3397 /* allocate the hot team structure */
3398 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
3399
3400 hot_team = __kmp_allocate_team(root,
3401 1, // new_nproc
3402 __kmp_dflt_team_nth_ub * 2, // max_nproc
3403#if OMPT_SUPPORT
3404 ompt_data_none, // root parallel id
3405#endif
3406 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3407 0, // argc
3408 NULL // primary thread is unknown
3409 );
3410 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
3411
3412 root->r.r_hot_team = hot_team;
3413 root_team->t.t_control_stack_top = NULL;
3414
3415 /* first-time initialization */
3416 hot_team->t.t_parent = root_team;
3417
3418 /* initialize hot team */
3419 hot_team_max_nth = hot_team->t.t_max_nproc;
3420 for (f = 0; f < hot_team_max_nth; ++f) {
3421 hot_team->t.t_threads[f] = NULL;
3422 }
3423 hot_team->t.t_nproc = 1;
3424 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3425 hot_team->t.t_sched.sched = r_sched.sched;
3426 hot_team->t.t_size_changed = 0;
3427 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3428}
3429
3430#ifdef KMP_DEBUG
3431
3432typedef struct kmp_team_list_item {
3433 kmp_team_p const *entry;
3434 struct kmp_team_list_item *next;
3435} kmp_team_list_item_t;
3436typedef kmp_team_list_item_t *kmp_team_list_t;
3437
3438static void __kmp_print_structure_team_accum( // Add team to list of teams.
3439 kmp_team_list_t list, // List of teams.
3440 kmp_team_p const *team // Team to add.
3441) {
3442
3443 // List must terminate with item where both entry and next are NULL.
3444 // Team is added to the list only once.
3445 // List is sorted in ascending order by team id.
3446 // Team id is *not* a key.
3447
3448 kmp_team_list_t l;
3449
3450 KMP_DEBUG_ASSERT(list != NULL);
3451 if (team == NULL) {
3452 return;
3453 }
3454
3455 __kmp_print_structure_team_accum(list, team->t.t_parent);
3456 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3457
3458 // Search list for the team.
3459 l = list;
3460 while (l->next != NULL && l->entry != team) {
3461 l = l->next;
3462 }
3463 if (l->next != NULL) {
3464 return; // Team has been added before, exit.
3465 }
3466
3467 // Team is not found. Search list again for insertion point.
3468 l = list;
3469 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3470 l = l->next;
3471 }
3472
3473 // Insert team.
3474 {
3475 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3476 sizeof(kmp_team_list_item_t));
3477 *item = *l;
3478 l->entry = team;
3479 l->next = item;
3480 }
3481}
3482
3483static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
3484
3485) {
3486 __kmp_printf("%s", title);
3487 if (team != NULL) {
3488 __kmp_printf("%2x %p\n", team->t.t_id, team);
3489 } else {
3490 __kmp_printf(" - (nil)\n");
3491 }
3492}
3493
3494static void __kmp_print_structure_thread(char const *title,
3495 kmp_info_p const *thread) {
3496 __kmp_printf("%s", title);
3497 if (thread != NULL) {
3498 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3499 } else {
3500 __kmp_printf(" - (nil)\n");
3501 }
3502}
3503
3504void __kmp_print_structure(void) {
3505
3506 kmp_team_list_t list;
3507
3508 // Initialize list of teams.
3509 list =
3510 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3511 list->entry = NULL;
3512 list->next = NULL;
3513
3514 __kmp_printf("\n------------------------------\nGlobal Thread "
3515 "Table\n------------------------------\n");
3516 {
3517 int gtid;
3518 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3519 __kmp_printf("%2d", gtid);
3520 if (__kmp_threads != NULL) {
3521 __kmp_printf(" %p", __kmp_threads[gtid]);
3522 }
3523 if (__kmp_root != NULL) {
3524 __kmp_printf(" %p", __kmp_root[gtid]);
3525 }
3526 __kmp_printf("\n");
3527 }
3528 }
3529
3530 // Print out __kmp_threads array.
3531 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3532 "----------\n");
3533 if (__kmp_threads != NULL) {
3534 int gtid;
3535 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3536 kmp_info_t const *thread = __kmp_threads[gtid];
3537 if (thread != NULL) {
3538 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3539 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3540 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3541 __kmp_print_structure_team(" Serial Team: ",
3542 thread->th.th_serial_team);
3543 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3544 __kmp_print_structure_thread(" Primary: ",
3545 thread->th.th_team_master);
3546 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3547 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
3548 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3549 __kmp_print_structure_thread(" Next in pool: ",
3550 thread->th.th_next_pool);
3551 __kmp_printf("\n");
3552 __kmp_print_structure_team_accum(list, thread->th.th_team);
3553 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3554 }
3555 }
3556 } else {
3557 __kmp_printf("Threads array is not allocated.\n");
3558 }
3559
3560 // Print out __kmp_root array.
3561 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3562 "--------\n");
3563 if (__kmp_root != NULL) {
3564 int gtid;
3565 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3566 kmp_root_t const *root = __kmp_root[gtid];
3567 if (root != NULL) {
3568 __kmp_printf("GTID %2d %p:\n", gtid, root);
3569 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3570 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3571 __kmp_print_structure_thread(" Uber Thread: ",
3572 root->r.r_uber_thread);
3573 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3574 __kmp_printf(" In Parallel: %2d\n",
3575 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3576 __kmp_printf("\n");
3577 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3578 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3579 }
3580 }
3581 } else {
3582 __kmp_printf("Ubers array is not allocated.\n");
3583 }
3584
3585 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3586 "--------\n");
3587 while (list->next != NULL) {
3588 kmp_team_p const *team = list->entry;
3589 int i;
3590 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3591 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3592 __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);
3593 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3594 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3595 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3596 for (i = 0; i < team->t.t_nproc; ++i) {
3597 __kmp_printf(" Thread %2d: ", i);
3598 __kmp_print_structure_thread("", team->t.t_threads[i]);
3599 }
3600 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3601 __kmp_printf("\n");
3602 list = list->next;
3603 }
3604
3605 // Print out __kmp_thread_pool and __kmp_team_pool.
3606 __kmp_printf("\n------------------------------\nPools\n----------------------"
3607 "--------\n");
3608 __kmp_print_structure_thread("Thread pool: ",
3610 __kmp_print_structure_team("Team pool: ",
3612 __kmp_printf("\n");
3613
3614 // Free team list.
3615 while (list != NULL) {
3616 kmp_team_list_item_t *item = list;
3617 list = list->next;
3618 KMP_INTERNAL_FREE(item);
3619 }
3620}
3621
3622#endif
3623
3624//---------------------------------------------------------------------------
3625// Stuff for per-thread fast random number generator
3626// Table of primes
3627static const unsigned __kmp_primes[] = {
3628 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3629 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3630 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3631 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3632 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3633 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3634 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3635 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3636 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3637 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3638 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3639
3640//---------------------------------------------------------------------------
3641// __kmp_get_random: Get a random number using a linear congruential method.
3642unsigned short __kmp_get_random(kmp_info_t *thread) {
3643 unsigned x = thread->th.th_x;
3644 unsigned short r = (unsigned short)(x >> 16);
3645
3646 thread->th.th_x = x * thread->th.th_a + 1;
3647
3648 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3649 thread->th.th_info.ds.ds_tid, r));
3650
3651 return r;
3652}
3653//--------------------------------------------------------
3654// __kmp_init_random: Initialize a random number generator
3656 unsigned seed = thread->th.th_info.ds.ds_tid;
3657
3658 thread->th.th_a =
3659 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3660 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3661 KA_TRACE(30,
3662 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3663}
3664
3665#if KMP_OS_WINDOWS
3666/* reclaim array entries for root threads that are already dead, returns number
3667 * reclaimed */
3668static int __kmp_reclaim_dead_roots(void) {
3669 int i, r = 0;
3670
3671 for (i = 0; i < __kmp_threads_capacity; ++i) {
3672 if (KMP_UBER_GTID(i) &&
3674 !__kmp_root[i]
3675 ->r.r_active) { // AC: reclaim only roots died in non-active state
3676 r += __kmp_unregister_root_other_thread(i);
3677 }
3678 }
3679 return r;
3680}
3681#endif
3682
3683/* This function attempts to create free entries in __kmp_threads and
3684 __kmp_root, and returns the number of free entries generated.
3685
3686 For Windows* OS static library, the first mechanism used is to reclaim array
3687 entries for root threads that are already dead.
3688
3689 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3690 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3691 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3692 threadprivate cache array has been created. Synchronization with
3693 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3694
3695 After any dead root reclamation, if the clipping value allows array expansion
3696 to result in the generation of a total of nNeed free slots, the function does
3697 that expansion. If not, nothing is done beyond the possible initial root
3698 thread reclamation.
3699
3700 If any argument is negative, the behavior is undefined. */
3701static int __kmp_expand_threads(int nNeed) {
3702 int added = 0;
3703 int minimumRequiredCapacity;
3704 int newCapacity;
3705 kmp_info_t **newThreads;
3706 kmp_root_t **newRoot;
3707
3708 // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3709 // resizing __kmp_threads does not need additional protection if foreign
3710 // threads are present
3711
3712#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3713 /* only for Windows static library */
3714 /* reclaim array entries for root threads that are already dead */
3715 added = __kmp_reclaim_dead_roots();
3716
3717 if (nNeed) {
3718 nNeed -= added;
3719 if (nNeed < 0)
3720 nNeed = 0;
3721 }
3722#endif
3723 if (nNeed <= 0)
3724 return added;
3725
3726 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3727 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3728 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3729 // > __kmp_max_nth in one of two ways:
3730 //
3731 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3732 // may not be reused by another thread, so we may need to increase
3733 // __kmp_threads_capacity to __kmp_max_nth + 1.
3734 //
3735 // 2) New foreign root(s) are encountered. We always register new foreign
3736 // roots. This may cause a smaller # of threads to be allocated at
3737 // subsequent parallel regions, but the worker threads hang around (and
3738 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3739 //
3740 // Anyway, that is the reason for moving the check to see if
3741 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3742 // instead of having it performed here. -BB
3743
3745
3746 /* compute expansion headroom to check if we can expand */
3748 /* possible expansion too small -- give up */
3749 return added;
3750 }
3751 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3752
3753 newCapacity = __kmp_threads_capacity;
3754 do {
3755 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3757 } while (newCapacity < minimumRequiredCapacity);
3758 newThreads = (kmp_info_t **)__kmp_allocate(
3759 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3760 newRoot =
3761 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3762 KMP_MEMCPY(newThreads, __kmp_threads,
3764 KMP_MEMCPY(newRoot, __kmp_root,
3766 // Put old __kmp_threads array on a list. Any ongoing references to the old
3767 // list will be valid. This list is cleaned up at library shutdown.
3770 node->threads = __kmp_threads;
3773
3774 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3775 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3776 added += newCapacity - __kmp_threads_capacity;
3777 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3778
3779 if (newCapacity > __kmp_tp_capacity) {
3781 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3783 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3784 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3785 }
3787 }
3788
3789 return added;
3790}
3791
3792/* Register the current thread as a root thread and obtain our gtid. We must
3793 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3794 thread that calls from __kmp_do_serial_initialize() */
3795int __kmp_register_root(int initial_thread) {
3796 kmp_info_t *root_thread;
3797 kmp_root_t *root;
3798 int gtid;
3799 int capacity;
3801 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3802 KMP_MB();
3803
3804 /* 2007-03-02:
3805 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3806 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3807 work as expected -- it may return false (that means there is at least one
3808 empty slot in __kmp_threads array), but it is possible the only free slot
3809 is #0, which is reserved for initial thread and so cannot be used for this
3810 one. Following code workarounds this bug.
3811
3812 However, right solution seems to be not reserving slot #0 for initial
3813 thread because:
3814 (1) there is no magic in slot #0,
3815 (2) we cannot detect initial thread reliably (the first thread which does
3816 serial initialization may be not a real initial thread).
3817 */
3818 capacity = __kmp_threads_capacity;
3819 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3820 --capacity;
3821 }
3822
3823 // If it is not for initializing the hidden helper team, we need to take
3824 // __kmp_hidden_helper_threads_num out of the capacity because it is included
3825 // in __kmp_threads_capacity.
3828 }
3829
3830 /* see if there are too many threads */
3831 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3832 if (__kmp_tp_cached) {
3833 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3834 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3835 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3836 } else {
3837 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3839 }
3840 }
3841
3842 // When hidden helper task is enabled, __kmp_threads is organized as follows:
3843 // 0: initial thread, also a regular OpenMP thread.
3844 // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3845 // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3846 // regular OpenMP threads.
3848 // Find an available thread slot for hidden helper thread. Slots for hidden
3849 // helper threads start from 1 to __kmp_hidden_helper_threads_num.
3850 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3852 gtid++)
3853 ;
3855 KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "
3856 "hidden helper thread: T#%d\n",
3857 gtid));
3858 } else {
3859 /* find an available thread slot */
3860 // Don't reassign the zero slot since we need that to only be used by
3861 // initial thread. Slots for hidden helper threads should also be skipped.
3862 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3863 gtid = 0;
3864 } else {
3865 for (gtid = __kmp_hidden_helper_threads_num + 1;
3866 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3867 ;
3868 }
3869 KA_TRACE(
3870 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3872 }
3873
3874 /* update global accounting */
3875 __kmp_all_nth++;
3877
3878 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3879 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3882 if (TCR_4(__kmp_gtid_mode) != 2) {
3884 }
3885 } else {
3886 if (TCR_4(__kmp_gtid_mode) != 1) {
3888 }
3889 }
3890 }
3891
3892#ifdef KMP_ADJUST_BLOCKTIME
3893 /* Adjust blocktime to zero if necessary */
3894 /* Middle initialization might not have occurred yet */
3895 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3897 __kmp_zero_bt = TRUE;
3898 }
3899 }
3900#endif /* KMP_ADJUST_BLOCKTIME */
3901
3902 /* setup this new hierarchy */
3903 if (!(root = __kmp_root[gtid])) {
3904 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3905 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3906 }
3907
3908#if KMP_STATS_ENABLED
3909 // Initialize stats as soon as possible (right after gtid assignment).
3910 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3911 __kmp_stats_thread_ptr->startLife();
3912 KMP_SET_THREAD_STATE(SERIAL_REGION);
3913 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3914#endif
3916
3917 /* setup new root thread structure */
3918 if (root->r.r_uber_thread) {
3919 root_thread = root->r.r_uber_thread;
3920 } else {
3921 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3922 if (__kmp_storage_map) {
3923 __kmp_print_thread_storage_map(root_thread, gtid);
3924 }
3925 root_thread->th.th_info.ds.ds_gtid = gtid;
3926#if OMPT_SUPPORT
3927 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3928#endif
3929 root_thread->th.th_root = root;
3931 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3932 }
3933#if USE_FAST_MEMORY
3934 __kmp_initialize_fast_memory(root_thread);
3935#endif /* USE_FAST_MEMORY */
3936
3937#if KMP_USE_BGET
3938 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3939 __kmp_initialize_bget(root_thread);
3940#endif
3941 __kmp_init_random(root_thread); // Initialize random number generator
3942 }
3943
3944 /* setup the serial team held in reserve by the root thread */
3945 if (!root_thread->th.th_serial_team) {
3947 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3948 root_thread->th.th_serial_team =
3949 __kmp_allocate_team(root, 1, 1,
3950#if OMPT_SUPPORT
3951 ompt_data_none, // root parallel id
3952#endif
3953 proc_bind_default, &r_icvs, 0, NULL);
3954 }
3955 KMP_ASSERT(root_thread->th.th_serial_team);
3956 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3957 root_thread->th.th_serial_team));
3958
3959 /* drop root_thread into place */
3960 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3961
3962 root->r.r_root_team->t.t_threads[0] = root_thread;
3963 root->r.r_hot_team->t.t_threads[0] = root_thread;
3964 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3965 // AC: the team created in reserve, not for execution (it is unused for now).
3966 root_thread->th.th_serial_team->t.t_serialized = 0;
3967 root->r.r_uber_thread = root_thread;
3968
3969 /* initialize the thread, get it ready to go */
3970 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3972
3973 /* prepare the primary thread for get_gtid() */
3975
3976#if USE_ITT_BUILD
3977 __kmp_itt_thread_name(gtid);
3978#endif /* USE_ITT_BUILD */
3979
3980#ifdef KMP_TDATA_GTID
3981 __kmp_gtid = gtid;
3982#endif
3983 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3985
3986 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3987 "plain=%u\n",
3988 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3989 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3991 { // Initialize barrier data.
3992 int b;
3993 for (b = 0; b < bs_last_barrier; ++b) {
3994 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3995#if USE_DEBUGGER
3996 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3997#endif
3998 }
3999 }
4000 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4002
4003#if KMP_AFFINITY_SUPPORTED
4004 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4005 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4006 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4007 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4008#endif /* KMP_AFFINITY_SUPPORTED */
4009 root_thread->th.th_def_allocator = __kmp_def_allocator;
4010 root_thread->th.th_prev_level = 0;
4011 root_thread->th.th_prev_num_threads = 1;
4012
4014 tmp->cg_root = root_thread;
4016 tmp->cg_nthreads = 1;
4017 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"
4018 " cg_nthreads init to 1\n",
4019 root_thread, tmp));
4020 tmp->up = NULL;
4021 root_thread->th.th_cg_roots = tmp;
4022
4024
4025#if OMPT_SUPPORT
4026 if (ompt_enabled.enabled) {
4027
4028 kmp_info_t *root_thread = ompt_get_thread();
4029
4030 ompt_set_thread_state(root_thread, ompt_state_overhead);
4031
4032 if (ompt_enabled.ompt_callback_thread_begin) {
4033 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4034 ompt_thread_initial, __ompt_get_thread_data_internal());
4035 }
4036 ompt_data_t *task_data;
4037 ompt_data_t *parallel_data;
4038 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data,
4039 NULL);
4040 if (ompt_enabled.ompt_callback_implicit_task) {
4041 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4042 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4043 }
4044
4045 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4046 }
4047#endif
4048#if OMPD_SUPPORT
4049 if (ompd_state & OMPD_ENABLE_BP)
4050 ompd_bp_thread_begin();
4051#endif
4052
4053 KMP_MB();
4055
4056 return gtid;
4057}
4058
4060 const int max_level) {
4061 int i, n, nth;
4062 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4063 if (!hot_teams || !hot_teams[level].hot_team) {
4064 return 0;
4065 }
4066 KMP_DEBUG_ASSERT(level < max_level);
4067 kmp_team_t *team = hot_teams[level].hot_team;
4068 nth = hot_teams[level].hot_team_nth;
4069 n = nth - 1; // primary thread is not freed
4070 if (level < max_level - 1) {
4071 for (i = 0; i < nth; ++i) {
4072 kmp_info_t *th = team->t.t_threads[i];
4073 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4074 if (i > 0 && th->th.th_hot_teams) {
4075 __kmp_free(th->th.th_hot_teams);
4076 th->th.th_hot_teams = NULL;
4077 }
4078 }
4079 }
4080 __kmp_free_team(root, team, NULL);
4081 return n;
4082}
4083
4084// Resets a root thread and clear its root and hot teams.
4085// Returns the number of __kmp_threads entries directly and indirectly freed.
4086static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4087 kmp_team_t *root_team = root->r.r_root_team;
4088 kmp_team_t *hot_team = root->r.r_hot_team;
4089 int n = hot_team->t.t_nproc;
4090 int i;
4091
4092 KMP_DEBUG_ASSERT(!root->r.r_active);
4093
4094 root->r.r_root_team = NULL;
4095 root->r.r_hot_team = NULL;
4096 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4097 // before call to __kmp_free_team().
4098 __kmp_free_team(root, root_team, NULL);
4100 0) { // need to free nested hot teams and their threads if any
4101 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4102 kmp_info_t *th = hot_team->t.t_threads[i];
4103 if (__kmp_hot_teams_max_level > 1) {
4105 }
4106 if (th->th.th_hot_teams) {
4107 __kmp_free(th->th.th_hot_teams);
4108 th->th.th_hot_teams = NULL;
4109 }
4110 }
4111 }
4112 __kmp_free_team(root, hot_team, NULL);
4113
4114 // Before we can reap the thread, we need to make certain that all other
4115 // threads in the teams that had this root as ancestor have stopped trying to
4116 // steal tasks.
4119 }
4120
4121#if KMP_OS_WINDOWS
4122 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4123 KA_TRACE(
4124 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4125 "\n",
4126 (LPVOID) & (root->r.r_uber_thread->th),
4127 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4128 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4129#endif /* KMP_OS_WINDOWS */
4130
4131#if OMPD_SUPPORT
4132 if (ompd_state & OMPD_ENABLE_BP)
4133 ompd_bp_thread_end();
4134#endif
4135
4136#if OMPT_SUPPORT
4137 ompt_data_t *task_data;
4138 ompt_data_t *parallel_data;
4139 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data,
4140 NULL);
4141 if (ompt_enabled.ompt_callback_implicit_task) {
4142 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4143 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4144 }
4145 if (ompt_enabled.ompt_callback_thread_end) {
4146 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4147 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4148 }
4149#endif
4150
4152 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4153 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4154 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4155 " to %d\n",
4156 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4157 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4158 if (i == 1) {
4159 // need to free contention group structure
4160 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4161 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4162 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4163 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4164 root->r.r_uber_thread->th.th_cg_roots = NULL;
4165 }
4166 __kmp_reap_thread(root->r.r_uber_thread, 1);
4167
4168 // We canot put root thread to __kmp_thread_pool, so we have to reap it
4169 // instead of freeing.
4170 root->r.r_uber_thread = NULL;
4171 /* mark root as no longer in use */
4172 root->r.r_begin = FALSE;
4173
4174 return n;
4175}
4176
4178 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4179 /* this lock should be ok, since unregister_root_current_thread is never
4180 called during an abort, only during a normal close. furthermore, if you
4181 have the forkjoin lock, you should never try to get the initz lock */
4183 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4184 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
4185 "exiting T#%d\n",
4186 gtid));
4188 return;
4189 }
4190 kmp_root_t *root = __kmp_root[gtid];
4191
4194 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4195 KMP_ASSERT(root->r.r_active == FALSE);
4196
4197 KMP_MB();
4198
4199 kmp_info_t *thread = __kmp_threads[gtid];
4200 kmp_team_t *team = thread->th.th_team;
4201 kmp_task_team_t *task_team = thread->th.th_task_team;
4202
4203 // we need to wait for the proxy tasks before finishing the thread
4204 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4206#if OMPT_SUPPORT
4207 // the runtime is shutting down so we won't report any events
4208 thread->th.ompt_thread_info.state = ompt_state_undefined;
4209#endif
4210 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4211 }
4212
4213 __kmp_reset_root(gtid, root);
4214
4215 KMP_MB();
4216 KC_TRACE(10,
4217 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4218
4220}
4221
4222#if KMP_OS_WINDOWS
4223/* __kmp_forkjoin_lock must be already held
4224 Unregisters a root thread that is not the current thread. Returns the number
4225 of __kmp_threads entries freed as a result. */
4226static int __kmp_unregister_root_other_thread(int gtid) {
4227 kmp_root_t *root = __kmp_root[gtid];
4228 int r;
4229
4230 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4233 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4234 KMP_ASSERT(root->r.r_active == FALSE);
4235
4236 r = __kmp_reset_root(gtid, root);
4237 KC_TRACE(10,
4238 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4239 return r;
4240}
4241#endif
4242
4243#if KMP_DEBUG
4244void __kmp_task_info() {
4245
4246 kmp_int32 gtid = __kmp_entry_gtid();
4247 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4248 kmp_info_t *this_thr = __kmp_threads[gtid];
4249 kmp_team_t *steam = this_thr->th.th_serial_team;
4250 kmp_team_t *team = this_thr->th.th_team;
4251
4253 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4254 "ptask=%p\n",
4255 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4256 team->t.t_implicit_task_taskdata[tid].td_parent);
4257}
4258#endif // KMP_DEBUG
4259
4260/* TODO optimize with one big memclr, take out what isn't needed, split
4261 responsibility to workers as much as possible, and delay initialization of
4262 features as much as possible */
4263static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4264 int tid, int gtid) {
4265 /* this_thr->th.th_info.ds.ds_gtid is setup in
4266 kmp_allocate_thread/create_worker.
4267 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4268 KMP_DEBUG_ASSERT(this_thr != NULL);
4269 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4270 KMP_DEBUG_ASSERT(team);
4271 KMP_DEBUG_ASSERT(team->t.t_threads);
4272 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4273 kmp_info_t *master = team->t.t_threads[0];
4274 KMP_DEBUG_ASSERT(master);
4275 KMP_DEBUG_ASSERT(master->th.th_root);
4276
4277 KMP_MB();
4278
4279 TCW_SYNC_PTR(this_thr->th.th_team, team);
4280
4281 this_thr->th.th_info.ds.ds_tid = tid;
4282 this_thr->th.th_set_nproc = 0;
4284 // When tasking is possible, threads are not safe to reap until they are
4285 // done tasking; this will be set when tasking code is exited in wait
4286 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4287 else // no tasking --> always safe to reap
4288 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4289 this_thr->th.th_set_proc_bind = proc_bind_default;
4290
4291#if KMP_AFFINITY_SUPPORTED
4292 this_thr->th.th_new_place = this_thr->th.th_current_place;
4293#endif
4294 this_thr->th.th_root = master->th.th_root;
4295
4296 /* setup the thread's cache of the team structure */
4297 this_thr->th.th_team_nproc = team->t.t_nproc;
4298 this_thr->th.th_team_master = master;
4299 this_thr->th.th_team_serialized = team->t.t_serialized;
4300
4301 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4302
4303 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4304 tid, gtid, this_thr, this_thr->th.th_current_task));
4305
4306 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4307 team, tid, TRUE);
4308
4309 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4310 tid, gtid, this_thr, this_thr->th.th_current_task));
4311 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4312 // __kmp_initialize_team()?
4313
4314 /* TODO no worksharing in speculative threads */
4315 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4316
4317 this_thr->th.th_local.this_construct = 0;
4318
4319 if (!this_thr->th.th_pri_common) {
4320 this_thr->th.th_pri_common =
4321 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4322 if (__kmp_storage_map) {
4324 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4325 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
4326 }
4327 this_thr->th.th_pri_head = NULL;
4328 }
4329
4330 if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4331 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4332 // Make new thread's CG root same as primary thread's
4333 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4334 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4335 if (tmp) {
4336 // worker changes CG, need to check if old CG should be freed
4337 int i = tmp->cg_nthreads--;
4338 KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
4339 " on node %p of thread %p to %d\n",
4340 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4341 if (i == 1) {
4342 __kmp_free(tmp); // last thread left CG --> free it
4343 }
4344 }
4345 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4346 // Increment new thread's CG root's counter to add the new thread
4347 this_thr->th.th_cg_roots->cg_nthreads++;
4348 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
4349 " node %p of thread %p to %d\n",
4350 this_thr, this_thr->th.th_cg_roots,
4351 this_thr->th.th_cg_roots->cg_root,
4352 this_thr->th.th_cg_roots->cg_nthreads));
4353 this_thr->th.th_current_task->td_icvs.thread_limit =
4354 this_thr->th.th_cg_roots->cg_thread_limit;
4355 }
4356
4357 /* Initialize dynamic dispatch */
4358 {
4359 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4360 // Use team max_nproc since this will never change for the team.
4361 size_t disp_size =
4362 sizeof(dispatch_private_info_t) *
4363 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4364 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4365 team->t.t_max_nproc));
4366 KMP_ASSERT(dispatch);
4367 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4368 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4369
4370 dispatch->th_disp_index = 0;
4371 dispatch->th_doacross_buf_idx = 0;
4372 if (!dispatch->th_disp_buffer) {
4373 dispatch->th_disp_buffer =
4375
4376 if (__kmp_storage_map) {
4378 gtid, &dispatch->th_disp_buffer[0],
4379 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4380 ? 1
4382 disp_size,
4383 "th_%d.th_dispatch.th_disp_buffer "
4384 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4385 gtid, team->t.t_id, gtid);
4386 }
4387 } else {
4388 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
4389 }
4390
4391 dispatch->th_dispatch_pr_current = 0;
4392 dispatch->th_dispatch_sh_current = 0;
4393
4394 dispatch->th_deo_fcn = 0; /* ORDERED */
4395 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4396 }
4397
4398 this_thr->th.th_next_pool = NULL;
4399
4400 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4401 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4402
4403 KMP_MB();
4404}
4405
4406/* allocate a new thread for the requesting team. this is only called from
4407 within a forkjoin critical section. we will first try to get an available
4408 thread from the thread pool. if none is available, we will fork a new one
4409 assuming we are able to create a new one. this should be assured, as the
4410 caller should check on this first. */
4412 int new_tid) {
4413 kmp_team_t *serial_team;
4414 kmp_info_t *new_thr;
4415 int new_gtid;
4416
4417 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4418 KMP_DEBUG_ASSERT(root && team);
4419 KMP_MB();
4420
4421 /* first, try to get one from the thread pool unless allocating thread is
4422 * the main hidden helper thread. The hidden helper team should always
4423 * allocate new OS threads. */
4425 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4426 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4427 if (new_thr == __kmp_thread_pool_insert_pt) {
4429 }
4430 TCW_4(new_thr->th.th_in_pool, FALSE);
4432 __kmp_lock_suspend_mx(new_thr);
4433 if (new_thr->th.th_active_in_pool == TRUE) {
4434 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4436 new_thr->th.th_active_in_pool = FALSE;
4437 }
4438 __kmp_unlock_suspend_mx(new_thr);
4439
4440 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4441 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4442 KMP_ASSERT(!new_thr->th.th_team);
4444
4445 /* setup the thread structure */
4446 __kmp_initialize_info(new_thr, team, new_tid,
4447 new_thr->th.th_info.ds.ds_gtid);
4448 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4449
4451
4452 new_thr->th.th_task_state = 0;
4453
4455 // Make sure pool thread has transitioned to waiting on own thread struct
4456 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4457 // Thread activated in __kmp_allocate_team when increasing team size
4458 }
4459
4460#ifdef KMP_ADJUST_BLOCKTIME
4461 /* Adjust blocktime back to zero if necessary */
4462 /* Middle initialization might not have occurred yet */
4463 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4465 __kmp_zero_bt = TRUE;
4466 }
4467 }
4468#endif /* KMP_ADJUST_BLOCKTIME */
4469
4470#if KMP_DEBUG
4471 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4472 // KMP_BARRIER_PARENT_FLAG.
4473 int b;
4474 kmp_balign_t *balign = new_thr->th.th_bar;
4475 for (b = 0; b < bs_last_barrier; ++b)
4476 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4477#endif
4478
4479 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4480 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4481
4482 KMP_MB();
4483 return new_thr;
4484 }
4485
4486 /* no, well fork a new one */
4489
4490#if KMP_USE_MONITOR
4491 // If this is the first worker thread the RTL is creating, then also
4492 // launch the monitor thread. We try to do this as early as possible.
4493 if (!TCR_4(__kmp_init_monitor)) {
4494 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4495 if (!TCR_4(__kmp_init_monitor)) {
4496 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4497 TCW_4(__kmp_init_monitor, 1);
4498 __kmp_create_monitor(&__kmp_monitor);
4499 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4500#if KMP_OS_WINDOWS
4501 // AC: wait until monitor has started. This is a fix for CQ232808.
4502 // The reason is that if the library is loaded/unloaded in a loop with
4503 // small (parallel) work in between, then there is high probability that
4504 // monitor thread started after the library shutdown. At shutdown it is
4505 // too late to cope with the problem, because when the primary thread is
4506 // in DllMain (process detach) the monitor has no chances to start (it is
4507 // blocked), and primary thread has no means to inform the monitor that
4508 // the library has gone, because all the memory which the monitor can
4509 // access is going to be released/reset.
4510 while (TCR_4(__kmp_init_monitor) < 2) {
4511 KMP_YIELD(TRUE);
4512 }
4513 KF_TRACE(10, ("after monitor thread has started\n"));
4514#endif
4515 }
4516 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4517 }
4518#endif
4519
4520 KMP_MB();
4521
4522 {
4523 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4524 ? 1
4526
4527 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4528 ++new_gtid) {
4530 }
4531
4534 }
4535 }
4536
4537 /* allocate space for it. */
4538 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4539
4540 new_thr->th.th_nt_strict = false;
4541 new_thr->th.th_nt_loc = NULL;
4542 new_thr->th.th_nt_sev = severity_fatal;
4543 new_thr->th.th_nt_msg = NULL;
4544
4545 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4546
4547#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4548 // suppress race conditions detection on synchronization flags in debug mode
4549 // this helps to analyze library internals eliminating false positives
4550 __itt_suppress_mark_range(
4551 __itt_suppress_range, __itt_suppress_threading_errors,
4552 &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4553 __itt_suppress_mark_range(
4554 __itt_suppress_range, __itt_suppress_threading_errors,
4555 &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4556#if KMP_OS_WINDOWS
4557 __itt_suppress_mark_range(
4558 __itt_suppress_range, __itt_suppress_threading_errors,
4559 &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4560#else
4561 __itt_suppress_mark_range(__itt_suppress_range,
4562 __itt_suppress_threading_errors,
4563 &new_thr->th.th_suspend_init_count,
4564 sizeof(new_thr->th.th_suspend_init_count));
4565#endif
4566 // TODO: check if we need to also suppress b_arrived flags
4567 __itt_suppress_mark_range(__itt_suppress_range,
4568 __itt_suppress_threading_errors,
4569 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4570 sizeof(new_thr->th.th_bar[0].bb.b_go));
4571 __itt_suppress_mark_range(__itt_suppress_range,
4572 __itt_suppress_threading_errors,
4573 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4574 sizeof(new_thr->th.th_bar[1].bb.b_go));
4575 __itt_suppress_mark_range(__itt_suppress_range,
4576 __itt_suppress_threading_errors,
4577 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4578 sizeof(new_thr->th.th_bar[2].bb.b_go));
4579#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4580 if (__kmp_storage_map) {
4581 __kmp_print_thread_storage_map(new_thr, new_gtid);
4582 }
4583
4584 // add the reserve serialized team, initialized from the team's primary thread
4585 {
4587 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4588 new_thr->th.th_serial_team = serial_team =
4589 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4590#if OMPT_SUPPORT
4591 ompt_data_none, // root parallel id
4592#endif
4593 proc_bind_default, &r_icvs, 0, NULL);
4594 }
4595 KMP_ASSERT(serial_team);
4596 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4597 // execution (it is unused for now).
4598 serial_team->t.t_threads[0] = new_thr;
4599 KF_TRACE(10,
4600 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4601 new_thr));
4602
4603 /* setup the thread structures */
4604 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4605
4606#if USE_FAST_MEMORY
4607 __kmp_initialize_fast_memory(new_thr);
4608#endif /* USE_FAST_MEMORY */
4609
4610#if KMP_USE_BGET
4611 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4612 __kmp_initialize_bget(new_thr);
4613#endif
4614
4615 __kmp_init_random(new_thr); // Initialize random number generator
4616
4617 /* Initialize these only once when thread is grabbed for a team allocation */
4618 KA_TRACE(20,
4619 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4621
4622 int b;
4623 kmp_balign_t *balign = new_thr->th.th_bar;
4624 for (b = 0; b < bs_last_barrier; ++b) {
4625 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4626 balign[b].bb.team = NULL;
4627 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4628 balign[b].bb.use_oncore_barrier = 0;
4629 }
4630
4631 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4632 new_thr->th.th_sleep_loc_type = flag_unset;
4633
4634 new_thr->th.th_spin_here = FALSE;
4635 new_thr->th.th_next_waiting = 0;
4636#if KMP_OS_UNIX
4637 new_thr->th.th_blocking = false;
4638#endif
4639
4640#if KMP_AFFINITY_SUPPORTED
4641 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4642 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4643 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4644 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4645#endif
4646 new_thr->th.th_def_allocator = __kmp_def_allocator;
4647 new_thr->th.th_prev_level = 0;
4648 new_thr->th.th_prev_num_threads = 1;
4649
4650 TCW_4(new_thr->th.th_in_pool, FALSE);
4651 new_thr->th.th_active_in_pool = FALSE;
4652 TCW_4(new_thr->th.th_active, TRUE);
4653
4654 new_thr->th.th_set_nested_nth = NULL;
4655 new_thr->th.th_set_nested_nth_sz = 0;
4656
4657 /* adjust the global counters */
4658 __kmp_all_nth++;
4659 __kmp_nth++;
4660
4661 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4662 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4665 if (TCR_4(__kmp_gtid_mode) != 2) {
4667 }
4668 } else {
4669 if (TCR_4(__kmp_gtid_mode) != 1) {
4671 }
4672 }
4673 }
4674
4675#ifdef KMP_ADJUST_BLOCKTIME
4676 /* Adjust blocktime back to zero if necessary */
4677 /* Middle initialization might not have occurred yet */
4678 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4680 __kmp_zero_bt = TRUE;
4681 }
4682 }
4683#endif /* KMP_ADJUST_BLOCKTIME */
4684
4685#if KMP_AFFINITY_SUPPORTED
4686 // Set the affinity and topology information for new thread
4687 __kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);
4688#endif
4689
4690 /* actually fork it and create the new worker thread */
4691 KF_TRACE(
4692 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4693 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4694 KF_TRACE(10,
4695 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4696
4697 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4698 new_gtid));
4699 KMP_MB();
4700 return new_thr;
4701}
4702
4703/* Reinitialize team for reuse.
4704 The hot team code calls this case at every fork barrier, so EPCC barrier
4705 test are extremely sensitive to changes in it, esp. writes to the team
4706 struct, which cause a cache invalidation in all threads.
4707 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4709 kmp_internal_control_t *new_icvs,
4710 ident_t *loc) {
4711 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4712 team->t.t_threads[0], team));
4713 KMP_DEBUG_ASSERT(team && new_icvs);
4715 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4716
4717 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4718 // Copy ICVs to the primary thread's implicit taskdata
4719 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4720 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4721
4722 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4723 team->t.t_threads[0], team));
4724}
4725
4726/* Initialize the team data structure.
4727 This assumes the t_threads and t_max_nproc are already set.
4728 Also, we don't touch the arguments */
4729static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4730 kmp_internal_control_t *new_icvs,
4731 ident_t *loc) {
4732 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
4733
4734 /* verify */
4735 KMP_DEBUG_ASSERT(team);
4736 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4737 KMP_DEBUG_ASSERT(team->t.t_threads);
4738 KMP_MB();
4739
4740 team->t.t_master_tid = 0; /* not needed */
4741 /* team->t.t_master_bar; not needed */
4742 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4743 team->t.t_nproc = new_nproc;
4744
4745 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4746 team->t.t_next_pool = NULL;
4747 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4748 * up hot team */
4749
4750 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4751 team->t.t_invoke = NULL; /* not needed */
4752
4753 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4754 team->t.t_sched.sched = new_icvs->sched.sched;
4755
4756#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4757 team->t.t_fp_control_saved = FALSE; /* not needed */
4758 team->t.t_x87_fpu_control_word = 0; /* not needed */
4759 team->t.t_mxcsr = 0; /* not needed */
4760#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4761
4762 team->t.t_construct = 0;
4763
4764 team->t.t_ordered.dt.t_value = 0;
4765 team->t.t_master_active = FALSE;
4766
4767#ifdef KMP_DEBUG
4768 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
4769#endif
4770#if KMP_OS_WINDOWS
4771 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
4772#endif
4773
4774 team->t.t_control_stack_top = NULL;
4775
4776 __kmp_reinitialize_team(team, new_icvs, loc);
4777
4778 KMP_MB();
4779 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
4780}
4781
4782#if KMP_AFFINITY_SUPPORTED
4783static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4784 int first, int last, int newp) {
4785 th->th.th_first_place = first;
4786 th->th.th_last_place = last;
4787 th->th.th_new_place = newp;
4788 if (newp != th->th.th_current_place) {
4789 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4790 team->t.t_display_affinity = 1;
4791 // Copy topology information associated with the new place
4792 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4793 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4794 }
4795}
4796
4797// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4798// It calculates the worker + primary thread's partition based upon the parent
4799// thread's partition, and binds each worker to a thread in their partition.
4800// The primary thread's partition should already include its current binding.
4801static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4802 // Do not partition places for the hidden helper team
4803 if (KMP_HIDDEN_HELPER_TEAM(team))
4804 return;
4805 // Copy the primary thread's place partition to the team struct
4806 kmp_info_t *master_th = team->t.t_threads[0];
4807 KMP_DEBUG_ASSERT(master_th != NULL);
4808 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4809 int first_place = master_th->th.th_first_place;
4810 int last_place = master_th->th.th_last_place;
4811 int masters_place = master_th->th.th_current_place;
4812 int num_masks = __kmp_affinity.num_masks;
4813 team->t.t_first_place = first_place;
4814 team->t.t_last_place = last_place;
4815
4816 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4817 "bound to place %d partition = [%d,%d]\n",
4818 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4819 team->t.t_id, masters_place, first_place, last_place));
4820
4821 switch (proc_bind) {
4822
4823 case proc_bind_default:
4824 // Serial teams might have the proc_bind policy set to proc_bind_default.
4825 // Not an issue -- we don't rebind primary thread for any proc_bind policy.
4826 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4827 break;
4828
4829 case proc_bind_primary: {
4830 int f;
4831 int n_th = team->t.t_nproc;
4832 for (f = 1; f < n_th; f++) {
4833 kmp_info_t *th = team->t.t_threads[f];
4834 KMP_DEBUG_ASSERT(th != NULL);
4835 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4836
4837 KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4838 "partition = [%d,%d]\n",
4839 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4840 f, masters_place, first_place, last_place));
4841 }
4842 } break;
4843
4844 case proc_bind_close: {
4845 int f;
4846 int n_th = team->t.t_nproc;
4847 int n_places;
4848 if (first_place <= last_place) {
4849 n_places = last_place - first_place + 1;
4850 } else {
4851 n_places = num_masks - first_place + last_place + 1;
4852 }
4853 if (n_th <= n_places) {
4854 int place = masters_place;
4855 for (f = 1; f < n_th; f++) {
4856 kmp_info_t *th = team->t.t_threads[f];
4857 KMP_DEBUG_ASSERT(th != NULL);
4858
4859 if (place == last_place) {
4860 place = first_place;
4861 } else if (place == (num_masks - 1)) {
4862 place = 0;
4863 } else {
4864 place++;
4865 }
4866 __kmp_set_thread_place(team, th, first_place, last_place, place);
4867
4868 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4869 "partition = [%d,%d]\n",
4870 __kmp_gtid_from_thread(team->t.t_threads[f]),
4871 team->t.t_id, f, place, first_place, last_place));
4872 }
4873 } else {
4874 int S, rem, gap, s_count;
4875 S = n_th / n_places;
4876 s_count = 0;
4877 rem = n_th - (S * n_places);
4878 gap = rem > 0 ? n_places / rem : n_places;
4879 int place = masters_place;
4880 int gap_ct = gap;
4881 for (f = 0; f < n_th; f++) {
4882 kmp_info_t *th = team->t.t_threads[f];
4883 KMP_DEBUG_ASSERT(th != NULL);
4884
4885 __kmp_set_thread_place(team, th, first_place, last_place, place);
4886 s_count++;
4887
4888 if ((s_count == S) && rem && (gap_ct == gap)) {
4889 // do nothing, add an extra thread to place on next iteration
4890 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4891 // we added an extra thread to this place; move to next place
4892 if (place == last_place) {
4893 place = first_place;
4894 } else if (place == (num_masks - 1)) {
4895 place = 0;
4896 } else {
4897 place++;
4898 }
4899 s_count = 0;
4900 gap_ct = 1;
4901 rem--;
4902 } else if (s_count == S) { // place full; don't add extra
4903 if (place == last_place) {
4904 place = first_place;
4905 } else if (place == (num_masks - 1)) {
4906 place = 0;
4907 } else {
4908 place++;
4909 }
4910 gap_ct++;
4911 s_count = 0;
4912 }
4913
4914 KA_TRACE(100,
4915 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4916 "partition = [%d,%d]\n",
4917 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4918 th->th.th_new_place, first_place, last_place));
4919 }
4920 KMP_DEBUG_ASSERT(place == masters_place);
4921 }
4922 } break;
4923
4924 case proc_bind_spread: {
4925 int f;
4926 int n_th = team->t.t_nproc;
4927 int n_places;
4928 int thidx;
4929 if (first_place <= last_place) {
4930 n_places = last_place - first_place + 1;
4931 } else {
4932 n_places = num_masks - first_place + last_place + 1;
4933 }
4934 if (n_th <= n_places) {
4935 int place = -1;
4936
4937 if (n_places != num_masks) {
4938 int S = n_places / n_th;
4939 int s_count, rem, gap, gap_ct;
4940
4941 place = masters_place;
4942 rem = n_places - n_th * S;
4943 gap = rem ? n_th / rem : 1;
4944 gap_ct = gap;
4945 thidx = n_th;
4946 if (update_master_only == 1)
4947 thidx = 1;
4948 for (f = 0; f < thidx; f++) {
4949 kmp_info_t *th = team->t.t_threads[f];
4950 KMP_DEBUG_ASSERT(th != NULL);
4951
4952 int fplace = place, nplace = place;
4953 s_count = 1;
4954 while (s_count < S) {
4955 if (place == last_place) {
4956 place = first_place;
4957 } else if (place == (num_masks - 1)) {
4958 place = 0;
4959 } else {
4960 place++;
4961 }
4962 s_count++;
4963 }
4964 if (rem && (gap_ct == gap)) {
4965 if (place == last_place) {
4966 place = first_place;
4967 } else if (place == (num_masks - 1)) {
4968 place = 0;
4969 } else {
4970 place++;
4971 }
4972 rem--;
4973 gap_ct = 0;
4974 }
4975 __kmp_set_thread_place(team, th, fplace, place, nplace);
4976 gap_ct++;
4977
4978 if (place == last_place) {
4979 place = first_place;
4980 } else if (place == (num_masks - 1)) {
4981 place = 0;
4982 } else {
4983 place++;
4984 }
4985
4986 KA_TRACE(100,
4987 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4988 "partition = [%d,%d], num_masks: %u\n",
4989 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4990 f, th->th.th_new_place, th->th.th_first_place,
4991 th->th.th_last_place, num_masks));
4992 }
4993 } else {
4994 /* Having uniform space of available computation places I can create
4995 T partitions of round(P/T) size and put threads into the first
4996 place of each partition. */
4997 double current = static_cast<double>(masters_place);
4998 double spacing =
4999 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
5000 int first, last;
5001 kmp_info_t *th;
5002
5003 thidx = n_th + 1;
5004 if (update_master_only == 1)
5005 thidx = 1;
5006 for (f = 0; f < thidx; f++) {
5007 first = static_cast<int>(current);
5008 last = static_cast<int>(current + spacing) - 1;
5009 KMP_DEBUG_ASSERT(last >= first);
5010 if (first >= n_places) {
5011 if (masters_place) {
5012 first -= n_places;
5013 last -= n_places;
5014 if (first == (masters_place + 1)) {
5015 KMP_DEBUG_ASSERT(f == n_th);
5016 first--;
5017 }
5018 if (last == masters_place) {
5019 KMP_DEBUG_ASSERT(f == (n_th - 1));
5020 last--;
5021 }
5022 } else {
5023 KMP_DEBUG_ASSERT(f == n_th);
5024 first = 0;
5025 last = 0;
5026 }
5027 }
5028 if (last >= n_places) {
5029 last = (n_places - 1);
5030 }
5031 place = first;
5032 current += spacing;
5033 if (f < n_th) {
5034 KMP_DEBUG_ASSERT(0 <= first);
5035 KMP_DEBUG_ASSERT(n_places > first);
5036 KMP_DEBUG_ASSERT(0 <= last);
5037 KMP_DEBUG_ASSERT(n_places > last);
5038 KMP_DEBUG_ASSERT(last_place >= first_place);
5039 th = team->t.t_threads[f];
5040 KMP_DEBUG_ASSERT(th);
5041 __kmp_set_thread_place(team, th, first, last, place);
5042 KA_TRACE(100,
5043 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5044 "partition = [%d,%d], spacing = %.4f\n",
5045 __kmp_gtid_from_thread(team->t.t_threads[f]),
5046 team->t.t_id, f, th->th.th_new_place,
5047 th->th.th_first_place, th->th.th_last_place, spacing));
5048 }
5049 }
5050 }
5051 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5052 } else {
5053 int S, rem, gap, s_count;
5054 S = n_th / n_places;
5055 s_count = 0;
5056 rem = n_th - (S * n_places);
5057 gap = rem > 0 ? n_places / rem : n_places;
5058 int place = masters_place;
5059 int gap_ct = gap;
5060 thidx = n_th;
5061 if (update_master_only == 1)
5062 thidx = 1;
5063 for (f = 0; f < thidx; f++) {
5064 kmp_info_t *th = team->t.t_threads[f];
5065 KMP_DEBUG_ASSERT(th != NULL);
5066
5067 __kmp_set_thread_place(team, th, place, place, place);
5068 s_count++;
5069
5070 if ((s_count == S) && rem && (gap_ct == gap)) {
5071 // do nothing, add an extra thread to place on next iteration
5072 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5073 // we added an extra thread to this place; move on to next place
5074 if (place == last_place) {
5075 place = first_place;
5076 } else if (place == (num_masks - 1)) {
5077 place = 0;
5078 } else {
5079 place++;
5080 }
5081 s_count = 0;
5082 gap_ct = 1;
5083 rem--;
5084 } else if (s_count == S) { // place is full; don't add extra thread
5085 if (place == last_place) {
5086 place = first_place;
5087 } else if (place == (num_masks - 1)) {
5088 place = 0;
5089 } else {
5090 place++;
5091 }
5092 gap_ct++;
5093 s_count = 0;
5094 }
5095
5096 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5097 "partition = [%d,%d]\n",
5098 __kmp_gtid_from_thread(team->t.t_threads[f]),
5099 team->t.t_id, f, th->th.th_new_place,
5100 th->th.th_first_place, th->th.th_last_place));
5101 }
5102 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5103 }
5104 } break;
5105
5106 default:
5107 break;
5108 }
5109
5110 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
5111}
5112
5113#endif // KMP_AFFINITY_SUPPORTED
5114
5115/* allocate a new team data structure to use. take one off of the free pool if
5116 available */
5117kmp_team_t *__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5118#if OMPT_SUPPORT
5119 ompt_data_t ompt_parallel_data,
5120#endif
5121 kmp_proc_bind_t new_proc_bind,
5122 kmp_internal_control_t *new_icvs, int argc,
5123 kmp_info_t *master) {
5124 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5125 int f;
5126 kmp_team_t *team;
5127 int use_hot_team = !root->r.r_active;
5128 int level = 0;
5129 int do_place_partition = 1;
5130
5131 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
5132 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5133 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5134 KMP_MB();
5135
5136 kmp_hot_team_ptr_t *hot_teams;
5137 if (master) {
5138 team = master->th.th_team;
5139 level = team->t.t_active_level;
5140 if (master->th.th_teams_microtask) { // in teams construct?
5141 if (master->th.th_teams_size.nteams > 1 &&
5142 ( // #teams > 1
5143 team->t.t_pkfn ==
5144 (microtask_t)__kmp_teams_master || // inner fork of the teams
5145 master->th.th_teams_level <
5146 team->t.t_level)) { // or nested parallel inside the teams
5147 ++level; // not increment if #teams==1, or for outer fork of the teams;
5148 // increment otherwise
5149 }
5150 // Do not perform the place partition if inner fork of the teams
5151 // Wait until nested parallel region encountered inside teams construct
5152 if ((master->th.th_teams_size.nteams == 1 &&
5153 master->th.th_teams_level >= team->t.t_level) ||
5154 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5155 do_place_partition = 0;
5156 }
5157 hot_teams = master->th.th_hot_teams;
5158 if (level < __kmp_hot_teams_max_level && hot_teams &&
5159 hot_teams[level].hot_team) {
5160 // hot team has already been allocated for given level
5161 use_hot_team = 1;
5162 } else {
5163 use_hot_team = 0;
5164 }
5165 } else {
5166 // check we won't access uninitialized hot_teams, just in case
5167 KMP_DEBUG_ASSERT(new_nproc == 1);
5168 }
5169 // Optimization to use a "hot" team
5170 if (use_hot_team && new_nproc > 1) {
5171 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5172 team = hot_teams[level].hot_team;
5173#if KMP_DEBUG
5175 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5176 "task_team[1] = %p before reinit\n",
5177 team->t.t_task_team[0], team->t.t_task_team[1]));
5178 }
5179#endif
5180
5181 if (team->t.t_nproc != new_nproc &&
5183 // Distributed barrier may need a resize
5184 int old_nthr = team->t.t_nproc;
5185 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5186 }
5187
5188 // If not doing the place partition, then reset the team's proc bind
5189 // to indicate that partitioning of all threads still needs to take place
5190 if (do_place_partition == 0)
5191 team->t.t_proc_bind = proc_bind_default;
5192 // Has the number of threads changed?
5193 /* Let's assume the most common case is that the number of threads is
5194 unchanged, and put that case first. */
5195 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5196 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
5197 // This case can mean that omp_set_num_threads() was called and the hot
5198 // team size was already reduced, so we check the special flag
5199 if (team->t.t_size_changed == -1) {
5200 team->t.t_size_changed = 1;
5201 } else {
5202 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5203 }
5204
5205 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5206 kmp_r_sched_t new_sched = new_icvs->sched;
5207 // set primary thread's schedule as new run-time schedule
5208 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5209
5210 __kmp_reinitialize_team(team, new_icvs,
5211 root->r.r_uber_thread->th.th_ident);
5212
5213 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5214 team->t.t_threads[0], team));
5215 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5216
5217#if KMP_AFFINITY_SUPPORTED
5218 if ((team->t.t_size_changed == 0) &&
5219 (team->t.t_proc_bind == new_proc_bind)) {
5220 if (new_proc_bind == proc_bind_spread) {
5221 if (do_place_partition) {
5222 // add flag to update only master for spread
5223 __kmp_partition_places(team, 1);
5224 }
5225 }
5226 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
5227 "proc_bind = %d, partition = [%d,%d]\n",
5228 team->t.t_id, new_proc_bind, team->t.t_first_place,
5229 team->t.t_last_place));
5230 } else {
5231 if (do_place_partition) {
5232 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5233 __kmp_partition_places(team);
5234 }
5235 }
5236#else
5237 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5238#endif /* KMP_AFFINITY_SUPPORTED */
5239 } else if (team->t.t_nproc > new_nproc) {
5240 KA_TRACE(20,
5241 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
5242 new_nproc));
5243
5244 team->t.t_size_changed = 1;
5246 // Barrier size already reduced earlier in this function
5247 // Activate team threads via th_used_in_team
5248 __kmp_add_threads_to_team(team, new_nproc);
5249 }
5250 // When decreasing team size, threads no longer in the team should
5251 // unref task team.
5253 for (f = new_nproc; f < team->t.t_nproc; f++) {
5254 kmp_info_t *th = team->t.t_threads[f];
5255 KMP_DEBUG_ASSERT(th);
5256 th->th.th_task_team = NULL;
5257 }
5258 }
5259 if (__kmp_hot_teams_mode == 0) {
5260 // AC: saved number of threads should correspond to team's value in this
5261 // mode, can be bigger in mode 1, when hot team has threads in reserve
5262 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5263 hot_teams[level].hot_team_nth = new_nproc;
5264 /* release the extra threads we don't need any more */
5265 for (f = new_nproc; f < team->t.t_nproc; f++) {
5266 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5267 __kmp_free_thread(team->t.t_threads[f]);
5268 team->t.t_threads[f] = NULL;
5269 }
5270 } // (__kmp_hot_teams_mode == 0)
5271 else {
5272 // When keeping extra threads in team, switch threads to wait on own
5273 // b_go flag
5274 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5275 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5276 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5277 for (int b = 0; b < bs_last_barrier; ++b) {
5278 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5279 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5280 }
5281 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5282 }
5283 }
5284 }
5285 team->t.t_nproc = new_nproc;
5286 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5287 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5288 __kmp_reinitialize_team(team, new_icvs,
5289 root->r.r_uber_thread->th.th_ident);
5290
5291 // Update remaining threads
5292 for (f = 0; f < new_nproc; ++f) {
5293 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5294 }
5295
5296 // restore the current task state of the primary thread: should be the
5297 // implicit task
5298 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5299 team->t.t_threads[0], team));
5300
5301 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5302
5303#ifdef KMP_DEBUG
5304 for (f = 0; f < team->t.t_nproc; f++) {
5305 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5306 team->t.t_threads[f]->th.th_team_nproc ==
5307 team->t.t_nproc);
5308 }
5309#endif
5310
5311 if (do_place_partition) {
5312 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5313#if KMP_AFFINITY_SUPPORTED
5314 __kmp_partition_places(team);
5315#endif
5316 }
5317 } else { // team->t.t_nproc < new_nproc
5318
5319 KA_TRACE(20,
5320 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5321 new_nproc));
5322 int old_nproc = team->t.t_nproc; // save old value and use to update only
5323 team->t.t_size_changed = 1;
5324
5325 int avail_threads = hot_teams[level].hot_team_nth;
5326 if (new_nproc < avail_threads)
5327 avail_threads = new_nproc;
5328 kmp_info_t **other_threads = team->t.t_threads;
5329 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5330 // Adjust barrier data of reserved threads (if any) of the team
5331 // Other data will be set in __kmp_initialize_info() below.
5332 int b;
5333 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5334 for (b = 0; b < bs_last_barrier; ++b) {
5335 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5336 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5337#if USE_DEBUGGER
5338 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5339#endif
5340 }
5341 }
5342 if (hot_teams[level].hot_team_nth >= new_nproc) {
5343 // we have all needed threads in reserve, no need to allocate any
5344 // this only possible in mode 1, cannot have reserved threads in mode 0
5346 team->t.t_nproc = new_nproc; // just get reserved threads involved
5347 } else {
5348 // We may have some threads in reserve, but not enough;
5349 // get reserved threads involved if any.
5350 team->t.t_nproc = hot_teams[level].hot_team_nth;
5351 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5352 if (team->t.t_max_nproc < new_nproc) {
5353 /* reallocate larger arrays */
5354 __kmp_reallocate_team_arrays(team, new_nproc);
5355 __kmp_reinitialize_team(team, new_icvs, NULL);
5356 }
5357
5358#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5359 KMP_AFFINITY_SUPPORTED
5360 /* Temporarily set full mask for primary thread before creation of
5361 workers. The reason is that workers inherit the affinity from the
5362 primary thread, so if a lot of workers are created on the single
5363 core quickly, they don't get a chance to set their own affinity for
5364 a long time. */
5365 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5366#endif
5367
5368 /* allocate new threads for the hot team */
5369 for (f = team->t.t_nproc; f < new_nproc; f++) {
5370 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5371 KMP_DEBUG_ASSERT(new_worker);
5372 team->t.t_threads[f] = new_worker;
5373
5374 KA_TRACE(20,
5375 ("__kmp_allocate_team: team %d init T#%d arrived: "
5376 "join=%llu, plain=%llu\n",
5377 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5378 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5379 team->t.t_bar[bs_plain_barrier].b_arrived));
5380
5381 { // Initialize barrier data for new threads.
5382 int b;
5383 kmp_balign_t *balign = new_worker->th.th_bar;
5384 for (b = 0; b < bs_last_barrier; ++b) {
5385 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5386 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5388#if USE_DEBUGGER
5389 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5390#endif
5391 }
5392 }
5393 }
5394
5395#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5396 KMP_AFFINITY_SUPPORTED
5397 /* Restore initial primary thread's affinity mask */
5398 new_temp_affinity.restore();
5399#endif
5400 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5402 // Barrier size already increased earlier in this function
5403 // Activate team threads via th_used_in_team
5404 __kmp_add_threads_to_team(team, new_nproc);
5405 }
5406 /* make sure everyone is syncronized */
5407 // new threads below
5408 __kmp_initialize_team(team, new_nproc, new_icvs,
5409 root->r.r_uber_thread->th.th_ident);
5410
5411 /* reinitialize the threads */
5412 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5413 for (f = 0; f < team->t.t_nproc; ++f)
5414 __kmp_initialize_info(team->t.t_threads[f], team, f,
5415 __kmp_gtid_from_tid(f, team));
5416
5417 // set th_task_state for new threads in hot team with older thread's state
5418 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5419 for (f = old_nproc; f < team->t.t_nproc; ++f)
5420 team->t.t_threads[f]->th.th_task_state = old_state;
5421
5422#ifdef KMP_DEBUG
5423 for (f = 0; f < team->t.t_nproc; ++f) {
5424 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5425 team->t.t_threads[f]->th.th_team_nproc ==
5426 team->t.t_nproc);
5427 }
5428#endif
5429
5430 if (do_place_partition) {
5431 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5432#if KMP_AFFINITY_SUPPORTED
5433 __kmp_partition_places(team);
5434#endif
5435 }
5436 } // Check changes in number of threads
5437
5438 if (master->th.th_teams_microtask) {
5439 for (f = 1; f < new_nproc; ++f) {
5440 // propagate teams construct specific info to workers
5441 kmp_info_t *thr = team->t.t_threads[f];
5442 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5443 thr->th.th_teams_level = master->th.th_teams_level;
5444 thr->th.th_teams_size = master->th.th_teams_size;
5445 }
5446 }
5447 if (level) {
5448 // Sync barrier state for nested hot teams, not needed for outermost hot
5449 // team.
5450 for (f = 1; f < new_nproc; ++f) {
5451 kmp_info_t *thr = team->t.t_threads[f];
5452 int b;
5453 kmp_balign_t *balign = thr->th.th_bar;
5454 for (b = 0; b < bs_last_barrier; ++b) {
5455 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5456 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5457#if USE_DEBUGGER
5458 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5459#endif
5460 }
5461 }
5462 }
5463
5464 /* reallocate space for arguments if necessary */
5465 __kmp_alloc_argv_entries(argc, team, TRUE);
5466 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5467 // The hot team re-uses the previous task team,
5468 // if untouched during the previous release->gather phase.
5469
5470 KF_TRACE(10, (" hot_team = %p\n", team));
5471
5472#if KMP_DEBUG
5474 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5475 "task_team[1] = %p after reinit\n",
5476 team->t.t_task_team[0], team->t.t_task_team[1]));
5477 }
5478#endif
5479
5480#if OMPT_SUPPORT
5481 __ompt_team_assign_id(team, ompt_parallel_data);
5482#endif
5483
5484 KMP_MB();
5485
5486 return team;
5487 }
5488
5489 /* next, let's try to take one from the team pool */
5490 KMP_MB();
5491 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5492 /* TODO: consider resizing undersized teams instead of reaping them, now
5493 that we have a resizing mechanism */
5494 if (team->t.t_max_nproc >= max_nproc) {
5495 /* take this team from the team pool */
5496 __kmp_team_pool = team->t.t_next_pool;
5497
5498 if (max_nproc > 1 &&
5500 if (!team->t.b) { // Allocate barrier structure
5502 }
5503 }
5504
5505 /* setup the team for fresh use */
5506 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5507
5508 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5509 "task_team[1] %p to NULL\n",
5510 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5511 team->t.t_task_team[0] = NULL;
5512 team->t.t_task_team[1] = NULL;
5513
5514 /* reallocate space for arguments if necessary */
5515 __kmp_alloc_argv_entries(argc, team, TRUE);
5516 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5517
5518 KA_TRACE(
5519 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5521 { // Initialize barrier data.
5522 int b;
5523 for (b = 0; b < bs_last_barrier; ++b) {
5524 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5525#if USE_DEBUGGER
5526 team->t.t_bar[b].b_master_arrived = 0;
5527 team->t.t_bar[b].b_team_arrived = 0;
5528#endif
5529 }
5530 }
5531
5532 team->t.t_proc_bind = new_proc_bind;
5533
5534 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5535 team->t.t_id));
5536
5537#if OMPT_SUPPORT
5538 __ompt_team_assign_id(team, ompt_parallel_data);
5539#endif
5540
5541 team->t.t_nested_nth = NULL;
5542
5543 KMP_MB();
5544
5545 return team;
5546 }
5547
5548 /* reap team if it is too small, then loop back and check the next one */
5549 // not sure if this is wise, but, will be redone during the hot-teams
5550 // rewrite.
5551 /* TODO: Use technique to find the right size hot-team, don't reap them */
5552 team = __kmp_reap_team(team);
5553 __kmp_team_pool = team;
5554 }
5555
5556 /* nothing available in the pool, no matter, make a new team! */
5557 KMP_MB();
5558 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5559
5560 /* and set it up */
5561 team->t.t_max_nproc = max_nproc;
5562 if (max_nproc > 1 &&
5564 // Allocate barrier structure
5566 }
5567
5568 /* NOTE well, for some reason allocating one big buffer and dividing it up
5569 seems to really hurt performance a lot on the P4, so, let's not use this */
5570 __kmp_allocate_team_arrays(team, max_nproc);
5571
5572 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5573 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5574
5575 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5576 "%p to NULL\n",
5577 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5578 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5579 // memory, no need to duplicate
5580 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5581 // memory, no need to duplicate
5582
5583 if (__kmp_storage_map) {
5584 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5585 }
5586
5587 /* allocate space for arguments */
5588 __kmp_alloc_argv_entries(argc, team, FALSE);
5589 team->t.t_argc = argc;
5590
5591 KA_TRACE(20,
5592 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5594 { // Initialize barrier data.
5595 int b;
5596 for (b = 0; b < bs_last_barrier; ++b) {
5597 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5598#if USE_DEBUGGER
5599 team->t.t_bar[b].b_master_arrived = 0;
5600 team->t.t_bar[b].b_team_arrived = 0;
5601#endif
5602 }
5603 }
5604
5605 team->t.t_proc_bind = new_proc_bind;
5606
5607#if OMPT_SUPPORT
5608 __ompt_team_assign_id(team, ompt_parallel_data);
5609 team->t.ompt_serialized_team_info = NULL;
5610#endif
5611
5612 KMP_MB();
5613
5614 team->t.t_nested_nth = NULL;
5615
5616 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5617 team->t.t_id));
5618
5619 return team;
5620}
5621
5622/* TODO implement hot-teams at all levels */
5623/* TODO implement lazy thread release on demand (disband request) */
5624
5625/* free the team. return it to the team pool. release all the threads
5626 * associated with it */
5628 int f;
5629 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5630 team->t.t_id));
5631
5632 /* verify state */
5633 KMP_DEBUG_ASSERT(root);
5634 KMP_DEBUG_ASSERT(team);
5635 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5636 KMP_DEBUG_ASSERT(team->t.t_threads);
5637
5638 int use_hot_team = team == root->r.r_hot_team;
5639 int level;
5640 if (master) {
5641 level = team->t.t_active_level - 1;
5642 if (master->th.th_teams_microtask) { // in teams construct?
5643 if (master->th.th_teams_size.nteams > 1) {
5644 ++level; // level was not increased in teams construct for
5645 // team_of_masters
5646 }
5647 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5648 master->th.th_teams_level == team->t.t_level) {
5649 ++level; // level was not increased in teams construct for
5650 // team_of_workers before the parallel
5651 } // team->t.t_level will be increased inside parallel
5652 }
5653#if KMP_DEBUG
5654 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5655#endif
5657 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5658 use_hot_team = 1;
5659 }
5660 }
5661
5662 /* team is done working */
5663 TCW_SYNC_PTR(team->t.t_pkfn,
5664 NULL); // Important for Debugging Support Library.
5665#if KMP_OS_WINDOWS
5666 team->t.t_copyin_counter = 0; // init counter for possible reuse
5667#endif
5668 // Do not reset pointer to parent team to NULL for hot teams.
5669
5670 /* if we are non-hot team, release our threads */
5671 if (!use_hot_team) {
5673 // Wait for threads to reach reapable state
5674 for (f = 1; f < team->t.t_nproc; ++f) {
5675 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5676 kmp_info_t *th = team->t.t_threads[f];
5677 volatile kmp_uint32 *state = &th->th.th_reap_state;
5678 while (*state != KMP_SAFE_TO_REAP) {
5679#if KMP_OS_WINDOWS
5680 // On Windows a thread can be killed at any time, check this
5681 DWORD ecode;
5682 if (!__kmp_is_thread_alive(th, &ecode)) {
5683 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5684 break;
5685 }
5686#endif
5687 // first check if thread is sleeping
5688 if (th->th.th_sleep_loc)
5690 KMP_CPU_PAUSE();
5691 }
5692 }
5693
5694 // Delete task teams
5695 int tt_idx;
5696 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5697 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5698 if (task_team != NULL) {
5699 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5700 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5701 team->t.t_threads[f]->th.th_task_team = NULL;
5702 }
5703 KA_TRACE(
5704 20,
5705 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5706 __kmp_get_gtid(), task_team, team->t.t_id));
5707 __kmp_free_task_team(master, task_team);
5708 team->t.t_task_team[tt_idx] = NULL;
5709 }
5710 }
5711 }
5712
5713 // Before clearing parent pointer, check if nested_nth list should be freed
5714 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5715 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5716 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5717 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5718 }
5719 team->t.t_nested_nth = NULL;
5720
5721 // Reset pointer to parent team only for non-hot teams.
5722 team->t.t_parent = NULL;
5723 team->t.t_level = 0;
5724 team->t.t_active_level = 0;
5725
5726 /* free the worker threads */
5727 for (f = 1; f < team->t.t_nproc; ++f) {
5728 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5731 &(team->t.t_threads[f]->th.th_used_in_team), 1, 2);
5732 }
5733 __kmp_free_thread(team->t.t_threads[f]);
5734 }
5735
5737 if (team->t.b) {
5738 // wake up thread at old location
5739 team->t.b->go_release();
5741 for (f = 1; f < team->t.t_nproc; ++f) {
5742 if (team->t.b->sleep[f].sleep) {
5744 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5745 (kmp_atomic_flag_64<> *)NULL);
5746 }
5747 }
5748 }
5749 // Wait for threads to be removed from team
5750 for (int f = 1; f < team->t.t_nproc; ++f) {
5751 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5752 KMP_CPU_PAUSE();
5753 }
5754 }
5755 }
5756
5757 for (f = 1; f < team->t.t_nproc; ++f) {
5758 team->t.t_threads[f] = NULL;
5759 }
5760
5761 if (team->t.t_max_nproc > 1 &&
5764 team->t.b = NULL;
5765 }
5766 /* put the team back in the team pool */
5767 /* TODO limit size of team pool, call reap_team if pool too large */
5768 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5769 __kmp_team_pool = (volatile kmp_team_t *)team;
5770 } else { // Check if team was created for primary threads in teams construct
5771 // See if first worker is a CG root
5772 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5773 team->t.t_threads[1]->th.th_cg_roots);
5774 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5775 // Clean up the CG root nodes on workers so that this team can be re-used
5776 for (f = 1; f < team->t.t_nproc; ++f) {
5777 kmp_info_t *thr = team->t.t_threads[f];
5778 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5779 thr->th.th_cg_roots->cg_root == thr);
5780 // Pop current CG root off list
5781 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5782 thr->th.th_cg_roots = tmp->up;
5783 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"
5784 " up to node %p. cg_nthreads was %d\n",
5785 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5786 int i = tmp->cg_nthreads--;
5787 if (i == 1) {
5788 __kmp_free(tmp); // free CG if we are the last thread in it
5789 }
5790 // Restore current task's thread_limit from CG root
5791 if (thr->th.th_cg_roots)
5792 thr->th.th_current_task->td_icvs.thread_limit =
5793 thr->th.th_cg_roots->cg_thread_limit;
5794 }
5795 }
5796 }
5797
5798 KMP_MB();
5799}
5800
5801/* reap the team. destroy it, reclaim all its resources and free its memory */
5803 kmp_team_t *next_pool = team->t.t_next_pool;
5804
5805 KMP_DEBUG_ASSERT(team);
5806 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5807 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5808 KMP_DEBUG_ASSERT(team->t.t_threads);
5809 KMP_DEBUG_ASSERT(team->t.t_argv);
5810
5811 /* TODO clean the threads that are a part of this? */
5812
5813 /* free stuff */
5815 if (team->t.t_argv != &team->t.t_inline_argv[0])
5816 __kmp_free((void *)team->t.t_argv);
5817 __kmp_free(team);
5818
5819 KMP_MB();
5820 return next_pool;
5821}
5822
5823// Free the thread. Don't reap it, just place it on the pool of available
5824// threads.
5825//
5826// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5827// binding for the affinity mechanism to be useful.
5828//
5829// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5830// However, we want to avoid a potential performance problem by always
5831// scanning through the list to find the correct point at which to insert
5832// the thread (potential N**2 behavior). To do this we keep track of the
5833// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5834// With single-level parallelism, threads will always be added to the tail
5835// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5836// parallelism, all bets are off and we may need to scan through the entire
5837// free list.
5838//
5839// This change also has a potentially large performance benefit, for some
5840// applications. Previously, as threads were freed from the hot team, they
5841// would be placed back on the free list in inverse order. If the hot team
5842// grew back to it's original size, then the freed thread would be placed
5843// back on the hot team in reverse order. This could cause bad cache
5844// locality problems on programs where the size of the hot team regularly
5845// grew and shrunk.
5846//
5847// Now, for single-level parallelism, the OMP tid is always == gtid.
5849 int gtid;
5850 kmp_info_t **scan;
5851
5852 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5853 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5854
5855 KMP_DEBUG_ASSERT(this_th);
5856
5857 // When moving thread to pool, switch thread to wait on own b_go flag, and
5858 // uninitialized (NULL team).
5859 int b;
5860 kmp_balign_t *balign = this_th->th.th_bar;
5861 for (b = 0; b < bs_last_barrier; ++b) {
5862 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5863 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5864 balign[b].bb.team = NULL;
5865 balign[b].bb.leaf_kids = 0;
5866 }
5867 this_th->th.th_task_state = 0;
5868 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5869
5870 /* put thread back on the free pool */
5871 TCW_PTR(this_th->th.th_team, NULL);
5872 TCW_PTR(this_th->th.th_root, NULL);
5873 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5874
5875 while (this_th->th.th_cg_roots) {
5876 this_th->th.th_cg_roots->cg_nthreads--;
5877 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5878 " %p of thread %p to %d\n",
5879 this_th, this_th->th.th_cg_roots,
5880 this_th->th.th_cg_roots->cg_root,
5881 this_th->th.th_cg_roots->cg_nthreads));
5882 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5883 if (tmp->cg_root == this_th) { // Thread is a cg_root
5884 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5885 KA_TRACE(
5886 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5887 this_th->th.th_cg_roots = tmp->up;
5888 __kmp_free(tmp);
5889 } else { // Worker thread
5890 if (tmp->cg_nthreads == 0) { // last thread leaves contention group
5891 __kmp_free(tmp);
5892 }
5893 this_th->th.th_cg_roots = NULL;
5894 break;
5895 }
5896 }
5897
5898 /* If the implicit task assigned to this thread can be used by other threads
5899 * -> multiple threads can share the data and try to free the task at
5900 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5901 * with higher probability when hot team is disabled but can occurs even when
5902 * the hot team is enabled */
5903 __kmp_free_implicit_task(this_th);
5904 this_th->th.th_current_task = NULL;
5905
5906 // If the __kmp_thread_pool_insert_pt is already past the new insert
5907 // point, then we need to re-scan the entire list.
5908 gtid = this_th->th.th_info.ds.ds_gtid;
5909 if (__kmp_thread_pool_insert_pt != NULL) {
5911 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5913 }
5914 }
5915
5916 // Scan down the list to find the place to insert the thread.
5917 // scan is the address of a link in the list, possibly the address of
5918 // __kmp_thread_pool itself.
5919 //
5920 // In the absence of nested parallelism, the for loop will have 0 iterations.
5921 if (__kmp_thread_pool_insert_pt != NULL) {
5922 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5923 } else {
5924 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5925 }
5926 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5927 scan = &((*scan)->th.th_next_pool))
5928 ;
5929
5930 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5931 // to its address.
5932 TCW_PTR(this_th->th.th_next_pool, *scan);
5933 __kmp_thread_pool_insert_pt = *scan = this_th;
5934 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5935 (this_th->th.th_info.ds.ds_gtid <
5936 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5937 TCW_4(this_th->th.th_in_pool, TRUE);
5939 __kmp_lock_suspend_mx(this_th);
5940 if (this_th->th.th_active == TRUE) {
5942 this_th->th.th_active_in_pool = TRUE;
5943 }
5944#if KMP_DEBUG
5945 else {
5946 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5947 }
5948#endif
5949 __kmp_unlock_suspend_mx(this_th);
5950
5952
5953#ifdef KMP_ADJUST_BLOCKTIME
5954 /* Adjust blocktime back to user setting or default if necessary */
5955 /* Middle initialization might never have occurred */
5956 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5958 if (__kmp_nth <= __kmp_avail_proc) {
5959 __kmp_zero_bt = FALSE;
5960 }
5961 }
5962#endif /* KMP_ADJUST_BLOCKTIME */
5963
5964 KMP_MB();
5965}
5966
5967/* ------------------------------------------------------------------------ */
5968
5970#if OMP_PROFILING_SUPPORT
5971 ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
5972 // TODO: add a configuration option for time granularity
5973 if (ProfileTraceFile)
5974 llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
5975#endif
5976
5977 int gtid = this_thr->th.th_info.ds.ds_gtid;
5978 /* void *stack_data;*/
5979 kmp_team_t **volatile pteam;
5980
5981 KMP_MB();
5982 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
5983
5985 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5986 }
5987
5988#if OMPD_SUPPORT
5989 if (ompd_state & OMPD_ENABLE_BP)
5990 ompd_bp_thread_begin();
5991#endif
5992
5993#if OMPT_SUPPORT
5994 ompt_data_t *thread_data = nullptr;
5995 if (ompt_enabled.enabled) {
5996 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5997 *thread_data = ompt_data_none;
5998
5999 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6000 this_thr->th.ompt_thread_info.wait_id = 0;
6001 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6002 this_thr->th.ompt_thread_info.parallel_flags = 0;
6003 if (ompt_enabled.ompt_callback_thread_begin) {
6004 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6005 ompt_thread_worker, thread_data);
6006 }
6007 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6008 }
6009#endif
6010
6011 /* This is the place where threads wait for work */
6012 while (!TCR_4(__kmp_global.g.g_done)) {
6013 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6014 KMP_MB();
6015
6016 /* wait for work to do */
6017 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
6018
6019 /* No tid yet since not part of a team */
6021
6022#if OMPT_SUPPORT
6023 if (ompt_enabled.enabled) {
6024 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6025 }
6026#endif
6027
6028 pteam = &this_thr->th.th_team;
6029
6030 /* have we been allocated? */
6031 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6032 /* we were just woken up, so run our new task */
6033 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6034 int rc;
6035 KA_TRACE(20,
6036 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6037 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6038 (*pteam)->t.t_pkfn));
6039
6040 updateHWFPControl(*pteam);
6041
6042#if OMPT_SUPPORT
6043 if (ompt_enabled.enabled) {
6044 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6045 }
6046#endif
6047
6048 rc = (*pteam)->t.t_invoke(gtid);
6049 KMP_ASSERT(rc);
6050
6051 KMP_MB();
6052 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6053 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6054 (*pteam)->t.t_pkfn));
6055 }
6056#if OMPT_SUPPORT
6057 if (ompt_enabled.enabled) {
6058 /* no frame set while outside task */
6059 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6060
6061 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6062 }
6063#endif
6064 /* join barrier after parallel region */
6065 __kmp_join_barrier(gtid);
6066 }
6067 }
6068
6069#if OMPD_SUPPORT
6070 if (ompd_state & OMPD_ENABLE_BP)
6071 ompd_bp_thread_end();
6072#endif
6073
6074#if OMPT_SUPPORT
6075 if (ompt_enabled.ompt_callback_thread_end) {
6076 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6077 }
6078#endif
6079
6080 this_thr->th.th_task_team = NULL;
6081 /* run the destructors for the threadprivate data for this thread */
6083
6084 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
6085 KMP_MB();
6086
6087#if OMP_PROFILING_SUPPORT
6088 llvm::timeTraceProfilerFinishThread();
6089#endif
6090 return this_thr;
6091}
6092
6093/* ------------------------------------------------------------------------ */
6094
6095void __kmp_internal_end_dest(void *specific_gtid) {
6096 // Make sure no significant bits are lost
6097 int gtid;
6098 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, &gtid);
6099
6100 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
6101 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6102 * this is because 0 is reserved for the nothing-stored case */
6103
6105}
6106
6107#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6108
6109__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6111}
6112
6113#endif
6114
6115/* [Windows] josh: when the atexit handler is called, there may still be more
6116 than one thread alive */
6118 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
6119 /* [Windows]
6120 josh: ideally, we want to completely shutdown the library in this atexit
6121 handler, but stat code that depends on thread specific data for gtid fails
6122 because that data becomes unavailable at some point during the shutdown, so
6123 we call __kmp_internal_end_thread instead. We should eventually remove the
6124 dependency on __kmp_get_specific_gtid in the stat code and use
6125 __kmp_internal_end_library to cleanly shutdown the library.
6126
6127 // TODO: Can some of this comment about GVS be removed?
6128 I suspect that the offending stat code is executed when the calling thread
6129 tries to clean up a dead root thread's data structures, resulting in GVS
6130 code trying to close the GVS structures for that thread, but since the stat
6131 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6132 the calling thread is cleaning up itself instead of another thread, it get
6133 confused. This happens because allowing a thread to unregister and cleanup
6134 another thread is a recent modification for addressing an issue.
6135 Based on the current design (20050722), a thread may end up
6136 trying to unregister another thread only if thread death does not trigger
6137 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
6138 thread specific data destructor function to detect thread death. For
6139 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6140 is nothing. Thus, the workaround is applicable only for Windows static
6141 stat library. */
6143#if KMP_OS_WINDOWS
6145#endif
6146}
6147
6148static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
6149 // It is assumed __kmp_forkjoin_lock is acquired.
6150
6151 int gtid;
6152
6153 KMP_DEBUG_ASSERT(thread != NULL);
6154
6155 gtid = thread->th.th_info.ds.ds_gtid;
6156
6157 if (!is_root) {
6159 /* Assume the threads are at the fork barrier here */
6160 KA_TRACE(
6161 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6162 gtid));
6164 while (
6165 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6166 KMP_CPU_PAUSE();
6168 } else {
6169 /* Need release fence here to prevent seg faults for tree forkjoin
6170 barrier (GEH) */
6171 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6172 thread);
6174 }
6175 }
6176
6177 // Terminate OS thread.
6178 __kmp_reap_worker(thread);
6179
6180 // The thread was killed asynchronously. If it was actively
6181 // spinning in the thread pool, decrement the global count.
6182 //
6183 // There is a small timing hole here - if the worker thread was just waking
6184 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
6185 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
6186 // the global counter might not get updated.
6187 //
6188 // Currently, this can only happen as the library is unloaded,
6189 // so there are no harmful side effects.
6190 if (thread->th.th_active_in_pool) {
6191 thread->th.th_active_in_pool = FALSE;
6194 }
6195 }
6196
6198
6199// Free the fast memory for tasking
6200#if USE_FAST_MEMORY
6201 __kmp_free_fast_memory(thread);
6202#endif /* USE_FAST_MEMORY */
6203
6205
6206 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6207 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6208
6209 --__kmp_all_nth;
6210 // __kmp_nth was decremented when thread is added to the pool.
6211
6212#ifdef KMP_ADJUST_BLOCKTIME
6213 /* Adjust blocktime back to user setting or default if necessary */
6214 /* Middle initialization might never have occurred */
6215 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6217 if (__kmp_nth <= __kmp_avail_proc) {
6218 __kmp_zero_bt = FALSE;
6219 }
6220 }
6221#endif /* KMP_ADJUST_BLOCKTIME */
6222
6223 /* free the memory being used */
6225 if (thread->th.th_cons) {
6226 __kmp_free_cons_stack(thread->th.th_cons);
6227 thread->th.th_cons = NULL;
6228 }
6229 }
6230
6231 if (thread->th.th_pri_common != NULL) {
6232 __kmp_free(thread->th.th_pri_common);
6233 thread->th.th_pri_common = NULL;
6234 }
6235
6236#if KMP_USE_BGET
6237 if (thread->th.th_local.bget_data != NULL) {
6238 __kmp_finalize_bget(thread);
6239 }
6240#endif
6241
6242#if KMP_AFFINITY_SUPPORTED
6243 if (thread->th.th_affin_mask != NULL) {
6244 KMP_CPU_FREE(thread->th.th_affin_mask);
6245 thread->th.th_affin_mask = NULL;
6246 }
6247#endif /* KMP_AFFINITY_SUPPORTED */
6248
6249#if KMP_USE_HIER_SCHED
6250 if (thread->th.th_hier_bar_data != NULL) {
6251 __kmp_free(thread->th.th_hier_bar_data);
6252 thread->th.th_hier_bar_data = NULL;
6253 }
6254#endif
6255
6256 __kmp_reap_team(thread->th.th_serial_team);
6257 thread->th.th_serial_team = NULL;
6258 __kmp_free(thread);
6259
6260 KMP_MB();
6261
6262} // __kmp_reap_thread
6263
6265#if USE_ITT_NOTIFY
6266 if (__kmp_itt_region_domains.count > 0) {
6267 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6268 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6269 while (bucket) {
6270 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6271 __kmp_thread_free(th, bucket);
6272 bucket = next;
6273 }
6274 }
6275 }
6276 if (__kmp_itt_barrier_domains.count > 0) {
6277 for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6278 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6279 while (bucket) {
6280 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6281 __kmp_thread_free(th, bucket);
6282 bucket = next;
6283 }
6284 }
6285 }
6286#endif
6287}
6288
6289static void __kmp_internal_end(void) {
6290 int i;
6291
6292 /* First, unregister the library */
6294
6295#if KMP_OS_WINDOWS
6296 /* In Win static library, we can't tell when a root actually dies, so we
6297 reclaim the data structures for any root threads that have died but not
6298 unregistered themselves, in order to shut down cleanly.
6299 In Win dynamic library we also can't tell when a thread dies. */
6300 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6301// dead roots
6302#endif
6303
6304 for (i = 0; i < __kmp_threads_capacity; i++)
6305 if (__kmp_root[i])
6306 if (__kmp_root[i]->r.r_active)
6307 break;
6308 KMP_MB(); /* Flush all pending memory write invalidates. */
6309 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6310
6311 if (i < __kmp_threads_capacity) {
6312#if KMP_USE_MONITOR
6313 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6314 KMP_MB(); /* Flush all pending memory write invalidates. */
6315
6316 // Need to check that monitor was initialized before reaping it. If we are
6317 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6318 // __kmp_monitor will appear to contain valid data, but it is only valid in
6319 // the parent process, not the child.
6320 // New behavior (201008): instead of keying off of the flag
6321 // __kmp_init_parallel, the monitor thread creation is keyed off
6322 // of the new flag __kmp_init_monitor.
6323 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6324 if (TCR_4(__kmp_init_monitor)) {
6326 TCW_4(__kmp_init_monitor, 0);
6327 }
6328 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6329 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6330#endif // KMP_USE_MONITOR
6331 } else {
6332/* TODO move this to cleanup code */
6333#ifdef KMP_DEBUG
6334 /* make sure that everything has properly ended */
6335 for (i = 0; i < __kmp_threads_capacity; i++) {
6336 if (__kmp_root[i]) {
6337 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6338 // there can be uber threads alive here
6339 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
6340 }
6341 }
6342#endif
6343
6344 KMP_MB();
6345
6346 // Reap the worker threads.
6347 // This is valid for now, but be careful if threads are reaped sooner.
6348 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
6349 // Get the next thread from the pool.
6351 __kmp_thread_pool = thread->th.th_next_pool;
6352 // Reap it.
6353 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6354 thread->th.th_next_pool = NULL;
6355 thread->th.th_in_pool = FALSE;
6356 __kmp_reap_thread(thread, 0);
6357 }
6359
6360 // Reap teams.
6361 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
6362 // Get the next team from the pool.
6364 __kmp_team_pool = team->t.t_next_pool;
6365 // Reap it.
6366 team->t.t_next_pool = NULL;
6367 __kmp_reap_team(team);
6368 }
6369
6371
6372#if KMP_OS_UNIX
6373 // Threads that are not reaped should not access any resources since they
6374 // are going to be deallocated soon, so the shutdown sequence should wait
6375 // until all threads either exit the final spin-waiting loop or begin
6376 // sleeping after the given blocktime.
6377 for (i = 0; i < __kmp_threads_capacity; i++) {
6378 kmp_info_t *thr = __kmp_threads[i];
6379 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6380 KMP_CPU_PAUSE();
6381 }
6382#endif
6383
6384 for (i = 0; i < __kmp_threads_capacity; ++i) {
6385 // TBD: Add some checking...
6386 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6387 }
6388
6389 /* Make sure all threadprivate destructors get run by joining with all
6390 worker threads before resetting this flag */
6392
6393 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
6394 KMP_MB();
6395
6396#if KMP_USE_MONITOR
6397 // See note above: One of the possible fixes for CQ138434 / CQ140126
6398 //
6399 // FIXME: push both code fragments down and CSE them?
6400 // push them into __kmp_cleanup() ?
6401 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6402 if (TCR_4(__kmp_init_monitor)) {
6404 TCW_4(__kmp_init_monitor, 0);
6405 }
6406 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6407 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6408#endif
6409 } /* else !__kmp_global.t_active */
6411 KMP_MB(); /* Flush all pending memory write invalidates. */
6412
6413 __kmp_cleanup();
6414#if OMPT_SUPPORT
6415 ompt_fini();
6416#endif
6417}
6418
6419void __kmp_internal_end_library(int gtid_req) {
6420 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6421 /* this shouldn't be a race condition because __kmp_internal_end() is the
6422 only place to clear __kmp_serial_init */
6423 /* we'll check this later too, after we get the lock */
6424 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6425 // redundant, because the next check will work in any case.
6426 if (__kmp_global.g.g_abort) {
6427 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6428 /* TODO abort? */
6429 return;
6430 }
6431 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6432 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6433 return;
6434 }
6435
6436 // If hidden helper team has been initialized, we need to deinit it
6440 // First release the main thread to let it continue its work
6442 // Wait until the hidden helper team has been destroyed
6444 }
6445
6446 KMP_MB(); /* Flush all pending memory write invalidates. */
6447 /* find out who we are and what we should do */
6448 {
6449 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6450 KA_TRACE(
6451 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6452 if (gtid == KMP_GTID_SHUTDOWN) {
6453 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6454 "already shutdown\n"));
6455 return;
6456 } else if (gtid == KMP_GTID_MONITOR) {
6457 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6458 "registered, or system shutdown\n"));
6459 return;
6460 } else if (gtid == KMP_GTID_DNE) {
6461 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6462 "shutdown\n"));
6463 /* we don't know who we are, but we may still shutdown the library */
6464 } else if (KMP_UBER_GTID(gtid)) {
6465 /* unregister ourselves as an uber thread. gtid is no longer valid */
6466 if (__kmp_root[gtid]->r.r_active) {
6467 __kmp_global.g.g_abort = -1;
6468 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6470 KA_TRACE(10,
6471 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6472 gtid));
6473 return;
6474 } else {
6476 KA_TRACE(
6477 10,
6478 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6480 }
6481 } else {
6482/* worker threads may call this function through the atexit handler, if they
6483 * call exit() */
6484/* For now, skip the usual subsequent processing and just dump the debug buffer.
6485 TODO: do a thorough shutdown instead */
6486#ifdef DUMP_DEBUG_ON_EXIT
6487 if (__kmp_debug_buf)
6489#endif
6490 // added unregister library call here when we switch to shm linux
6491 // if we don't, it will leave lots of files in /dev/shm
6492 // cleanup shared memory file before exiting.
6494 return;
6495 }
6496 }
6497 /* synchronize the termination process */
6499
6500 /* have we already finished */
6501 if (__kmp_global.g.g_abort) {
6502 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6503 /* TODO abort? */
6505 return;
6506 }
6507 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6509 return;
6510 }
6511
6512 /* We need this lock to enforce mutex between this reading of
6513 __kmp_threads_capacity and the writing by __kmp_register_root.
6514 Alternatively, we can use a counter of roots that is atomically updated by
6515 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6516 __kmp_internal_end_*. */
6518
6519 /* now we can safely conduct the actual termination */
6521
6524
6525 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
6526
6527#ifdef DUMP_DEBUG_ON_EXIT
6528 if (__kmp_debug_buf)
6530#endif
6531
6532#if KMP_OS_WINDOWS
6534#endif
6535
6537
6538} // __kmp_internal_end_library
6539
6540void __kmp_internal_end_thread(int gtid_req) {
6541 int i;
6542
6543 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6544 /* this shouldn't be a race condition because __kmp_internal_end() is the
6545 * only place to clear __kmp_serial_init */
6546 /* we'll check this later too, after we get the lock */
6547 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6548 // redundant, because the next check will work in any case.
6549 if (__kmp_global.g.g_abort) {
6550 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6551 /* TODO abort? */
6552 return;
6553 }
6554 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6555 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6556 return;
6557 }
6558
6559 // If hidden helper team has been initialized, we need to deinit it
6563 // First release the main thread to let it continue its work
6565 // Wait until the hidden helper team has been destroyed
6567 }
6568
6569 KMP_MB(); /* Flush all pending memory write invalidates. */
6570
6571 /* find out who we are and what we should do */
6572 {
6573 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6574 KA_TRACE(10,
6575 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6576 if (gtid == KMP_GTID_SHUTDOWN) {
6577 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6578 "already shutdown\n"));
6579 return;
6580 } else if (gtid == KMP_GTID_MONITOR) {
6581 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6582 "registered, or system shutdown\n"));
6583 return;
6584 } else if (gtid == KMP_GTID_DNE) {
6585 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6586 "shutdown\n"));
6587 return;
6588 /* we don't know who we are */
6589 } else if (KMP_UBER_GTID(gtid)) {
6590 /* unregister ourselves as an uber thread. gtid is no longer valid */
6591 if (__kmp_root[gtid]->r.r_active) {
6592 __kmp_global.g.g_abort = -1;
6593 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6594 KA_TRACE(10,
6595 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6596 gtid));
6597 return;
6598 } else {
6599 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6600 gtid));
6602 }
6603 } else {
6604 /* just a worker thread, let's leave */
6605 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6606
6607 if (gtid >= 0) {
6608 __kmp_threads[gtid]->th.th_task_team = NULL;
6609 }
6610
6611 KA_TRACE(10,
6612 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6613 gtid));
6614 return;
6615 }
6616 }
6617#if KMP_DYNAMIC_LIB
6619 // AC: lets not shutdown the dynamic library at the exit of uber thread,
6620 // because we will better shutdown later in the library destructor.
6621 {
6622 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6623 return;
6624 }
6625#endif
6626 /* synchronize the termination process */
6628
6629 /* have we already finished */
6630 if (__kmp_global.g.g_abort) {
6631 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6632 /* TODO abort? */
6634 return;
6635 }
6636 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6638 return;
6639 }
6640
6641 /* We need this lock to enforce mutex between this reading of
6642 __kmp_threads_capacity and the writing by __kmp_register_root.
6643 Alternatively, we can use a counter of roots that is atomically updated by
6644 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6645 __kmp_internal_end_*. */
6646
6647 /* should we finish the run-time? are all siblings done? */
6649
6650 for (i = 0; i < __kmp_threads_capacity; ++i) {
6651 if (KMP_UBER_GTID(i)) {
6652 KA_TRACE(
6653 10,
6654 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6657 return;
6658 }
6659 }
6660
6661 /* now we can safely conduct the actual termination */
6662
6664
6667
6668 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6669
6670#ifdef DUMP_DEBUG_ON_EXIT
6671 if (__kmp_debug_buf)
6673#endif
6674} // __kmp_internal_end_thread
6675
6676// -----------------------------------------------------------------------------
6677// Library registration stuff.
6678
6680// Random value used to indicate library initialization.
6681static char *__kmp_registration_str = NULL;
6682// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6683
6684static inline char *__kmp_reg_status_name() {
6685/* On RHEL 3u5 if linked statically, getpid() returns different values in
6686 each thread. If registration and unregistration go in different threads
6687 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6688 env var can not be found, because the name will contain different pid. */
6689// macOS* complains about name being too long with additional getuid()
6690#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6691 return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6692 (int)getuid());
6693#else
6694 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6695#endif
6696} // __kmp_reg_status_get
6697
6698#if defined(KMP_USE_SHM)
6699bool __kmp_shm_available = false;
6700bool __kmp_tmp_available = false;
6701// If /dev/shm is not accessible, we will create a temporary file under /tmp.
6702char *temp_reg_status_file_name = nullptr;
6703#endif
6704
6706
6707 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6708 int done = 0;
6709 union {
6710 double dtime;
6711 long ltime;
6712 } time;
6713#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6715#endif
6716 __kmp_read_system_time(&time.dtime);
6717 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6720 __kmp_registration_flag, KMP_LIBRARY_FILE);
6721
6722 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6724
6725 while (!done) {
6726
6727 char *value = NULL; // Actual value of the environment variable.
6728
6729#if defined(KMP_USE_SHM)
6730 char *shm_name = nullptr;
6731 char *data1 = nullptr;
6732 __kmp_shm_available = __kmp_detect_shm();
6733 if (__kmp_shm_available) {
6734 int fd1 = -1;
6735 shm_name = __kmp_str_format("/%s", name);
6736 int shm_preexist = 0;
6737 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6738 if ((fd1 == -1) && (errno == EEXIST)) {
6739 // file didn't open because it already exists.
6740 // try opening existing file
6741 fd1 = shm_open(shm_name, O_RDWR, 0600);
6742 if (fd1 == -1) { // file didn't open
6743 KMP_WARNING(FunctionError, "Can't open SHM");
6744 __kmp_shm_available = false;
6745 } else { // able to open existing file
6746 shm_preexist = 1;
6747 }
6748 }
6749 if (__kmp_shm_available && shm_preexist == 0) { // SHM created, set size
6750 if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
6751 KMP_WARNING(FunctionError, "Can't set size of SHM");
6752 __kmp_shm_available = false;
6753 }
6754 }
6755 if (__kmp_shm_available) { // SHM exists, now map it
6756 data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6757 fd1, 0);
6758 if (data1 == MAP_FAILED) { // failed to map shared memory
6759 KMP_WARNING(FunctionError, "Can't map SHM");
6760 __kmp_shm_available = false;
6761 }
6762 }
6763 if (__kmp_shm_available) { // SHM mapped
6764 if (shm_preexist == 0) { // set data to SHM, set value
6765 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6766 }
6767 // Read value from either what we just wrote or existing file.
6768 value = __kmp_str_format("%s", data1); // read value from SHM
6769 munmap(data1, SHM_SIZE);
6770 }
6771 if (fd1 != -1)
6772 close(fd1);
6773 }
6774 if (!__kmp_shm_available)
6775 __kmp_tmp_available = __kmp_detect_tmp();
6776 if (!__kmp_shm_available && __kmp_tmp_available) {
6777 // SHM failed to work due to an error other than that the file already
6778 // exists. Try to create a temp file under /tmp.
6779 // If /tmp isn't accessible, fall back to using environment variable.
6780 // TODO: /tmp might not always be the temporary directory. For now we will
6781 // not consider TMPDIR.
6782 int fd1 = -1;
6783 temp_reg_status_file_name = __kmp_str_format("/tmp/%s", name);
6784 int tmp_preexist = 0;
6785 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6786 if ((fd1 == -1) && (errno == EEXIST)) {
6787 // file didn't open because it already exists.
6788 // try opening existing file
6789 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6790 if (fd1 == -1) { // file didn't open if (fd1 == -1) {
6791 KMP_WARNING(FunctionError, "Can't open TEMP");
6792 __kmp_tmp_available = false;
6793 } else {
6794 tmp_preexist = 1;
6795 }
6796 }
6797 if (__kmp_tmp_available && tmp_preexist == 0) {
6798 // we created /tmp file now set size
6799 if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;
6800 KMP_WARNING(FunctionError, "Can't set size of /tmp file");
6801 __kmp_tmp_available = false;
6802 }
6803 }
6804 if (__kmp_tmp_available) {
6805 data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6806 fd1, 0);
6807 if (data1 == MAP_FAILED) { // failed to map /tmp
6808 KMP_WARNING(FunctionError, "Can't map /tmp");
6809 __kmp_tmp_available = false;
6810 }
6811 }
6812 if (__kmp_tmp_available) {
6813 if (tmp_preexist == 0) { // set data to TMP, set value
6814 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6815 }
6816 // Read value from either what we just wrote or existing file.
6817 value = __kmp_str_format("%s", data1); // read value from SHM
6818 munmap(data1, SHM_SIZE);
6819 }
6820 if (fd1 != -1)
6821 close(fd1);
6822 }
6823 if (!__kmp_shm_available && !__kmp_tmp_available) {
6824 // no /dev/shm and no /tmp -- fall back to environment variable
6825 // Set environment variable, but do not overwrite if it exists.
6827 // read value to see if it got set
6829 }
6830#else // Windows and unix with static library
6831 // Set environment variable, but do not overwrite if it exists.
6833 // read value to see if it got set
6835#endif
6836
6837 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6838 done = 1; // Ok, environment variable set successfully, exit the loop.
6839 } else {
6840 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6841 // Check whether it alive or dead.
6842 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6843 char *tail = value;
6844 char *flag_addr_str = NULL;
6845 char *flag_val_str = NULL;
6846 char const *file_name = NULL;
6847 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6848 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6849 file_name = tail;
6850 if (tail != NULL) {
6851 unsigned long *flag_addr = 0;
6852 unsigned long flag_val = 0;
6853 KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr));
6854 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6855 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6856 // First, check whether environment-encoded address is mapped into
6857 // addr space.
6858 // If so, dereference it to see if it still has the right value.
6859 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6860 neighbor = 1;
6861 } else {
6862 // If not, then we know the other copy of the library is no longer
6863 // running.
6864 neighbor = 2;
6865 }
6866 }
6867 }
6868 switch (neighbor) {
6869 case 0: // Cannot parse environment variable -- neighbor status unknown.
6870 // Assume it is the incompatible format of future version of the
6871 // library. Assume the other library is alive.
6872 // WARN( ... ); // TODO: Issue a warning.
6873 file_name = "unknown library";
6875 // Attention! Falling to the next case. That's intentional.
6876 case 1: { // Neighbor is alive.
6877 // Check it is allowed.
6878 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6879 if (!__kmp_str_match_true(duplicate_ok)) {
6880 // That's not allowed. Issue fatal error.
6881 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6882 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6883 }
6884 KMP_INTERNAL_FREE(duplicate_ok);
6886 done = 1; // Exit the loop.
6887 } break;
6888 case 2: { // Neighbor is dead.
6889
6890#if defined(KMP_USE_SHM)
6891 if (__kmp_shm_available) { // close shared memory.
6892 shm_unlink(shm_name); // this removes file in /dev/shm
6893 } else if (__kmp_tmp_available) {
6894 unlink(temp_reg_status_file_name); // this removes the temp file
6895 } else {
6896 // Clear the variable and try to register library again.
6898 }
6899#else
6900 // Clear the variable and try to register library again.
6902#endif
6903 } break;
6904 default: {
6906 } break;
6907 }
6908 }
6909 KMP_INTERNAL_FREE((void *)value);
6910#if defined(KMP_USE_SHM)
6911 if (shm_name)
6912 KMP_INTERNAL_FREE((void *)shm_name);
6913#endif
6914 } // while
6915 KMP_INTERNAL_FREE((void *)name);
6916
6917} // func __kmp_register_library_startup
6918
6920
6921 char *name = __kmp_reg_status_name();
6922 char *value = NULL;
6923
6924#if defined(KMP_USE_SHM)
6925 char *shm_name = nullptr;
6926 int fd1;
6927 if (__kmp_shm_available) {
6928 shm_name = __kmp_str_format("/%s", name);
6929 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6930 if (fd1 != -1) { // File opened successfully
6931 char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6932 if (data1 != MAP_FAILED) {
6933 value = __kmp_str_format("%s", data1); // read value from SHM
6934 munmap(data1, SHM_SIZE);
6935 }
6936 close(fd1);
6937 }
6938 } else if (__kmp_tmp_available) { // try /tmp
6939 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6940 if (fd1 != -1) { // File opened successfully
6941 char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6942 if (data1 != MAP_FAILED) {
6943 value = __kmp_str_format("%s", data1); // read value from /tmp
6944 munmap(data1, SHM_SIZE);
6945 }
6946 close(fd1);
6947 }
6948 } else { // fall back to envirable
6950 }
6951#else
6953#endif
6954
6957 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6958// Ok, this is our variable. Delete it.
6959#if defined(KMP_USE_SHM)
6960 if (__kmp_shm_available) {
6961 shm_unlink(shm_name); // this removes file in /dev/shm
6962 } else if (__kmp_tmp_available) {
6963 unlink(temp_reg_status_file_name); // this removes the temp file
6964 } else {
6966 }
6967#else
6969#endif
6970 }
6971
6972#if defined(KMP_USE_SHM)
6973 if (shm_name)
6974 KMP_INTERNAL_FREE(shm_name);
6975 if (temp_reg_status_file_name)
6976 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6977#endif
6978
6982
6985
6986} // __kmp_unregister_library
6987
6988// End of Library registration stuff.
6989// -----------------------------------------------------------------------------
6990
6991#if KMP_MIC_SUPPORTED
6992
6993static void __kmp_check_mic_type() {
6994 kmp_cpuid_t cpuid_state = {0};
6995 kmp_cpuid_t *cs_p = &cpuid_state;
6996 __kmp_x86_cpuid(1, 0, cs_p);
6997 // We don't support mic1 at the moment
6998 if ((cs_p->eax & 0xff0) == 0xB10) {
6999 __kmp_mic_type = mic2;
7000 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7001 __kmp_mic_type = mic3;
7002 } else {
7003 __kmp_mic_type = non_mic;
7004 }
7005}
7006
7007#endif /* KMP_MIC_SUPPORTED */
7008
7009#if KMP_HAVE_UMWAIT
7010static void __kmp_user_level_mwait_init() {
7011 struct kmp_cpuid buf;
7012 __kmp_x86_cpuid(7, 0, &buf);
7013 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7014 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7015 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7016 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7017 __kmp_umwait_enabled));
7018}
7019#elif KMP_HAVE_MWAIT
7020#ifndef AT_INTELPHIUSERMWAIT
7021// Spurious, non-existent value that should always fail to return anything.
7022// Will be replaced with the correct value when we know that.
7023#define AT_INTELPHIUSERMWAIT 10000
7024#endif
7025// getauxval() function is available in RHEL7 and SLES12. If a system with an
7026// earlier OS is used to build the RTL, we'll use the following internal
7027// function when the entry is not found.
7028unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7029unsigned long getauxval(unsigned long) { return 0; }
7030
7031static void __kmp_user_level_mwait_init() {
7032 // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
7033 // use them to find if the user-level mwait is enabled. Otherwise, forcibly
7034 // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
7035 // KMP_USER_LEVEL_MWAIT was set to TRUE.
7036 if (__kmp_mic_type == mic3) {
7037 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7038 if ((res & 0x1) || __kmp_user_level_mwait) {
7039 __kmp_mwait_enabled = TRUE;
7040 if (__kmp_user_level_mwait) {
7041 KMP_INFORM(EnvMwaitWarn);
7042 }
7043 } else {
7044 __kmp_mwait_enabled = FALSE;
7045 }
7046 }
7047 KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7048 "__kmp_mwait_enabled = %d\n",
7049 __kmp_mic_type, __kmp_mwait_enabled));
7050}
7051#endif /* KMP_HAVE_UMWAIT */
7052
7054 int i, gtid;
7055 size_t size;
7056
7057 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
7058
7059 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
7060 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
7061 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
7062 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
7063 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
7064
7065#if OMPT_SUPPORT
7066 ompt_pre_init();
7067#endif
7068#if OMPD_SUPPORT
7069 __kmp_env_dump();
7070 ompd_init();
7071#endif
7072
7074
7075#if ENABLE_LIBOMPTARGET
7076 /* Initialize functions from libomptarget */
7077 __kmp_init_omptarget();
7078#endif
7079
7080 /* Initialize internal memory allocator */
7082
7083 /* Register the library startup via an environment variable or via mapped
7084 shared memory file and check to see whether another copy of the library is
7085 already registered. Since forked child process is often terminated, we
7086 postpone the registration till middle initialization in the child */
7089
7090 /* TODO reinitialization of library */
7091 if (TCR_4(__kmp_global.g.g_done)) {
7092 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
7093 }
7094
7095 __kmp_global.g.g_abort = 0;
7096 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7097
7098/* initialize the locks */
7099#if KMP_USE_ADAPTIVE_LOCKS
7100#if KMP_DEBUG_ADAPTIVE_LOCKS
7101 __kmp_init_speculative_stats();
7102#endif
7103#endif
7104#if KMP_STATS_ENABLED
7106#endif
7123#if KMP_USE_MONITOR
7124 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7125#endif
7127
7128 /* conduct initialization and initial setup of configuration */
7129
7131
7132#if KMP_MIC_SUPPORTED
7133 __kmp_check_mic_type();
7134#endif
7135#if ENABLE_LIBOMPTARGET
7136 __kmp_target_init();
7137#endif /* ENABLE_LIBOMPTARGET */
7138
7139// Some global variable initialization moved here from kmp_env_initialize()
7140#ifdef KMP_DEBUG
7141 kmp_diag = 0;
7142#endif
7144
7145 // From __kmp_init_dflt_team_nth()
7146 /* assume the entire machine will be used */
7150 }
7153 }
7156 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7159 }
7160
7161 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7162 // part
7164#if KMP_USE_MONITOR
7165 __kmp_monitor_wakeups =
7166 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7167 __kmp_bt_intervals =
7168 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7169#endif
7170 // From "KMP_LIBRARY" part of __kmp_env_initialize()
7172 // From KMP_SCHEDULE initialization
7174// AC: do not use analytical here, because it is non-monotonous
7175//__kmp_guided = kmp_sch_guided_iterative_chunked;
7176//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7177// need to repeat assignment
7178// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7179// bit control and barrier method control parts
7180#if KMP_FAST_REDUCTION_BARRIER
7181#define kmp_reduction_barrier_gather_bb ((int)1)
7182#define kmp_reduction_barrier_release_bb ((int)1)
7183#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7184#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7185#endif // KMP_FAST_REDUCTION_BARRIER
7186 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7191#if KMP_FAST_REDUCTION_BARRIER
7192 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7193 // lin_64 ): hyper,1
7194 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7195 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7196 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7197 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7198 }
7199#endif // KMP_FAST_REDUCTION_BARRIER
7200 }
7201#if KMP_FAST_REDUCTION_BARRIER
7202#undef kmp_reduction_barrier_release_pat
7203#undef kmp_reduction_barrier_gather_pat
7204#undef kmp_reduction_barrier_release_bb
7205#undef kmp_reduction_barrier_gather_bb
7206#endif // KMP_FAST_REDUCTION_BARRIER
7207#if KMP_MIC_SUPPORTED
7208 if (__kmp_mic_type == mic2) { // KNC
7209 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7212 1; // forkjoin release
7215 }
7216#if KMP_FAST_REDUCTION_BARRIER
7217 if (__kmp_mic_type == mic2) { // KNC
7220 }
7221#endif // KMP_FAST_REDUCTION_BARRIER
7222#endif // KMP_MIC_SUPPORTED
7223
7224// From KMP_CHECKS initialization
7225#ifdef KMP_DEBUG
7226 __kmp_env_checks = TRUE; /* development versions have the extra checks */
7227#else
7228 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
7229#endif
7230
7231 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7233
7234 __kmp_global.g.g_dynamic = FALSE;
7235 __kmp_global.g.g_dynamic_mode = dynamic_default;
7236
7238
7240
7241#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7242 __kmp_user_level_mwait_init();
7243#endif
7244// Print all messages in message catalog for testing purposes.
7245#ifdef KMP_DEBUG
7246 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
7248 kmp_str_buf_t buffer;
7249 __kmp_str_buf_init(&buffer);
7250 __kmp_i18n_dump_catalog(&buffer);
7251 __kmp_printf("%s", buffer.str);
7252 __kmp_str_buf_free(&buffer);
7253 }
7255#endif
7256
7259 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7262
7263 // If the library is shut down properly, both pools must be NULL. Just in
7264 // case, set them to NULL -- some memory may leak, but subsequent code will
7265 // work even if pools are not freed.
7269 __kmp_thread_pool = NULL;
7271 __kmp_team_pool = NULL;
7272
7273 /* Allocate all of the variable sized records */
7274 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
7275 * expandable */
7276 /* Since allocation is cache-aligned, just add extra padding at the end */
7277 size =
7278 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7279 CACHE_LINE;
7281 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
7283
7284 /* init thread counts */
7286 0); // Asserts fail if the library is reinitializing and
7287 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
7288 __kmp_all_nth = 0;
7289 __kmp_nth = 0;
7290
7291 /* setup the uber master thread and hierarchy */
7292 gtid = __kmp_register_root(TRUE);
7293 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
7296
7297 KMP_MB(); /* Flush all pending memory write invalidates. */
7298
7300
7301#if KMP_OS_UNIX
7302 /* invoke the child fork handler */
7304#endif
7305
7306#if !KMP_DYNAMIC_LIB || \
7307 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7308 {
7309 /* Invoke the exit handler when the program finishes, only for static
7310 library and macOS* dynamic. For other dynamic libraries, we already
7311 have _fini and DllMain. */
7312 int rc = atexit(__kmp_internal_end_atexit);
7313 if (rc != 0) {
7314 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
7316 }
7317 }
7318#endif
7319
7320#if KMP_HANDLE_SIGNALS
7321#if KMP_OS_UNIX
7322 /* NOTE: make sure that this is called before the user installs their own
7323 signal handlers so that the user handlers are called first. this way they
7324 can return false, not call our handler, avoid terminating the library, and
7325 continue execution where they left off. */
7326 __kmp_install_signals(FALSE);
7327#endif /* KMP_OS_UNIX */
7328#if KMP_OS_WINDOWS
7329 __kmp_install_signals(TRUE);
7330#endif /* KMP_OS_WINDOWS */
7331#endif
7332
7333 /* we have finished the serial initialization */
7335
7337
7338 if (__kmp_version) {
7340 }
7341
7342 if (__kmp_settings) {
7344 }
7345
7348 }
7349
7350#if OMPT_SUPPORT
7352#endif
7353
7354 KMP_MB();
7355
7356 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
7357}
7358
7371
7373 int i, j;
7374 int prev_dflt_team_nth;
7375
7376 if (!__kmp_init_serial) {
7378 }
7379
7380 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
7381
7383 // We are in a forked child process. The registration was skipped during
7384 // serial initialization in __kmp_atfork_child handler. Do it here.
7386 }
7387
7388 // Save the previous value for the __kmp_dflt_team_nth so that
7389 // we can avoid some reinitialization if it hasn't changed.
7390 prev_dflt_team_nth = __kmp_dflt_team_nth;
7391
7392#if KMP_AFFINITY_SUPPORTED
7393 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7394 // number of cores on the machine.
7395 __kmp_affinity_initialize(__kmp_affinity);
7396
7397#endif /* KMP_AFFINITY_SUPPORTED */
7398
7400 if (__kmp_avail_proc == 0) {
7402 }
7403
7404 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7405 // correct them now
7406 j = 0;
7407 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7410 j++;
7411 }
7412
7413 if (__kmp_dflt_team_nth == 0) {
7414#ifdef KMP_DFLT_NTH_CORES
7415 // Default #threads = #cores
7417 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7418 "__kmp_ncores (%d)\n",
7420#else
7421 // Default #threads = #available OS procs
7423 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7424 "__kmp_avail_proc(%d)\n",
7426#endif /* KMP_DFLT_NTH_CORES */
7427 }
7428
7431 }
7434 }
7435
7436 if (__kmp_nesting_mode > 0)
7438
7439 // There's no harm in continuing if the following check fails,
7440 // but it indicates an error in the previous logic.
7442
7443 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7444 // Run through the __kmp_threads array and set the num threads icv for each
7445 // root thread that is currently registered with the RTL (which has not
7446 // already explicitly set its nthreads-var with a call to
7447 // omp_set_num_threads()).
7448 for (i = 0; i < __kmp_threads_capacity; i++) {
7449 kmp_info_t *thread = __kmp_threads[i];
7450 if (thread == NULL)
7451 continue;
7452 if (thread->th.th_current_task->td_icvs.nproc != 0)
7453 continue;
7454
7456 }
7457 }
7458 KA_TRACE(
7459 20,
7460 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7462
7463#ifdef KMP_ADJUST_BLOCKTIME
7464 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
7465 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7468 __kmp_zero_bt = TRUE;
7469 }
7470 }
7471#endif /* KMP_ADJUST_BLOCKTIME */
7472
7473 /* we have finished middle initialization */
7475
7476 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
7477}
7478
7491
7493 int gtid = __kmp_entry_gtid(); // this might be a new root
7494
7495 /* synchronize parallel initialization (for sibling) */
7497 return;
7501 return;
7502 }
7503
7504 /* TODO reinitialization after we have already shut down */
7505 if (TCR_4(__kmp_global.g.g_done)) {
7506 KA_TRACE(
7507 10,
7508 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
7510 }
7511
7512 /* jc: The lock __kmp_initz_lock is already held, so calling
7513 __kmp_serial_initialize would cause a deadlock. So we call
7514 __kmp_do_serial_initialize directly. */
7515 if (!__kmp_init_middle) {
7517 }
7520
7521 /* begin initialization */
7522 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
7524
7525#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7526 // Save the FP control regs.
7527 // Worker threads will set theirs to these values at thread startup.
7528 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7529 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7530 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7531#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7532
7533#if KMP_OS_UNIX
7534#if KMP_HANDLE_SIGNALS
7535 /* must be after __kmp_serial_initialize */
7536 __kmp_install_signals(TRUE);
7537#endif
7538#endif
7539
7541
7542#if defined(USE_LOAD_BALANCE)
7543 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7544 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7545 }
7546#else
7547 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7548 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7549 }
7550#endif
7551
7552 if (__kmp_version) {
7554 }
7555
7556 /* we have finished parallel initialization */
7558
7559 KMP_MB();
7560 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
7561
7563}
7564
7567 return;
7568
7569 // __kmp_parallel_initialize is required before we initialize hidden helper
7572
7573 // Double check. Note that this double check should not be placed before
7574 // __kmp_parallel_initialize as it will cause dead lock.
7578 return;
7579 }
7580
7581#if KMP_AFFINITY_SUPPORTED
7582 // Initialize hidden helper affinity settings.
7583 // The above __kmp_parallel_initialize() will initialize
7584 // regular affinity (and topology) if not already done.
7585 if (!__kmp_hh_affinity.flags.initialized)
7586 __kmp_affinity_initialize(__kmp_hh_affinity);
7587#endif
7588
7589 // Set the count of hidden helper tasks to be executed to zero
7591
7592 // Set the global variable indicating that we're initializing hidden helper
7593 // team/threads
7595
7596 // Platform independent initialization
7598
7599 // Wait here for the finish of initialization of hidden helper teams
7601
7602 // We have finished hidden helper initialization
7604
7606}
7607
7608/* ------------------------------------------------------------------------ */
7609
7610void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7611 kmp_team_t *team) {
7612 kmp_disp_t *dispatch;
7613
7614 KMP_MB();
7615
7616 /* none of the threads have encountered any constructs, yet. */
7617 this_thr->th.th_local.this_construct = 0;
7618#if KMP_CACHE_MANAGE
7619 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7620#endif /* KMP_CACHE_MANAGE */
7621 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7622 KMP_DEBUG_ASSERT(dispatch);
7623 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7624 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7625 // this_thr->th.th_info.ds.ds_tid ] );
7626
7627 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
7628 dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
7630 __kmp_push_parallel(gtid, team->t.t_ident);
7631
7632 KMP_MB(); /* Flush all pending memory write invalidates. */
7633}
7634
7635void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7636 kmp_team_t *team) {
7638 __kmp_pop_parallel(gtid, team->t.t_ident);
7639
7641}
7642
7644 int rc;
7645 int tid = __kmp_tid_from_gtid(gtid);
7646 kmp_info_t *this_thr = __kmp_threads[gtid];
7647 kmp_team_t *team = this_thr->th.th_team;
7648
7649 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7650#if USE_ITT_BUILD
7651 if (__itt_stack_caller_create_ptr) {
7652 // inform ittnotify about entering user's code
7653 if (team->t.t_stack_id != NULL) {
7654 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7655 } else {
7656 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7657 __kmp_itt_stack_callee_enter(
7658 (__itt_caller)team->t.t_parent->t.t_stack_id);
7659 }
7660 }
7661#endif /* USE_ITT_BUILD */
7662#if INCLUDE_SSC_MARKS
7663 SSC_MARK_INVOKING();
7664#endif
7665
7666#if OMPT_SUPPORT
7667 void *dummy;
7668 void **exit_frame_p;
7669 ompt_data_t *my_task_data;
7670 ompt_data_t *my_parallel_data;
7671 int ompt_team_size;
7672
7673 if (ompt_enabled.enabled) {
7674 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7675 .ompt_task_info.frame.exit_frame.ptr);
7676 } else {
7677 exit_frame_p = &dummy;
7678 }
7679
7680 my_task_data =
7681 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7682 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7683 if (ompt_enabled.ompt_callback_implicit_task) {
7684 ompt_team_size = team->t.t_nproc;
7685 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7686 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7687 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7688 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7689 }
7690#endif
7691
7692#if KMP_STATS_ENABLED
7693 stats_state_e previous_state = KMP_GET_THREAD_STATE();
7694 if (previous_state == stats_state_e::TEAMS_REGION) {
7695 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7696 } else {
7697 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7698 }
7699 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7700#endif
7701
7702 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7703 tid, (int)team->t.t_argc, (void **)team->t.t_argv
7704#if OMPT_SUPPORT
7705 ,
7706 exit_frame_p
7707#endif
7708 );
7709#if OMPT_SUPPORT
7710 *exit_frame_p = NULL;
7711 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7712#endif
7713
7714#if KMP_STATS_ENABLED
7715 if (previous_state == stats_state_e::TEAMS_REGION) {
7716 KMP_SET_THREAD_STATE(previous_state);
7717 }
7719#endif
7720
7721#if USE_ITT_BUILD
7722 if (__itt_stack_caller_create_ptr) {
7723 // inform ittnotify about leaving user's code
7724 if (team->t.t_stack_id != NULL) {
7725 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7726 } else {
7727 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7728 __kmp_itt_stack_callee_leave(
7729 (__itt_caller)team->t.t_parent->t.t_stack_id);
7730 }
7731 }
7732#endif /* USE_ITT_BUILD */
7733 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7734
7735 return rc;
7736}
7737
7738void __kmp_teams_master(int gtid) {
7739 // This routine is called by all primary threads in teams construct
7740 kmp_info_t *thr = __kmp_threads[gtid];
7741 kmp_team_t *team = thr->th.th_team;
7742 ident_t *loc = team->t.t_ident;
7743 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7744 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7745 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7746 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7747 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7748
7749 // This thread is a new CG root. Set up the proper variables.
7751 tmp->cg_root = thr; // Make thr the CG root
7752 // Init to thread limit stored when league primary threads were forked
7753 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7754 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7755 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"
7756 " cg_nthreads to 1\n",
7757 thr, tmp));
7758 tmp->up = thr->th.th_cg_roots;
7759 thr->th.th_cg_roots = tmp;
7760
7761// Launch league of teams now, but not let workers execute
7762// (they hang on fork barrier until next parallel)
7763#if INCLUDE_SSC_MARKS
7764 SSC_MARK_FORKING();
7765#endif
7766 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7767 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7769#if INCLUDE_SSC_MARKS
7770 SSC_MARK_JOINING();
7771#endif
7772 // If the team size was reduced from the limit, set it to the new size
7773 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7774 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7775 // AC: last parameter "1" eliminates join barrier which won't work because
7776 // worker threads are in a fork barrier waiting for more parallel regions
7777 __kmp_join_call(loc, gtid
7778#if OMPT_SUPPORT
7779 ,
7781#endif
7782 ,
7783 1);
7784}
7785
7787 kmp_info_t *this_thr = __kmp_threads[gtid];
7788 kmp_team_t *team = this_thr->th.th_team;
7789#if KMP_DEBUG
7790 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7791 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7792 (void *)__kmp_teams_master);
7793#endif
7794 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7795#if OMPT_SUPPORT
7796 int tid = __kmp_tid_from_gtid(gtid);
7797 ompt_data_t *task_data =
7798 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7799 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7800 if (ompt_enabled.ompt_callback_implicit_task) {
7801 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7802 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7803 ompt_task_initial);
7804 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7805 }
7806#endif
7807 __kmp_teams_master(gtid);
7808#if OMPT_SUPPORT
7809 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7810#endif
7811 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7812 return 1;
7813}
7814
7815/* this sets the requested number of threads for the next parallel region
7816 encountered by this team. since this should be enclosed in the forkjoin
7817 critical section it should avoid race conditions with asymmetrical nested
7818 parallelism */
7819void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7820 kmp_info_t *thr = __kmp_threads[gtid];
7821
7822 if (num_threads > 0)
7823 thr->th.th_set_nproc = num_threads;
7824}
7825
7826void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length,
7827 int *num_threads_list) {
7828 kmp_info_t *thr = __kmp_threads[gtid];
7829
7830 KMP_DEBUG_ASSERT(list_length > 1);
7831
7832 if (num_threads_list[0] > 0)
7833 thr->th.th_set_nproc = num_threads_list[0];
7834 thr->th.th_set_nested_nth =
7835 (int *)KMP_INTERNAL_MALLOC(list_length * sizeof(int));
7836 for (kmp_uint32 i = 0; i < list_length; ++i)
7837 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7838 thr->th.th_set_nested_nth_sz = list_length;
7839}
7840
7842 const char *msg) {
7843 kmp_info_t *thr = __kmp_threads[gtid];
7844 thr->th.th_nt_strict = true;
7845 thr->th.th_nt_loc = loc;
7846 // if sev is unset make fatal
7847 if (sev == severity_warning)
7848 thr->th.th_nt_sev = sev;
7849 else
7850 thr->th.th_nt_sev = severity_fatal;
7851 // if msg is unset, use an appropriate message
7852 if (msg)
7853 thr->th.th_nt_msg = msg;
7854 else
7855 thr->th.th_nt_msg = "Cannot form team with number of threads specified by "
7856 "strict num_threads clause.";
7857}
7858
7859static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
7860 int num_threads) {
7861 KMP_DEBUG_ASSERT(thr);
7862 // Remember the number of threads for inner parallel regions
7864 __kmp_middle_initialize(); // get internal globals calculated
7868
7869 if (num_threads == 0) {
7870 if (__kmp_teams_thread_limit > 0) {
7871 num_threads = __kmp_teams_thread_limit;
7872 } else {
7873 num_threads = __kmp_avail_proc / num_teams;
7874 }
7875 // adjust num_threads w/o warning as it is not user setting
7876 // num_threads = min(num_threads, nthreads-var, thread-limit-var)
7877 // no thread_limit clause specified - do not change thread-limit-var ICV
7878 if (num_threads > __kmp_dflt_team_nth) {
7879 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7880 }
7881 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7882 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7883 } // prevent team size to exceed thread-limit-var
7884 if (num_teams * num_threads > __kmp_teams_max_nth) {
7885 num_threads = __kmp_teams_max_nth / num_teams;
7886 }
7887 if (num_threads == 0) {
7888 num_threads = 1;
7889 }
7890 } else {
7891 if (num_threads < 0) {
7892 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7894 num_threads = 1;
7895 }
7896 // This thread will be the primary thread of the league primary threads
7897 // Store new thread limit; old limit is saved in th_cg_roots list
7898 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7899 // num_threads = min(num_threads, nthreads-var)
7900 if (num_threads > __kmp_dflt_team_nth) {
7901 num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7902 }
7903 if (num_teams * num_threads > __kmp_teams_max_nth) {
7904 int new_threads = __kmp_teams_max_nth / num_teams;
7905 if (new_threads == 0) {
7906 new_threads = 1;
7907 }
7908 if (new_threads != num_threads) {
7909 if (!__kmp_reserve_warn) { // user asked for too many threads
7910 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
7912 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7913 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7914 }
7915 }
7916 num_threads = new_threads;
7917 }
7918 }
7919 thr->th.th_teams_size.nth = num_threads;
7920}
7921
7922/* this sets the requested number of teams for the teams region and/or
7923 the number of threads for the next parallel region encountered */
7924void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7925 int num_threads) {
7926 kmp_info_t *thr = __kmp_threads[gtid];
7927 if (num_teams < 0) {
7928 // OpenMP specification requires requested values to be positive,
7929 // but people can send us any value, so we'd better check
7930 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7932 num_teams = 1;
7933 }
7934 if (num_teams == 0) {
7935 if (__kmp_nteams > 0) {
7936 num_teams = __kmp_nteams;
7937 } else {
7938 num_teams = 1; // default number of teams is 1.
7939 }
7940 }
7941 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7942 if (!__kmp_reserve_warn) {
7945 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7946 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7947 }
7948 num_teams = __kmp_teams_max_nth;
7949 }
7950 // Set number of teams (number of threads in the outer "parallel" of the
7951 // teams)
7952 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7953
7954 __kmp_push_thread_limit(thr, num_teams, num_threads);
7955}
7956
7957/* This sets the requested number of teams for the teams region and/or
7958 the number of threads for the next parallel region encountered */
7959void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
7960 int num_teams_ub, int num_threads) {
7961 kmp_info_t *thr = __kmp_threads[gtid];
7962 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7963 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7964 KMP_DEBUG_ASSERT(num_threads >= 0);
7965
7966 if (num_teams_lb > num_teams_ub) {
7967 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7969 }
7970
7971 int num_teams = 1; // defalt number of teams is 1.
7972
7973 if (num_teams_lb == 0 && num_teams_ub > 0)
7974 num_teams_lb = num_teams_ub;
7975
7976 if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
7977 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7978 if (num_teams > __kmp_teams_max_nth) {
7979 if (!__kmp_reserve_warn) {
7982 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7983 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7984 }
7985 num_teams = __kmp_teams_max_nth;
7986 }
7987 } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
7988 num_teams = num_teams_ub;
7989 } else { // num_teams_lb <= num_teams <= num_teams_ub
7990 if (num_threads <= 0) {
7991 if (num_teams_ub > __kmp_teams_max_nth) {
7992 num_teams = num_teams_lb;
7993 } else {
7994 num_teams = num_teams_ub;
7995 }
7996 } else {
7997 num_teams = (num_threads > __kmp_teams_max_nth)
7998 ? num_teams
7999 : __kmp_teams_max_nth / num_threads;
8000 if (num_teams < num_teams_lb) {
8001 num_teams = num_teams_lb;
8002 } else if (num_teams > num_teams_ub) {
8003 num_teams = num_teams_ub;
8004 }
8005 }
8006 }
8007 // Set number of teams (number of threads in the outer "parallel" of the
8008 // teams)
8009 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8010
8011 __kmp_push_thread_limit(thr, num_teams, num_threads);
8012}
8013
8014// Set the proc_bind var to use in the following parallel region.
8015void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
8016 kmp_info_t *thr = __kmp_threads[gtid];
8017 thr->th.th_set_proc_bind = proc_bind;
8018}
8019
8020/* Launch the worker threads into the microtask. */
8021
8022void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
8023 kmp_info_t *this_thr = __kmp_threads[gtid];
8024
8025#ifdef KMP_DEBUG
8026 int f;
8027#endif /* KMP_DEBUG */
8028
8029 KMP_DEBUG_ASSERT(team);
8030 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8032 KMP_MB(); /* Flush all pending memory write invalidates. */
8033
8034 team->t.t_construct = 0; /* no single directives seen yet */
8035 team->t.t_ordered.dt.t_value =
8036 0; /* thread 0 enters the ordered section first */
8037
8038 /* Reset the identifiers on the dispatch buffer */
8039 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8040 if (team->t.t_max_nproc > 1) {
8041 int i;
8042 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8043 team->t.t_disp_buffer[i].buffer_index = i;
8044 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8045 }
8046 } else {
8047 team->t.t_disp_buffer[0].buffer_index = 0;
8048 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8049 }
8050
8051 KMP_MB(); /* Flush all pending memory write invalidates. */
8052 KMP_ASSERT(this_thr->th.th_team == team);
8053
8054#ifdef KMP_DEBUG
8055 for (f = 0; f < team->t.t_nproc; f++) {
8056 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8057 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8058 }
8059#endif /* KMP_DEBUG */
8060
8061 /* release the worker threads so they may begin working */
8062 __kmp_fork_barrier(gtid, 0);
8063}
8064
8065void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
8066 kmp_info_t *this_thr = __kmp_threads[gtid];
8067
8068 KMP_DEBUG_ASSERT(team);
8069 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8071 KMP_MB(); /* Flush all pending memory write invalidates. */
8072
8073 /* Join barrier after fork */
8074
8075#ifdef KMP_DEBUG
8076 if (__kmp_threads[gtid] &&
8077 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8078 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8079 __kmp_threads[gtid]);
8080 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8081 "team->t.t_nproc=%d\n",
8082 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8083 team->t.t_nproc);
8085 }
8087 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8088#endif /* KMP_DEBUG */
8089
8090 __kmp_join_barrier(gtid); /* wait for everyone */
8091#if OMPT_SUPPORT
8092 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8093 if (ompt_enabled.enabled &&
8094 (ompt_state == ompt_state_wait_barrier_teams ||
8095 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8096 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8097 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8098 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8099#if OMPT_OPTIONAL
8100 void *codeptr = NULL;
8101 if (KMP_MASTER_TID(ds_tid) &&
8102 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8103 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8104 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8105
8106 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8107 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8108 sync_kind = ompt_sync_region_barrier_teams;
8109 if (ompt_enabled.ompt_callback_sync_region_wait) {
8110 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8111 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8112 }
8113 if (ompt_enabled.ompt_callback_sync_region) {
8114 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8115 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8116 }
8117#endif
8118 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8119 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8120 ompt_scope_end, NULL, task_data, 0, ds_tid,
8121 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
8122 }
8123 }
8124#endif
8125
8126 KMP_MB(); /* Flush all pending memory write invalidates. */
8127 KMP_ASSERT(this_thr->th.th_team == team);
8128}
8129
8130/* ------------------------------------------------------------------------ */
8131
8132#ifdef USE_LOAD_BALANCE
8133
8134// Return the worker threads actively spinning in the hot team, if we
8135// are at the outermost level of parallelism. Otherwise, return 0.
8136static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8137 int i;
8138 int retval;
8139 kmp_team_t *hot_team;
8140
8141 if (root->r.r_active) {
8142 return 0;
8143 }
8144 hot_team = root->r.r_hot_team;
8146 return hot_team->t.t_nproc - 1; // Don't count primary thread
8147 }
8148
8149 // Skip the primary thread - it is accounted for elsewhere.
8150 retval = 0;
8151 for (i = 1; i < hot_team->t.t_nproc; i++) {
8152 if (hot_team->t.t_threads[i]->th.th_active) {
8153 retval++;
8154 }
8155 }
8156 return retval;
8157}
8158
8159// Perform an automatic adjustment to the number of
8160// threads used by the next parallel region.
8161static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
8162 int retval;
8163 int pool_active;
8164 int hot_team_active;
8165 int team_curr_active;
8166 int system_active;
8167
8168 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8169 set_nproc));
8170 KMP_DEBUG_ASSERT(root);
8171 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8172 ->th.th_current_task->td_icvs.dynamic == TRUE);
8173 KMP_DEBUG_ASSERT(set_nproc > 1);
8174
8175 if (set_nproc == 1) {
8176 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
8177 return 1;
8178 }
8179
8180 // Threads that are active in the thread pool, active in the hot team for this
8181 // particular root (if we are at the outer par level), and the currently
8182 // executing thread (to become the primary thread) are available to add to the
8183 // new team, but are currently contributing to the system load, and must be
8184 // accounted for.
8185 pool_active = __kmp_thread_pool_active_nth;
8186 hot_team_active = __kmp_active_hot_team_nproc(root);
8187 team_curr_active = pool_active + hot_team_active + 1;
8188
8189 // Check the system load.
8190 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8191 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
8192 "hot team active = %d\n",
8193 system_active, pool_active, hot_team_active));
8194
8195 if (system_active < 0) {
8196 // There was an error reading the necessary info from /proc, so use the
8197 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8198 // = dynamic_thread_limit, we shouldn't wind up getting back here.
8199 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8200 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
8201
8202 // Make this call behave like the thread limit algorithm.
8203 retval = __kmp_avail_proc - __kmp_nth +
8204 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8205 if (retval > set_nproc) {
8206 retval = set_nproc;
8207 }
8208 if (retval < KMP_MIN_NTH) {
8209 retval = KMP_MIN_NTH;
8210 }
8211
8212 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8213 retval));
8214 return retval;
8215 }
8216
8217 // There is a slight delay in the load balance algorithm in detecting new
8218 // running procs. The real system load at this instant should be at least as
8219 // large as the #active omp thread that are available to add to the team.
8220 if (system_active < team_curr_active) {
8221 system_active = team_curr_active;
8222 }
8223 retval = __kmp_avail_proc - system_active + team_curr_active;
8224 if (retval > set_nproc) {
8225 retval = set_nproc;
8226 }
8227 if (retval < KMP_MIN_NTH) {
8228 retval = KMP_MIN_NTH;
8229 }
8230
8231 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8232 return retval;
8233} // __kmp_load_balance_nproc()
8234
8235#endif /* USE_LOAD_BALANCE */
8236
8237/* ------------------------------------------------------------------------ */
8238
8239/* NOTE: this is called with the __kmp_init_lock held */
8240void __kmp_cleanup(void) {
8241 int f;
8242
8243 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
8244
8246#if KMP_HANDLE_SIGNALS
8247 __kmp_remove_signals();
8248#endif
8250 }
8251
8252 if (TCR_4(__kmp_init_middle)) {
8253#if KMP_AFFINITY_SUPPORTED
8254 __kmp_affinity_uninitialize();
8255#endif /* KMP_AFFINITY_SUPPORTED */
8258 }
8259
8260 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
8261
8262 if (__kmp_init_serial) {
8265 }
8266
8268
8269 for (f = 0; f < __kmp_threads_capacity; f++) {
8270 if (__kmp_root[f] != NULL) {
8272 __kmp_root[f] = NULL;
8273 }
8274 }
8276 // __kmp_threads and __kmp_root were allocated at once, as single block, so
8277 // there is no need in freeing __kmp_root.
8278 __kmp_threads = NULL;
8279 __kmp_root = NULL;
8281
8282 // Free old __kmp_threads arrays if they exist.
8284 while (ptr) {
8285 kmp_old_threads_list_t *next = ptr->next;
8286 __kmp_free(ptr->threads);
8287 __kmp_free(ptr);
8288 ptr = next;
8289 }
8291
8292#if KMP_USE_DYNAMIC_LOCK
8293 __kmp_cleanup_indirect_user_locks();
8294#else
8296#endif
8297#if OMPD_SUPPORT
8298 if (ompd_env_block) {
8299 __kmp_free(ompd_env_block);
8300 ompd_env_block = NULL;
8301 ompd_env_block_size = 0;
8302 }
8303#endif
8304
8305#if KMP_AFFINITY_SUPPORTED
8306 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
8307 __kmp_cpuinfo_file = NULL;
8308#endif /* KMP_AFFINITY_SUPPORTED */
8309
8310#if KMP_USE_ADAPTIVE_LOCKS
8311#if KMP_DEBUG_ADAPTIVE_LOCKS
8312 __kmp_print_speculative_stats();
8313#endif
8314#endif
8316 __kmp_nested_nth.nth = NULL;
8317 __kmp_nested_nth.size = 0;
8318 __kmp_nested_nth.used = 0;
8319
8321 __kmp_nested_proc_bind.bind_types = NULL;
8322 __kmp_nested_proc_bind.size = 0;
8323 __kmp_nested_proc_bind.used = 0;
8328 __kmp_affinity_format = NULL;
8329 }
8330
8332
8335
8336#if KMP_USE_HIER_SCHED
8337 __kmp_hier_scheds.deallocate();
8338#endif
8339
8340#if KMP_STATS_ENABLED
8342#endif
8343
8346
8347 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
8348}
8349
8350/* ------------------------------------------------------------------------ */
8351
8353 char *env;
8354
8355 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
8356 if (__kmp_str_match_false(env))
8357 return FALSE;
8358 }
8359 // By default __kmpc_begin() is no-op.
8360 return TRUE;
8361}
8362
8364 char *env;
8365
8366 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
8367 if (__kmp_str_match_false(env))
8368 return FALSE;
8369 }
8370 // By default __kmpc_end() is no-op.
8371 return TRUE;
8372}
8373
8375 int gtid;
8376 kmp_root_t *root;
8377
8378 /* this is a very important step as it will register new sibling threads
8379 and assign these new uber threads a new gtid */
8380 gtid = __kmp_entry_gtid();
8381 root = __kmp_threads[gtid]->th.th_root;
8383
8384 if (root->r.r_begin)
8385 return;
8386 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8387 if (root->r.r_begin) {
8388 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8389 return;
8390 }
8391
8392 root->r.r_begin = TRUE;
8393
8394 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8395}
8396
8397/* ------------------------------------------------------------------------ */
8398
8400 int gtid;
8401 kmp_root_t *root;
8402 kmp_info_t *thread;
8403
8404 /* first, make sure we are initialized so we can get our gtid */
8405
8406 gtid = __kmp_entry_gtid();
8407 thread = __kmp_threads[gtid];
8408
8409 root = thread->th.th_root;
8410
8411 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8413 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
8414 thread */
8415 KMP_WARNING(SetLibraryIncorrectCall);
8416 return;
8417 }
8418
8419 switch (arg) {
8420 case library_serial:
8421 thread->th.th_set_nproc = 0;
8422 set__nproc(thread, 1);
8423 break;
8424 case library_turnaround:
8425 thread->th.th_set_nproc = 0;
8428 break;
8429 case library_throughput:
8430 thread->th.th_set_nproc = 0;
8433 break;
8434 default:
8435 KMP_FATAL(UnknownLibraryType, arg);
8436 }
8437
8439}
8440
8441void __kmp_aux_set_stacksize(size_t arg) {
8442 if (!__kmp_init_serial)
8444
8445#if KMP_OS_DARWIN
8446 if (arg & (0x1000 - 1)) {
8447 arg &= ~(0x1000 - 1);
8448 if (arg + 0x1000) /* check for overflow if we round up */
8449 arg += 0x1000;
8450 }
8451#endif
8453
8454 /* only change the default stacksize before the first parallel region */
8455 if (!TCR_4(__kmp_init_parallel)) {
8456 size_t value = arg; /* argument is in bytes */
8457
8460 else if (value > KMP_MAX_STKSIZE)
8462
8464
8465 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
8466 }
8467
8469}
8470
8471/* set the behaviour of the runtime library */
8472/* TODO this can cause some odd behaviour with sibling parallelism... */
8474 __kmp_library = arg;
8475
8476 switch (__kmp_library) {
8477 case library_serial: {
8478 KMP_INFORM(LibraryIsSerial);
8479 } break;
8480 case library_turnaround:
8482 __kmp_use_yield = 2; // only yield when oversubscribed
8483 break;
8484 case library_throughput:
8487 break;
8488 default:
8489 KMP_FATAL(UnknownLibraryType, arg);
8490 }
8491}
8492
8493/* Getting team information common for all team API */
8494// Returns NULL if not in teams construct
8495static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
8497 teams_serialized = 0;
8498 if (thr->th.th_teams_microtask) {
8499 kmp_team_t *team = thr->th.th_team;
8500 int tlevel = thr->th.th_teams_level; // the level of the teams construct
8501 int ii = team->t.t_level;
8502 teams_serialized = team->t.t_serialized;
8503 int level = tlevel + 1;
8504 KMP_DEBUG_ASSERT(ii >= tlevel);
8505 while (ii > level) {
8506 for (teams_serialized = team->t.t_serialized;
8507 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8508 }
8509 if (team->t.t_serialized && (!teams_serialized)) {
8510 team = team->t.t_parent;
8511 continue;
8512 }
8513 if (ii > level) {
8514 team = team->t.t_parent;
8515 ii--;
8516 }
8517 }
8518 return team;
8519 }
8520 return NULL;
8521}
8522
8524 int serialized;
8525 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8526 if (team) {
8527 if (serialized > 1) {
8528 return 0; // teams region is serialized ( 1 team of 1 thread ).
8529 } else {
8530 return team->t.t_master_tid;
8531 }
8532 }
8533 return 0;
8534}
8535
8537 int serialized;
8538 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8539 if (team) {
8540 if (serialized > 1) {
8541 return 1;
8542 } else {
8543 return team->t.t_parent->t.t_nproc;
8544 }
8545 }
8546 return 1;
8547}
8548
8549/* ------------------------------------------------------------------------ */
8550
8551/*
8552 * Affinity Format Parser
8553 *
8554 * Field is in form of: %[[[0].]size]type
8555 * % and type are required (%% means print a literal '%')
8556 * type is either single char or long name surrounded by {},
8557 * e.g., N or {num_threads}
8558 * 0 => leading zeros
8559 * . => right justified when size is specified
8560 * by default output is left justified
8561 * size is the *minimum* field length
8562 * All other characters are printed as is
8563 *
8564 * Available field types:
8565 * L {thread_level} - omp_get_level()
8566 * n {thread_num} - omp_get_thread_num()
8567 * h {host} - name of host machine
8568 * P {process_id} - process id (integer)
8569 * T {thread_identifier} - native thread identifier (integer)
8570 * N {num_threads} - omp_get_num_threads()
8571 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8572 * a {thread_affinity} - comma separated list of integers or integer ranges
8573 * (values of affinity mask)
8574 *
8575 * Implementation-specific field types can be added
8576 * If a type is unknown, print "undefined"
8577 */
8578
8579// Structure holding the short name, long name, and corresponding data type
8580// for snprintf. A table of these will represent the entire valid keyword
8581// field types.
8583 char short_name; // from spec e.g., L -> thread level
8584 const char *long_name; // from spec thread_level -> thread level
8585 char field_format; // data type for snprintf (typically 'd' or 's'
8586 // for integer or string)
8588
8590#if KMP_AFFINITY_SUPPORTED
8591 {'A', "thread_affinity", 's'},
8592#endif
8593 {'t', "team_num", 'd'},
8594 {'T', "num_teams", 'd'},
8595 {'L', "nesting_level", 'd'},
8596 {'n', "thread_num", 'd'},
8597 {'N', "num_threads", 'd'},
8598 {'a', "ancestor_tnum", 'd'},
8599 {'H', "host", 's'},
8600 {'P', "process_id", 'd'},
8601 {'i', "native_thread_id", 'd'}};
8602
8603// Return the number of characters it takes to hold field
8604static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8605 const char **ptr,
8606 kmp_str_buf_t *field_buffer) {
8607 int rc, format_index, field_value;
8608 const char *width_left, *width_right;
8609 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8610 static const int FORMAT_SIZE = 20;
8611 char format[FORMAT_SIZE] = {0};
8612 char absolute_short_name = 0;
8613
8614 KMP_DEBUG_ASSERT(gtid >= 0);
8615 KMP_DEBUG_ASSERT(th);
8616 KMP_DEBUG_ASSERT(**ptr == '%');
8617 KMP_DEBUG_ASSERT(field_buffer);
8618
8619 __kmp_str_buf_clear(field_buffer);
8620
8621 // Skip the initial %
8622 (*ptr)++;
8623
8624 // Check for %% first
8625 if (**ptr == '%') {
8626 __kmp_str_buf_cat(field_buffer, "%", 1);
8627 (*ptr)++; // skip over the second %
8628 return 1;
8629 }
8630
8631 // Parse field modifiers if they are present
8632 pad_zeros = false;
8633 if (**ptr == '0') {
8634 pad_zeros = true;
8635 (*ptr)++; // skip over 0
8636 }
8637 right_justify = false;
8638 if (**ptr == '.') {
8639 right_justify = true;
8640 (*ptr)++; // skip over .
8641 }
8642 // Parse width of field: [width_left, width_right)
8643 width_left = width_right = NULL;
8644 if (**ptr >= '0' && **ptr <= '9') {
8645 width_left = *ptr;
8646 SKIP_DIGITS(*ptr);
8647 width_right = *ptr;
8648 }
8649
8650 // Create the format for KMP_SNPRINTF based on flags parsed above
8651 format_index = 0;
8652 format[format_index++] = '%';
8653 if (!right_justify)
8654 format[format_index++] = '-';
8655 if (pad_zeros)
8656 format[format_index++] = '0';
8657 if (width_left && width_right) {
8658 int i = 0;
8659 // Only allow 8 digit number widths.
8660 // This also prevents overflowing format variable
8661 while (i < 8 && width_left < width_right) {
8662 format[format_index++] = *width_left;
8663 width_left++;
8664 i++;
8665 }
8666 }
8667
8668 // Parse a name (long or short)
8669 // Canonicalize the name into absolute_short_name
8670 found_valid_name = false;
8671 parse_long_name = (**ptr == '{');
8672 if (parse_long_name)
8673 (*ptr)++; // skip initial left brace
8674 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
8675 sizeof(__kmp_affinity_format_table[0]);
8676 ++i) {
8677 char short_name = __kmp_affinity_format_table[i].short_name;
8678 const char *long_name = __kmp_affinity_format_table[i].long_name;
8679 char field_format = __kmp_affinity_format_table[i].field_format;
8680 if (parse_long_name) {
8681 size_t length = KMP_STRLEN(long_name);
8682 if (strncmp(*ptr, long_name, length) == 0) {
8683 found_valid_name = true;
8684 (*ptr) += length; // skip the long name
8685 }
8686 } else if (**ptr == short_name) {
8687 found_valid_name = true;
8688 (*ptr)++; // skip the short name
8689 }
8690 if (found_valid_name) {
8691 format[format_index++] = field_format;
8692 format[format_index++] = '\0';
8693 absolute_short_name = short_name;
8694 break;
8695 }
8696 }
8697 if (parse_long_name) {
8698 if (**ptr != '}') {
8699 absolute_short_name = 0;
8700 } else {
8701 (*ptr)++; // skip over the right brace
8702 }
8703 }
8704
8705 // Attempt to fill the buffer with the requested
8706 // value using snprintf within __kmp_str_buf_print()
8707 switch (absolute_short_name) {
8708 case 't':
8709 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8710 break;
8711 case 'T':
8712 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8713 break;
8714 case 'L':
8715 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8716 break;
8717 case 'n':
8718 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8719 break;
8720 case 'H': {
8721 static const int BUFFER_SIZE = 256;
8722 char buf[BUFFER_SIZE];
8724 rc = __kmp_str_buf_print(field_buffer, format, buf);
8725 } break;
8726 case 'P':
8727 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8728 break;
8729 case 'i':
8730 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8731 break;
8732 case 'N':
8733 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8734 break;
8735 case 'a':
8736 field_value =
8737 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8738 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8739 break;
8740#if KMP_AFFINITY_SUPPORTED
8741 case 'A': {
8742 if (th->th.th_affin_mask) {
8745 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8746 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8748 } else {
8749 rc = __kmp_str_buf_print(field_buffer, "%s", "disabled");
8750 }
8751 } break;
8752#endif
8753 default:
8754 // According to spec, If an implementation does not have info for field
8755 // type, then "undefined" is printed
8756 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
8757 // Skip the field
8758 if (parse_long_name) {
8759 SKIP_TOKEN(*ptr);
8760 if (**ptr == '}')
8761 (*ptr)++;
8762 } else {
8763 (*ptr)++;
8764 }
8765 }
8766
8767 KMP_ASSERT(format_index <= FORMAT_SIZE);
8768 return rc;
8769}
8770
8771/*
8772 * Return number of characters needed to hold the affinity string
8773 * (not including null byte character)
8774 * The resultant string is printed to buffer, which the caller can then
8775 * handle afterwards
8776 */
8777size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8778 kmp_str_buf_t *buffer) {
8779 const char *parse_ptr;
8780 size_t retval;
8781 const kmp_info_t *th;
8782 kmp_str_buf_t field;
8783
8784 KMP_DEBUG_ASSERT(buffer);
8785 KMP_DEBUG_ASSERT(gtid >= 0);
8786
8787 __kmp_str_buf_init(&field);
8788 __kmp_str_buf_clear(buffer);
8789
8790 th = __kmp_threads[gtid];
8791 retval = 0;
8792
8793 // If format is NULL or zero-length string, then we use
8794 // affinity-format-var ICV
8795 parse_ptr = format;
8796 if (parse_ptr == NULL || *parse_ptr == '\0') {
8797 parse_ptr = __kmp_affinity_format;
8798 }
8799 KMP_DEBUG_ASSERT(parse_ptr);
8800
8801 while (*parse_ptr != '\0') {
8802 // Parse a field
8803 if (*parse_ptr == '%') {
8804 // Put field in the buffer
8805 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8806 __kmp_str_buf_catbuf(buffer, &field);
8807 retval += rc;
8808 } else {
8809 // Put literal character in buffer
8810 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8811 retval++;
8812 parse_ptr++;
8813 }
8814 }
8815 __kmp_str_buf_free(&field);
8816 return retval;
8817}
8818
8819// Displays the affinity string to stdout
8820void __kmp_aux_display_affinity(int gtid, const char *format) {
8823 __kmp_aux_capture_affinity(gtid, format, &buf);
8824 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
8826}
8827
8828/* ------------------------------------------------------------------------ */
8829void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8830 int blocktime = arg; /* argument is in microseconds */
8831#if KMP_USE_MONITOR
8832 int bt_intervals;
8833#endif
8834 kmp_int8 bt_set;
8835
8837
8838 /* Normalize and set blocktime for the teams */
8839 if (blocktime < KMP_MIN_BLOCKTIME)
8840 blocktime = KMP_MIN_BLOCKTIME;
8841 else if (blocktime > KMP_MAX_BLOCKTIME)
8842 blocktime = KMP_MAX_BLOCKTIME;
8843
8844 set__blocktime_team(thread->th.th_team, tid, blocktime);
8845 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8846
8847#if KMP_USE_MONITOR
8848 /* Calculate and set blocktime intervals for the teams */
8849 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8850
8851 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8852 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8853#endif
8854
8855 /* Set whether blocktime has been set to "TRUE" */
8856 bt_set = TRUE;
8857
8858 set__bt_set_team(thread->th.th_team, tid, bt_set);
8859 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8860#if KMP_USE_MONITOR
8861 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8862 "bt_intervals=%d, monitor_updates=%d\n",
8863 __kmp_gtid_from_tid(tid, thread->th.th_team),
8864 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8865 __kmp_monitor_wakeups));
8866#else
8867 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8868 __kmp_gtid_from_tid(tid, thread->th.th_team),
8869 thread->th.th_team->t.t_id, tid, blocktime));
8870#endif
8871}
8872
8873void __kmp_aux_set_defaults(char const *str, size_t len) {
8874 if (!__kmp_init_serial) {
8876 }
8878
8881 }
8882} // __kmp_aux_set_defaults
8883
8884/* ------------------------------------------------------------------------ */
8885/* internal fast reduction routines */
8886
8889 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8890 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8892
8893 // Default reduction method: critical construct ( lck != NULL, like in current
8894 // PAROPT )
8895 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8896 // can be selected by RTL
8897 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8898 // can be selected by RTL
8899 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8900 // among generated by PAROPT.
8901
8903
8904 int team_size;
8905
8906 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
8907
8908#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8909 (loc && \
8910 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8911#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8912
8913 retval = critical_reduce_block;
8914
8915 // another choice of getting a team size (with 1 dynamic deference) is slower
8916 team_size = __kmp_get_team_num_threads(global_tid);
8917 if (team_size == 1) {
8918
8919 retval = empty_reduce_block;
8920
8921 } else {
8922
8923 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8924
8925#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8926 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8927 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8928
8929#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8930 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \
8931 KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8932
8933 int teamsize_cutoff = 4;
8934
8935#if KMP_MIC_SUPPORTED
8936 if (__kmp_mic_type != non_mic) {
8937 teamsize_cutoff = 8;
8938 }
8939#endif
8940 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8941 if (tree_available) {
8942 if (team_size <= teamsize_cutoff) {
8943 if (atomic_available) {
8944 retval = atomic_reduce_block;
8945 }
8946 } else {
8947 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8948 }
8949 } else if (atomic_available) {
8950 retval = atomic_reduce_block;
8951 }
8952#else
8953#error "Unknown or unsupported OS"
8954#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8955 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU ||
8956 // KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8957
8958#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8959 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC
8960
8961#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8962 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HAIKU || KMP_OS_HURD || \
8963 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8964
8965 // basic tuning
8966
8967 if (atomic_available) {
8968 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8969 retval = atomic_reduce_block;
8970 }
8971 } // otherwise: use critical section
8972
8973#elif KMP_OS_DARWIN
8974
8975 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8976 if (atomic_available && (num_vars <= 3)) {
8977 retval = atomic_reduce_block;
8978 } else if (tree_available) {
8979 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8980 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8981 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8982 }
8983 } // otherwise: use critical section
8984
8985#else
8986#error "Unknown or unsupported OS"
8987#endif
8988
8989#else
8990#error "Unknown or unsupported architecture"
8991#endif
8992 }
8993
8994 // KMP_FORCE_REDUCTION
8995
8996 // If the team is serialized (team_size == 1), ignore the forced reduction
8997 // method and stay with the unsynchronized method (empty_reduce_block)
8999 team_size != 1) {
9000
9002
9003 int atomic_available, tree_available;
9004
9005 switch ((forced_retval = __kmp_force_reduction_method)) {
9007 KMP_ASSERT(lck); // lck should be != 0
9008 break;
9009
9011 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9012 if (!atomic_available) {
9013 KMP_WARNING(RedMethodNotSupported, "atomic");
9014 forced_retval = critical_reduce_block;
9015 }
9016 break;
9017
9018 case tree_reduce_block:
9019 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9020 if (!tree_available) {
9021 KMP_WARNING(RedMethodNotSupported, "tree");
9022 forced_retval = critical_reduce_block;
9023 } else {
9024#if KMP_FAST_REDUCTION_BARRIER
9025 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9026#endif
9027 }
9028 break;
9029
9030 default:
9031 KMP_ASSERT(0); // "unsupported method specified"
9032 }
9033
9034 retval = forced_retval;
9035 }
9036
9037 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
9038
9039#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9040#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9041
9042 return (retval);
9043}
9044// this function is for testing set/get/determine reduce method
9046 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9047}
9048
9049// Soft pause sets up threads to ignore blocktime and just go to sleep.
9050// Spin-wait code checks __kmp_pause_status and reacts accordingly.
9052
9053// Hard pause shuts down the runtime completely. Resume happens naturally when
9054// OpenMP is used subsequently.
9059
9060// Soft resume sets __kmp_pause_status, and wakes up all threads.
9064
9065 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9066 kmp_info_t *thread = __kmp_threads[gtid];
9067 if (thread) { // Wake it if sleeping
9068 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9069 thread);
9070 if (fl.is_sleeping())
9071 fl.resume(gtid);
9072 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
9073 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
9074 } else { // thread holds the lock and may sleep soon
9075 do { // until either the thread sleeps, or we can get the lock
9076 if (fl.is_sleeping()) {
9077 fl.resume(gtid);
9078 break;
9079 } else if (__kmp_try_suspend_mx(thread)) {
9081 break;
9082 }
9083 } while (1);
9084 }
9085 }
9086 }
9087 }
9088}
9089
9090// This function is called via __kmpc_pause_resource. Returns 0 if successful.
9091// TODO: add warning messages
9093 if (level == kmp_not_paused) { // requesting resume
9095 // error message about runtime not being paused, so can't resume
9096 return 1;
9097 } else {
9101 return 0;
9102 }
9103 } else if (level == kmp_soft_paused) { // requesting soft pause
9105 // error message about already being paused
9106 return 1;
9107 } else {
9109 return 0;
9110 }
9111 } else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) {
9112 // requesting hard pause or stop_tool pause
9114 // error message about already being paused
9115 return 1;
9116 } else {
9118 return 0;
9119 }
9120 } else {
9121 // error message about invalid level
9122 return 1;
9123 }
9124}
9125
9133
9134// The team size is changing, so distributed barrier must be modified
9135void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
9136 int new_nthreads) {
9138 bp_dist_bar);
9139 kmp_info_t **other_threads = team->t.t_threads;
9140
9141 // We want all the workers to stop waiting on the barrier while we adjust the
9142 // size of the team.
9143 for (int f = 1; f < old_nthreads; ++f) {
9144 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9145 // Ignore threads that are already inactive or not present in the team
9146 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9147 // teams construct causes thread_limit to get passed in, and some of
9148 // those could be inactive; just ignore them
9149 continue;
9150 }
9151 // If thread is transitioning still to in_use state, wait for it
9152 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9153 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9154 KMP_CPU_PAUSE();
9155 }
9156 // The thread should be in_use now
9157 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9158 // Transition to unused state
9159 team->t.t_threads[f]->th.th_used_in_team.store(2);
9160 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9161 }
9162 // Release all the workers
9163 team->t.b->go_release();
9164
9165 KMP_MFENCE();
9166
9167 // Workers should see transition status 2 and move to 0; but may need to be
9168 // woken up first
9169 int count = old_nthreads - 1;
9170 while (count > 0) {
9171 count = old_nthreads - 1;
9172 for (int f = 1; f < old_nthreads; ++f) {
9173 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9174 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
9176 void *, other_threads[f]->th.th_sleep_loc);
9177 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9178 }
9179 } else {
9180 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9181 count--;
9182 }
9183 }
9184 }
9185 // Now update the barrier size
9186 team->t.b->update_num_threads(new_nthreads);
9187 team->t.b->go_reset();
9188}
9189
9190void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
9191 // Add the threads back to the team
9192 KMP_DEBUG_ASSERT(team);
9193 // Threads were paused and pointed at th_used_in_team temporarily during a
9194 // resize of the team. We're going to set th_used_in_team to 3 to indicate to
9195 // the thread that it should transition itself back into the team. Then, if
9196 // blocktime isn't infinite, the thread could be sleeping, so we send a resume
9197 // to wake it up.
9198 for (int f = 1; f < new_nthreads; ++f) {
9199 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9201 &(team->t.t_threads[f]->th.th_used_in_team), 0, 3);
9202 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
9203 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9205 }
9206 }
9207 // The threads should be transitioning to the team; when they are done, they
9208 // should have set th_used_in_team to 1. This loop forces master to wait until
9209 // all threads have moved into the team and are waiting in the barrier.
9210 int count = new_nthreads - 1;
9211 while (count > 0) {
9212 count = new_nthreads - 1;
9213 for (int f = 1; f < new_nthreads; ++f) {
9214 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9215 count--;
9216 }
9217 }
9218 }
9219}
9220
9221// Globals and functions for hidden helper task
9225#if KMP_OS_LINUX
9228#else
9231#endif
9232
9233namespace {
9234std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9235
9236void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {
9237 // This is an explicit synchronization on all hidden helper threads in case
9238 // that when a regular thread pushes a hidden helper task to one hidden
9239 // helper thread, the thread has not been awaken once since they're released
9240 // by the main thread after creating the team.
9241 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9242 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9244 ;
9245
9246 // If main thread, then wait for signal
9247 if (__kmpc_master(nullptr, *gtid)) {
9248 // First, unset the initial state and release the initial thread
9252 // Now wake up all worker threads
9253 for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9255 }
9256 }
9257}
9258} // namespace
9259
9261 // Create a new root for hidden helper team/threads
9262 const int gtid = __kmp_register_root(TRUE);
9265 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9267
9268 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9269
9270 __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);
9271
9272 // Set the initialization flag to FALSE
9274
9276}
9277
9278/* Nesting Mode:
9279 Set via KMP_NESTING_MODE, which takes an integer.
9280 Note: we skip duplicate topology levels, and skip levels with only
9281 one entity.
9282 KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9283 KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9284 in the topology, and initializes the number of threads at each of those
9285 levels to the number of entities at each level, respectively, below the
9286 entity at the parent level.
9287 KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9288 but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9289 the user to turn nesting on explicitly. This is an even more experimental
9290 option to this experimental feature, and may change or go away in the
9291 future.
9292*/
9293
9294// Allocate space to store nesting levels
9296 int levels = KMP_HW_LAST;
9298 __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int));
9299 for (int i = 0; i < levels; ++i)
9301 if (__kmp_nested_nth.size < levels) {
9302 __kmp_nested_nth.nth =
9303 (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int));
9304 __kmp_nested_nth.size = levels;
9305 }
9306}
9307
9308// Set # threads for top levels of nesting; must be called after topology set
9311
9312 if (__kmp_nesting_mode == 1)
9314 else if (__kmp_nesting_mode > 1)
9316
9317 if (__kmp_topology) { // use topology info
9318 int loc, hw_level;
9319 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9321 loc++, hw_level++) {
9322 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9323 if (__kmp_nesting_nth_level[loc] == 1)
9324 loc--;
9325 }
9326 // Make sure all cores are used
9327 if (__kmp_nesting_mode > 1 && loc > 1) {
9328 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9329 int num_cores = __kmp_topology->get_count(core_level);
9330 int upper_levels = 1;
9331 for (int level = 0; level < loc - 1; ++level)
9332 upper_levels *= __kmp_nesting_nth_level[level];
9333 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9335 num_cores / __kmp_nesting_nth_level[loc - 2];
9336 }
9339 } else { // no topology info available; provide a reasonable guesstimation
9340 if (__kmp_avail_proc >= 4) {
9344 } else {
9347 }
9349 }
9350 for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9352 }
9356 if (get__max_active_levels(thread) > 1) {
9357 // if max levels was set, set nesting mode levels to same
9359 }
9360 if (__kmp_nesting_mode == 1) // turn on nesting for this case only
9362}
9363
9364#if ENABLE_LIBOMPTARGET
9365void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task,
9366 void *event) = NULL;
9367void __kmp_target_init() {
9368 // Look for hooks in the libomptarget library
9369 *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync");
9370}
9371#endif // ENABLE_LIBOMPTARGET
9372
9373// Empty symbols to export (see exports_so.txt) when feature is disabled
9374extern "C" {
9375#if !KMP_STATS_ENABLED
9377#endif
9378#if !USE_DEBUGGER
9381#endif
9382#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9385#endif
9386}
9387
9388// end of file
char buf[BUFFER_SIZE]
#define BUFFER_SIZE
uint8_t kmp_uint8
A simple pure header implementation of VLA that aims to replace uses of actual VLA,...
Definition kmp_utils.h:26
static void deallocate(distributedBarrier *db)
static distributedBarrier * allocate(int nThreads)
void resume(int th_gtid)
bool is_sleeping()
Test whether there are threads sleeping on the flag.
int64_t kmp_int64
Definition common.h:10
@ KMP_IDENT_AUTOPAR
Entry point generated by auto-parallelization.
Definition kmp.h:192
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
sched_type
Describes the loop schedule to be used for a parallel for loop.
Definition kmp.h:350
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
@ kmp_sch_auto
auto
Definition kmp.h:357
@ kmp_sch_static
static unspecialized
Definition kmp.h:353
@ kmp_sch_guided_chunked
guided unspecialized
Definition kmp.h:355
@ kmp_sch_dynamic_chunked
Definition kmp.h:354
@ kmp_sch_guided_analytical_chunked
Definition kmp.h:365
@ kmp_sch_static_balanced
Definition kmp.h:362
@ kmp_sch_static_greedy
Definition kmp.h:361
@ kmp_sch_static_chunked
Definition kmp.h:352
@ kmp_sch_trapezoidal
Definition kmp.h:358
@ kmp_sch_guided_iterative_chunked
Definition kmp.h:364
@ kmp_sch_static_steal
Definition kmp.h:367
__itt_string_handle * name
Definition ittnotify.h:3305
void
Definition ittnotify.h:3324
void const char const char int ITT_FORMAT __itt_group_sync s
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t new_size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark S
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team)
struct kmp_disp kmp_disp_t
int __kmp_hot_teams_max_level
void __kmp_finish_implicit_task(kmp_info_t *this_thr)
volatile kmp_team_t * __kmp_team_pool
#define get__dynamic_2(xteam, xtid)
Definition kmp.h:2399
#define __kmp_free(ptr)
Definition kmp.h:3756
kmp_bar_pat_e __kmp_barrier_release_pat_dflt
kmp_info_t * __kmp_hidden_helper_main_thread
int __kmp_generate_warnings
int __kmp_cg_max_nth
int __kmp_abort_delay
kmp_proc_bind_t __kmp_teams_proc_bind
#define KMP_INTERNAL_MALLOC(sz)
Definition kmp.h:113
#define KMP_CPU_PAUSE()
Definition kmp.h:1594
#define KMP_DEFAULT_CHUNK
Definition kmp.h:1314
bool __kmp_detect_shm()
int __kmp_version
kmp_bootstrap_lock_t __kmp_initz_lock
#define KMP_MAX_STKSIZE
Definition kmp.h:1216
#define KMP_MAX_STKPADDING
Definition kmp.h:1249
int __kmp_display_env_verbose
kmp_global_t __kmp_global
void __kmp_init_target_mem()
union kmp_task_team kmp_task_team_t
Definition kmp.h:253
@ ct_psingle
Definition kmp.h:1697
@ ct_ordered_in_parallel
Definition kmp.h:1699
void __kmp_hidden_helper_worker_thread_signal()
void __kmp_teams_master(int gtid)
void __kmp_common_initialize(void)
void __kmp_release_64(kmp_flag_64<> *flag)
kmp_pause_status_t __kmp_pause_status
#define KMP_MAX_BLOCKTIME
Definition kmp.h:1254
int __kmp_teams_max_nth
void __kmp_read_system_time(double *delta)
kmp_bootstrap_lock_t __kmp_tp_cached_lock
void __kmp_reap_task_teams(void)
kmp_int32 __kmp_use_yield
kmp_pause_status_t
Definition kmp.h:4543
@ kmp_hard_paused
Definition kmp.h:4546
@ kmp_stop_tool_paused
Definition kmp.h:4547
@ kmp_soft_paused
Definition kmp.h:4545
@ kmp_not_paused
Definition kmp.h:4544
int __kmp_dflt_team_nth_ub
void __kmp_hidden_helper_threads_initz_wait()
struct dispatch_shared_info dispatch_shared_info_t
void __kmp_fini_target_mem()
Finalize target memory support.
#define KMP_INTERNAL_REALLOC(p, sz)
Definition kmp.h:115
#define get__nproc_2(xteam, xtid)
Definition kmp.h:2401
void __kmp_wait_to_unref_task_teams(void)
union kmp_team kmp_team_p
Definition kmp.h:254
struct KMP_ALIGN_CACHE dispatch_private_info dispatch_private_info_t
#define __kmp_assign_root_init_mask()
Definition kmp.h:3955
int __kmp_dflt_max_active_levels
struct kmp_hot_team_ptr kmp_hot_team_ptr_t
#define KMP_NOT_SAFE_TO_REAP
Definition kmp.h:2148
int __kmp_xproc
int __kmp_debug_buf
void __kmp_unlock_suspend_mx(kmp_info_t *th)
kmp_bar_pat_e __kmp_barrier_gather_pat_dflt
#define KMP_HIDDEN_HELPER_TEAM(team)
Definition kmp.h:4604
static kmp_team_t * __kmp_team_from_gtid(int gtid)
Definition kmp.h:3639
void __kmp_do_initialize_hidden_helper_threads()
struct kmp_local kmp_local_t
kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]
kmp_tasking_mode_t __kmp_tasking_mode
char * __kmp_affinity_format
int __kmp_dflt_blocktime
volatile kmp_info_t * __kmp_thread_pool
void __kmp_internal_end_atexit(void)
volatile int __kmp_init_gtid
omp_allocator_handle_t __kmp_def_allocator
static void __kmp_resume_if_hard_paused()
Definition kmp.h:4559
size_t __kmp_stksize
int __kmp_env_checks
#define get__max_active_levels(xthread)
Definition kmp.h:2433
kmp_nested_proc_bind_t __kmp_nested_proc_bind
void __kmp_free_implicit_task(kmp_info_t *this_thr)
void __kmp_hidden_helper_main_thread_release()
fork_context_e
Tell the fork call which compiler generated the fork call, and therefore how to deal with the call.
Definition kmp.h:4061
@ fork_context_gnu
Called from GNU generated code, so must not invoke the microtask internally.
Definition kmp.h:4062
@ fork_context_intel
Called from Intel generated code.
Definition kmp.h:4064
@ fork_context_last
Definition kmp.h:4065
void __kmp_suspend_initialize(void)
kmp_nested_nthreads_t __kmp_nested_nth
int __kmp_max_nth
omp_allocator_handle_t const omp_default_mem_alloc
int __kmp_chunk
#define KMP_GTID_SHUTDOWN
Definition kmp.h:1010
@ flag_unset
Definition kmp.h:2158
void __kmp_internal_end_dtor(void)
volatile int __kmp_all_nth
#define set__nproc(xthread, xval)
Definition kmp.h:2424
#define KMP_MIN_NTH
Definition kmp.h:1190
int __kmp_is_address_mapped(void *addr)
kmp_lock_t __kmp_global_lock
@ severity_warning
Definition kmp.h:4633
@ severity_fatal
Definition kmp.h:4634
void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al)
union KMP_ALIGN_CACHE kmp_root kmp_root_t
int __kmp_adjust_gtid_mode
int __kmp_env_blocktime
#define __kmp_entry_gtid()
Definition kmp.h:3601
kmp_old_threads_list_t * __kmp_old_threads_list
struct kmp_internal_control kmp_internal_control_t
#define KMP_GTID_MONITOR
Definition kmp.h:1011
volatile int __kmp_init_common
kmp_info_t __kmp_monitor
static int __kmp_tid_from_gtid(int gtid)
Definition kmp.h:3619
static bool KMP_UBER_GTID(int gtid)
Definition kmp.h:3612
int __kmp_display_env
kmp_int32 __kmp_use_yield_exp_set
int __kmp_tp_cached
volatile int __kmp_init_hidden_helper
#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr)
Definition kmp.h:4152
int __kmp_gtid_get_specific(void)
volatile int __kmp_init_middle
void __kmp_hidden_helper_threads_deinitz_wait()
void __kmpc_error(ident_t *loc, int severity, const char *message)
static kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind)
Definition kmp.h:481
@ cancel_noreq
Definition kmp.h:980
int __kmp_reserve_warn
#define KMP_CHECK_UPDATE(a, b)
Definition kmp.h:2383
int __kmp_storage_map_verbose
int __kmp_allThreadsSpecified
enum sched_type __kmp_static
#define KMP_INITIAL_GTID(gtid)
Definition kmp.h:1346
volatile int __kmp_nth
int PACKED_REDUCTION_METHOD_T
Definition kmp.h:578
std::atomic< int > __kmp_thread_pool_active_nth
#define KMP_MASTER_TID(tid)
Definition kmp.h:1341
int __kmp_duplicate_library_ok
volatile int __kmp_need_register_serial
kmp_bootstrap_lock_t __kmp_forkjoin_lock
struct kmp_cg_root kmp_cg_root_t
kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]
static kmp_info_t * __kmp_entry_thread()
Definition kmp.h:3731
void __kmp_init_memkind()
void __kmp_hidden_helper_main_thread_wait()
#define KMP_GEN_TEAM_ID()
Definition kmp.h:3681
void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task)
kmp_int32 __kmp_default_device
#define get__sched_2(xteam, xtid)
Definition kmp.h:2403
void __kmp_cleanup_threadprivate_caches()
static void copy_icvs(kmp_internal_control_t *dst, kmp_internal_control_t *src)
Definition kmp.h:2214
kmp_bootstrap_lock_t __kmp_exit_lock
kmp_info_t ** __kmp_threads
void __kmp_hidden_helper_initz_release()
enum sched_type __kmp_sched
#define KMP_BARRIER_PARENT_FLAG
Definition kmp.h:2141
void __kmp_suspend_uninitialize_thread(kmp_info_t *th)
void __kmp_finalize_bget(kmp_info_t *th)
#define KMP_BARRIER_SWITCH_TO_OWN_FLAG
Definition kmp.h:2143
static void __kmp_reset_root_init_mask(int gtid)
Definition kmp.h:3956
kmp_uint32 __kmp_barrier_gather_bb_dflt
kmp_uint32 __kmp_barrier_release_bb_dflt
int __kmp_dispatch_num_buffers
#define SCHEDULE_WITHOUT_MODIFIERS(s)
Definition kmp.h:443
union kmp_team kmp_team_t
Definition kmp.h:251
int __kmp_nesting_mode
#define set__max_active_levels(xthread, xval)
Definition kmp.h:2430
#define __kmp_get_team_num_threads(gtid)
Definition kmp.h:3609
#define KMP_MIN_MALLOC_ARGV_ENTRIES
Definition kmp.h:3112
#define KMP_MASTER_GTID(gtid)
Definition kmp.h:1344
void __kmp_lock_suspend_mx(kmp_info_t *th)
bool __kmp_detect_tmp()
int __kmp_nesting_mode_nlevels
int __kmp_nteams
int __kmp_storage_map
#define KMP_YIELD(cond)
Definition kmp.h:1612
int(* launch_t)(int gtid)
Definition kmp.h:3109
void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size)
int * __kmp_nesting_nth_level
volatile int __kmp_init_parallel
int __kmp_init_counter
int __kmp_sys_max_nth
union kmp_barrier_union kmp_balign_t
Definition kmp.h:2259
kmp_root_t ** __kmp_root
kmp_int32 __kmp_enable_hidden_helper
#define KMP_DEFAULT_BLOCKTIME
Definition kmp.h:1258
#define set__blocktime_team(xteam, xtid, xval)
Definition kmp.h:2406
#define __kmp_allocate(size)
Definition kmp.h:3754
enum kmp_sched kmp_sched_t
#define TRUE
Definition kmp.h:1350
enum library_type __kmp_library
#define FALSE
Definition kmp.h:1349
int __kmp_tp_capacity
int __kmp_settings
@ tskm_immediate_exec
Definition kmp.h:2447
kmp_info_t * __kmp_thread_pool_insert_pt
#define UNLIKELY(x)
Definition kmp.h:150
int __kmp_env_consistency_check
#define bs_reduction_barrier
Definition kmp.h:2173
void __kmp_runtime_destroy(void)
union KMP_ALIGN_CACHE kmp_desc kmp_desc_t
static void __kmp_sched_apply_mods_intkind(kmp_sched_t kind, enum sched_type *internal_kind)
Definition kmp.h:472
volatile int __kmp_hidden_helper_team_done
static void __kmp_sched_apply_mods_stdkind(kmp_sched_t *kind, enum sched_type internal_kind)
Definition kmp.h:463
union kmp_barrier_team_union kmp_balign_team_t
Definition kmp.h:2278
int __kmp_hot_teams_mode
std::atomic< kmp_int32 > __kmp_unexecuted_hidden_helper_tasks
#define KMP_INIT_BARRIER_STATE
Definition kmp.h:2121
size_t __kmp_sys_min_stksize
@ kmp_sched_upper
Definition kmp.h:340
@ kmp_sched_lower
Definition kmp.h:328
@ kmp_sched_trapezoidal
Definition kmp.h:336
@ kmp_sched_upper_std
Definition kmp.h:334
@ kmp_sched_dynamic
Definition kmp.h:331
@ kmp_sched_auto
Definition kmp.h:333
@ kmp_sched_guided
Definition kmp.h:332
@ kmp_sched_lower_ext
Definition kmp.h:335
@ kmp_sched_default
Definition kmp.h:341
@ kmp_sched_static
Definition kmp.h:330
union kmp_info kmp_info_p
Definition kmp.h:255
#define set__bt_set_team(xteam, xtid, xval)
Definition kmp.h:2416
int __kmp_invoke_task_func(int gtid)
kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]
#define KMP_BARRIER_NOT_WAITING
Definition kmp.h:2138
int __kmp_task_max_nth
#define KMP_INTERNAL_FREE(p)
Definition kmp.h:114
int __kmp_threads_capacity
kmp_info_t ** __kmp_hidden_helper_threads
void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, int tid)
int __kmp_foreign_tp
static int __kmp_gtid_from_tid(int tid, const kmp_team_t *team)
Definition kmp.h:3624
#define KMP_SAFE_TO_REAP
Definition kmp.h:2150
void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team)
void __kmp_threadprivate_resize_cache(int newCapacity)
union kmp_r_sched kmp_r_sched_t
void __kmp_runtime_initialize(void)
@ bs_plain_barrier
Definition kmp.h:2162
@ bs_last_barrier
Definition kmp.h:2168
@ bs_forkjoin_barrier
Definition kmp.h:2164
volatile int __kmp_init_hidden_helper_threads
void __kmp_common_destroy_gtid(int gtid)
int __kmp_try_suspend_mx(kmp_info_t *th)
int __kmp_display_affinity
enum sched_type __kmp_guided
void __kmp_resume_32(int target_gtid, kmp_flag_32< C, S > *flag)
#define KMP_INLINE_ARGV_ENTRIES
Definition kmp.h:3128
#define __kmp_get_gtid()
Definition kmp.h:3600
#define SCHEDULE_GET_MODIFIERS(s)
Definition kmp.h:450
PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method
int __kmp_avail_proc
#define __kmp_page_allocate(size)
Definition kmp.h:3755
#define SKIP_DIGITS(_x)
Definition kmp.h:280
void __kmp_initialize_bget(kmp_info_t *th)
int __kmp_teams_thread_limit
int __kmp_stkpadding
void __kmp_cleanup_hierarchy()
int __kmp_dflt_team_nth
void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr)
void __kmp_gtid_set_specific(int gtid)
kmp_proc_bind_t
Definition kmp.h:940
@ proc_bind_false
Definition kmp.h:941
@ proc_bind_close
Definition kmp.h:944
@ proc_bind_primary
Definition kmp.h:943
@ proc_bind_spread
Definition kmp.h:945
@ proc_bind_default
Definition kmp.h:947
@ KMP_HW_CORE
Definition kmp.h:613
@ KMP_HW_LAST
Definition kmp.h:615
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64< C, S > *flag)
int __kmp_root_counter
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
Definition kmp.h:3629
int __kmp_gtid_mode
#define KMP_MIN_BLOCKTIME
Definition kmp.h:1253
#define SCHEDULE_SET_MODIFIERS(s, m)
Definition kmp.h:453
void __kmp_suspend_initialize_thread(kmp_info_t *th)
library_type
Definition kmp.h:497
@ library_turnaround
Definition kmp.h:500
@ library_throughput
Definition kmp.h:501
@ library_serial
Definition kmp.h:499
volatile int __kmp_init_serial
@ empty_reduce_block
Definition kmp.h:532
@ critical_reduce_block
Definition kmp.h:529
@ tree_reduce_block
Definition kmp.h:531
@ reduction_method_not_defined
Definition kmp.h:528
@ atomic_reduce_block
Definition kmp.h:530
#define KMP_CHECK_UPDATE_SYNC(a, b)
Definition kmp.h:2386
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, void *argv[])
kmp_int32 __kmp_hidden_helper_threads_num
#define KMP_MAX_ACTIVE_LEVELS_LIMIT
Definition kmp.h:1326
static void __kmp_type_convert(T1 src, T2 *dest)
Definition kmp.h:4884
#define SKIP_TOKEN(_x)
Definition kmp.h:285
void __kmp_fini_memkind()
struct kmp_taskdata kmp_taskdata_t
Definition kmp.h:252
kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]
void __kmp_reap_worker(kmp_info_t *th)
int __kmp_env_stksize
#define KMP_GTID_DNE
Definition kmp.h:1009
@ bp_dist_bar
Definition kmp.h:2184
@ bp_hierarchical_bar
Definition kmp.h:2183
@ dynamic_thread_limit
Definition kmp.h:319
@ dynamic_default
Definition kmp.h:314
@ dynamic_random
Definition kmp.h:318
void __kmp_hidden_helper_threads_deinitz_release()
void __kmp_expand_host_name(char *buffer, size_t size)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
enum sched_type __kmp_sch_map[]
int __kmp_tls_gtid_min
void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team, int wait=1)
#define __kmp_thread_free(th, ptr)
Definition kmp.h:3782
kmp_topology_t * __kmp_topology
static int __kmp_ncores
kmp_atomic_lock_t __kmp_atomic_lock_8c
kmp_atomic_lock_t __kmp_atomic_lock_8r
kmp_atomic_lock_t __kmp_atomic_lock_4i
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 kmp_int16
kmp_atomic_lock_t __kmp_atomic_lock_20c
kmp_atomic_lock_t __kmp_atomic_lock_16c
KMP_ARCH_X86 short
kmp_atomic_lock_t __kmp_atomic_lock_2i
kmp_atomic_lock_t __kmp_atomic_lock_32c
kmp_atomic_lock_t __kmp_atomic_lock_8i
kmp_atomic_lock_t __kmp_atomic_lock
kmp_atomic_lock_t __kmp_atomic_lock_10r
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 kmp_int8
kmp_atomic_lock_t __kmp_atomic_lock_1i
kmp_atomic_lock_t __kmp_atomic_lock_16r
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86<<, 2i, 1, KMP_ARCH_X86) ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, > KMP_ARCH_X86 KMP_ARCH_X86 kmp_uint32
kmp_atomic_lock_t __kmp_atomic_lock_4r
static void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck)
Definition kmp_atomic.h:405
void __kmp_print_structure(void)
#define MAX_MESSAGE
void __kmp_dump_debug_buffer(void)
Definition kmp_debug.cpp:84
#define KA_TRACE(d, x)
Definition kmp_debug.h:157
#define KMP_ASSERT(cond)
Definition kmp_debug.h:59
#define KMP_BUILD_ASSERT(expr)
Definition kmp_debug.h:26
#define KF_TRACE(d, x)
Definition kmp_debug.h:162
#define KD_TRACE(d, x)
Definition kmp_debug.h:160
#define KC_TRACE(d, x)
Definition kmp_debug.h:159
#define KMP_DEBUG_ASSERT(cond)
Definition kmp_debug.h:61
#define KB_TRACE(d, x)
Definition kmp_debug.h:158
#define KMP_ASSERT2(cond, msg)
Definition kmp_debug.h:60
unsigned long long kmp_uint64
kmp_hier_sched_env_t __kmp_hier_scheds
void __kmp_dispatch_free_hierarchies(kmp_team_t *team)
void __kmp_env_free(char const **value)
char * __kmp_env_get(char const *name)
void __kmp_env_set(char const *name, char const *value, int overwrite)
void __kmp_env_unset(char const *name)
void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, kmp_user_lock_p lck)
void __kmp_push_parallel(int gtid, ident_t const *ident)
void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident)
void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident)
enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, ident_t const *ident)
void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident)
struct cons_header * __kmp_allocate_cons_stack(int gtid)
void __kmp_pop_parallel(int gtid, ident_t const *ident)
void __kmp_free_cons_stack(void *ptr)
static volatile kmp_i18n_cat_status_t status
Definition kmp_i18n.cpp:48
kmp_msg_t __kmp_msg_null
Definition kmp_i18n.cpp:36
static void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list ap)
Definition kmp_i18n.cpp:789
void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer)
Definition kmp_i18n.cpp:593
void __kmp_fatal(kmp_msg_t message,...)
Definition kmp_i18n.cpp:875
#define KMP_INFORM(...)
Definition kmp_i18n.h:142
#define KMP_WARNING(...)
Definition kmp_i18n.h:144
#define KMP_MSG(...)
Definition kmp_i18n.h:121
@ kmp_ms_warning
Definition kmp_i18n.h:130
#define KMP_I18N_STR(id)
Definition kmp_i18n.h:46
#define KMP_FATAL(...)
Definition kmp_i18n.h:146
#define KMP_HNT(...)
Definition kmp_i18n.h:122
void __kmp_i18n_catclose()
#define KMP_ERR
Definition kmp_i18n.h:125
kmp_bootstrap_lock_t __kmp_stdio_lock
Definition kmp_io.cpp:41
void __kmp_fprintf(enum kmp_io stream, char const *format,...)
Definition kmp_io.cpp:206
void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap)
Definition kmp_io.cpp:115
void __kmp_printf(char const *format,...)
Definition kmp_io.cpp:186
void __kmp_printf_no_lock(char const *format,...)
Definition kmp_io.cpp:197
@ kmp_out
Definition kmp_io.h:22
@ kmp_err
Definition kmp_io.h:22
void __kmp_close_console(void)
#define USE_ITT_BUILD_ARG(x)
Definition kmp_itt.h:346
void __kmp_cleanup_user_locks(void)
void __kmp_validate_locks(void)
Definition kmp_lock.cpp:43
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:535
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.h:559
static void __kmp_init_lock(kmp_lock_t *lck)
Definition kmp_lock.h:571
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:527
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.h:567
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:539
#define TCW_PTR(a, b)
Definition kmp_os.h:1167
void(* microtask_t)(int *gtid, int *npr,...)
Definition kmp_os.h:1185
#define kmp_va_deref(ap)
Definition kmp_os.h:229
#define KMP_WAIT
Definition kmp_os.h:1193
#define TCW_SYNC_PTR(a, b)
Definition kmp_os.h:1169
#define KMP_ATOMIC_ST_REL(p, v)
Definition kmp_os.h:1261
double kmp_real64
Definition kmp_os.h:200
long kmp_intptr_t
Definition kmp_os.h:204
#define TCR_SYNC_PTR(a)
Definition kmp_os.h:1168
#define TCR_PTR(a)
Definition kmp_os.h:1166
#define KMP_UINTPTR_SPEC
Definition kmp_os.h:207
@ kmp_warnings_off
Definition kmp_os.h:1241
#define RCAST(type, var)
Definition kmp_os.h:292
#define CACHE_LINE
Definition kmp_os.h:340
#define KMP_CACHE_PREFETCH(ADDR)
Definition kmp_os.h:348
#define KMP_ATOMIC_LD_ACQ(p)
Definition kmp_os.h:1259
#define TCW_SYNC_4(a, b)
Definition kmp_os.h:1146
#define VOLATILE_CAST(x)
Definition kmp_os.h:1190
#define CCAST(type, var)
Definition kmp_os.h:291
#define KMP_MB()
Definition kmp_os.h:1066
#define KMP_EQ
Definition kmp_os.h:1195
bool __kmp_atomic_compare_store_acq(std::atomic< T > *p, T expected, T desired)
Definition kmp_os.h:1282
#define TCR_4(a)
Definition kmp_os.h:1137
#define KMP_FALLTHROUGH()
Definition kmp_os.h:364
#define KMP_ATOMIC_DEC(p)
Definition kmp_os.h:1270
#define KMP_GET_PAGE_SIZE()
Definition kmp_os.h:322
#define KMP_ATOMIC_LD_RLX(p)
Definition kmp_os.h:1260
#define KMP_MFENCE()
Definition kmp_os.h:1099
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
Definition kmp_os.h:814
#define TCW_4(a, b)
Definition kmp_os.h:1138
#define KMP_WEAK_ATTRIBUTE_EXTERNAL
Definition kmp_os.h:401
va_list kmp_va_list
Definition kmp_os.h:228
#define KMP_DLSYM(name)
Definition kmp_os.h:1302
#define KMP_ATOMIC_INC(p)
Definition kmp_os.h:1269
int __kmp_pause_resource(kmp_pause_status_t level)
void __kmp_warn(char const *format,...)
void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk)
static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc)
static void __kmp_fini_allocator()
void __kmp_soft_pause()
static void __kmp_init_allocator()
void __kmp_aux_set_defaults(char const *str, size_t len)
static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, const int max_level)
static kmp_team_t * __kmp_aux_get_team_info(int &teams_serialized)
static int __kmp_expand_threads(int nNeed)
void __kmp_teams_master(int gtid)
static void __kmp_itthash_clean(kmp_info_t *th)
#define propagateFPControl(x)
void __kmp_itt_init_ittlib()
void __kmp_infinite_loop(void)
void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, int num_teams_ub, int num_threads)
int __kmp_aux_get_num_teams()
kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, kmp_proc_bind_t new_proc_bind, kmp_internal_control_t *new_icvs, int argc, kmp_info_t *master)
kmp_info_t * __kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, int new_tid)
void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team)
static long __kmp_registration_flag
int __kmp_get_max_active_levels(int gtid)
void __kmp_aux_set_library(enum library_type arg)
void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, char const *format,...)
void __kmp_free_team(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master)
unsigned short __kmp_get_random(kmp_info_t *thread)
int __kmp_register_root(int initial_thread)
static void __kmp_internal_end(void)
void __kmp_set_max_active_levels(int gtid, int max_active_levels)
void __kmp_abort_thread(void)
void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc)
void __kmp_internal_end_atexit(void)
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master_th, int master_gtid, int fork_teams_workers)
void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind)
kmp_team_t * __kmp_reap_team(kmp_team_t *team)
void __kmp_exit_single(int gtid)
void __kmp_check_stack_overlap(kmp_info_t *th)
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, int num_threads)
int __kmp_get_team_size(int gtid, int level)
static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
static void __kmp_do_middle_initialize(void)
int __kmp_get_max_teams(void)
static void __kmp_free_team_arrays(kmp_team_t *team)
static void __kmp_initialize_root(kmp_root_t *root)
static void __kmp_reinitialize_team(kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc)
int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_va_list ap)
void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
void * __kmp_launch_thread(kmp_info_t *this_thr)
void __kmp_set_teams_thread_limit(int limit)
static int __kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_info_t *master_th, kmp_team_t *parent_team, kmp_va_list ap)
void __kmp_join_barrier(int gtid)
static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team)
void __kmp_init_random(kmp_info_t *thread)
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, int num_threads)
void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads)
void __kmp_user_set_library(enum library_type arg)
#define updateHWFPControl(x)
#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED
void __kmp_internal_end_dest(void *specific_gtid)
int __kmp_aux_get_team_num()
void __kmp_set_num_threads(int new_nth, int gtid)
void __kmp_internal_end_thread(int gtid_req)
static bool __kmp_is_fork_in_teams(kmp_info_t *master_th, microtask_t microtask, int level, int teams_level, kmp_va_list ap)
PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmp_hidden_helper_threads_initz_routine()
int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws)
static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, int gtid)
void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team)
void __kmp_join_call(ident_t *loc, int gtid, int exit_teams)
static int __kmp_reset_root(int gtid, kmp_root_t *root)
int __kmp_get_ancestor_thread_num(int gtid, int level)
void __kmp_itt_fini_ittlib()
void __kmp_omp_display_env(int verbose)
void __kmp_reset_stats()
void __kmp_middle_initialize(void)
void __kmp_unregister_root_current_thread(int gtid)
int __kmp_debugging
static void __kmp_reap_thread(kmp_info_t *thread, int is_root)
static const unsigned __kmp_primes[]
int __kmp_get_teams_thread_limit(void)
#define FAST_REDUCTION_TREE_METHOD_GENERATED
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
kmp_r_sched_t __kmp_get_schedule_global()
void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team)
static kmp_internal_control_t __kmp_get_global_icvs(void)
void __kmp_parallel_initialize(void)
void __kmp_set_nesting_mode_threads()
void __kmp_unregister_library(void)
char const __kmp_version_omp_api[]
static char * __kmp_registration_str
int __kmp_ignore_mppbeg(void)
void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team)
void __kmp_aux_set_stacksize(size_t arg)
void __kmp_internal_end_library(int gtid_req)
size_t __kmp_aux_capture_affinity(int gtid, const char *format, kmp_str_buf_t *buffer)
void __kmp_hard_pause()
void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, int new_nthreads)
int __kmp_omp_debug_struct_info
static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid)
void __kmp_aux_display_affinity(int gtid, const char *format)
void __kmp_init_nesting_mode()
void __kmp_register_library_startup(void)
void __kmp_free_thread(kmp_info_t *this_th)
int __kmp_invoke_task_func(int gtid)
void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk)
void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev, const char *msg)
void __kmp_abort_process()
static const kmp_affinity_format_field_t __kmp_affinity_format_table[]
void __kmp_set_num_teams(int num_teams)
static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc)
void __kmp_save_internal_controls(kmp_info_t *thread)
int __kmp_invoke_teams_master(int gtid)
void __kmp_hidden_helper_initialize()
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads)
void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length, int *num_threads_list)
static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth)
static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, int master_tid, int set_nthreads, int enter_teams)
void __kmp_serial_initialize(void)
static bool __kmp_is_entering_teams(int active_level, int level, int teams_level, kmp_va_list ap)
void __kmp_resume_if_soft_paused()
void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
int __kmp_get_global_thread_id()
void __kmp_internal_begin(void)
static char * __kmp_reg_status_name()
static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, int team_id, int num_thr)
static void __kmp_do_serial_initialize(void)
void __kmp_fork_barrier(int gtid, int tid)
int __kmp_get_global_thread_id_reg()
int __kmp_ignore_mppend(void)
static kmp_nested_nthreads_t * __kmp_override_nested_nth(kmp_info_t *thr, int level)
kmp_int32 __kmp_get_reduce_method(void)
static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, const char **ptr, kmp_str_buf_t *field_buffer)
void __kmp_cleanup(void)
static int __kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team, kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root, enum fork_context_e call_context, microtask_t microtask, launch_t invoker, int master_set_numthreads, int level, kmp_va_list ap)
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid)
#define KMP_ALLOCA
#define KMP_STRCPY_S(dst, bsz, src)
#define KMP_SNPRINTF
#define KMP_SSCANF
#define KMP_MEMCPY
#define KMP_STRLEN
void __kmp_env_print_2()
int __kmp_default_tp_capacity(int req_nproc, int max_nth, int all_threads_specified)
int __kmp_initial_threads_capacity(int req_nproc)
void __kmp_env_initialize(char const *string)
void __kmp_display_env_impl(int display_env, int display_env_verbose)
void __kmp_env_print()
void __kmp_stats_init(void)
void __kmp_stats_fini(void)
Functions for collecting statistics.
#define KMP_COUNT_VALUE(n, v)
Definition kmp_stats.h:1000
#define KMP_PUSH_PARTITIONED_TIMER(name)
Definition kmp_stats.h:1014
#define KMP_GET_THREAD_STATE()
Definition kmp_stats.h:1017
#define KMP_POP_PARTITIONED_TIMER()
Definition kmp_stats.h:1015
#define KMP_INIT_PARTITIONED_TIMERS(name)
Definition kmp_stats.h:1012
#define KMP_SET_THREAD_STATE_BLOCK(state_name)
Definition kmp_stats.h:1018
#define KMP_TIME_PARTITIONED_BLOCK(name)
Definition kmp_stats.h:1013
#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n)
Definition kmp_stats.h:1008
#define KMP_SET_THREAD_STATE(state_name)
Definition kmp_stats.h:1016
void __kmp_str_split(char *str, char delim, char **head, char **tail)
Definition kmp_str.cpp:571
void __kmp_str_buf_clear(kmp_str_buf_t *buffer)
Definition kmp_str.cpp:71
void __kmp_str_buf_free(kmp_str_buf_t *buffer)
Definition kmp_str.cpp:123
char * __kmp_str_format(char const *format,...)
Definition kmp_str.cpp:448
int __kmp_str_match_true(char const *data)
Definition kmp_str.cpp:552
void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, size_t len)
Definition kmp_str.cpp:134
void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src)
Definition kmp_str.cpp:146
#define args
int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format,...)
Definition kmp_str.cpp:221
int __kmp_str_match_false(char const *data)
Definition kmp_str.cpp:543
struct kmp_str_buf kmp_str_buf_t
Definition kmp_str.h:38
#define __kmp_str_buf_init(b)
Definition kmp_str.h:40
#define i
Definition kmp_stub.cpp:87
void __kmp_print_version_1(void)
void __kmp_print_version_2(void)
#define KMP_VERSION_PREFIX
Definition kmp_version.h:34
char const __kmp_version_alt_comp[]
char const __kmp_version_lock[]
static void __kmp_null_resume_wrapper(kmp_info_t *thr)
void microtask(int *global_tid, int *bound_tid)
int32_t kmp_int32
omp_lock_t lck
Definition omp_lock.c:7
static int ii
#define res
void ompt_fini()
ompt_callbacks_active_t ompt_enabled
void ompt_pre_init()
void ompt_post_init()
ompt_callbacks_internal_t ompt_callbacks
#define OMPT_INVOKER(x)
struct ompt_lw_taskteam_s ompt_lw_taskteam_t
#define OMPT_GET_FRAME_ADDRESS(level)
void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, ompt_data_t *ompt_pid, void *codeptr)
int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num)
void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int on_heap, bool always)
ompt_task_info_t * __ompt_get_task_info_object(int depth)
void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid)
void __ompt_lw_taskteam_unlink(kmp_info_t *thr)
ompt_data_t * __ompt_get_thread_data_internal()
static id loc
volatile int flag
__attribute__((noinline))
kmp_int32 tt_found_proxy_tasks
Definition kmp.h:2868
kmp_int32 tt_hidden_helper_task_encountered
Definition kmp.h:2873
kmp_info_p * cg_root
Definition kmp.h:2928
kmp_int32 cg_nthreads
Definition kmp.h:2932
kmp_int32 cg_thread_limit
Definition kmp.h:2931
struct kmp_cg_root * up
Definition kmp.h:2933
void(* th_dxo_fcn)(int *gtid, int *cid, ident_t *)
Definition kmp.h:2102
kmp_int32 th_doacross_buf_idx
Definition kmp.h:2109
dispatch_private_info_t * th_dispatch_pr_current
Definition kmp.h:2105
kmp_uint32 th_disp_index
Definition kmp.h:2108
dispatch_private_info_t * th_disp_buffer
Definition kmp.h:2107
void(* th_deo_fcn)(int *gtid, int *cid, ident_t *)
Definition kmp.h:2100
dispatch_shared_info_t * th_dispatch_sh_current
Definition kmp.h:2104
kmp_team_p * hot_team
Definition kmp.h:2908
kmp_int32 hot_team_nth
Definition kmp.h:2909
kmp_proc_bind_t proc_bind
Definition kmp.h:2209
kmp_r_sched_t sched
Definition kmp.h:2208
struct kmp_internal_control * next
Definition kmp.h:2211
int serial_nesting_level
Definition kmp.h:2192
struct kmp_old_threads_list_t * next
Definition kmp.h:3299
kmp_info_t ** threads
Definition kmp.h:3298
char * str
Definition kmp_str.h:33
ompt_task_info_t ompt_task_info
ompt_data_t task_data
ompt_frame_t frame
kmp_bstate_t bb
Definition kmp.h:2256
kmp_base_info_t th
Definition kmp.h:3088
int chunk
Definition kmp.h:489
enum sched_type r_sched_type
Definition kmp.h:488
kmp_int64 sched
Definition kmp.h:491
kmp_base_task_team_t tt
Definition kmp.h:2884
kmp_base_team_t t
Definition kmp.h:3234
void __kmp_reap_monitor(kmp_info_t *th)
void __kmp_register_atfork(void)
void __kmp_free_handle(kmp_thread_t tHandle)
int __kmp_get_load_balance(int max)
int __kmp_still_running(kmp_info_t *th)
void __kmp_initialize_system_tick(void)
int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val)