LLVM OpenMP 20.0.0git
kmp_csupport.cpp
Go to the documentation of this file.
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "kmp_utils.h"
22#include "ompt-specific.h"
23
24#define MAX_MESSAGE 512
25
26// flags will be used in future, e.g. to implement openmp_strict library
27// restrictions
28
29/*!
30 * @ingroup STARTUP_SHUTDOWN
31 * @param loc in source location information
32 * @param flags in for future use (currently ignored)
33 *
34 * Initialize the runtime library. This call is optional; if it is not made then
35 * it will be implicitly called by attempts to use other library functions.
36 */
38 // By default __kmpc_begin() is no-op.
39 char *env;
40 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45 } else if (__kmp_ignore_mppbeg() == FALSE) {
46 // By default __kmp_ignore_mppbeg() returns TRUE.
48 KC_TRACE(10, ("__kmpc_begin: called\n"));
49 }
50}
51
52/*!
53 * @ingroup STARTUP_SHUTDOWN
54 * @param loc source location information
55 *
56 * Shutdown the runtime library. This is also optional, and even if called will
57 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
58 * zero.
59 */
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64 // returns FALSE and __kmpc_end() will unregister this root (it can cause
65 // library shut down).
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, ("__kmpc_end: called\n"));
68 KA_TRACE(30, ("__kmpc_end\n"));
69
71 }
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
73 // Normal exit process on Windows does not allow worker threads of the final
74 // parallel region to finish reporting their events, so shutting down the
75 // library here fixes the issue at least for the cases where __kmpc_end() is
76 // placed properly.
79#endif
80}
81
82/*!
83@ingroup THREAD_STATES
84@param loc Source location information.
85@return The global thread index of the active thread.
86
87This function can be called in any context.
88
89If the runtime has ony been entered at the outermost level from a
90single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
91that which would be returned by omp_get_thread_num() in the outermost
92active parallel construct. (Or zero if there is no active parallel
93construct, since the primary thread is necessarily thread zero).
94
95If multiple non-OpenMP threads all enter an OpenMP construct then this
96will be a unique thread identifier among all the threads created by
97the OpenMP runtime (but the value cannot be defined in terms of
98OpenMP thread ids returned by omp_get_thread_num()).
99*/
102
103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105 return gtid;
106}
107
108/*!
109@ingroup THREAD_STATES
110@param loc Source location information.
111@return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113This function can be called in any context.
114It returns the total number of threads under the control of the OpenMP runtime.
115That is not a number that can be determined by any OpenMP standard calls, since
116the library may be called from more than one non-OpenMP thread, and this
117reflects the total over all such calls. Similarly the runtime maintains
118underlying threads even when they are not active (since the cost of creating
119and destroying OS threads is high), this call counts all such threads even if
120they are not waiting for work.
121*/
123 KC_TRACE(10,
124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126 return TCR_4(__kmp_all_nth);
127}
128
129/*!
130@ingroup THREAD_STATES
131@param loc Source location information.
132@return The thread number of the calling thread in the innermost active parallel
133construct.
134*/
136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138}
139
140/*!
141@ingroup THREAD_STATES
142@param loc Source location information.
143@return The number of threads in the innermost active parallel construct.
144*/
146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
149}
150
151/*!
152 * @ingroup DEPRECATED
153 * @param loc location description
154 *
155 * This function need not be called. It always returns TRUE.
156 */
158#ifndef KMP_DEBUG
159
160 return TRUE;
161
162#else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(semi2 + 1, ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(semi2 + 1, ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210#endif /* KMP_DEBUG */
211}
212
213/*!
214@ingroup THREAD_STATES
215@param loc Source location information.
216@return 1 if this thread is executing inside an active parallel region, zero if
217not.
218*/
220 return __kmp_entry_thread()->th.th_root->r.r_active;
221}
222
223/*!
224@ingroup PARALLEL
225@param loc source location information
226@param global_tid global thread number
227@param num_threads number of threads requested for this parallel construct
228
229Set the number of threads to be used by the next fork spawned by this thread.
230This call is only required if the parallel construct has a `num_threads` clause.
231*/
233 kmp_int32 num_threads) {
234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
238}
239
241 kmp_int32 num_threads, int severity,
242 const char *message) {
243 __kmp_push_num_threads(loc, global_tid, num_threads);
244 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
245}
246
247/*!
248@ingroup PARALLEL
249@param loc source location information
250@param global_tid global thread number
251@param list_length number of entries in the num_threads_list array
252@param num_threads_list array of numbers of threads requested for this parallel
253construct and subsequent nested parallel constructs
254
255Set the number of threads to be used by the next fork spawned by this thread,
256and some nested forks as well.
257This call is only required if the parallel construct has a `num_threads` clause
258that has a list of integers as the argument.
259*/
261 kmp_uint32 list_length,
262 kmp_int32 *num_threads_list) {
263 KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
264 global_tid));
265 KA_TRACE(20, ("%d", num_threads_list[0]));
266#ifdef KMP_DEBUG
267 for (kmp_uint32 i = 1; i < list_length; ++i)
268 KA_TRACE(20, (", %d", num_threads_list[i]));
269#endif
270 KA_TRACE(20, ("/n"));
271
272 __kmp_assert_valid_gtid(global_tid);
273 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
274}
275
277 kmp_uint32 list_length,
278 kmp_int32 *num_threads_list,
279 int severity, const char *message) {
280 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
281 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
282}
283
285 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
286 /* the num_threads are automatically popped */
287}
288
290 kmp_int32 proc_bind) {
291 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
292 proc_bind));
293 __kmp_assert_valid_gtid(global_tid);
294 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
295}
296
297/*!
298@ingroup PARALLEL
299@param loc source location information
300@param argc total number of arguments in the ellipsis
301@param microtask pointer to callback routine consisting of outlined parallel
302construct
303@param ... pointers to shared variables that aren't global
304
305Do the actual fork and call the microtask in the relevant number of threads.
306*/
308 int gtid = __kmp_entry_gtid();
309
310#if (KMP_STATS_ENABLED)
311 // If we were in a serial region, then stop the serial timer, record
312 // the event, and start parallel region timer
313 stats_state_e previous_state = KMP_GET_THREAD_STATE();
314 if (previous_state == stats_state_e::SERIAL_REGION) {
315 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
316 } else {
317 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
318 }
319 int inParallel = __kmpc_in_parallel(loc);
320 if (inParallel) {
321 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
322 } else {
323 KMP_COUNT_BLOCK(OMP_PARALLEL);
324 }
325#endif
326
327 // maybe to save thr_state is enough here
328 {
329 va_list ap;
330 va_start(ap, microtask);
331
332#if OMPT_SUPPORT
333 ompt_frame_t *ompt_frame;
334 if (ompt_enabled.enabled) {
335 kmp_info_t *master_th = __kmp_threads[gtid];
336 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
337 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
338 }
339 OMPT_STORE_RETURN_ADDRESS(gtid);
340#endif
341
342#if INCLUDE_SSC_MARKS
343 SSC_MARK_FORKING();
344#endif
346 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
348 kmp_va_addr_of(ap));
349#if INCLUDE_SSC_MARKS
350 SSC_MARK_JOINING();
351#endif
352 __kmp_join_call(loc, gtid
353#if OMPT_SUPPORT
354 ,
356#endif
357 );
358
359 va_end(ap);
360
361#if OMPT_SUPPORT
362 if (ompt_enabled.enabled) {
363 ompt_frame->enter_frame = ompt_data_none;
364 }
365#endif
366 }
367
368#if KMP_STATS_ENABLED
369 if (previous_state == stats_state_e::SERIAL_REGION) {
370 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
371 KMP_SET_THREAD_STATE(previous_state);
372 } else {
374 }
375#endif // KMP_STATS_ENABLED
376}
377
378/*!
379@ingroup PARALLEL
380@param loc source location information
381@param microtask pointer to callback routine consisting of outlined parallel
382construct
383@param cond condition for running in parallel
384@param args struct of pointers to shared variables that aren't global
385
386Perform a fork only if the condition is true.
387*/
389 kmp_int32 cond, void *args) {
390 int gtid = __kmp_entry_gtid();
391 if (cond) {
392 if (args)
394 else
396 } else {
398
399#if OMPT_SUPPORT
400 void *exit_frame_ptr;
401#endif
402
403 if (args)
405 /*npr=*/0,
406 /*argc=*/1, &args
407#if OMPT_SUPPORT
408 ,
409 &exit_frame_ptr
410#endif
411 );
412 else
414 /*npr=*/0,
415 /*argc=*/0,
416 /*args=*/nullptr
417#if OMPT_SUPPORT
418 ,
419 &exit_frame_ptr
420#endif
421 );
422
424 }
425}
426
427/*!
428@ingroup PARALLEL
429@param loc source location information
430@param global_tid global thread number
431@param num_teams number of teams requested for the teams construct
432@param num_threads number of threads per team requested for the teams construct
433
434Set the number of teams to be used by the teams construct.
435This call is only required if the teams construct has a `num_teams` clause
436or a `thread_limit` clause (or both).
437*/
439 kmp_int32 num_teams, kmp_int32 num_threads) {
440 KA_TRACE(20,
441 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
442 global_tid, num_teams, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
445}
446
447/*!
448@ingroup PARALLEL
449@param loc source location information
450@param global_tid global thread number
451@param thread_limit limit on number of threads which can be created within the
452current task
453
454Set the thread_limit for the current task
455This call is there to support `thread_limit` clause on the `target` construct
456*/
458 kmp_int32 thread_limit) {
459 __kmp_assert_valid_gtid(global_tid);
460 kmp_info_t *thread = __kmp_threads[global_tid];
461 if (thread_limit > 0)
462 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
463}
464
465/*!
466@ingroup PARALLEL
467@param loc source location information
468@param global_tid global thread number
469@param num_teams_lb lower bound on number of teams requested for the teams
470construct
471@param num_teams_ub upper bound on number of teams requested for the teams
472construct
473@param num_threads number of threads per team requested for the teams construct
474
475Set the number of teams to be used by the teams construct. The number of initial
476teams cretaed will be greater than or equal to the lower bound and less than or
477equal to the upper bound.
478This call is only required if the teams construct has a `num_teams` clause
479or a `thread_limit` clause (or both).
480*/
482 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
483 kmp_int32 num_threads) {
484 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
485 " num_teams_ub=%d num_threads=%d\n",
486 global_tid, num_teams_lb, num_teams_ub, num_threads));
487 __kmp_assert_valid_gtid(global_tid);
488 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
489 num_threads);
490}
491
492/*!
493@ingroup PARALLEL
494@param loc source location information
495@param argc total number of arguments in the ellipsis
496@param microtask pointer to callback routine consisting of outlined teams
497construct
498@param ... pointers to shared variables that aren't global
499
500Do the actual fork and call the microtask in the relevant number of threads.
501*/
503 ...) {
504 int gtid = __kmp_entry_gtid();
505 kmp_info_t *this_thr = __kmp_threads[gtid];
506 va_list ap;
507 va_start(ap, microtask);
508
509#if KMP_STATS_ENABLED
510 KMP_COUNT_BLOCK(OMP_TEAMS);
511 stats_state_e previous_state = KMP_GET_THREAD_STATE();
512 if (previous_state == stats_state_e::SERIAL_REGION) {
513 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
514 } else {
515 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
516 }
517#endif
518
519 // remember teams entry point and nesting level
520 this_thr->th.th_teams_microtask = microtask;
521 this_thr->th.th_teams_level =
522 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
523
524#if OMPT_SUPPORT
525 kmp_team_t *parent_team = this_thr->th.th_team;
526 int tid = __kmp_tid_from_gtid(gtid);
527 if (ompt_enabled.enabled) {
528 parent_team->t.t_implicit_task_taskdata[tid]
529 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
530 }
531 OMPT_STORE_RETURN_ADDRESS(gtid);
532#endif
533
534 // check if __kmpc_push_num_teams called, set default number of teams
535 // otherwise
536 if (this_thr->th.th_teams_size.nteams == 0) {
537 __kmp_push_num_teams(loc, gtid, 0, 0);
538 }
539 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
540 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
541 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
542
544 loc, gtid, fork_context_intel, argc,
547 __kmp_join_call(loc, gtid
548#if OMPT_SUPPORT
549 ,
551#endif
552 );
553
554 // Pop current CG root off list
555 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
556 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
557 this_thr->th.th_cg_roots = tmp->up;
558 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
559 " to node %p. cg_nthreads was %d\n",
560 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
562 int i = tmp->cg_nthreads--;
563 if (i == 1) { // check is we are the last thread in CG (not always the case)
564 __kmp_free(tmp);
565 }
566 // Restore current task's thread_limit from CG root
567 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
568 this_thr->th.th_current_task->td_icvs.thread_limit =
569 this_thr->th.th_cg_roots->cg_thread_limit;
570
571 this_thr->th.th_teams_microtask = NULL;
572 this_thr->th.th_teams_level = 0;
573 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
574 va_end(ap);
575#if KMP_STATS_ENABLED
576 if (previous_state == stats_state_e::SERIAL_REGION) {
577 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
578 KMP_SET_THREAD_STATE(previous_state);
579 } else {
581 }
582#endif // KMP_STATS_ENABLED
583}
584
585// I don't think this function should ever have been exported.
586// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
587// openmp code ever called it, but it's been exported from the RTL for so
588// long that I'm afraid to remove the definition.
589int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
590
591/*!
592@ingroup PARALLEL
593@param loc source location information
594@param global_tid global thread number
595
596Enter a serialized parallel construct. This interface is used to handle a
597conditional parallel region, like this,
598@code
599#pragma omp parallel if (condition)
600@endcode
601when the condition is false.
602*/
604 // The implementation is now in kmp_runtime.cpp so that it can share static
605 // functions with kmp_fork_call since the tasks to be done are similar in
606 // each case.
607 __kmp_assert_valid_gtid(global_tid);
608#if OMPT_SUPPORT
609 OMPT_STORE_RETURN_ADDRESS(global_tid);
610#endif
611 __kmp_serialized_parallel(loc, global_tid);
612}
613
614/*!
615@ingroup PARALLEL
616@param loc source location information
617@param global_tid global thread number
618
619Leave a serialized parallel construct.
620*/
623 kmp_info_t *this_thr;
624 kmp_team_t *serial_team;
625
626 KC_TRACE(10,
627 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
628
629 /* skip all this code for autopar serialized loops since it results in
630 unacceptable overhead */
631 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
632 return;
633
634 // Not autopar code
635 __kmp_assert_valid_gtid(global_tid);
638
640
641 this_thr = __kmp_threads[global_tid];
642 serial_team = this_thr->th.th_serial_team;
643
644 kmp_task_team_t *task_team = this_thr->th.th_task_team;
645 // we need to wait for the proxy tasks before finishing the thread
646 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
648 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
649
650 KMP_MB();
651 KMP_DEBUG_ASSERT(serial_team);
652 KMP_ASSERT(serial_team->t.t_serialized);
653 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
654 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
655 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
656 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
657
658#if OMPT_SUPPORT
659 if (ompt_enabled.enabled &&
660 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
661 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
662 if (ompt_enabled.ompt_callback_implicit_task) {
663 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
664 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
665 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
666 }
667
668 // reset clear the task id only after unlinking the task
669 ompt_data_t *parent_task_data;
670 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
671
672 if (ompt_enabled.ompt_callback_parallel_end) {
673 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
674 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
675 ompt_parallel_invoker_program | ompt_parallel_team,
676 OMPT_LOAD_RETURN_ADDRESS(global_tid));
677 }
679 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
680 }
681#endif
682
683 /* If necessary, pop the internal control stack values and replace the team
684 * values */
685 top = serial_team->t.t_control_stack_top;
686 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
687 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
688 serial_team->t.t_control_stack_top = top->next;
689 __kmp_free(top);
690 }
691
692 /* pop dispatch buffers stack */
693 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
694 {
695 dispatch_private_info_t *disp_buffer =
696 serial_team->t.t_dispatch->th_disp_buffer;
697 serial_team->t.t_dispatch->th_disp_buffer =
698 serial_team->t.t_dispatch->th_disp_buffer->next;
699 __kmp_free(disp_buffer);
700 }
701
702 /* pop the task team stack */
703 if (serial_team->t.t_serialized > 1) {
704 __kmp_pop_task_team_node(this_thr, serial_team);
705 }
706
707 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
708
709 --serial_team->t.t_serialized;
710 if (serial_team->t.t_serialized == 0) {
711
712 /* return to the parallel section */
713
714#if KMP_ARCH_X86 || KMP_ARCH_X86_64
715 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
716 __kmp_clear_x87_fpu_status_word();
717 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
718 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
719 }
720#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
721
723#if OMPD_SUPPORT
724 if (ompd_state & OMPD_ENABLE_BP)
725 ompd_bp_parallel_end();
726#endif
727
728 this_thr->th.th_team = serial_team->t.t_parent;
729 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
730
731 /* restore values cached in the thread */
732 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
733 this_thr->th.th_team_master =
734 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
735 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
736
737 /* TODO the below shouldn't need to be adjusted for serialized teams */
738 this_thr->th.th_dispatch =
739 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
740
741 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
742 this_thr->th.th_current_task->td_flags.executing = 1;
743
745 // Restore task state from serial team structure
746 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
747 serial_team->t.t_primary_task_state == 1);
748 this_thr->th.th_task_state =
749 (kmp_uint8)serial_team->t.t_primary_task_state;
750 // Copy the task team from the new child / old parent team to the thread.
751 this_thr->th.th_task_team =
752 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
753 KA_TRACE(20,
754 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
755 "team %p\n",
756 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
757 }
758#if KMP_AFFINITY_SUPPORTED
759 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
760 __kmp_reset_root_init_mask(global_tid);
761 }
762#endif
763 } else {
765 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
766 "depth of serial team %p to %d\n",
767 global_tid, serial_team, serial_team->t.t_serialized));
768 }
769 }
770
771 serial_team->t.t_level--;
773 __kmp_pop_parallel(global_tid, NULL);
774#if OMPT_SUPPORT
776 this_thr->th.ompt_thread_info.state =
777 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
778 : ompt_state_work_parallel);
779#endif
780}
781
782/*!
783@ingroup SYNCHRONIZATION
784@param loc source location information.
785
786Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
787depending on the memory ordering convention obeyed by the compiler
788even that may not be necessary).
789*/
791 KC_TRACE(10, ("__kmpc_flush: called\n"));
792
793 /* need explicit __mf() here since use volatile instead in library */
794 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
795
796#if OMPT_SUPPORT && OMPT_OPTIONAL
797 if (ompt_enabled.ompt_callback_flush) {
798 ompt_callbacks.ompt_callback(ompt_callback_flush)(
800 }
801#endif
802}
803
804/* -------------------------------------------------------------------------- */
805/*!
806@ingroup SYNCHRONIZATION
807@param loc source location information
808@param global_tid thread id.
809
810Execute a barrier.
811*/
813 KMP_COUNT_BLOCK(OMP_BARRIER);
814 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
815 __kmp_assert_valid_gtid(global_tid);
816
819
821
823 if (loc == 0) {
824 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
825 }
826 __kmp_check_barrier(global_tid, ct_barrier, loc);
827 }
828
829#if OMPT_SUPPORT
830 ompt_frame_t *ompt_frame;
831 if (ompt_enabled.enabled) {
832 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
833 if (ompt_frame->enter_frame.ptr == NULL)
834 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
835 }
836 OMPT_STORE_RETURN_ADDRESS(global_tid);
837#endif
838 __kmp_threads[global_tid]->th.th_ident = loc;
839 // TODO: explicit barrier_wait_id:
840 // this function is called when 'barrier' directive is present or
841 // implicit barrier at the end of a worksharing construct.
842 // 1) better to add a per-thread barrier counter to a thread data structure
843 // 2) set to 0 when a new team is created
844 // 4) no sync is required
845
846 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
847#if OMPT_SUPPORT && OMPT_OPTIONAL
848 if (ompt_enabled.enabled) {
849 ompt_frame->enter_frame = ompt_data_none;
850 }
851#endif
852}
853
854/* The BARRIER for a MASTER section is always explicit */
855/*!
856@ingroup WORK_SHARING
857@param loc source location information.
858@param global_tid global thread number .
859@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
860*/
862 int status = 0;
863
864 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(global_tid);
866
869
871
872 if (KMP_MASTER_GTID(global_tid)) {
873 KMP_COUNT_BLOCK(OMP_MASTER);
874 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
875 status = 1;
876 }
877
878#if OMPT_SUPPORT && OMPT_OPTIONAL
879 if (status) {
880 if (ompt_enabled.ompt_callback_masked) {
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
883
884 int tid = __kmp_tid_from_gtid(global_tid);
885 ompt_callbacks.ompt_callback(ompt_callback_masked)(
886 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
889 }
890 }
891#endif
892
894#if KMP_USE_DYNAMIC_LOCK
895 if (status)
896 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
897 else
898 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
899#else
900 if (status)
901 __kmp_push_sync(global_tid, ct_master, loc, NULL);
902 else
903 __kmp_check_sync(global_tid, ct_master, loc, NULL);
904#endif
905 }
906
907 return status;
908}
909
910/*!
911@ingroup WORK_SHARING
912@param loc source location information.
913@param global_tid global thread number .
914
915Mark the end of a <tt>master</tt> region. This should only be called by the
916thread that executes the <tt>master</tt> region.
917*/
919 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
920 __kmp_assert_valid_gtid(global_tid);
923
924#if OMPT_SUPPORT && OMPT_OPTIONAL
925 kmp_info_t *this_thr = __kmp_threads[global_tid];
926 kmp_team_t *team = this_thr->th.th_team;
927 if (ompt_enabled.ompt_callback_masked) {
928 int tid = __kmp_tid_from_gtid(global_tid);
929 ompt_callbacks.ompt_callback(ompt_callback_masked)(
930 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
933 }
934#endif
935
937 if (KMP_MASTER_GTID(global_tid))
938 __kmp_pop_sync(global_tid, ct_master, loc);
939 }
940}
941
942/*!
943@ingroup WORK_SHARING
944@param loc source location information.
945@param global_tid global thread number.
946@param filter result of evaluating filter clause on thread global_tid, or zero
947if no filter clause present
948@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
949*/
951 int status = 0;
952 int tid;
953 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
954 __kmp_assert_valid_gtid(global_tid);
955
958
960
961 tid = __kmp_tid_from_gtid(global_tid);
962 if (tid == filter) {
963 KMP_COUNT_BLOCK(OMP_MASKED);
964 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
965 status = 1;
966 }
967
968#if OMPT_SUPPORT && OMPT_OPTIONAL
969 if (status) {
970 if (ompt_enabled.ompt_callback_masked) {
971 kmp_info_t *this_thr = __kmp_threads[global_tid];
972 kmp_team_t *team = this_thr->th.th_team;
973 ompt_callbacks.ompt_callback(ompt_callback_masked)(
974 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
975 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
977 }
978 }
979#endif
980
982#if KMP_USE_DYNAMIC_LOCK
983 if (status)
984 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
985 else
986 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
987#else
988 if (status)
989 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
990 else
991 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
992#endif
993 }
994
995 return status;
996}
997
998/*!
999@ingroup WORK_SHARING
1000@param loc source location information.
1001@param global_tid global thread number .
1002
1003Mark the end of a <tt>masked</tt> region. This should only be called by the
1004thread that executes the <tt>masked</tt> region.
1005*/
1007 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
1008 __kmp_assert_valid_gtid(global_tid);
1010
1011#if OMPT_SUPPORT && OMPT_OPTIONAL
1012 kmp_info_t *this_thr = __kmp_threads[global_tid];
1013 kmp_team_t *team = this_thr->th.th_team;
1014 if (ompt_enabled.ompt_callback_masked) {
1015 int tid = __kmp_tid_from_gtid(global_tid);
1016 ompt_callbacks.ompt_callback(ompt_callback_masked)(
1017 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
1018 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1020 }
1021#endif
1022
1024 __kmp_pop_sync(global_tid, ct_masked, loc);
1025 }
1026}
1027
1028/*!
1029@ingroup WORK_SHARING
1030@param loc source location information.
1031@param gtid global thread number.
1032
1033Start execution of an <tt>ordered</tt> construct.
1034*/
1036 int cid = 0;
1037 kmp_info_t *th;
1039
1040 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
1042
1045
1047
1048#if USE_ITT_BUILD
1049 __kmp_itt_ordered_prep(gtid);
1050// TODO: ordered_wait_id
1051#endif /* USE_ITT_BUILD */
1052
1053 th = __kmp_threads[gtid];
1054
1055#if OMPT_SUPPORT && OMPT_OPTIONAL
1056 kmp_team_t *team;
1057 ompt_wait_id_t lck;
1058 void *codeptr_ra;
1059 OMPT_STORE_RETURN_ADDRESS(gtid);
1060 if (ompt_enabled.enabled) {
1061 team = __kmp_team_from_gtid(gtid);
1062 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1063 /* OMPT state update */
1064 th->th.ompt_thread_info.wait_id = lck;
1065 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1066
1067 /* OMPT event callback */
1068 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1069 if (ompt_enabled.ompt_callback_mutex_acquire) {
1070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1071 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1072 codeptr_ra);
1073 }
1074 }
1075#endif
1076
1077 if (th->th.th_dispatch->th_deo_fcn != 0)
1078 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1079 else
1080 __kmp_parallel_deo(&gtid, &cid, loc);
1081
1082#if OMPT_SUPPORT && OMPT_OPTIONAL
1083 if (ompt_enabled.enabled) {
1084 /* OMPT state update */
1085 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1086 th->th.ompt_thread_info.wait_id = 0;
1087
1088 /* OMPT event callback */
1089 if (ompt_enabled.ompt_callback_mutex_acquired) {
1090 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1091 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1092 }
1093 }
1094#endif
1095
1096#if USE_ITT_BUILD
1097 __kmp_itt_ordered_start(gtid);
1098#endif /* USE_ITT_BUILD */
1099}
1100
1101/*!
1102@ingroup WORK_SHARING
1103@param loc source location information.
1104@param gtid global thread number.
1105
1106End execution of an <tt>ordered</tt> construct.
1107*/
1109 int cid = 0;
1110 kmp_info_t *th;
1111
1112 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1114
1115#if USE_ITT_BUILD
1116 __kmp_itt_ordered_end(gtid);
1117// TODO: ordered_wait_id
1118#endif /* USE_ITT_BUILD */
1119
1120 th = __kmp_threads[gtid];
1121
1122 if (th->th.th_dispatch->th_dxo_fcn != 0)
1123 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1124 else
1125 __kmp_parallel_dxo(&gtid, &cid, loc);
1126
1127#if OMPT_SUPPORT && OMPT_OPTIONAL
1128 OMPT_STORE_RETURN_ADDRESS(gtid);
1129 if (ompt_enabled.ompt_callback_mutex_released) {
1130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1131 ompt_mutex_ordered,
1132 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1133 ->t.t_ordered.dt.t_value,
1134 OMPT_LOAD_RETURN_ADDRESS(gtid));
1135 }
1136#endif
1137}
1138
1139#if KMP_USE_DYNAMIC_LOCK
1140
1141static __forceinline void
1142__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1143 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1144 // Pointer to the allocated indirect lock is written to crit, while indexing
1145 // is ignored.
1146 void *idx;
1147 kmp_indirect_lock_t **lck;
1148 lck = (kmp_indirect_lock_t **)crit;
1149 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1150 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1151 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1152 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1153 KA_TRACE(20,
1154 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1155#if USE_ITT_BUILD
1156 __kmp_itt_critical_creating(ilk->lock, loc);
1157#endif
1158 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1159 if (status == 0) {
1160#if USE_ITT_BUILD
1161 __kmp_itt_critical_destroyed(ilk->lock);
1162#endif
1163 // We don't really need to destroy the unclaimed lock here since it will be
1164 // cleaned up at program exit.
1165 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1166 }
1167 KMP_DEBUG_ASSERT(*lck != NULL);
1168}
1169
1170// Fast-path acquire tas lock
1171#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1172 { \
1173 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1174 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1175 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1176 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1177 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1178 kmp_uint32 spins; \
1179 KMP_FSYNC_PREPARE(l); \
1180 KMP_INIT_YIELD(spins); \
1181 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1182 do { \
1183 if (TCR_4(__kmp_nth) > \
1184 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1185 KMP_YIELD(TRUE); \
1186 } else { \
1187 KMP_YIELD_SPIN(spins); \
1188 } \
1189 __kmp_spin_backoff(&backoff); \
1190 } while ( \
1191 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1192 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1193 } \
1194 KMP_FSYNC_ACQUIRED(l); \
1195 }
1196
1197// Fast-path test tas lock
1198#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1199 { \
1200 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1201 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1202 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1203 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1204 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1205 }
1206
1207// Fast-path release tas lock
1208#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1209 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1210
1211#if KMP_USE_FUTEX
1212
1213#include <sys/syscall.h>
1214#include <unistd.h>
1215#ifndef FUTEX_WAIT
1216#define FUTEX_WAIT 0
1217#endif
1218#ifndef FUTEX_WAKE
1219#define FUTEX_WAKE 1
1220#endif
1221
1222// Fast-path acquire futex lock
1223#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1224 { \
1225 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226 kmp_int32 gtid_code = (gtid + 1) << 1; \
1227 KMP_MB(); \
1228 KMP_FSYNC_PREPARE(ftx); \
1229 kmp_int32 poll_val; \
1230 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1231 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1232 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1233 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1234 if (!cond) { \
1235 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1236 poll_val | \
1237 KMP_LOCK_BUSY(1, futex))) { \
1238 continue; \
1239 } \
1240 poll_val |= KMP_LOCK_BUSY(1, futex); \
1241 } \
1242 kmp_int32 rc; \
1243 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1244 NULL, NULL, 0)) != 0) { \
1245 continue; \
1246 } \
1247 gtid_code |= 1; \
1248 } \
1249 KMP_FSYNC_ACQUIRED(ftx); \
1250 }
1251
1252// Fast-path test futex lock
1253#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1254 { \
1255 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1256 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1257 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1258 KMP_FSYNC_ACQUIRED(ftx); \
1259 rc = TRUE; \
1260 } else { \
1261 rc = FALSE; \
1262 } \
1263 }
1264
1265// Fast-path release futex lock
1266#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1267 { \
1268 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1269 KMP_MB(); \
1270 KMP_FSYNC_RELEASING(ftx); \
1271 kmp_int32 poll_val = \
1272 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1273 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1274 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1275 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1276 } \
1277 KMP_MB(); \
1278 KMP_YIELD_OVERSUB(); \
1279 }
1280
1281#endif // KMP_USE_FUTEX
1282
1283#else // KMP_USE_DYNAMIC_LOCK
1284
1286 ident_t const *loc,
1287 kmp_int32 gtid) {
1289
1290 // Because of the double-check, the following load doesn't need to be volatile
1292
1293 if (lck == NULL) {
1294 void *idx;
1295
1296 // Allocate & initialize the lock.
1297 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1301#if USE_ITT_BUILD
1302 __kmp_itt_critical_creating(lck);
1303// __kmp_itt_critical_creating() should be called *before* the first usage
1304// of underlying lock. It is the only place where we can guarantee it. There
1305// are chances the lock will destroyed with no usage, but it is not a
1306// problem, because this is not real event seen by user but rather setting
1307// name for object (lock). See more details in kmp_itt.h.
1308#endif /* USE_ITT_BUILD */
1309
1310 // Use a cmpxchg instruction to slam the start of the critical section with
1311 // the lock pointer. If another thread beat us to it, deallocate the lock,
1312 // and use the lock that the other thread allocated.
1313 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1314
1315 if (status == 0) {
1316// Deallocate the lock and reload the value.
1317#if USE_ITT_BUILD
1318 __kmp_itt_critical_destroyed(lck);
1319// Let ITT know the lock is destroyed and the same memory location may be reused
1320// for another purpose.
1321#endif /* USE_ITT_BUILD */
1323 __kmp_user_lock_free(&idx, gtid, lck);
1324 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1325 KMP_DEBUG_ASSERT(lck != NULL);
1326 }
1327 }
1328 return lck;
1329}
1330
1331#endif // KMP_USE_DYNAMIC_LOCK
1332
1333/*!
1334@ingroup WORK_SHARING
1335@param loc source location information.
1336@param global_tid global thread number.
1337@param crit identity of the critical section. This could be a pointer to a lock
1338associated with the critical section, or some other suitably unique value.
1339
1340Enter code protected by a `critical` construct.
1341This function blocks until the executing thread can enter the critical section.
1342*/
1345#if KMP_USE_DYNAMIC_LOCK
1346#if OMPT_SUPPORT && OMPT_OPTIONAL
1347 OMPT_STORE_RETURN_ADDRESS(global_tid);
1348#endif // OMPT_SUPPORT
1349 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1350#else
1351 KMP_COUNT_BLOCK(OMP_CRITICAL);
1352#if OMPT_SUPPORT && OMPT_OPTIONAL
1353 ompt_state_t prev_state = ompt_state_undefined;
1355#endif
1357
1358 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1359 __kmp_assert_valid_gtid(global_tid);
1360
1361 // TODO: add THR_OVHD_STATE
1362
1363 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1365
1366 if ((__kmp_user_lock_kind == lk_tas) &&
1367 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1369 }
1370#if KMP_USE_FUTEX
1371 else if ((__kmp_user_lock_kind == lk_futex) &&
1372 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1374 }
1375#endif
1376 else { // ticket, queuing or drdpa
1378 }
1379
1381 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1382
1383 // since the critical directive binds to all threads, not just the current
1384 // team we have to check this even if we are in a serialized team.
1385 // also, even if we are the uber thread, we still have to conduct the lock,
1386 // as we have to contend with sibling threads.
1387
1388#if USE_ITT_BUILD
1389 __kmp_itt_critical_acquiring(lck);
1390#endif /* USE_ITT_BUILD */
1391#if OMPT_SUPPORT && OMPT_OPTIONAL
1392 OMPT_STORE_RETURN_ADDRESS(gtid);
1393 void *codeptr_ra = NULL;
1394 if (ompt_enabled.enabled) {
1395 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1396 /* OMPT state update */
1397 prev_state = ti.state;
1398 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1399 ti.state = ompt_state_wait_critical;
1400
1401 /* OMPT event callback */
1402 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1403 if (ompt_enabled.ompt_callback_mutex_acquire) {
1404 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1405 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1406 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1407 }
1408 }
1409#endif
1410 // Value of 'crit' should be good for using as a critical_id of the critical
1411 // section directive.
1413
1414#if USE_ITT_BUILD
1415 __kmp_itt_critical_acquired(lck);
1416#endif /* USE_ITT_BUILD */
1417#if OMPT_SUPPORT && OMPT_OPTIONAL
1418 if (ompt_enabled.enabled) {
1419 /* OMPT state update */
1420 ti.state = prev_state;
1421 ti.wait_id = 0;
1422
1423 /* OMPT event callback */
1424 if (ompt_enabled.ompt_callback_mutex_acquired) {
1425 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1426 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1427 }
1428 }
1429#endif
1431
1432 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1433 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1434#endif // KMP_USE_DYNAMIC_LOCK
1435}
1436
1437#if KMP_USE_DYNAMIC_LOCK
1438
1439// Converts the given hint to an internal lock implementation
1440static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1441#if KMP_USE_TSX
1442#define KMP_TSX_LOCK(seq) lockseq_##seq
1443#else
1444#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1445#endif
1446
1447#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1448#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1449#else
1450#define KMP_CPUINFO_RTM 0
1451#endif
1452
1453 // Hints that do not require further logic
1454 if (hint & kmp_lock_hint_hle)
1455 return KMP_TSX_LOCK(hle);
1456 if (hint & kmp_lock_hint_rtm)
1457 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1458 if (hint & kmp_lock_hint_adaptive)
1459 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1460
1461 // Rule out conflicting hints first by returning the default lock
1462 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1463 return __kmp_user_lock_seq;
1464 if ((hint & omp_lock_hint_speculative) &&
1465 (hint & omp_lock_hint_nonspeculative))
1466 return __kmp_user_lock_seq;
1467
1468 // Do not even consider speculation when it appears to be contended
1469 if (hint & omp_lock_hint_contended)
1470 return lockseq_queuing;
1471
1472 // Uncontended lock without speculation
1473 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1474 return lockseq_tas;
1475
1476 // Use RTM lock for speculation
1477 if (hint & omp_lock_hint_speculative)
1478 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1479
1480 return __kmp_user_lock_seq;
1481}
1482
1483#if OMPT_SUPPORT && OMPT_OPTIONAL
1484#if KMP_USE_DYNAMIC_LOCK
1485static kmp_mutex_impl_t
1486__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1487 if (user_lock) {
1488 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1489 case 0:
1490 break;
1491#if KMP_USE_FUTEX
1492 case locktag_futex:
1493 return kmp_mutex_impl_queuing;
1494#endif
1495 case locktag_tas:
1496 return kmp_mutex_impl_spin;
1497#if KMP_USE_TSX
1498 case locktag_hle:
1499 case locktag_rtm_spin:
1500 return kmp_mutex_impl_speculative;
1501#endif
1502 default:
1503 return kmp_mutex_impl_none;
1504 }
1505 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1506 }
1507 KMP_ASSERT(ilock);
1508 switch (ilock->type) {
1509#if KMP_USE_TSX
1510 case locktag_adaptive:
1511 case locktag_rtm_queuing:
1512 return kmp_mutex_impl_speculative;
1513#endif
1514 case locktag_nested_tas:
1515 return kmp_mutex_impl_spin;
1516#if KMP_USE_FUTEX
1517 case locktag_nested_futex:
1518#endif
1519 case locktag_ticket:
1520 case locktag_queuing:
1521 case locktag_drdpa:
1522 case locktag_nested_ticket:
1523 case locktag_nested_queuing:
1524 case locktag_nested_drdpa:
1525 return kmp_mutex_impl_queuing;
1526 default:
1527 return kmp_mutex_impl_none;
1528 }
1529}
1530#else
1531// For locks without dynamic binding
1532static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1533 switch (__kmp_user_lock_kind) {
1534 case lk_tas:
1535 return kmp_mutex_impl_spin;
1536#if KMP_USE_FUTEX
1537 case lk_futex:
1538#endif
1539 case lk_ticket:
1540 case lk_queuing:
1541 case lk_drdpa:
1542 return kmp_mutex_impl_queuing;
1543#if KMP_USE_TSX
1544 case lk_hle:
1545 case lk_rtm_queuing:
1546 case lk_rtm_spin:
1547 case lk_adaptive:
1548 return kmp_mutex_impl_speculative;
1549#endif
1550 default:
1551 return kmp_mutex_impl_none;
1552 }
1553}
1554#endif // KMP_USE_DYNAMIC_LOCK
1555#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1556
1557/*!
1558@ingroup WORK_SHARING
1559@param loc source location information.
1560@param global_tid global thread number.
1561@param crit identity of the critical section. This could be a pointer to a lock
1562associated with the critical section, or some other suitably unique value.
1563@param hint the lock hint.
1564
1565Enter code protected by a `critical` construct with a hint. The hint value is
1566used to suggest a lock implementation. This function blocks until the executing
1567thread can enter the critical section unless the hint suggests use of
1568speculative execution and the hardware supports it.
1569*/
1571 kmp_critical_name *crit, uint32_t hint) {
1572 KMP_COUNT_BLOCK(OMP_CRITICAL);
1574#if OMPT_SUPPORT && OMPT_OPTIONAL
1575 ompt_state_t prev_state = ompt_state_undefined;
1577 // This is the case, if called from __kmpc_critical:
1578 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1579 if (!codeptr)
1580 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1581#endif
1582
1583 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1584 __kmp_assert_valid_gtid(global_tid);
1585
1586 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1587 // Check if it is initialized.
1588 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1589 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1590 if (*lk == 0) {
1591 if (KMP_IS_D_LOCK(lockseq)) {
1593 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1594 KMP_GET_D_TAG(lockseq));
1595 } else {
1596 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1597 }
1598 }
1599 // Branch for accessing the actual lock object and set operation. This
1600 // branching is inevitable since this lock initialization does not follow the
1601 // normal dispatch path (lock table is not used).
1602 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1603 lck = (kmp_user_lock_p)lk;
1605 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1606 __kmp_map_hint_to_lock(hint));
1607 }
1608#if USE_ITT_BUILD
1609 __kmp_itt_critical_acquiring(lck);
1610#endif
1611#if OMPT_SUPPORT && OMPT_OPTIONAL
1612 if (ompt_enabled.enabled) {
1613 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1614 /* OMPT state update */
1615 prev_state = ti.state;
1616 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1617 ti.state = ompt_state_wait_critical;
1618
1619 /* OMPT event callback */
1620 if (ompt_enabled.ompt_callback_mutex_acquire) {
1621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1622 ompt_mutex_critical, (unsigned int)hint,
1623 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1624 codeptr);
1625 }
1626 }
1627#endif
1628#if KMP_USE_INLINED_TAS
1629 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1630 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1631 } else
1632#elif KMP_USE_INLINED_FUTEX
1633 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1634 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1635 } else
1636#endif
1637 {
1638 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1639 }
1640 } else {
1641 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1642 lck = ilk->lock;
1644 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1645 __kmp_map_hint_to_lock(hint));
1646 }
1647#if USE_ITT_BUILD
1648 __kmp_itt_critical_acquiring(lck);
1649#endif
1650#if OMPT_SUPPORT && OMPT_OPTIONAL
1651 if (ompt_enabled.enabled) {
1652 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1653 /* OMPT state update */
1654 prev_state = ti.state;
1655 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1656 ti.state = ompt_state_wait_critical;
1657
1658 /* OMPT event callback */
1659 if (ompt_enabled.ompt_callback_mutex_acquire) {
1660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1661 ompt_mutex_critical, (unsigned int)hint,
1662 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1663 codeptr);
1664 }
1665 }
1666#endif
1667 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1668 }
1670
1671#if USE_ITT_BUILD
1672 __kmp_itt_critical_acquired(lck);
1673#endif /* USE_ITT_BUILD */
1674#if OMPT_SUPPORT && OMPT_OPTIONAL
1675 if (ompt_enabled.enabled) {
1676 /* OMPT state update */
1677 ti.state = prev_state;
1678 ti.wait_id = 0;
1679
1680 /* OMPT event callback */
1681 if (ompt_enabled.ompt_callback_mutex_acquired) {
1682 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1683 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1684 }
1685 }
1686#endif
1687
1688 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1689 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1690} // __kmpc_critical_with_hint
1691
1692#endif // KMP_USE_DYNAMIC_LOCK
1693
1694/*!
1695@ingroup WORK_SHARING
1696@param loc source location information.
1697@param global_tid global thread number .
1698@param crit identity of the critical section. This could be a pointer to a lock
1699associated with the critical section, or some other suitably unique value.
1700
1701Leave a critical section, releasing any lock that was held during its execution.
1702*/
1706
1707 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1708
1709#if KMP_USE_DYNAMIC_LOCK
1710 int locktag = KMP_EXTRACT_D_TAG(crit);
1711 if (locktag) {
1713 KMP_ASSERT(lck != NULL);
1715 __kmp_pop_sync(global_tid, ct_critical, loc);
1716 }
1717#if USE_ITT_BUILD
1718 __kmp_itt_critical_releasing(lck);
1719#endif
1720#if KMP_USE_INLINED_TAS
1721 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1722 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1723 } else
1724#elif KMP_USE_INLINED_FUTEX
1725 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1726 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1727 } else
1728#endif
1729 {
1730 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1731 }
1732 } else {
1733 kmp_indirect_lock_t *ilk =
1734 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1735 KMP_ASSERT(ilk != NULL);
1736 lck = ilk->lock;
1738 __kmp_pop_sync(global_tid, ct_critical, loc);
1739 }
1740#if USE_ITT_BUILD
1741 __kmp_itt_critical_releasing(lck);
1742#endif
1743 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1744 }
1745
1746#else // KMP_USE_DYNAMIC_LOCK
1747
1748 if ((__kmp_user_lock_kind == lk_tas) &&
1749 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1751 }
1752#if KMP_USE_FUTEX
1753 else if ((__kmp_user_lock_kind == lk_futex) &&
1754 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1756 }
1757#endif
1758 else { // ticket, queuing or drdpa
1760 }
1761
1762 KMP_ASSERT(lck != NULL);
1763
1765 __kmp_pop_sync(global_tid, ct_critical, loc);
1766
1767#if USE_ITT_BUILD
1768 __kmp_itt_critical_releasing(lck);
1769#endif /* USE_ITT_BUILD */
1770 // Value of 'crit' should be good for using as a critical_id of the critical
1771 // section directive.
1773
1774#endif // KMP_USE_DYNAMIC_LOCK
1775
1776#if OMPT_SUPPORT && OMPT_OPTIONAL
1777 /* OMPT release event triggers after lock is released; place here to trigger
1778 * for all #if branches */
1779 OMPT_STORE_RETURN_ADDRESS(global_tid);
1780 if (ompt_enabled.ompt_callback_mutex_released) {
1781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1782 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1783 OMPT_LOAD_RETURN_ADDRESS(0));
1784 }
1785#endif
1786
1788 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1789}
1790
1791/*!
1792@ingroup SYNCHRONIZATION
1793@param loc source location information
1794@param global_tid thread id.
1795@return one if the thread should execute the master block, zero otherwise
1796
1797Start execution of a combined barrier and master. The barrier is executed inside
1798this function.
1799*/
1801 int status;
1802 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1803 __kmp_assert_valid_gtid(global_tid);
1804
1807
1809
1811 __kmp_check_barrier(global_tid, ct_barrier, loc);
1812
1813#if OMPT_SUPPORT
1814 ompt_frame_t *ompt_frame;
1815 if (ompt_enabled.enabled) {
1816 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1817 if (ompt_frame->enter_frame.ptr == NULL)
1818 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1819 }
1820 OMPT_STORE_RETURN_ADDRESS(global_tid);
1821#endif
1822#if USE_ITT_NOTIFY
1823 __kmp_threads[global_tid]->th.th_ident = loc;
1824#endif
1825 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1826#if OMPT_SUPPORT && OMPT_OPTIONAL
1827 if (ompt_enabled.enabled) {
1828 ompt_frame->enter_frame = ompt_data_none;
1829 }
1830#endif
1831
1832 return (status != 0) ? 0 : 1;
1833}
1834
1835/*!
1836@ingroup SYNCHRONIZATION
1837@param loc source location information
1838@param global_tid thread id.
1839
1840Complete the execution of a combined barrier and master. This function should
1841only be called at the completion of the <tt>master</tt> code. Other threads will
1842still be waiting at the barrier and this call releases them.
1843*/
1845 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1846 __kmp_assert_valid_gtid(global_tid);
1848}
1849
1850/*!
1851@ingroup SYNCHRONIZATION
1852@param loc source location information
1853@param global_tid thread id.
1854@return one if the thread should execute the master block, zero otherwise
1855
1856Start execution of a combined barrier and master(nowait) construct.
1857The barrier is executed inside this function.
1858There is no equivalent "end" function, since the
1859*/
1861 kmp_int32 ret;
1862 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1863 __kmp_assert_valid_gtid(global_tid);
1864
1867
1869
1871 if (loc == 0) {
1872 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1873 }
1874 __kmp_check_barrier(global_tid, ct_barrier, loc);
1875 }
1876
1877#if OMPT_SUPPORT
1878 ompt_frame_t *ompt_frame;
1879 if (ompt_enabled.enabled) {
1880 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1881 if (ompt_frame->enter_frame.ptr == NULL)
1882 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1883 }
1884 OMPT_STORE_RETURN_ADDRESS(global_tid);
1885#endif
1886#if USE_ITT_NOTIFY
1887 __kmp_threads[global_tid]->th.th_ident = loc;
1888#endif
1889 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1890#if OMPT_SUPPORT && OMPT_OPTIONAL
1891 if (ompt_enabled.enabled) {
1892 ompt_frame->enter_frame = ompt_data_none;
1893 }
1894#endif
1895
1896 ret = __kmpc_master(loc, global_tid);
1897
1899 /* there's no __kmpc_end_master called; so the (stats) */
1900 /* actions of __kmpc_end_master are done here */
1901 if (ret) {
1902 /* only one thread should do the pop since only */
1903 /* one did the push (see __kmpc_master()) */
1904 __kmp_pop_sync(global_tid, ct_master, loc);
1905 }
1906 }
1907
1908 return (ret);
1909}
1910
1911/* The BARRIER for a SINGLE process section is always explicit */
1912/*!
1913@ingroup WORK_SHARING
1914@param loc source location information
1915@param global_tid global thread number
1916@return One if this thread should execute the single construct, zero otherwise.
1917
1918Test whether to execute a <tt>single</tt> construct.
1919There are no implicit barriers in the two "single" calls, rather the compiler
1920should introduce an explicit barrier if it is required.
1921*/
1922
1924 __kmp_assert_valid_gtid(global_tid);
1925 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1926
1927 if (rc) {
1928 // We are going to execute the single statement, so we should count it.
1929 KMP_COUNT_BLOCK(OMP_SINGLE);
1930 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1931 }
1932
1933#if OMPT_SUPPORT && OMPT_OPTIONAL
1934 kmp_info_t *this_thr = __kmp_threads[global_tid];
1935 kmp_team_t *team = this_thr->th.th_team;
1936 int tid = __kmp_tid_from_gtid(global_tid);
1937
1938 if (ompt_enabled.enabled) {
1939 if (rc) {
1940 if (ompt_enabled.ompt_callback_work) {
1941 ompt_callbacks.ompt_callback(ompt_callback_work)(
1942 ompt_work_single_executor, ompt_scope_begin,
1943 &(team->t.ompt_team_info.parallel_data),
1944 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1946 }
1947 } else {
1948 if (ompt_enabled.ompt_callback_work) {
1949 ompt_callbacks.ompt_callback(ompt_callback_work)(
1950 ompt_work_single_other, ompt_scope_begin,
1951 &(team->t.ompt_team_info.parallel_data),
1952 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1954 ompt_callbacks.ompt_callback(ompt_callback_work)(
1955 ompt_work_single_other, ompt_scope_end,
1956 &(team->t.ompt_team_info.parallel_data),
1957 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1959 }
1960 }
1961 }
1962#endif
1963
1964 return rc;
1965}
1966
1967/*!
1968@ingroup WORK_SHARING
1969@param loc source location information
1970@param global_tid global thread number
1971
1972Mark the end of a <tt>single</tt> construct. This function should
1973only be called by the thread that executed the block of code protected
1974by the `single` construct.
1975*/
1977 __kmp_assert_valid_gtid(global_tid);
1978 __kmp_exit_single(global_tid);
1980
1981#if OMPT_SUPPORT && OMPT_OPTIONAL
1982 kmp_info_t *this_thr = __kmp_threads[global_tid];
1983 kmp_team_t *team = this_thr->th.th_team;
1984 int tid = __kmp_tid_from_gtid(global_tid);
1985
1986 if (ompt_enabled.ompt_callback_work) {
1987 ompt_callbacks.ompt_callback(ompt_callback_work)(
1988 ompt_work_single_executor, ompt_scope_end,
1989 &(team->t.ompt_team_info.parallel_data),
1990 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1992 }
1993#endif
1994}
1995
1996/*!
1997@ingroup WORK_SHARING
1998@param loc Source location
1999@param global_tid Global thread id
2000
2001Mark the end of a statically scheduled loop.
2002*/
2005 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
2006
2007#if OMPT_SUPPORT && OMPT_OPTIONAL
2008 if (ompt_enabled.ompt_callback_work) {
2009 ompt_work_t ompt_work_type = ompt_work_loop_static;
2010 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2012 // Determine workshare type
2013 if (loc != NULL) {
2014 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
2015 ompt_work_type = ompt_work_loop_static;
2016 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
2017 ompt_work_type = ompt_work_sections;
2018 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
2019 ompt_work_type = ompt_work_distribute;
2020 } else {
2021 // use default set above.
2022 // a warning about this case is provided in __kmpc_for_static_init
2023 }
2024 KMP_DEBUG_ASSERT(ompt_work_type);
2025 }
2026 ompt_callbacks.ompt_callback(ompt_callback_work)(
2027 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
2028 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2029 }
2030#endif
2032 __kmp_pop_workshare(global_tid, ct_pdo, loc);
2033}
2034
2035// User routines which take C-style arguments (call by value)
2036// different from the Fortran equivalent routines
2037
2039 // !!!!! TODO: check the per-task binding
2041}
2042
2044 kmp_info_t *thread;
2045
2046 /* For the thread-private implementation of the internal controls */
2047 thread = __kmp_entry_thread();
2048
2050
2051 set__dynamic(thread, flag ? true : false);
2052}
2053
2055 kmp_info_t *thread;
2056
2057 /* For the thread-private internal controls implementation */
2058 thread = __kmp_entry_thread();
2059
2061
2063}
2064
2065void ompc_set_max_active_levels(int max_active_levels) {
2066 /* TO DO */
2067 /* we want per-task implementation of this internal control */
2068
2069 /* For the per-thread internal controls implementation */
2070 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2071}
2072
2073void ompc_set_schedule(omp_sched_t kind, int modifier) {
2074 // !!!!! TODO: check the per-task binding
2076}
2077
2080}
2081
2084}
2085
2086/* OpenMP 5.0 Affinity Format API */
2088 if (!__kmp_init_serial) {
2090 }
2092 format, KMP_STRLEN(format) + 1);
2093}
2094
2095size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2096 size_t format_size;
2097 if (!__kmp_init_serial) {
2099 }
2100 format_size = KMP_STRLEN(__kmp_affinity_format);
2101 if (buffer && size) {
2103 format_size + 1);
2104 }
2105 return format_size;
2106}
2107
2108void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2109 int gtid;
2110 if (!TCR_4(__kmp_init_middle)) {
2112 }
2114 gtid = __kmp_get_gtid();
2115#if KMP_AFFINITY_SUPPORTED
2116 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2117 __kmp_affinity.flags.reset) {
2119 }
2120#endif
2121 __kmp_aux_display_affinity(gtid, format);
2122}
2123
2124size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2125 char const *format) {
2126 int gtid;
2127 size_t num_required;
2128 kmp_str_buf_t capture_buf;
2129 if (!TCR_4(__kmp_init_middle)) {
2131 }
2133 gtid = __kmp_get_gtid();
2134#if KMP_AFFINITY_SUPPORTED
2135 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2136 __kmp_affinity.flags.reset) {
2138 }
2139#endif
2140 __kmp_str_buf_init(&capture_buf);
2141 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2142 if (buffer && buf_size) {
2143 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2144 capture_buf.used + 1);
2145 }
2146 __kmp_str_buf_free(&capture_buf);
2147 return num_required;
2148}
2149
2150void kmpc_set_stacksize(int arg) {
2151 // __kmp_aux_set_stacksize initializes the library if needed
2153}
2154
2155void kmpc_set_stacksize_s(size_t arg) {
2156 // __kmp_aux_set_stacksize initializes the library if needed
2158}
2159
2160void kmpc_set_blocktime(int arg) {
2161 int gtid, tid, bt = arg;
2162 kmp_info_t *thread;
2163
2164 gtid = __kmp_entry_gtid();
2165 tid = __kmp_tid_from_gtid(gtid);
2166 thread = __kmp_thread_from_gtid(gtid);
2167
2169 __kmp_aux_set_blocktime(bt, thread, tid);
2170}
2171
2172void kmpc_set_library(int arg) {
2173 // __kmp_user_set_library initializes the library if needed
2175}
2176
2177void kmpc_set_defaults(char const *str) {
2178 // __kmp_aux_set_defaults initializes the library if needed
2180}
2181
2183 // ignore after initialization because some teams have already
2184 // allocated dispatch buffers
2186 arg <= KMP_MAX_DISP_NUM_BUFF) {
2188 }
2189}
2190
2191int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2192#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2193 return -1;
2194#else
2195 if (!TCR_4(__kmp_init_middle)) {
2197 }
2199 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2200#endif
2201}
2202
2204#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2205 return -1;
2206#else
2207 if (!TCR_4(__kmp_init_middle)) {
2209 }
2211 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2212#endif
2213}
2214
2215int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2216#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2217 return -1;
2218#else
2219 if (!TCR_4(__kmp_init_middle)) {
2221 }
2223 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2224#endif
2225}
2226
2227/* -------------------------------------------------------------------------- */
2228/*!
2229@ingroup THREADPRIVATE
2230@param loc source location information
2231@param gtid global thread number
2232@param cpy_size size of the cpy_data buffer
2233@param cpy_data pointer to data to be copied
2234@param cpy_func helper function to call for copying data
2235@param didit flag variable: 1=single thread; 0=not single thread
2236
2237__kmpc_copyprivate implements the interface for the private data broadcast
2238needed for the copyprivate clause associated with a single region in an
2239OpenMP<sup>*</sup> program (both C and Fortran).
2240All threads participating in the parallel region call this routine.
2241One of the threads (called the single thread) should have the <tt>didit</tt>
2242variable set to 1 and all other threads should have that variable set to 0.
2243All threads pass a pointer to a data buffer (cpy_data) that they have built.
2244
2245The OpenMP specification forbids the use of nowait on the single region when a
2246copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2247barrier internally to avoid race conditions, so the code generation for the
2248single region should avoid generating a barrier after the call to @ref
2249__kmpc_copyprivate.
2250
2251The <tt>gtid</tt> parameter is the global thread id for the current thread.
2252The <tt>loc</tt> parameter is a pointer to source location information.
2253
2254Internal implementation: The single thread will first copy its descriptor
2255address (cpy_data) to a team-private location, then the other threads will each
2256call the function pointed to by the parameter cpy_func, which carries out the
2257copy by copying the data using the cpy_data buffer.
2258
2259The cpy_func routine used for the copy and the contents of the data area defined
2260by cpy_data and cpy_size may be built in any fashion that will allow the copy
2261to be done. For instance, the cpy_data buffer can hold the actual data to be
2262copied or it may hold a list of pointers to the data. The cpy_func routine must
2263interpret the cpy_data buffer appropriately.
2264
2265The interface to cpy_func is as follows:
2266@code
2267void cpy_func( void *destination, void *source )
2268@endcode
2269where void *destination is the cpy_data pointer for the thread being copied to
2270and void *source is the cpy_data pointer for the thread being copied from.
2271*/
2272void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2273 void *cpy_data, void (*cpy_func)(void *, void *),
2274 kmp_int32 didit) {
2275 void **data_ptr;
2276 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2278
2279 KMP_MB();
2280
2281 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2282
2284 if (loc == 0) {
2285 KMP_WARNING(ConstructIdentInvalid);
2286 }
2287 }
2288
2289 // ToDo: Optimize the following two barriers into some kind of split barrier
2290
2291 if (didit)
2292 *data_ptr = cpy_data;
2293
2294#if OMPT_SUPPORT
2295 ompt_frame_t *ompt_frame;
2296 if (ompt_enabled.enabled) {
2297 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2298 if (ompt_frame->enter_frame.ptr == NULL)
2299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2300 }
2301 OMPT_STORE_RETURN_ADDRESS(gtid);
2302#endif
2303/* This barrier is not a barrier region boundary */
2304#if USE_ITT_NOTIFY
2305 __kmp_threads[gtid]->th.th_ident = loc;
2306#endif
2307 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2308
2309 if (!didit)
2310 (*cpy_func)(cpy_data, *data_ptr);
2311
2312 // Consider next barrier a user-visible barrier for barrier region boundaries
2313 // Nesting checks are already handled by the single construct checks
2314 {
2315#if OMPT_SUPPORT
2316 OMPT_STORE_RETURN_ADDRESS(gtid);
2317#endif
2318#if USE_ITT_NOTIFY
2319 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2320// tasks can overwrite the location)
2321#endif
2322 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2323#if OMPT_SUPPORT && OMPT_OPTIONAL
2324 if (ompt_enabled.enabled) {
2325 ompt_frame->enter_frame = ompt_data_none;
2326 }
2327#endif
2328 }
2329}
2330
2331/* --------------------------------------------------------------------------*/
2332/*!
2333@ingroup THREADPRIVATE
2334@param loc source location information
2335@param gtid global thread number
2336@param cpy_data pointer to the data to be saved/copied or 0
2337@return the saved pointer to the data
2338
2339__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2340__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2341coming from single), and returns that pointer in all calls (for single thread
2342it's not needed). This version doesn't do any actual data copying. Data copying
2343has to be done somewhere else, e.g. inline in the generated code. Due to this,
2344this function doesn't have any barrier at the end of the function, like
2345__kmpc_copyprivate does, so generated code needs barrier after copying of all
2346data was done.
2347*/
2348void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2349 void **data_ptr;
2350
2351 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2352
2353 KMP_MB();
2354
2355 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2356
2358 if (loc == 0) {
2359 KMP_WARNING(ConstructIdentInvalid);
2360 }
2361 }
2362
2363 // ToDo: Optimize the following barrier
2364
2365 if (cpy_data)
2366 *data_ptr = cpy_data;
2367
2368#if OMPT_SUPPORT
2369 ompt_frame_t *ompt_frame;
2370 if (ompt_enabled.enabled) {
2371 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2372 if (ompt_frame->enter_frame.ptr == NULL)
2373 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2374 OMPT_STORE_RETURN_ADDRESS(gtid);
2375 }
2376#endif
2377/* This barrier is not a barrier region boundary */
2378#if USE_ITT_NOTIFY
2379 __kmp_threads[gtid]->th.th_ident = loc;
2380#endif
2381 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2382
2383 return *data_ptr;
2384}
2385
2386/* -------------------------------------------------------------------------- */
2387
2388#define INIT_LOCK __kmp_init_user_lock_with_checks
2389#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2390#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2391#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2392#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2393#define ACQUIRE_NESTED_LOCK_TIMED \
2394 __kmp_acquire_nested_user_lock_with_checks_timed
2395#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2396#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2397#define TEST_LOCK __kmp_test_user_lock_with_checks
2398#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2399#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2400#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2401
2402// TODO: Make check abort messages use location info & pass it into
2403// with_checks routines
2404
2405#if KMP_USE_DYNAMIC_LOCK
2406
2407// internal lock initializer
2408static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2409 kmp_dyna_lockseq_t seq) {
2410 if (KMP_IS_D_LOCK(seq)) {
2411 KMP_INIT_D_LOCK(lock, seq);
2412#if USE_ITT_BUILD
2413 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2414#endif
2415 } else {
2416 KMP_INIT_I_LOCK(lock, seq);
2417#if USE_ITT_BUILD
2418 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2419 __kmp_itt_lock_creating(ilk->lock, loc);
2420#endif
2421 }
2422}
2423
2424// internal nest lock initializer
2425static __forceinline void
2426__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2427 kmp_dyna_lockseq_t seq) {
2428#if KMP_USE_TSX
2429 // Don't have nested lock implementation for speculative locks
2430 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2431 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2432 seq = __kmp_user_lock_seq;
2433#endif
2434 switch (seq) {
2435 case lockseq_tas:
2436 seq = lockseq_nested_tas;
2437 break;
2438#if KMP_USE_FUTEX
2439 case lockseq_futex:
2440 seq = lockseq_nested_futex;
2441 break;
2442#endif
2443 case lockseq_ticket:
2444 seq = lockseq_nested_ticket;
2445 break;
2446 case lockseq_queuing:
2447 seq = lockseq_nested_queuing;
2448 break;
2449 case lockseq_drdpa:
2450 seq = lockseq_nested_drdpa;
2451 break;
2452 default:
2453 seq = lockseq_nested_queuing;
2454 }
2455 KMP_INIT_I_LOCK(lock, seq);
2456#if USE_ITT_BUILD
2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2458 __kmp_itt_lock_creating(ilk->lock, loc);
2459#endif
2460}
2461
2462/* initialize the lock with a hint */
2463void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2464 uintptr_t hint) {
2466 if (__kmp_env_consistency_check && user_lock == NULL) {
2467 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2468 }
2469
2470 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2471
2472#if OMPT_SUPPORT && OMPT_OPTIONAL
2473 // This is the case, if called from omp_init_lock_with_hint:
2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475 if (!codeptr)
2476 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477 if (ompt_enabled.ompt_callback_lock_init) {
2478 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2479 ompt_mutex_lock, (omp_lock_hint_t)hint,
2480 __ompt_get_mutex_impl_type(user_lock),
2481 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2482 }
2483#endif
2484}
2485
2486/* initialize the lock with a hint */
2488 void **user_lock, uintptr_t hint) {
2490 if (__kmp_env_consistency_check && user_lock == NULL) {
2491 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2492 }
2493
2494 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2495
2496#if OMPT_SUPPORT && OMPT_OPTIONAL
2497 // This is the case, if called from omp_init_lock_with_hint:
2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2499 if (!codeptr)
2500 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501 if (ompt_enabled.ompt_callback_lock_init) {
2502 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2503 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2504 __ompt_get_mutex_impl_type(user_lock),
2505 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2506 }
2507#endif
2508}
2509
2510#endif // KMP_USE_DYNAMIC_LOCK
2511
2512/* initialize the lock */
2513void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2514#if KMP_USE_DYNAMIC_LOCK
2515
2517 if (__kmp_env_consistency_check && user_lock == NULL) {
2518 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2519 }
2520 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2521
2522#if OMPT_SUPPORT && OMPT_OPTIONAL
2523 // This is the case, if called from omp_init_lock_with_hint:
2524 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2525 if (!codeptr)
2526 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2527 if (ompt_enabled.ompt_callback_lock_init) {
2528 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2529 ompt_mutex_lock, omp_lock_hint_none,
2530 __ompt_get_mutex_impl_type(user_lock),
2531 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2532 }
2533#endif
2534
2535#else // KMP_USE_DYNAMIC_LOCK
2536
2537 static char const *const func = "omp_init_lock";
2540
2542 if (user_lock == NULL) {
2543 KMP_FATAL(LockIsUninitialized, func);
2544 }
2545 }
2546
2548
2549 if ((__kmp_user_lock_kind == lk_tas) &&
2550 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2551 lck = (kmp_user_lock_p)user_lock;
2552 }
2553#if KMP_USE_FUTEX
2554 else if ((__kmp_user_lock_kind == lk_futex) &&
2555 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2556 lck = (kmp_user_lock_p)user_lock;
2557 }
2558#endif
2559 else {
2560 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2561 }
2562 INIT_LOCK(lck);
2564
2565#if OMPT_SUPPORT && OMPT_OPTIONAL
2566 // This is the case, if called from omp_init_lock_with_hint:
2567 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2568 if (!codeptr)
2569 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2570 if (ompt_enabled.ompt_callback_lock_init) {
2571 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2572 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2573 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2574 }
2575#endif
2576
2577#if USE_ITT_BUILD
2578 __kmp_itt_lock_creating(lck);
2579#endif /* USE_ITT_BUILD */
2580
2581#endif // KMP_USE_DYNAMIC_LOCK
2582} // __kmpc_init_lock
2583
2584/* initialize the lock */
2585void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2586#if KMP_USE_DYNAMIC_LOCK
2587
2589 if (__kmp_env_consistency_check && user_lock == NULL) {
2590 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2591 }
2592 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2593
2594#if OMPT_SUPPORT && OMPT_OPTIONAL
2595 // This is the case, if called from omp_init_lock_with_hint:
2596 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2597 if (!codeptr)
2598 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2599 if (ompt_enabled.ompt_callback_lock_init) {
2600 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2601 ompt_mutex_nest_lock, omp_lock_hint_none,
2602 __ompt_get_mutex_impl_type(user_lock),
2603 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2604 }
2605#endif
2606
2607#else // KMP_USE_DYNAMIC_LOCK
2608
2609 static char const *const func = "omp_init_nest_lock";
2612
2614 if (user_lock == NULL) {
2615 KMP_FATAL(LockIsUninitialized, func);
2616 }
2617 }
2618
2620
2621 if ((__kmp_user_lock_kind == lk_tas) &&
2622 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2624 lck = (kmp_user_lock_p)user_lock;
2625 }
2626#if KMP_USE_FUTEX
2627 else if ((__kmp_user_lock_kind == lk_futex) &&
2628 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2630 lck = (kmp_user_lock_p)user_lock;
2631 }
2632#endif
2633 else {
2634 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2635 }
2636
2639
2640#if OMPT_SUPPORT && OMPT_OPTIONAL
2641 // This is the case, if called from omp_init_lock_with_hint:
2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2643 if (!codeptr)
2644 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645 if (ompt_enabled.ompt_callback_lock_init) {
2646 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2647 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2648 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2649 }
2650#endif
2651
2652#if USE_ITT_BUILD
2653 __kmp_itt_lock_creating(lck);
2654#endif /* USE_ITT_BUILD */
2655
2656#endif // KMP_USE_DYNAMIC_LOCK
2657} // __kmpc_init_nest_lock
2658
2659void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2660#if KMP_USE_DYNAMIC_LOCK
2661
2662#if USE_ITT_BUILD
2664 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2665 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2666 } else {
2667 lck = (kmp_user_lock_p)user_lock;
2668 }
2669 __kmp_itt_lock_destroyed(lck);
2670#endif
2671#if OMPT_SUPPORT && OMPT_OPTIONAL
2672 // This is the case, if called from omp_init_lock_with_hint:
2673 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2674 if (!codeptr)
2675 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2676 if (ompt_enabled.ompt_callback_lock_destroy) {
2677 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2678 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2679 }
2680#endif
2681 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2682#else
2684
2685 if ((__kmp_user_lock_kind == lk_tas) &&
2686 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2687 lck = (kmp_user_lock_p)user_lock;
2688 }
2689#if KMP_USE_FUTEX
2690 else if ((__kmp_user_lock_kind == lk_futex) &&
2691 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2692 lck = (kmp_user_lock_p)user_lock;
2693 }
2694#endif
2695 else {
2696 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2697 }
2698
2699#if OMPT_SUPPORT && OMPT_OPTIONAL
2700 // This is the case, if called from omp_init_lock_with_hint:
2701 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2702 if (!codeptr)
2703 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2704 if (ompt_enabled.ompt_callback_lock_destroy) {
2705 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2706 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2707 }
2708#endif
2709
2710#if USE_ITT_BUILD
2711 __kmp_itt_lock_destroyed(lck);
2712#endif /* USE_ITT_BUILD */
2714
2715 if ((__kmp_user_lock_kind == lk_tas) &&
2716 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2717 ;
2718 }
2719#if KMP_USE_FUTEX
2720 else if ((__kmp_user_lock_kind == lk_futex) &&
2721 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2722 ;
2723 }
2724#endif
2725 else {
2726 __kmp_user_lock_free(user_lock, gtid, lck);
2727 }
2728#endif // KMP_USE_DYNAMIC_LOCK
2729} // __kmpc_destroy_lock
2730
2731/* destroy the lock */
2732void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2733#if KMP_USE_DYNAMIC_LOCK
2734
2735#if USE_ITT_BUILD
2736 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2737 __kmp_itt_lock_destroyed(ilk->lock);
2738#endif
2739#if OMPT_SUPPORT && OMPT_OPTIONAL
2740 // This is the case, if called from omp_init_lock_with_hint:
2741 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2742 if (!codeptr)
2743 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2744 if (ompt_enabled.ompt_callback_lock_destroy) {
2745 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2746 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2747 }
2748#endif
2749 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2750
2751#else // KMP_USE_DYNAMIC_LOCK
2752
2754
2755 if ((__kmp_user_lock_kind == lk_tas) &&
2756 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2758 lck = (kmp_user_lock_p)user_lock;
2759 }
2760#if KMP_USE_FUTEX
2761 else if ((__kmp_user_lock_kind == lk_futex) &&
2762 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2764 lck = (kmp_user_lock_p)user_lock;
2765 }
2766#endif
2767 else {
2768 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2769 }
2770
2771#if OMPT_SUPPORT && OMPT_OPTIONAL
2772 // This is the case, if called from omp_init_lock_with_hint:
2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2774 if (!codeptr)
2775 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776 if (ompt_enabled.ompt_callback_lock_destroy) {
2777 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2778 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2779 }
2780#endif
2781
2782#if USE_ITT_BUILD
2783 __kmp_itt_lock_destroyed(lck);
2784#endif /* USE_ITT_BUILD */
2785
2787
2788 if ((__kmp_user_lock_kind == lk_tas) &&
2789 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2791 ;
2792 }
2793#if KMP_USE_FUTEX
2794 else if ((__kmp_user_lock_kind == lk_futex) &&
2795 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2797 ;
2798 }
2799#endif
2800 else {
2801 __kmp_user_lock_free(user_lock, gtid, lck);
2802 }
2803#endif // KMP_USE_DYNAMIC_LOCK
2804} // __kmpc_destroy_nest_lock
2805
2806void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2807 KMP_COUNT_BLOCK(OMP_set_lock);
2808#if KMP_USE_DYNAMIC_LOCK
2809 int tag = KMP_EXTRACT_D_TAG(user_lock);
2810#if USE_ITT_BUILD
2811 __kmp_itt_lock_acquiring(
2813 user_lock); // itt function will get to the right lock object.
2814#endif
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_acquire) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822 ompt_mutex_lock, omp_lock_hint_none,
2823 __ompt_get_mutex_impl_type(user_lock),
2824 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2825 }
2826#endif
2827#if KMP_USE_INLINED_TAS
2828 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2829 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2830 } else
2831#elif KMP_USE_INLINED_FUTEX
2832 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2833 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2834 } else
2835#endif
2836 {
2837 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2838 }
2839#if USE_ITT_BUILD
2840 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2841#endif
2842#if OMPT_SUPPORT && OMPT_OPTIONAL
2843 if (ompt_enabled.ompt_callback_mutex_acquired) {
2844 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2845 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2846 }
2847#endif
2848
2849#else // KMP_USE_DYNAMIC_LOCK
2850
2852
2853 if ((__kmp_user_lock_kind == lk_tas) &&
2854 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2855 lck = (kmp_user_lock_p)user_lock;
2856 }
2857#if KMP_USE_FUTEX
2858 else if ((__kmp_user_lock_kind == lk_futex) &&
2859 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2860 lck = (kmp_user_lock_p)user_lock;
2861 }
2862#endif
2863 else {
2864 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2865 }
2866
2867#if USE_ITT_BUILD
2868 __kmp_itt_lock_acquiring(lck);
2869#endif /* USE_ITT_BUILD */
2870#if OMPT_SUPPORT && OMPT_OPTIONAL
2871 // This is the case, if called from omp_init_lock_with_hint:
2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873 if (!codeptr)
2874 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875 if (ompt_enabled.ompt_callback_mutex_acquire) {
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2877 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2878 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2879 }
2880#endif
2881
2882 ACQUIRE_LOCK(lck, gtid);
2883
2884#if USE_ITT_BUILD
2885 __kmp_itt_lock_acquired(lck);
2886#endif /* USE_ITT_BUILD */
2887
2888#if OMPT_SUPPORT && OMPT_OPTIONAL
2889 if (ompt_enabled.ompt_callback_mutex_acquired) {
2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2891 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2892 }
2893#endif
2894
2895#endif // KMP_USE_DYNAMIC_LOCK
2896}
2897
2898void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2899#if KMP_USE_DYNAMIC_LOCK
2900
2901#if USE_ITT_BUILD
2902 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2903#endif
2904#if OMPT_SUPPORT && OMPT_OPTIONAL
2905 // This is the case, if called from omp_init_lock_with_hint:
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907 if (!codeptr)
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909 if (ompt_enabled.enabled) {
2910 if (ompt_enabled.ompt_callback_mutex_acquire) {
2911 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2912 ompt_mutex_nest_lock, omp_lock_hint_none,
2913 __ompt_get_mutex_impl_type(user_lock),
2914 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2915 }
2916 }
2917#endif
2918 int acquire_status =
2919 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2920 (void)acquire_status;
2921#if USE_ITT_BUILD
2922 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2923#endif
2924
2925#if OMPT_SUPPORT && OMPT_OPTIONAL
2926 if (ompt_enabled.enabled) {
2927 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2928 if (ompt_enabled.ompt_callback_mutex_acquired) {
2929 // lock_first
2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2931 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2932 codeptr);
2933 }
2934 } else {
2935 if (ompt_enabled.ompt_callback_nest_lock) {
2936 // lock_next
2937 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2938 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2939 }
2940 }
2941 }
2942#endif
2943
2944#else // KMP_USE_DYNAMIC_LOCK
2945 int acquire_status;
2947
2948 if ((__kmp_user_lock_kind == lk_tas) &&
2949 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2951 lck = (kmp_user_lock_p)user_lock;
2952 }
2953#if KMP_USE_FUTEX
2954 else if ((__kmp_user_lock_kind == lk_futex) &&
2955 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2957 lck = (kmp_user_lock_p)user_lock;
2958 }
2959#endif
2960 else {
2961 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2962 }
2963
2964#if USE_ITT_BUILD
2965 __kmp_itt_lock_acquiring(lck);
2966#endif /* USE_ITT_BUILD */
2967#if OMPT_SUPPORT && OMPT_OPTIONAL
2968 // This is the case, if called from omp_init_lock_with_hint:
2969 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2970 if (!codeptr)
2971 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2972 if (ompt_enabled.enabled) {
2973 if (ompt_enabled.ompt_callback_mutex_acquire) {
2974 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2975 ompt_mutex_nest_lock, omp_lock_hint_none,
2976 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2977 codeptr);
2978 }
2979 }
2980#endif
2981
2982 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2983
2984#if USE_ITT_BUILD
2985 __kmp_itt_lock_acquired(lck);
2986#endif /* USE_ITT_BUILD */
2987
2988#if OMPT_SUPPORT && OMPT_OPTIONAL
2989 if (ompt_enabled.enabled) {
2990 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2991 if (ompt_enabled.ompt_callback_mutex_acquired) {
2992 // lock_first
2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2994 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2995 }
2996 } else {
2997 if (ompt_enabled.ompt_callback_nest_lock) {
2998 // lock_next
2999 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3000 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3001 }
3002 }
3003 }
3004#endif
3005
3006#endif // KMP_USE_DYNAMIC_LOCK
3007}
3008
3009void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3010#if KMP_USE_DYNAMIC_LOCK
3011
3012 int tag = KMP_EXTRACT_D_TAG(user_lock);
3013#if USE_ITT_BUILD
3014 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3015#endif
3016#if KMP_USE_INLINED_TAS
3017 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3018 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
3019 } else
3020#elif KMP_USE_INLINED_FUTEX
3021 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3022 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
3023 } else
3024#endif
3025 {
3026 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3027 }
3028
3029#if OMPT_SUPPORT && OMPT_OPTIONAL
3030 // This is the case, if called from omp_init_lock_with_hint:
3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3032 if (!codeptr)
3033 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034 if (ompt_enabled.ompt_callback_mutex_released) {
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3037 }
3038#endif
3039
3040#else // KMP_USE_DYNAMIC_LOCK
3041
3043
3044 /* Can't use serial interval since not block structured */
3045 /* release the lock */
3046
3047 if ((__kmp_user_lock_kind == lk_tas) &&
3048 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3049#if KMP_OS_LINUX && \
3050 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3051// "fast" path implemented to fix customer performance issue
3052#if USE_ITT_BUILD
3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054#endif /* USE_ITT_BUILD */
3055 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3056 KMP_MB();
3057
3058#if OMPT_SUPPORT && OMPT_OPTIONAL
3059 // This is the case, if called from omp_init_lock_with_hint:
3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061 if (!codeptr)
3062 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled.ompt_callback_mutex_released) {
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3065 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3066 }
3067#endif
3068
3069 return;
3070#else
3071 lck = (kmp_user_lock_p)user_lock;
3072#endif
3073 }
3074#if KMP_USE_FUTEX
3075 else if ((__kmp_user_lock_kind == lk_futex) &&
3076 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3077 lck = (kmp_user_lock_p)user_lock;
3078 }
3079#endif
3080 else {
3081 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3082 }
3083
3084#if USE_ITT_BUILD
3085 __kmp_itt_lock_releasing(lck);
3086#endif /* USE_ITT_BUILD */
3087
3088 RELEASE_LOCK(lck, gtid);
3089
3090#if OMPT_SUPPORT && OMPT_OPTIONAL
3091 // This is the case, if called from omp_init_lock_with_hint:
3092 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3093 if (!codeptr)
3094 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3095 if (ompt_enabled.ompt_callback_mutex_released) {
3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098 }
3099#endif
3100
3101#endif // KMP_USE_DYNAMIC_LOCK
3102}
3103
3104/* release the lock */
3105void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3106#if KMP_USE_DYNAMIC_LOCK
3107
3108#if USE_ITT_BUILD
3109 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3110#endif
3111 int release_status =
3112 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3113 (void)release_status;
3114
3115#if OMPT_SUPPORT && OMPT_OPTIONAL
3116 // This is the case, if called from omp_init_lock_with_hint:
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118 if (!codeptr)
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120 if (ompt_enabled.enabled) {
3121 if (release_status == KMP_LOCK_RELEASED) {
3122 if (ompt_enabled.ompt_callback_mutex_released) {
3123 // release_lock_last
3124 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3125 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3126 codeptr);
3127 }
3128 } else if (ompt_enabled.ompt_callback_nest_lock) {
3129 // release_lock_prev
3130 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3131 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3132 }
3133 }
3134#endif
3135
3136#else // KMP_USE_DYNAMIC_LOCK
3137
3139
3140 /* Can't use serial interval since not block structured */
3141
3142 if ((__kmp_user_lock_kind == lk_tas) &&
3143 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3145#if KMP_OS_LINUX && \
3146 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3147 // "fast" path implemented to fix customer performance issue
3148 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3149#if USE_ITT_BUILD
3150 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3151#endif /* USE_ITT_BUILD */
3152
3153#if OMPT_SUPPORT && OMPT_OPTIONAL
3154 int release_status = KMP_LOCK_STILL_HELD;
3155#endif
3156
3157 if (--(tl->lk.depth_locked) == 0) {
3158 TCW_4(tl->lk.poll, 0);
3159#if OMPT_SUPPORT && OMPT_OPTIONAL
3160 release_status = KMP_LOCK_RELEASED;
3161#endif
3162 }
3163 KMP_MB();
3164
3165#if OMPT_SUPPORT && OMPT_OPTIONAL
3166 // This is the case, if called from omp_init_lock_with_hint:
3167 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3168 if (!codeptr)
3169 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3170 if (ompt_enabled.enabled) {
3171 if (release_status == KMP_LOCK_RELEASED) {
3172 if (ompt_enabled.ompt_callback_mutex_released) {
3173 // release_lock_last
3174 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3175 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3176 }
3177 } else if (ompt_enabled.ompt_callback_nest_lock) {
3178 // release_lock_previous
3179 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3180 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3181 }
3182 }
3183#endif
3184
3185 return;
3186#else
3187 lck = (kmp_user_lock_p)user_lock;
3188#endif
3189 }
3190#if KMP_USE_FUTEX
3191 else if ((__kmp_user_lock_kind == lk_futex) &&
3192 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3194 lck = (kmp_user_lock_p)user_lock;
3195 }
3196#endif
3197 else {
3198 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3199 }
3200
3201#if USE_ITT_BUILD
3202 __kmp_itt_lock_releasing(lck);
3203#endif /* USE_ITT_BUILD */
3204
3205 int release_status;
3206 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3207#if OMPT_SUPPORT && OMPT_OPTIONAL
3208 // This is the case, if called from omp_init_lock_with_hint:
3209 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3210 if (!codeptr)
3211 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3212 if (ompt_enabled.enabled) {
3213 if (release_status == KMP_LOCK_RELEASED) {
3214 if (ompt_enabled.ompt_callback_mutex_released) {
3215 // release_lock_last
3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3217 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3218 }
3219 } else if (ompt_enabled.ompt_callback_nest_lock) {
3220 // release_lock_previous
3221 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3222 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3223 }
3224 }
3225#endif
3226
3227#endif // KMP_USE_DYNAMIC_LOCK
3228}
3229
3230/* try to acquire the lock */
3231int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3232 KMP_COUNT_BLOCK(OMP_test_lock);
3233
3234#if KMP_USE_DYNAMIC_LOCK
3235 int rc;
3236 int tag = KMP_EXTRACT_D_TAG(user_lock);
3237#if USE_ITT_BUILD
3238 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3239#endif
3240#if OMPT_SUPPORT && OMPT_OPTIONAL
3241 // This is the case, if called from omp_init_lock_with_hint:
3242 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3243 if (!codeptr)
3244 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3245 if (ompt_enabled.ompt_callback_mutex_acquire) {
3246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3247 ompt_mutex_test_lock, omp_lock_hint_none,
3248 __ompt_get_mutex_impl_type(user_lock),
3249 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3250 }
3251#endif
3252#if KMP_USE_INLINED_TAS
3253 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3254 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3255 } else
3256#elif KMP_USE_INLINED_FUTEX
3257 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3258 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3259 } else
3260#endif
3261 {
3262 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3263 }
3264 if (rc) {
3265#if USE_ITT_BUILD
3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267#endif
3268#if OMPT_SUPPORT && OMPT_OPTIONAL
3269 if (ompt_enabled.ompt_callback_mutex_acquired) {
3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3272 }
3273#endif
3274 return FTN_TRUE;
3275 } else {
3276#if USE_ITT_BUILD
3277 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3278#endif
3279 return FTN_FALSE;
3280 }
3281
3282#else // KMP_USE_DYNAMIC_LOCK
3283
3285 int rc;
3286
3287 if ((__kmp_user_lock_kind == lk_tas) &&
3288 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3289 lck = (kmp_user_lock_p)user_lock;
3290 }
3291#if KMP_USE_FUTEX
3292 else if ((__kmp_user_lock_kind == lk_futex) &&
3293 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3294 lck = (kmp_user_lock_p)user_lock;
3295 }
3296#endif
3297 else {
3298 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3299 }
3300
3301#if USE_ITT_BUILD
3302 __kmp_itt_lock_acquiring(lck);
3303#endif /* USE_ITT_BUILD */
3304#if OMPT_SUPPORT && OMPT_OPTIONAL
3305 // This is the case, if called from omp_init_lock_with_hint:
3306 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3307 if (!codeptr)
3308 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3309 if (ompt_enabled.ompt_callback_mutex_acquire) {
3310 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3311 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3312 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3313 }
3314#endif
3315
3316 rc = TEST_LOCK(lck, gtid);
3317#if USE_ITT_BUILD
3318 if (rc) {
3319 __kmp_itt_lock_acquired(lck);
3320 } else {
3321 __kmp_itt_lock_cancelled(lck);
3322 }
3323#endif /* USE_ITT_BUILD */
3324#if OMPT_SUPPORT && OMPT_OPTIONAL
3325 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3328 }
3329#endif
3330
3331 return (rc ? FTN_TRUE : FTN_FALSE);
3332
3333 /* Can't use serial interval since not block structured */
3334
3335#endif // KMP_USE_DYNAMIC_LOCK
3336}
3337
3338/* try to acquire the lock */
3339int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3340#if KMP_USE_DYNAMIC_LOCK
3341 int rc;
3342#if USE_ITT_BUILD
3343 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3344#endif
3345#if OMPT_SUPPORT && OMPT_OPTIONAL
3346 // This is the case, if called from omp_init_lock_with_hint:
3347 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3348 if (!codeptr)
3349 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3350 if (ompt_enabled.ompt_callback_mutex_acquire) {
3351 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3352 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3353 __ompt_get_mutex_impl_type(user_lock),
3354 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3355 }
3356#endif
3357 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3358#if USE_ITT_BUILD
3359 if (rc) {
3360 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3361 } else {
3362 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3363 }
3364#endif
3365#if OMPT_SUPPORT && OMPT_OPTIONAL
3366 if (ompt_enabled.enabled && rc) {
3367 if (rc == 1) {
3368 if (ompt_enabled.ompt_callback_mutex_acquired) {
3369 // lock_first
3370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3371 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3372 codeptr);
3373 }
3374 } else {
3375 if (ompt_enabled.ompt_callback_nest_lock) {
3376 // lock_next
3377 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3378 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3379 }
3380 }
3381 }
3382#endif
3383 return rc;
3384
3385#else // KMP_USE_DYNAMIC_LOCK
3386
3388 int rc;
3389
3390 if ((__kmp_user_lock_kind == lk_tas) &&
3391 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3393 lck = (kmp_user_lock_p)user_lock;
3394 }
3395#if KMP_USE_FUTEX
3396 else if ((__kmp_user_lock_kind == lk_futex) &&
3397 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3399 lck = (kmp_user_lock_p)user_lock;
3400 }
3401#endif
3402 else {
3403 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3404 }
3405
3406#if USE_ITT_BUILD
3407 __kmp_itt_lock_acquiring(lck);
3408#endif /* USE_ITT_BUILD */
3409
3410#if OMPT_SUPPORT && OMPT_OPTIONAL
3411 // This is the case, if called from omp_init_lock_with_hint:
3412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3413 if (!codeptr)
3414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3415 if (ompt_enabled.enabled) &&
3416 ompt_enabled.ompt_callback_mutex_acquire) {
3417 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3418 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3419 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3420 codeptr);
3421 }
3422#endif
3423
3424 rc = TEST_NESTED_LOCK(lck, gtid);
3425#if USE_ITT_BUILD
3426 if (rc) {
3427 __kmp_itt_lock_acquired(lck);
3428 } else {
3429 __kmp_itt_lock_cancelled(lck);
3430 }
3431#endif /* USE_ITT_BUILD */
3432#if OMPT_SUPPORT && OMPT_OPTIONAL
3433 if (ompt_enabled.enabled && rc) {
3434 if (rc == 1) {
3435 if (ompt_enabled.ompt_callback_mutex_acquired) {
3436 // lock_first
3437 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3438 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3439 }
3440 } else {
3441 if (ompt_enabled.ompt_callback_nest_lock) {
3442 // lock_next
3443 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3444 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3445 }
3446 }
3447 }
3448#endif
3449 return rc;
3450
3451 /* Can't use serial interval since not block structured */
3452
3453#endif // KMP_USE_DYNAMIC_LOCK
3454}
3455
3456// Interface to fast scalable reduce methods routines
3457
3458// keep the selected method in a thread local structure for cross-function
3459// usage: will be used in __kmpc_end_reduce* functions;
3460// another solution: to re-determine the method one more time in
3461// __kmpc_end_reduce* functions (new prototype required then)
3462// AT: which solution is better?
3463#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3464 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3465
3466#define __KMP_GET_REDUCTION_METHOD(gtid) \
3467 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3468
3469// description of the packed_reduction_method variable: look at the macros in
3470// kmp.h
3471
3472// used in a critical section reduce block
3473static __forceinline void
3476
3477 // this lock was visible to a customer and to the threading profile tool as a
3478 // serial overhead span (although it's used for an internal purpose only)
3479 // why was it visible in previous implementation?
3480 // should we keep it visible in new reduce block?
3482
3483#if KMP_USE_DYNAMIC_LOCK
3484
3485 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3486 // Check if it is initialized.
3487 if (*lk == 0) {
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3490 KMP_GET_D_TAG(__kmp_user_lock_seq));
3491 } else {
3492 __kmp_init_indirect_csptr(crit, loc, global_tid,
3493 KMP_GET_I_TAG(__kmp_user_lock_seq));
3494 }
3495 }
3496 // Branch for accessing the actual lock object and set operation. This
3497 // branching is inevitable since this lock initialization does not follow the
3498 // normal dispatch path (lock table is not used).
3499 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3500 lck = (kmp_user_lock_p)lk;
3501 KMP_DEBUG_ASSERT(lck != NULL);
3503 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3504 }
3505 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3506 } else {
3507 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3508 lck = ilk->lock;
3509 KMP_DEBUG_ASSERT(lck != NULL);
3511 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3512 }
3513 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3514 }
3515
3516#else // KMP_USE_DYNAMIC_LOCK
3517
3518 // We know that the fast reduction code is only emitted by Intel compilers
3519 // with 32 byte critical sections. If there isn't enough space, then we
3520 // have to use a pointer.
3523 } else {
3525 }
3526 KMP_DEBUG_ASSERT(lck != NULL);
3527
3529 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3530
3532
3533#endif // KMP_USE_DYNAMIC_LOCK
3534}
3535
3536// used in a critical section reduce block
3537static __forceinline void
3540
3542
3543#if KMP_USE_DYNAMIC_LOCK
3544
3545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3548 __kmp_pop_sync(global_tid, ct_critical, loc);
3549 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3550 } else {
3551 kmp_indirect_lock_t *ilk =
3552 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3554 __kmp_pop_sync(global_tid, ct_critical, loc);
3555 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3556 }
3557
3558#else // KMP_USE_DYNAMIC_LOCK
3559
3560 // We know that the fast reduction code is only emitted by Intel compilers
3561 // with 32 byte critical sections. If there isn't enough space, then we have
3562 // to use a pointer.
3563 if (__kmp_base_user_lock_size > 32) {
3564 lck = *((kmp_user_lock_p *)crit);
3565 KMP_ASSERT(lck != NULL);
3566 } else {
3568 }
3569
3571 __kmp_pop_sync(global_tid, ct_critical, loc);
3572
3574
3575#endif // KMP_USE_DYNAMIC_LOCK
3576} // __kmp_end_critical_section_reduce_block
3577
3578static __forceinline int
3580 int *task_state) {
3581 kmp_team_t *team;
3582
3583 // Check if we are inside the teams construct?
3584 if (th->th.th_teams_microtask) {
3585 *team_p = team = th->th.th_team;
3586 if (team->t.t_level == th->th.th_teams_level) {
3587 // This is reduction at teams construct.
3588 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3589 // Let's swap teams temporarily for the reduction.
3590 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3591 th->th.th_team = team->t.t_parent;
3592 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3593 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3594 *task_state = th->th.th_task_state;
3595 th->th.th_task_state = 0;
3596
3597 return 1;
3598 }
3599 }
3600 return 0;
3601}
3602
3603static __forceinline void
3605 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3606 th->th.th_info.ds.ds_tid = 0;
3607 th->th.th_team = team;
3608 th->th.th_team_nproc = team->t.t_nproc;
3609 th->th.th_task_team = team->t.t_task_team[task_state];
3610 __kmp_type_convert(task_state, &(th->th.th_task_state));
3611}
3612
3613/* 2.a.i. Reduce Block without a terminating barrier */
3614/*!
3615@ingroup SYNCHRONIZATION
3616@param loc source location information
3617@param global_tid global thread number
3618@param num_vars number of items (variables) to be reduced
3619@param reduce_size size of data in bytes to be reduced
3620@param reduce_data pointer to data to be reduced
3621@param reduce_func callback function providing reduction operation on two
3622operands and returning result of reduction in lhs_data
3623@param lck pointer to the unique lock data structure
3624@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3625threads if atomic reduction needed
3626
3627The nowait version is used for a reduce clause with the nowait argument.
3628*/
3631 size_t reduce_size, void *reduce_data,
3632 void (*reduce_func)(void *lhs_data, void *rhs_data),
3634
3635 KMP_COUNT_BLOCK(REDUCE_nowait);
3636 int retval = 0;
3637 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638 kmp_info_t *th;
3639 kmp_team_t *team;
3640 int teams_swapped = 0, task_state;
3641 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3642 __kmp_assert_valid_gtid(global_tid);
3643
3644 // why do we need this initialization here at all?
3645 // Reduction clause can not be used as a stand-alone directive.
3646
3647 // do not call __kmp_serial_initialize(), it will be called by
3648 // __kmp_parallel_initialize() if needed
3649 // possible detection of false-positive race by the threadchecker ???
3652
3654
3655// check correctness of reduce block nesting
3656#if KMP_USE_DYNAMIC_LOCK
3658 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3659#else
3661 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3662#endif
3663
3664 th = __kmp_thread_from_gtid(global_tid);
3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3666
3667 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3668 // the value should be kept in a variable
3669 // the variable should be either a construct-specific or thread-specific
3670 // property, not a team specific property
3671 // (a thread can reach the next reduce block on the next construct, reduce
3672 // method may differ on the next construct)
3673 // an ident_t "loc" parameter could be used as a construct-specific property
3674 // (what if loc == 0?)
3675 // (if both construct-specific and team-specific variables were shared,
3676 // then unness extra syncs should be needed)
3677 // a thread-specific variable is better regarding two issues above (next
3678 // construct and extra syncs)
3679 // a thread-specific "th_local.reduction_method" variable is used currently
3680 // each thread executes 'determine' and 'set' lines (no need to execute by one
3681 // thread, to avoid unness extra syncs)
3682
3683 packed_reduction_method = __kmp_determine_reduction_method(
3684 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3685 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3686
3687 OMPT_REDUCTION_DECL(th, global_tid);
3688 if (packed_reduction_method == critical_reduce_block) {
3689
3691
3693 retval = 1;
3694
3695 } else if (packed_reduction_method == empty_reduce_block) {
3696
3698
3699 // usage: if team size == 1, no synchronization is required ( Intel
3700 // platforms only )
3701 retval = 1;
3702
3703 } else if (packed_reduction_method == atomic_reduce_block) {
3704
3705 retval = 2;
3706
3707 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3708 // won't be called by the code gen)
3709 // (it's not quite good, because the checking block has been closed by
3710 // this 'pop',
3711 // but atomic operation has not been executed yet, will be executed
3712 // slightly later, literally on next instruction)
3714 __kmp_pop_sync(global_tid, ct_reduce, loc);
3715
3716 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3718
3719// AT: performance issue: a real barrier here
3720// AT: (if primary thread is slow, other threads are blocked here waiting for
3721// the primary thread to come and release them)
3722// AT: (it's not what a customer might expect specifying NOWAIT clause)
3723// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3724// be confusing to a customer)
3725// AT: another implementation of *barrier_gather*nowait() (or some other design)
3726// might go faster and be more in line with sense of NOWAIT
3727// AT: TO DO: do epcc test and compare times
3728
3729// this barrier should be invisible to a customer and to the threading profile
3730// tool (it's neither a terminating barrier nor customer's code, it's
3731// used for an internal purpose)
3732#if OMPT_SUPPORT
3733 // JP: can this barrier potentially leed to task scheduling?
3734 // JP: as long as there is a barrier in the implementation, OMPT should and
3735 // will provide the barrier events
3736 // so we set-up the necessary frame/return addresses.
3737 ompt_frame_t *ompt_frame;
3738 if (ompt_enabled.enabled) {
3739 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3740 if (ompt_frame->enter_frame.ptr == NULL)
3741 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3742 }
3743 OMPT_STORE_RETURN_ADDRESS(global_tid);
3744#endif
3745#if USE_ITT_NOTIFY
3746 __kmp_threads[global_tid]->th.th_ident = loc;
3747#endif
3748 retval =
3749 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3750 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3751 retval = (retval != 0) ? (0) : (1);
3752#if OMPT_SUPPORT && OMPT_OPTIONAL
3753 if (ompt_enabled.enabled) {
3754 ompt_frame->enter_frame = ompt_data_none;
3755 }
3756#endif
3757
3758 // all other workers except primary thread should do this pop here
3759 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3761 if (retval == 0) {
3762 __kmp_pop_sync(global_tid, ct_reduce, loc);
3763 }
3764 }
3765
3766 } else {
3767
3768 // should never reach this block
3769 KMP_ASSERT(0); // "unexpected method"
3770 }
3771 if (teams_swapped) {
3772 __kmp_restore_swapped_teams(th, team, task_state);
3773 }
3774 KA_TRACE(
3775 10,
3776 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3777 global_tid, packed_reduction_method, retval));
3778
3779 return retval;
3780}
3781
3782/*!
3783@ingroup SYNCHRONIZATION
3784@param loc source location information
3785@param global_tid global thread id.
3786@param lck pointer to the unique lock data structure
3787
3788Finish the execution of a reduce nowait.
3789*/
3792
3793 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3794
3795 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3796 __kmp_assert_valid_gtid(global_tid);
3797
3798 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3799
3800 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3801
3802 if (packed_reduction_method == critical_reduce_block) {
3803
3806
3807 } else if (packed_reduction_method == empty_reduce_block) {
3808
3809 // usage: if team size == 1, no synchronization is required ( on Intel
3810 // platforms only )
3811
3813
3814 } else if (packed_reduction_method == atomic_reduce_block) {
3815
3816 // neither primary thread nor other workers should get here
3817 // (code gen does not generate this call in case 2: atomic reduce block)
3818 // actually it's better to remove this elseif at all;
3819 // after removal this value will checked by the 'else' and will assert
3820
3821 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3823
3824 // only primary thread gets here
3825 // OMPT: tree reduction is annotated in the barrier code
3826
3827 } else {
3828
3829 // should never reach this block
3830 KMP_ASSERT(0); // "unexpected method"
3831 }
3832
3834 __kmp_pop_sync(global_tid, ct_reduce, loc);
3835
3836 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3837 global_tid, packed_reduction_method));
3838
3839 return;
3840}
3841
3842/* 2.a.ii. Reduce Block with a terminating barrier */
3843
3844/*!
3845@ingroup SYNCHRONIZATION
3846@param loc source location information
3847@param global_tid global thread number
3848@param num_vars number of items (variables) to be reduced
3849@param reduce_size size of data in bytes to be reduced
3850@param reduce_data pointer to data to be reduced
3851@param reduce_func callback function providing reduction operation on two
3852operands and returning result of reduction in lhs_data
3853@param lck pointer to the unique lock data structure
3854@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3855threads if atomic reduction needed
3856
3857A blocking reduce that includes an implicit barrier.
3858*/
3860 size_t reduce_size, void *reduce_data,
3861 void (*reduce_func)(void *lhs_data, void *rhs_data),
3863 KMP_COUNT_BLOCK(REDUCE_wait);
3864 int retval = 0;
3865 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3866 kmp_info_t *th;
3867 kmp_team_t *team;
3868 int teams_swapped = 0, task_state;
3869
3870 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3871 __kmp_assert_valid_gtid(global_tid);
3872
3873 // why do we need this initialization here at all?
3874 // Reduction clause can not be a stand-alone directive.
3875
3876 // do not call __kmp_serial_initialize(), it will be called by
3877 // __kmp_parallel_initialize() if needed
3878 // possible detection of false-positive race by the threadchecker ???
3881
3883
3884// check correctness of reduce block nesting
3885#if KMP_USE_DYNAMIC_LOCK
3887 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3888#else
3890 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3891#endif
3892
3893 th = __kmp_thread_from_gtid(global_tid);
3894 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3895
3896 packed_reduction_method = __kmp_determine_reduction_method(
3897 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3898 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3899
3900 OMPT_REDUCTION_DECL(th, global_tid);
3901
3902 if (packed_reduction_method == critical_reduce_block) {
3903
3906 retval = 1;
3907
3908 } else if (packed_reduction_method == empty_reduce_block) {
3909
3911 // usage: if team size == 1, no synchronization is required ( Intel
3912 // platforms only )
3913 retval = 1;
3914
3915 } else if (packed_reduction_method == atomic_reduce_block) {
3916
3917 retval = 2;
3918
3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3921
3922// case tree_reduce_block:
3923// this barrier should be visible to a customer and to the threading profile
3924// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3925#if OMPT_SUPPORT
3926 ompt_frame_t *ompt_frame;
3927 if (ompt_enabled.enabled) {
3928 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3929 if (ompt_frame->enter_frame.ptr == NULL)
3930 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3931 }
3932 OMPT_STORE_RETURN_ADDRESS(global_tid);
3933#endif
3934#if USE_ITT_NOTIFY
3935 __kmp_threads[global_tid]->th.th_ident =
3936 loc; // needed for correct notification of frames
3937#endif
3938 retval =
3939 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3940 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3941 retval = (retval != 0) ? (0) : (1);
3942#if OMPT_SUPPORT && OMPT_OPTIONAL
3943 if (ompt_enabled.enabled) {
3944 ompt_frame->enter_frame = ompt_data_none;
3945 }
3946#endif
3947
3948 // all other workers except primary thread should do this pop here
3949 // (none of other workers except primary will enter __kmpc_end_reduce())
3951 if (retval == 0) { // 0: all other workers; 1: primary thread
3952 __kmp_pop_sync(global_tid, ct_reduce, loc);
3953 }
3954 }
3955
3956 } else {
3957
3958 // should never reach this block
3959 KMP_ASSERT(0); // "unexpected method"
3960 }
3961 if (teams_swapped) {
3962 __kmp_restore_swapped_teams(th, team, task_state);
3963 }
3964
3965 KA_TRACE(10,
3966 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3967 global_tid, packed_reduction_method, retval));
3968 return retval;
3969}
3970
3971/*!
3972@ingroup SYNCHRONIZATION
3973@param loc source location information
3974@param global_tid global thread id.
3975@param lck pointer to the unique lock data structure
3976
3977Finish the execution of a blocking reduce.
3978The <tt>lck</tt> pointer must be the same as that used in the corresponding
3979start function.
3980*/
3983
3984 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3985 kmp_info_t *th;
3986 kmp_team_t *team;
3987 int teams_swapped = 0, task_state;
3988
3989 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3990 __kmp_assert_valid_gtid(global_tid);
3991
3992 th = __kmp_thread_from_gtid(global_tid);
3993 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3994
3995 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3996
3997 // this barrier should be visible to a customer and to the threading profile
3998 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3999 OMPT_REDUCTION_DECL(th, global_tid);
4000
4001 if (packed_reduction_method == critical_reduce_block) {
4003
4005
4006// TODO: implicit barrier: should be exposed
4007#if OMPT_SUPPORT
4008 ompt_frame_t *ompt_frame;
4009 if (ompt_enabled.enabled) {
4010 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4011 if (ompt_frame->enter_frame.ptr == NULL)
4012 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4013 }
4014 OMPT_STORE_RETURN_ADDRESS(global_tid);
4015#endif
4016#if USE_ITT_NOTIFY
4017 __kmp_threads[global_tid]->th.th_ident = loc;
4018#endif
4019 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4020#if OMPT_SUPPORT && OMPT_OPTIONAL
4021 if (ompt_enabled.enabled) {
4022 ompt_frame->enter_frame = ompt_data_none;
4023 }
4024#endif
4025
4026 } else if (packed_reduction_method == empty_reduce_block) {
4027
4029
4030// usage: if team size==1, no synchronization is required (Intel platforms only)
4031
4032// TODO: implicit barrier: should be exposed
4033#if OMPT_SUPPORT
4034 ompt_frame_t *ompt_frame;
4035 if (ompt_enabled.enabled) {
4036 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4037 if (ompt_frame->enter_frame.ptr == NULL)
4038 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4039 }
4040 OMPT_STORE_RETURN_ADDRESS(global_tid);
4041#endif
4042#if USE_ITT_NOTIFY
4043 __kmp_threads[global_tid]->th.th_ident = loc;
4044#endif
4045 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4046#if OMPT_SUPPORT && OMPT_OPTIONAL
4047 if (ompt_enabled.enabled) {
4048 ompt_frame->enter_frame = ompt_data_none;
4049 }
4050#endif
4051
4052 } else if (packed_reduction_method == atomic_reduce_block) {
4053
4054#if OMPT_SUPPORT
4055 ompt_frame_t *ompt_frame;
4056 if (ompt_enabled.enabled) {
4057 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4058 if (ompt_frame->enter_frame.ptr == NULL)
4059 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4060 }
4061 OMPT_STORE_RETURN_ADDRESS(global_tid);
4062#endif
4063// TODO: implicit barrier: should be exposed
4064#if USE_ITT_NOTIFY
4065 __kmp_threads[global_tid]->th.th_ident = loc;
4066#endif
4067 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4068#if OMPT_SUPPORT && OMPT_OPTIONAL
4069 if (ompt_enabled.enabled) {
4070 ompt_frame->enter_frame = ompt_data_none;
4071 }
4072#endif
4073
4074 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4076
4077 // only primary thread executes here (primary releases all other workers)
4078 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4079 global_tid);
4080
4081 } else {
4082
4083 // should never reach this block
4084 KMP_ASSERT(0); // "unexpected method"
4085 }
4086 if (teams_swapped) {
4087 __kmp_restore_swapped_teams(th, team, task_state);
4088 }
4089
4091 __kmp_pop_sync(global_tid, ct_reduce, loc);
4092
4093 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4094 global_tid, packed_reduction_method));
4095
4096 return;
4097}
4098
4099#undef __KMP_GET_REDUCTION_METHOD
4100#undef __KMP_SET_REDUCTION_METHOD
4101
4102/* end of interface to fast scalable reduce routines */
4103
4105
4106 kmp_int32 gtid;
4107 kmp_info_t *thread;
4108
4109 gtid = __kmp_get_gtid();
4110 if (gtid < 0) {
4111 return 0;
4112 }
4113 thread = __kmp_thread_from_gtid(gtid);
4114 return thread->th.th_current_task->td_task_id;
4115
4116} // __kmpc_get_taskid
4117
4119
4120 kmp_int32 gtid;
4121 kmp_info_t *thread;
4122 kmp_taskdata_t *parent_task;
4123
4124 gtid = __kmp_get_gtid();
4125 if (gtid < 0) {
4126 return 0;
4127 }
4128 thread = __kmp_thread_from_gtid(gtid);
4129 parent_task = thread->th.th_current_task->td_parent;
4130 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4131
4132} // __kmpc_get_parent_taskid
4133
4134/*!
4135@ingroup WORK_SHARING
4136@param loc source location information.
4137@param gtid global thread number.
4138@param num_dims number of associated doacross loops.
4139@param dims info on loops bounds.
4140
4141Initialize doacross loop information.
4142Expect compiler send us inclusive bounds,
4143e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4144*/
4145void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4146 const struct kmp_dim *dims) {
4148 int j, idx;
4149 kmp_int64 last, trace_count;
4150 kmp_info_t *th = __kmp_threads[gtid];
4151 kmp_team_t *team = th->th.th_team;
4152 kmp_uint32 *flags;
4153 kmp_disp_t *pr_buf = th->th.th_dispatch;
4154 dispatch_shared_info_t *sh_buf;
4155
4156 KA_TRACE(
4157 20,
4158 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4159 gtid, num_dims, !team->t.t_serialized));
4160 KMP_DEBUG_ASSERT(dims != NULL);
4161 KMP_DEBUG_ASSERT(num_dims > 0);
4162
4163 if (team->t.t_serialized) {
4164 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4165 return; // no dependencies if team is serialized
4166 }
4167 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4168 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4169 // the next loop
4170 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4171
4172 // Save bounds info into allocated private buffer
4173 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4175 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4177 pr_buf->th_doacross_info[0] =
4178 (kmp_int64)num_dims; // first element is number of dimensions
4179 // Save also address of num_done in order to access it later without knowing
4180 // the buffer index
4181 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4182 pr_buf->th_doacross_info[2] = dims[0].lo;
4183 pr_buf->th_doacross_info[3] = dims[0].up;
4184 pr_buf->th_doacross_info[4] = dims[0].st;
4185 last = 5;
4186 for (j = 1; j < num_dims; ++j) {
4187 kmp_int64
4188 range_length; // To keep ranges of all dimensions but the first dims[0]
4189 if (dims[j].st == 1) { // most common case
4190 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4191 range_length = dims[j].up - dims[j].lo + 1;
4192 } else {
4193 if (dims[j].st > 0) {
4194 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4195 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4196 } else { // negative increment
4197 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4198 range_length =
4199 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4200 }
4201 }
4202 pr_buf->th_doacross_info[last++] = range_length;
4203 pr_buf->th_doacross_info[last++] = dims[j].lo;
4204 pr_buf->th_doacross_info[last++] = dims[j].up;
4205 pr_buf->th_doacross_info[last++] = dims[j].st;
4206 }
4207
4208 // Compute total trip count.
4209 // Start with range of dims[0] which we don't need to keep in the buffer.
4210 if (dims[0].st == 1) { // most common case
4211 trace_count = dims[0].up - dims[0].lo + 1;
4212 } else if (dims[0].st > 0) {
4213 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4214 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4215 } else { // negative increment
4216 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4217 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4218 }
4219 for (j = 1; j < num_dims; ++j) {
4220 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4221 }
4222 KMP_DEBUG_ASSERT(trace_count > 0);
4223
4224 // Check if shared buffer is not occupied by other loop (idx -
4225 // __kmp_dispatch_num_buffers)
4226 if (idx != sh_buf->doacross_buf_idx) {
4227 // Shared buffer is occupied, wait for it to be free
4228 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4229 __kmp_eq_4, NULL);
4230 }
4231#if KMP_32_BIT_ARCH
4232 // Check if we are the first thread. After the CAS the first thread gets 0,
4233 // others get 1 if initialization is in progress, allocated pointer otherwise.
4234 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4236 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4237#else
4239 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4240#endif
4241 if (flags == NULL) {
4242 // we are the first thread, allocate the array of flags
4243 size_t size =
4244 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4245 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4246 KMP_MB();
4247 sh_buf->doacross_flags = flags;
4248 } else if (flags == (kmp_uint32 *)1) {
4249#if KMP_32_BIT_ARCH
4250 // initialization is still in progress, need to wait
4251 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4252#else
4253 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4254#endif
4255 KMP_YIELD(TRUE);
4256 KMP_MB();
4257 } else {
4258 KMP_MB();
4259 }
4260 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4261 pr_buf->th_doacross_flags =
4262 sh_buf->doacross_flags; // save private copy in order to not
4263 // touch shared buffer on each iteration
4264 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4265}
4266
4267void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4269 kmp_int64 shft;
4270 size_t num_dims, i;
4272 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4273 kmp_info_t *th = __kmp_threads[gtid];
4274 kmp_team_t *team = th->th.th_team;
4275 kmp_disp_t *pr_buf;
4276 kmp_int64 lo, up, st;
4277
4278 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4279 if (team->t.t_serialized) {
4280 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4281 return; // no dependencies if team is serialized
4282 }
4283
4284 // calculate sequential iteration number and check out-of-bounds condition
4285 pr_buf = th->th.th_dispatch;
4286 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4287 num_dims = (size_t)pr_buf->th_doacross_info[0];
4288 lo = pr_buf->th_doacross_info[2];
4289 up = pr_buf->th_doacross_info[3];
4290 st = pr_buf->th_doacross_info[4];
4291#if OMPT_SUPPORT && OMPT_OPTIONAL
4292 SimpleVLA<ompt_dependence_t> deps(num_dims);
4293#endif
4294 if (st == 1) { // most common case
4295 if (vec[0] < lo || vec[0] > up) {
4296 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297 "bounds [%lld,%lld]\n",
4298 gtid, vec[0], lo, up));
4299 return;
4300 }
4301 iter_number = vec[0] - lo;
4302 } else if (st > 0) {
4303 if (vec[0] < lo || vec[0] > up) {
4304 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305 "bounds [%lld,%lld]\n",
4306 gtid, vec[0], lo, up));
4307 return;
4308 }
4309 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4310 } else { // negative increment
4311 if (vec[0] > lo || vec[0] < up) {
4312 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4313 "bounds [%lld,%lld]\n",
4314 gtid, vec[0], lo, up));
4315 return;
4316 }
4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318 }
4319#if OMPT_SUPPORT && OMPT_OPTIONAL
4320 deps[0].variable.value = iter_number;
4321 deps[0].dependence_type = ompt_dependence_type_sink;
4322#endif
4323 for (i = 1; i < num_dims; ++i) {
4324 kmp_int64 iter, ln;
4325 size_t j = i * 4;
4326 ln = pr_buf->th_doacross_info[j + 1];
4327 lo = pr_buf->th_doacross_info[j + 2];
4328 up = pr_buf->th_doacross_info[j + 3];
4329 st = pr_buf->th_doacross_info[j + 4];
4330 if (st == 1) {
4331 if (vec[i] < lo || vec[i] > up) {
4332 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4333 "bounds [%lld,%lld]\n",
4334 gtid, vec[i], lo, up));
4335 return;
4336 }
4337 iter = vec[i] - lo;
4338 } else if (st > 0) {
4339 if (vec[i] < lo || vec[i] > up) {
4340 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4341 "bounds [%lld,%lld]\n",
4342 gtid, vec[i], lo, up));
4343 return;
4344 }
4345 iter = (kmp_uint64)(vec[i] - lo) / st;
4346 } else { // st < 0
4347 if (vec[i] > lo || vec[i] < up) {
4348 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4349 "bounds [%lld,%lld]\n",
4350 gtid, vec[i], lo, up));
4351 return;
4352 }
4353 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4354 }
4355 iter_number = iter + ln * iter_number;
4356#if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps[i].variable.value = iter;
4358 deps[i].dependence_type = ompt_dependence_type_sink;
4359#endif
4360 }
4361 shft = iter_number % 32; // use 32-bit granularity
4362 iter_number >>= 5; // divided by 32
4363 flag = 1 << shft;
4364 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4365 KMP_YIELD(TRUE);
4366 }
4367 KMP_MB();
4368#if OMPT_SUPPORT && OMPT_OPTIONAL
4369 if (ompt_enabled.ompt_callback_dependences) {
4370 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4371 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4372 }
4373#endif
4374 KA_TRACE(20,
4375 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4376 gtid, (iter_number << 5) + shft));
4377}
4378
4379void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4381 kmp_int64 shft;
4382 size_t num_dims, i;
4384 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4385 kmp_info_t *th = __kmp_threads[gtid];
4386 kmp_team_t *team = th->th.th_team;
4387 kmp_disp_t *pr_buf;
4388 kmp_int64 lo, st;
4389
4390 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4391 if (team->t.t_serialized) {
4392 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4393 return; // no dependencies if team is serialized
4394 }
4395
4396 // calculate sequential iteration number (same as in "wait" but no
4397 // out-of-bounds checks)
4398 pr_buf = th->th.th_dispatch;
4399 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4400 num_dims = (size_t)pr_buf->th_doacross_info[0];
4401 lo = pr_buf->th_doacross_info[2];
4402 st = pr_buf->th_doacross_info[4];
4403#if OMPT_SUPPORT && OMPT_OPTIONAL
4404 SimpleVLA<ompt_dependence_t> deps(num_dims);
4405#endif
4406 if (st == 1) { // most common case
4407 iter_number = vec[0] - lo;
4408 } else if (st > 0) {
4409 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4410 } else { // negative increment
4411 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4412 }
4413#if OMPT_SUPPORT && OMPT_OPTIONAL
4414 deps[0].variable.value = iter_number;
4415 deps[0].dependence_type = ompt_dependence_type_source;
4416#endif
4417 for (i = 1; i < num_dims; ++i) {
4418 kmp_int64 iter, ln;
4419 size_t j = i * 4;
4420 ln = pr_buf->th_doacross_info[j + 1];
4421 lo = pr_buf->th_doacross_info[j + 2];
4422 st = pr_buf->th_doacross_info[j + 4];
4423 if (st == 1) {
4424 iter = vec[i] - lo;
4425 } else if (st > 0) {
4426 iter = (kmp_uint64)(vec[i] - lo) / st;
4427 } else { // st < 0
4428 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4429 }
4430 iter_number = iter + ln * iter_number;
4431#if OMPT_SUPPORT && OMPT_OPTIONAL
4432 deps[i].variable.value = iter;
4433 deps[i].dependence_type = ompt_dependence_type_source;
4434#endif
4435 }
4436#if OMPT_SUPPORT && OMPT_OPTIONAL
4437 if (ompt_enabled.ompt_callback_dependences) {
4438 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4439 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4440 }
4441#endif
4442 shft = iter_number % 32; // use 32-bit granularity
4443 iter_number >>= 5; // divided by 32
4444 flag = 1 << shft;
4445 KMP_MB();
4446 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4447 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4448 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4449 (iter_number << 5) + shft));
4450}
4451
4454 kmp_int32 num_done;
4455 kmp_info_t *th = __kmp_threads[gtid];
4456 kmp_team_t *team = th->th.th_team;
4457 kmp_disp_t *pr_buf = th->th.th_dispatch;
4458
4459 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4460 if (team->t.t_serialized) {
4461 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4462 return; // nothing to do
4463 }
4464 num_done =
4466 if (num_done == th->th.th_team_nproc) {
4467 // we are the last thread, need to free shared resources
4468 int idx = pr_buf->th_doacross_buf_idx - 1;
4469 dispatch_shared_info_t *sh_buf =
4470 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4472 (kmp_int64)&sh_buf->doacross_num_done);
4473 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4474 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4476 sh_buf->doacross_flags = NULL;
4477 sh_buf->doacross_num_done = 0;
4478 sh_buf->doacross_buf_idx +=
4479 __kmp_dispatch_num_buffers; // free buffer for future re-use
4480 }
4481 // free private resources (need to keep buffer index forever)
4482 pr_buf->th_doacross_flags = NULL;
4483 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4484 pr_buf->th_doacross_info = NULL;
4485 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4486}
4487
4488/* OpenMP 5.1 Memory Management routines */
4489void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4490 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4491}
4492
4493void *omp_aligned_alloc(size_t align, size_t size,
4494 omp_allocator_handle_t allocator) {
4495 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4496}
4497
4498void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4499 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4500}
4501
4502void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4503 omp_allocator_handle_t allocator) {
4504 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4505}
4506
4507void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4508 omp_allocator_handle_t free_allocator) {
4509 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4510 free_allocator);
4511}
4512
4513void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4514 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4515}
4516/* end of OpenMP 5.1 Memory Management routines */
4517
4519 if (!__kmp_init_serial) {
4521 }
4522 return __kmp_target_offload;
4523}
4524
4526 if (!__kmp_init_serial) {
4527 return 1; // Can't pause if runtime is not initialized
4528 }
4530}
4531
4532void __kmpc_error(ident_t *loc, int severity, const char *message) {
4533 if (!__kmp_init_serial)
4535
4536 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4537
4538#if OMPT_SUPPORT
4539 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4540 ompt_callbacks.ompt_callback(ompt_callback_error)(
4541 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4543 }
4544#endif // OMPT_SUPPORT
4545
4546 char *src_loc;
4547 if (loc && loc->psource) {
4548 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4549 src_loc =
4550 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4551 __kmp_str_loc_free(&str_loc);
4552 } else {
4553 src_loc = __kmp_str_format("unknown");
4554 }
4555
4556 if (severity == severity_warning)
4557 KMP_WARNING(UserDirectedWarning, src_loc, message);
4558 else
4559 KMP_FATAL(UserDirectedError, src_loc, message);
4560
4561 __kmp_str_free(&src_loc);
4562}
4563
4564// Mark begin of scope directive.
4565void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4566// reserved is for extension of scope directive and not used.
4567#if OMPT_SUPPORT && OMPT_OPTIONAL
4568 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4569 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4570 int tid = __kmp_tid_from_gtid(gtid);
4571 ompt_callbacks.ompt_callback(ompt_callback_work)(
4572 ompt_work_scope, ompt_scope_begin,
4573 &(team->t.ompt_team_info.parallel_data),
4574 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4576 }
4577#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4578}
4579
4580// Mark end of scope directive
4581void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4582// reserved is for extension of scope directive and not used.
4583#if OMPT_SUPPORT && OMPT_OPTIONAL
4584 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4585 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4586 int tid = __kmp_tid_from_gtid(gtid);
4587 ompt_callbacks.ompt_callback(ompt_callback_work)(
4588 ompt_work_scope, ompt_scope_end,
4589 &(team->t.ompt_team_info.parallel_data),
4590 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4592 }
4593#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4594}
4595
4596#ifdef KMP_USE_VERSION_SYMBOLS
4597// For GOMP compatibility there are two versions of each omp_* API.
4598// One is the plain C symbol and one is the Fortran symbol with an appended
4599// underscore. When we implement a specific ompc_* version of an omp_*
4600// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4601// instead of the Fortran versions in kmp_ftn_entry.h
4602extern "C" {
4603// Have to undef these from omp.h so they aren't translated into
4604// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4605#ifdef omp_set_affinity_format
4606#undef omp_set_affinity_format
4607#endif
4608#ifdef omp_get_affinity_format
4609#undef omp_get_affinity_format
4610#endif
4611#ifdef omp_display_affinity
4612#undef omp_display_affinity
4613#endif
4614#ifdef omp_capture_affinity
4615#undef omp_capture_affinity
4616#endif
4618 "OMP_5.0");
4620 "OMP_5.0");
4622 "OMP_5.0");
4624 "OMP_5.0");
4625} // extern "C"
4626#endif
uint8_t kmp_uint8
A simple pure header implementation of VLA that aims to replace uses of actual VLA,...
Definition: kmp_utils.h:26
int64_t kmp_int64
Definition: common.h:10
@ KMP_IDENT_WORK_LOOP
To mark a static loop in OMPT callbacks.
Definition: kmp.h:227
@ KMP_IDENT_WORK_SECTIONS
To mark a sections directive in OMPT callbacks.
Definition: kmp.h:229
@ KMP_IDENT_AUTOPAR
Entry point generated by auto-parallelization.
Definition: kmp.h:212
@ KMP_IDENT_WORK_DISTRIBUTE
To mark a distribute construct in OMPT callbacks.
Definition: kmp.h:231
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
__itt_string_handle * name
Definition: ittnotify.h:3305
void
Definition: ittnotify.h:3324
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
int __kmp_pause_resource(kmp_pause_status_t level)
void * omp_allocator_handle_t
Definition: kmp.h:1097
#define __kmp_free(ptr)
Definition: kmp.h:3765
void __kmp_aux_set_defaults(char const *str, size_t len)
void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk)
@ ct_reduce
Definition: kmp.h:1681
@ ct_critical
Definition: kmp.h:1677
@ ct_master
Definition: kmp.h:1680
@ ct_barrier
Definition: kmp.h:1682
@ ct_masked
Definition: kmp.h:1683
@ ct_pdo
Definition: kmp.h:1673
void __kmp_teams_master(int gtid)
#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)
Definition: kmp.h:590
kmp_pause_status_t
Definition: kmp.h:4552
void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team)
int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, void *reduce_data, void(*reduce)(void *, void *))
struct KMP_ALIGN_CACHE dispatch_private_info dispatch_private_info_t
void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al)
Definition: kmp_alloc.cpp:1824
void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb, int num_teams_ub, int num_threads)
#define __kmp_assign_root_init_mask()
Definition: kmp.h:3964
int __kmp_dflt_max_active_levels
Definition: kmp_global.cpp:136
static kmp_team_t * __kmp_team_from_gtid(int gtid)
Definition: kmp.h:3648
kmp_tasking_mode_t __kmp_tasking_mode
Definition: kmp_global.cpp:299
char * __kmp_affinity_format
Definition: kmp_global.cpp:295
void * __kmp_alloc(int gtid, size_t align, size_t sz, omp_allocator_handle_t al)
Definition: kmp_alloc.cpp:1546
void * __kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz, omp_allocator_handle_t al)
Definition: kmp_alloc.cpp:1758
@ fork_context_intel
Called from Intel generated code.
Definition: kmp.h:4073
void __kmp_exit_single(int gtid)
int __kmp_get_team_size(int gtid, int level)
kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker)
volatile int __kmp_all_nth
Definition: kmp_global.cpp:457
kmp_target_offload_kind_t __kmp_target_offload
Definition: kmp_global.cpp:555
void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid, kmp_critical_name *, uint32_t hint)
void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads)
@ severity_warning
Definition: kmp.h:4635
@ severity_fatal
Definition: kmp.h:4636
#define set__dynamic(xthread, xval)
Definition: kmp.h:2398
#define __kmp_entry_gtid()
Definition: kmp.h:3610
void __kmp_internal_end_library(int gtid)
void __kmp_set_max_active_levels(int gtid, int new_max_active_levels)
static int __kmp_tid_from_gtid(int gtid)
Definition: kmp.h:3628
void __kmp_internal_end_thread(int gtid)
void __kmp_push_num_threads_list(ident_t *loc, int gtid, kmp_uint32 list_length, int *num_threads_list)
void __kmp_user_set_library(enum library_type arg)
KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
int __kmp_gtid_get_specific(void)
volatile int __kmp_init_middle
Definition: kmp_global.cpp:48
void __kmp_set_num_threads(int new_nth, int gtid)
PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void * __kmp_realloc(int gtid, void *ptr, size_t sz, omp_allocator_handle_t al, omp_allocator_handle_t free_al)
Definition: kmp_alloc.cpp:1787
void __kmp_end_split_barrier(enum barrier_type bt, int gtid)
int PACKED_REDUCTION_METHOD_T
Definition: kmp.h:605
int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws)
static kmp_info_t * __kmp_entry_thread()
Definition: kmp.h:3740
int __kmp_get_ancestor_thread_num(int gtid, int level)
#define __kmp_thread_malloc(th, size)
Definition: kmp.h:3785
void __kmp_middle_initialize(void)
static void copy_icvs(kmp_internal_control_t *dst, kmp_internal_control_t *src)
Definition: kmp.h:2193
kmp_info_t ** __kmp_threads
Definition: kmp_global.cpp:450
#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)
Definition: kmp.h:593
static void __kmp_reset_root_init_mask(int gtid)
Definition: kmp.h:3965
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref)
int __kmp_dispatch_num_buffers
Definition: kmp_global.cpp:135
#define set__max_active_levels(xthread, xval)
Definition: kmp.h:2409
#define KMP_MASTER_GTID(gtid)
Definition: kmp.h:1327
void __kmp_parallel_initialize(void)
#define KMP_YIELD(cond)
Definition: kmp.h:1591
int(* launch_t)(int gtid)
Definition: kmp.h:3109
int __kmp_ignore_mppbeg(void)
volatile int __kmp_init_parallel
Definition: kmp_global.cpp:49
enum kmp_sched kmp_sched_t
void __kmp_aux_set_stacksize(size_t arg)
static const size_t KMP_AFFINITY_FORMAT_SIZE
Definition: kmp.h:988
#define TRUE
Definition: kmp.h:1333
#define FALSE
Definition: kmp.h:1332
void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, int num_threads)
size_t __kmp_aux_capture_affinity(int gtid, const char *format, kmp_str_buf_t *buffer)
@ tskm_immediate_exec
Definition: kmp.h:2426
int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e fork_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, kmp_va_list ap)
int __kmp_env_consistency_check
Definition: kmp_global.cpp:423
omp_sched_t
Definition: kmp.h:4503
void __kmp_aux_display_affinity(int gtid, const char *format)
void __kmp_push_proc_bind(ident_t *loc, int gtid, kmp_proc_bind_t proc_bind)
int __kmp_invoke_task_func(int gtid)
void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev, const char *msg)
#define __kmp_thread_calloc(th, nelem, elsize)
Definition: kmp.h:3787
void __kmp_save_internal_controls(kmp_info_t *thread)
@ bs_plain_barrier
Definition: kmp.h:2141
int __kmp_invoke_teams_master(int gtid)
static void __kmp_aux_convert_blocktime(int *bt)
Definition: kmp.h:3487
#define __kmp_get_gtid()
Definition: kmp.h:3609
void __kmp_serial_initialize(void)
kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker, kmp_uint32(*pred)(kmp_uint32, kmp_uint32), void *obj)
void __kmp_resume_if_soft_paused()
KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
static void __kmp_assert_valid_gtid(kmp_int32 gtid)
Definition: kmp.h:3653
void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid)
void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr)
void __kmp_internal_begin(void)
kmp_proc_bind_t
Definition: kmp.h:967
static kmp_info_t * __kmp_thread_from_gtid(int gtid)
Definition: kmp.h:3643
#define KMP_MIN_DISP_NUM_BUFF
Definition: kmp.h:1299
library_type
Definition: kmp.h:517
volatile int __kmp_init_serial
Definition: kmp_global.cpp:44
@ empty_reduce_block
Definition: kmp.h:559
@ critical_reduce_block
Definition: kmp.h:556
@ tree_reduce_block
Definition: kmp.h:558
@ atomic_reduce_block
Definition: kmp.h:557
#define KMP_MAX_DISP_NUM_BUFF
Definition: kmp.h:1301
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, void *argv[])
static void __kmp_type_convert(T1 src, T2 *dest)
Definition: kmp.h:4886
void __kmp_join_call(ident_t *loc, int gtid, int exit_teams=0)
int __kmp_ignore_mppend(void)
union KMP_ALIGN_CACHE kmp_info kmp_info_t
void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team, int wait=1)
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid)
#define __kmp_thread_free(th, ptr)
Definition: kmp.h:3791
static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, ident_t const *loc, kmp_int32 gtid)
void ompc_set_dynamic(int flag)
int kmpc_unset_affinity_mask_proc(int proc, void **mask)
#define __KMP_GET_REDUCTION_METHOD(gtid)
void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void * omp_aligned_calloc(size_t align, size_t nmemb, size_t size, omp_allocator_handle_t allocator)
void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec)
void * omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator)
void kmpc_set_disp_num_buffers(int arg)
#define INIT_NESTED_LOCK
static __forceinline int __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, int *task_state)
int ompc_get_ancestor_thread_num(int level)
#define DESTROY_LOCK
void KMP_EXPAND_NAME() ompc_set_affinity_format(char const *format)
void kmpc_set_stacksize_s(size_t arg)
void KMP_EXPAND_NAME() ompc_display_affinity(char const *format)
void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads, int severity, const char *message)
void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved)
static __forceinline void __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
int kmpc_set_affinity_mask_proc(int proc, void **mask)
void kmpc_set_library(int arg)
#define TEST_LOCK
void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list, int severity, const char *message)
void kmpc_set_stacksize(int arg)
#define DESTROY_NESTED_LOCK
void ompc_set_schedule(omp_sched_t kind, int modifier)
void __kmpc_error(ident_t *loc, int severity, const char *message)
void * omp_aligned_alloc(size_t align, size_t size, omp_allocator_handle_t allocator)
void kmpc_set_blocktime(int arg)
#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod)
void kmpc_set_defaults(char const *str)
void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved)
void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void omp_free(void *ptr, omp_allocator_handle_t allocator)
void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void * omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator)
kmp_uint64 __kmpc_get_taskid()
int ompc_get_team_size(int level)
size_t KMP_EXPAND_NAME() ompc_get_affinity_format(char *buffer, size_t size)
int kmpc_get_affinity_mask_proc(int proc, void **mask)
void __kmpc_doacross_fini(ident_t *loc, int gtid)
#define RELEASE_NESTED_LOCK
int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
static __forceinline void __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state)
void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid)
int __kmpc_invoke_task_func(int gtid)
void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind)
void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
#define ACQUIRE_NESTED_LOCK
#define INIT_LOCK
void ompc_set_nested(int flag)
#define ACQUIRE_LOCK
size_t KMP_EXPAND_NAME() ompc_capture_affinity(char *buffer, size_t buf_size, char const *format)
static __forceinline void __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void ompc_set_max_active_levels(int max_active_levels)
kmp_uint64 __kmpc_get_parent_taskid()
void ompc_set_num_threads(int arg)
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
#define TEST_NESTED_LOCK
#define RELEASE_LOCK
void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec)
int __kmpc_get_target_offload(void)
int __kmpc_pause_resource(kmp_pause_status_t level)
int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock)
#define KE_TRACE(d, x)
Definition: kmp_debug.h:161
#define KA_TRACE(d, x)
Definition: kmp_debug.h:157
#define KMP_ASSERT(cond)
Definition: kmp_debug.h:59
#define KC_TRACE(d, x)
Definition: kmp_debug.h:159
#define KMP_DEBUG_ASSERT(cond)
Definition: kmp_debug.h:61
unsigned long long kmp_uint64
void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, kmp_user_lock_p lck)
Definition: kmp_error.cpp:338
void __kmp_check_sync(int gtid, enum cons_type ct, ident_t const *ident, kmp_user_lock_p lck)
Definition: kmp_error.cpp:252
enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:383
void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:411
void __kmp_check_barrier(int gtid, enum cons_type ct, ident_t const *ident)
Definition: kmp_error.cpp:434
void __kmp_pop_parallel(int gtid, ident_t const *ident)
Definition: kmp_error.cpp:363
static volatile kmp_i18n_cat_status_t status
Definition: kmp_i18n.cpp:48
static kmp_bootstrap_lock_t lock
Definition: kmp_i18n.cpp:57
#define KMP_WARNING(...)
Definition: kmp_i18n.h:144
#define KMP_FATAL(...)
Definition: kmp_i18n.h:146
#define USE_ITT_BUILD_ARG(x)
Definition: kmp_itt.h:346
size_t __kmp_base_user_lock_size
Definition: kmp_lock.cpp:3542
enum kmp_lock_kind __kmp_user_lock_kind
Definition: kmp_lock.cpp:3540
kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags)
Definition: kmp_lock.cpp:3854
void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3898
kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func)
Definition: kmp_lock.cpp:3916
#define INTEL_CRITICAL_SIZE
Definition: kmp_lock.h:65
#define OMP_NEST_LOCK_T_SIZE
Definition: kmp_lock.h:58
#define KMP_CHECK_USER_LOCK_INIT()
Definition: kmp_lock.h:990
static void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.h:718
union kmp_user_lock * kmp_user_lock_p
Definition: kmp_lock.h:623
static int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.h:675
#define KMP_LOCK_RELEASED
Definition: kmp_lock.h:164
@ lk_ticket
Definition: kmp_lock.h:597
@ lk_drdpa
Definition: kmp_lock.h:599
@ lk_queuing
Definition: kmp_lock.h:598
@ lk_tas
Definition: kmp_lock.h:588
static void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck)
Definition: kmp_lock.h:742
static void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck)
Definition: kmp_lock.h:726
#define kmp_lf_critical_section
Definition: kmp_lock.h:70
static void __kmp_set_user_lock_location(kmp_user_lock_p lck, const ident_t *loc)
Definition: kmp_lock.h:892
#define OMP_LOCK_T_SIZE
Definition: kmp_lock.h:57
#define KMP_LOCK_ACQUIRED_FIRST
Definition: kmp_lock.h:166
#define OMP_CRITICAL_SIZE
Definition: kmp_lock.h:64
#define KMP_LOCK_STILL_HELD
Definition: kmp_lock.h:165
#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv)
Definition: kmp_os.h:862
void(* microtask_t)(int *gtid, int *npr,...)
Definition: kmp_os.h:1183
#define FTN_TRUE
Definition: kmp_os.h:1176
#define KMP_TEST_THEN_OR32(p, v)
Definition: kmp_os.h:783
#define KMP_TEST_THEN_INC32(p)
Definition: kmp_os.h:724
#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv)
Definition: kmp_os.h:829
#define TCR_PTR(a)
Definition: kmp_os.h:1164
#define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str)
Definition: kmp_os.h:446
#define VOLATILE_CAST(x)
Definition: kmp_os.h:1188
#define CCAST(type, var)
Definition: kmp_os.h:290
#define KMP_MB()
Definition: kmp_os.h:1064
#define FTN_FALSE
Definition: kmp_os.h:1180
#define kmp_va_addr_of(ap)
Definition: kmp_os.h:229
#define TCR_4(a)
Definition: kmp_os.h:1135
#define KMP_MFENCE()
Definition: kmp_os.h:1097
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
Definition: kmp_os.h:813
#define TCW_4(a, b)
Definition: kmp_os.h:1136
unsigned long kmp_uintptr_t
Definition: kmp_os.h:205
#define KMP_EXPAND_NAME(api_name)
Definition: kmp_os.h:444
#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)
Definition: kmp_os.h:819
#define KMP_SSCANF
static void __kmp_strncpy_truncate(char *buffer, size_t buf_size, char const *src, size_t src_size)
#define KMP_STRLEN
Functions for collecting statistics.
#define KMP_PUSH_PARTITIONED_TIMER(name)
Definition: kmp_stats.h:1014
#define KMP_GET_THREAD_STATE()
Definition: kmp_stats.h:1017
#define KMP_POP_PARTITIONED_TIMER()
Definition: kmp_stats.h:1015
#define KMP_COUNT_BLOCK(n)
Definition: kmp_stats.h:1001
#define KMP_SET_THREAD_STATE(state_name)
Definition: kmp_stats.h:1016
kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname)
Definition: kmp_str.cpp:347
void __kmp_str_buf_free(kmp_str_buf_t *buffer)
Definition: kmp_str.cpp:123
char * __kmp_str_format(char const *format,...)
Definition: kmp_str.cpp:448
int __kmp_str_match_true(char const *data)
Definition: kmp_str.cpp:552
void __kmp_str_loc_free(kmp_str_loc_t *loc)
Definition: kmp_str.cpp:393
#define args
void __kmp_str_free(char **str)
Definition: kmp_str.cpp:494
#define __kmp_str_buf_init(b)
Definition: kmp_str.h:40
#define i
Definition: kmp_stub.cpp:87
#define omp_get_affinity_format
Definition: kmp_stub.cpp:38
#define omp_set_affinity_format
Definition: kmp_stub.cpp:37
#define omp_display_affinity
Definition: kmp_stub.cpp:39
#define omp_capture_affinity
Definition: kmp_stub.cpp:40
void microtask(int *global_tid, int *bound_tid)
int32_t kmp_int32
void * kmpc_micro
int test(unsigned upper_bound)
omp_lock_t lck
Definition: omp_lock.c:7
void func(int *num_exec)
ompt_callbacks_active_t ompt_enabled
return ret
ompt_callbacks_internal_t ompt_callbacks
#define OMPT_GET_RETURN_ADDRESS(level)
#define OMPT_GET_FRAME_ADDRESS(level)
ompt_team_info_t * __ompt_get_teaminfo(int depth, int *size)
int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num)
ompt_task_info_t * __ompt_get_task_info_object(int depth)
void __ompt_lw_taskteam_unlink(kmp_info_t *thr)
ompt_data_t * __ompt_get_thread_data_internal()
#define OMPT_REDUCTION_BEGIN
#define OMPT_REDUCTION_DECL(this_thr, gtid)
#define OMPT_REDUCTION_END
static id loc
volatile int flag
kmp_int32 doacross_num_done
Definition: kmp.h:2065
volatile kmp_int32 doacross_buf_idx
Definition: kmp.h:2063
volatile kmp_uint32 * doacross_flags
Definition: kmp.h:2064
char * psource
std::atomic< kmp_int32 > poll
Definition: kmp_lock.h:130
kmp_int32 depth_locked
Definition: kmp_lock.h:131
kmp_int32 tt_found_proxy_tasks
Definition: kmp.h:2864
kmp_int32 tt_hidden_helper_task_encountered
Definition: kmp.h:2869
kmp_int32 cg_nthreads
Definition: kmp.h:2930
struct kmp_cg_root * up
Definition: kmp.h:2931
Definition: kmp.h:4467
kmp_int64 up
Definition: kmp.h:4469
kmp_int64 lo
Definition: kmp.h:4468
kmp_int64 st
Definition: kmp.h:4470
Definition: kmp.h:2077
kmp_int32 th_doacross_buf_idx
Definition: kmp.h:2088
volatile kmp_uint32 * th_doacross_flags
Definition: kmp.h:2089
kmp_int64 * th_doacross_info
Definition: kmp.h:2090
struct kmp_internal_control * next
Definition: kmp.h:2190
int serial_nesting_level
Definition: kmp.h:2171
int used
Definition: kmp_str.h:35
char * str
Definition: kmp_str.h:33
int line
Definition: kmp_str.h:97
char * file
Definition: kmp_str.h:95
int col
Definition: kmp_str.h:98
kmp_taskdata_t * td_parent
Definition: kmp.h:2761
kmp_int32 td_task_id
Definition: kmp.h:2756
ompt_data_t task_data
Definition: ompt-internal.h:61
ompt_data_t parallel_data
Definition: ompt-internal.h:68
ompt_wait_id_t wait_id
Definition: ompt-internal.h:86
ompt_state_t state
Definition: ompt-internal.h:85
kmp_critical_name crit
kmp_base_tas_lock_t lk
Definition: kmp_lock.h:138
kmp_base_task_team_t tt
Definition: kmp.h:2880
Definition: kmp.h:3233
kmp_base_team_t t
Definition: kmp.h:3234