LLVM OpenMP 19.0.0git
kmp_lock.cpp
Go to the documentation of this file.
1/*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include <stddef.h>
14#include <atomic>
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#if KMP_USE_FUTEX
25#include <sys/syscall.h>
26#include <unistd.h>
27// We should really include <futex.h>, but that causes compatibility problems on
28// different Linux* OS distributions that either require that you include (or
29// break when you try to include) <pci/types.h>. Since all we need is the two
30// macros below (which are part of the kernel ABI, so can't change) we just
31// define the constants here and don't include <futex.h>
32#ifndef FUTEX_WAIT
33#define FUTEX_WAIT 0
34#endif
35#ifndef FUTEX_WAKE
36#define FUTEX_WAKE 1
37#endif
38#endif
39
40/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
44 int i;
45 kmp_uint32 x, y;
46
47 /* Check to make sure unsigned arithmetic does wraps properly */
48 x = ~((kmp_uint32)0) - 2;
49 y = x - 2;
50
51 for (i = 0; i < 8; ++i, ++x, ++y) {
52 kmp_uint32 z = (x - y);
53 KMP_ASSERT(z == 2);
54 }
55
56 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57}
58
59/* ------------------------------------------------------------------------ */
60/* test and set locks */
61
62// For the non-nested locks, we can only assume that the first 4 bytes were
63// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64// compiler only allocates a 4 byte pointer on IA-32 architecture. On
65// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66//
67// gcc reserves >= 8 bytes for nested locks, so we can assume that the
68// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
71 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72}
73
75 return lck->lk.depth_locked != -1;
76}
77
78__forceinline static int
80 KMP_MB();
81
82#ifdef USE_LOCK_PROFILE
83 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84 if ((curr != 0) && (curr != gtid + 1))
85 __kmp_printf("LOCK CONTENTION: %p\n", lck);
86/* else __kmp_printf( "." );*/
87#endif /* USE_LOCK_PROFILE */
88
89 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
96 }
97
98 kmp_uint32 spins;
99 kmp_uint64 time;
101 KMP_INIT_YIELD(spins);
102 KMP_INIT_BACKOFF(time);
104 do {
105#if !KMP_HAVE_UMWAIT
106 __kmp_spin_backoff(&backoff);
107#else
108 if (!__kmp_tpause_enabled)
109 __kmp_spin_backoff(&backoff);
110#endif
111 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112 } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
116}
117
119 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120 return retval;
121}
122
124 kmp_int32 gtid) {
125 char const *const func = "omp_set_lock";
126 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
128 KMP_FATAL(LockNestableUsedAsSimple, func);
129 }
130 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131 KMP_FATAL(LockIsAlreadyOwned, func);
132 }
133 return __kmp_acquire_tas_lock(lck, gtid);
134}
135
137 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
142 return TRUE;
143 }
144 return FALSE;
145}
146
148 kmp_int32 gtid) {
149 char const *const func = "omp_test_lock";
150 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
152 KMP_FATAL(LockNestableUsedAsSimple, func);
153 }
154 return __kmp_test_tas_lock(lck, gtid);
155}
156
158 KMP_MB(); /* Flush all pending memory write invalidates. */
159
161 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
162 KMP_MB(); /* Flush all pending memory write invalidates. */
163
165 return KMP_LOCK_RELEASED;
166}
167
169 kmp_int32 gtid) {
170 char const *const func = "omp_unset_lock";
171 KMP_MB(); /* in case another processor initialized lock */
172 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
174 KMP_FATAL(LockNestableUsedAsSimple, func);
175 }
176 if (__kmp_get_tas_lock_owner(lck) == -1) {
177 KMP_FATAL(LockUnsettingFree, func);
178 }
179 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
180 (__kmp_get_tas_lock_owner(lck) != gtid)) {
181 KMP_FATAL(LockUnsettingSetByAnother, func);
182 }
183 return __kmp_release_tas_lock(lck, gtid);
184}
185
187 lck->lk.poll = KMP_LOCK_FREE(tas);
188}
189
191
193 char const *const func = "omp_destroy_lock";
194 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
196 KMP_FATAL(LockNestableUsedAsSimple, func);
197 }
198 if (__kmp_get_tas_lock_owner(lck) != -1) {
199 KMP_FATAL(LockStillOwned, func);
200 }
202}
203
204// nested test and set locks
205
207 KMP_DEBUG_ASSERT(gtid >= 0);
208
209 if (__kmp_get_tas_lock_owner(lck) == gtid) {
210 lck->lk.depth_locked += 1;
212 } else {
214 lck->lk.depth_locked = 1;
216 }
217}
218
220 kmp_int32 gtid) {
221 char const *const func = "omp_set_nest_lock";
223 KMP_FATAL(LockSimpleUsedAsNestable, func);
224 }
226}
227
229 int retval;
230
231 KMP_DEBUG_ASSERT(gtid >= 0);
232
233 if (__kmp_get_tas_lock_owner(lck) == gtid) {
234 retval = ++lck->lk.depth_locked;
235 } else if (!__kmp_test_tas_lock(lck, gtid)) {
236 retval = 0;
237 } else {
238 KMP_MB();
239 retval = lck->lk.depth_locked = 1;
240 }
241 return retval;
242}
243
245 kmp_int32 gtid) {
246 char const *const func = "omp_test_nest_lock";
248 KMP_FATAL(LockSimpleUsedAsNestable, func);
249 }
250 return __kmp_test_nested_tas_lock(lck, gtid);
251}
252
254 KMP_DEBUG_ASSERT(gtid >= 0);
255
256 KMP_MB();
257 if (--(lck->lk.depth_locked) == 0) {
259 return KMP_LOCK_RELEASED;
260 }
261 return KMP_LOCK_STILL_HELD;
262}
263
265 kmp_int32 gtid) {
266 char const *const func = "omp_unset_nest_lock";
267 KMP_MB(); /* in case another processor initialized lock */
269 KMP_FATAL(LockSimpleUsedAsNestable, func);
270 }
271 if (__kmp_get_tas_lock_owner(lck) == -1) {
272 KMP_FATAL(LockUnsettingFree, func);
273 }
274 if (__kmp_get_tas_lock_owner(lck) != gtid) {
275 KMP_FATAL(LockUnsettingSetByAnother, func);
276 }
278}
279
282 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
283}
284
287 lck->lk.depth_locked = 0;
288}
289
291 char const *const func = "omp_destroy_nest_lock";
293 KMP_FATAL(LockSimpleUsedAsNestable, func);
294 }
295 if (__kmp_get_tas_lock_owner(lck) != -1) {
296 KMP_FATAL(LockStillOwned, func);
297 }
299}
300
301#if KMP_USE_FUTEX
302
303/* ------------------------------------------------------------------------ */
304/* futex locks */
305
306// futex locks are really just test and set locks, with a different method
307// of handling contention. They take the same amount of space as test and
308// set locks, and are allocated the same way (i.e. use the area allocated by
309// the compiler for non-nested locks / allocate nested locks on the heap).
310
311static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
313}
314
315static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
316 return lck->lk.depth_locked != -1;
317}
318
319__forceinline static int
320__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
321 kmp_int32 gtid_code = (gtid + 1) << 1;
322
323 KMP_MB();
324
325#ifdef USE_LOCK_PROFILE
326 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
327 if ((curr != 0) && (curr != gtid_code))
328 __kmp_printf("LOCK CONTENTION: %p\n", lck);
329/* else __kmp_printf( "." );*/
330#endif /* USE_LOCK_PROFILE */
331
333 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334 lck, lck->lk.poll, gtid));
335
336 kmp_int32 poll_val;
337
338 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
339 &(lck->lk.poll), KMP_LOCK_FREE(futex),
340 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
341
342 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
343 KA_TRACE(
344 1000,
345 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346 lck, gtid, poll_val, cond));
347
348 // NOTE: if you try to use the following condition for this branch
349 //
350 // if ( poll_val & 1 == 0 )
351 //
352 // Then the 12.0 compiler has a bug where the following block will
353 // always be skipped, regardless of the value of the LSB of poll_val.
354 if (!cond) {
355 // Try to set the lsb in the poll to indicate to the owner
356 // thread that they need to wake this thread up.
357 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
358 poll_val | KMP_LOCK_BUSY(1, futex))) {
359 KA_TRACE(
360 1000,
361 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362 lck, lck->lk.poll, gtid));
363 continue;
364 }
365 poll_val |= KMP_LOCK_BUSY(1, futex);
366
367 KA_TRACE(1000,
368 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
369 lck->lk.poll, gtid));
370 }
371
372 KA_TRACE(
373 1000,
374 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375 lck, gtid, poll_val));
376
377 long rc;
378 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
379 NULL, 0)) != 0) {
380 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381 "failed (rc=%ld errno=%d)\n",
382 lck, gtid, poll_val, rc, errno));
383 continue;
384 }
385
386 KA_TRACE(1000,
387 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388 lck, gtid, poll_val));
389 // This thread has now done a successful futex wait call and was entered on
390 // the OS futex queue. We must now perform a futex wake call when releasing
391 // the lock, as we have no idea how many other threads are in the queue.
392 gtid_code |= 1;
393 }
394
396 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
397 lck->lk.poll, gtid));
399}
400
401int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
402 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
403 return retval;
404}
405
406static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
407 kmp_int32 gtid) {
408 char const *const func = "omp_set_lock";
409 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
410 __kmp_is_futex_lock_nestable(lck)) {
411 KMP_FATAL(LockNestableUsedAsSimple, func);
412 }
413 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
414 KMP_FATAL(LockIsAlreadyOwned, func);
415 }
416 return __kmp_acquire_futex_lock(lck, gtid);
417}
418
419int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
420 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
421 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
423 return TRUE;
424 }
425 return FALSE;
426}
427
428static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
429 kmp_int32 gtid) {
430 char const *const func = "omp_test_lock";
431 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
432 __kmp_is_futex_lock_nestable(lck)) {
433 KMP_FATAL(LockNestableUsedAsSimple, func);
434 }
435 return __kmp_test_futex_lock(lck, gtid);
436}
437
438int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
439 KMP_MB(); /* Flush all pending memory write invalidates. */
440
441 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442 lck, lck->lk.poll, gtid));
443
445
446 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447
448 KA_TRACE(1000,
449 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450 lck, gtid, poll_val));
451
452 if (KMP_LOCK_STRIP(poll_val) & 1) {
453 KA_TRACE(1000,
454 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455 lck, gtid));
456 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457 NULL, NULL, 0);
458 }
459
460 KMP_MB(); /* Flush all pending memory write invalidates. */
461
462 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463 lck->lk.poll, gtid));
464
466 return KMP_LOCK_RELEASED;
467}
468
469static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470 kmp_int32 gtid) {
471 char const *const func = "omp_unset_lock";
472 KMP_MB(); /* in case another processor initialized lock */
473 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474 __kmp_is_futex_lock_nestable(lck)) {
475 KMP_FATAL(LockNestableUsedAsSimple, func);
476 }
477 if (__kmp_get_futex_lock_owner(lck) == -1) {
478 KMP_FATAL(LockUnsettingFree, func);
479 }
480 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481 (__kmp_get_futex_lock_owner(lck) != gtid)) {
482 KMP_FATAL(LockUnsettingSetByAnother, func);
483 }
484 return __kmp_release_futex_lock(lck, gtid);
485}
486
487void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489}
490
491void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492
493static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494 char const *const func = "omp_destroy_lock";
495 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496 __kmp_is_futex_lock_nestable(lck)) {
497 KMP_FATAL(LockNestableUsedAsSimple, func);
498 }
499 if (__kmp_get_futex_lock_owner(lck) != -1) {
500 KMP_FATAL(LockStillOwned, func);
501 }
502 __kmp_destroy_futex_lock(lck);
503}
504
505// nested futex locks
506
507int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508 KMP_DEBUG_ASSERT(gtid >= 0);
509
510 if (__kmp_get_futex_lock_owner(lck) == gtid) {
511 lck->lk.depth_locked += 1;
513 } else {
514 __kmp_acquire_futex_lock_timed_template(lck, gtid);
515 lck->lk.depth_locked = 1;
517 }
518}
519
520static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
521 kmp_int32 gtid) {
522 char const *const func = "omp_set_nest_lock";
523 if (!__kmp_is_futex_lock_nestable(lck)) {
524 KMP_FATAL(LockSimpleUsedAsNestable, func);
525 }
526 return __kmp_acquire_nested_futex_lock(lck, gtid);
527}
528
529int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
530 int retval;
531
532 KMP_DEBUG_ASSERT(gtid >= 0);
533
534 if (__kmp_get_futex_lock_owner(lck) == gtid) {
535 retval = ++lck->lk.depth_locked;
536 } else if (!__kmp_test_futex_lock(lck, gtid)) {
537 retval = 0;
538 } else {
539 KMP_MB();
540 retval = lck->lk.depth_locked = 1;
541 }
542 return retval;
543}
544
545static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
546 kmp_int32 gtid) {
547 char const *const func = "omp_test_nest_lock";
548 if (!__kmp_is_futex_lock_nestable(lck)) {
549 KMP_FATAL(LockSimpleUsedAsNestable, func);
550 }
551 return __kmp_test_nested_futex_lock(lck, gtid);
552}
553
554int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
555 KMP_DEBUG_ASSERT(gtid >= 0);
556
557 KMP_MB();
558 if (--(lck->lk.depth_locked) == 0) {
559 __kmp_release_futex_lock(lck, gtid);
560 return KMP_LOCK_RELEASED;
561 }
562 return KMP_LOCK_STILL_HELD;
563}
564
565static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
566 kmp_int32 gtid) {
567 char const *const func = "omp_unset_nest_lock";
568 KMP_MB(); /* in case another processor initialized lock */
569 if (!__kmp_is_futex_lock_nestable(lck)) {
570 KMP_FATAL(LockSimpleUsedAsNestable, func);
571 }
572 if (__kmp_get_futex_lock_owner(lck) == -1) {
573 KMP_FATAL(LockUnsettingFree, func);
574 }
575 if (__kmp_get_futex_lock_owner(lck) != gtid) {
576 KMP_FATAL(LockUnsettingSetByAnother, func);
577 }
578 return __kmp_release_nested_futex_lock(lck, gtid);
579}
580
581void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
582 __kmp_init_futex_lock(lck);
583 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
584}
585
586void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
587 __kmp_destroy_futex_lock(lck);
588 lck->lk.depth_locked = 0;
589}
590
591static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
592 char const *const func = "omp_destroy_nest_lock";
593 if (!__kmp_is_futex_lock_nestable(lck)) {
594 KMP_FATAL(LockSimpleUsedAsNestable, func);
595 }
596 if (__kmp_get_futex_lock_owner(lck) != -1) {
597 KMP_FATAL(LockStillOwned, func);
598 }
599 __kmp_destroy_nested_futex_lock(lck);
600}
601
602#endif // KMP_USE_FUTEX
603
604/* ------------------------------------------------------------------------ */
605/* ticket (bakery) locks */
606
608 return std::atomic_load_explicit(&lck->lk.owner_id,
609 std::memory_order_relaxed) -
610 1;
611}
612
614 return std::atomic_load_explicit(&lck->lk.depth_locked,
615 std::memory_order_relaxed) != -1;
616}
617
618static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
619 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
620 std::memory_order_acquire) == my_ticket;
621}
622
623__forceinline static int
625 kmp_int32 gtid) {
626 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
627 &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
628
629#ifdef USE_LOCK_PROFILE
630 if (std::atomic_load_explicit(&lck->lk.now_serving,
631 std::memory_order_relaxed) != my_ticket)
632 __kmp_printf("LOCK CONTENTION: %p\n", lck);
633/* else __kmp_printf( "." );*/
634#endif /* USE_LOCK_PROFILE */
635
636 if (std::atomic_load_explicit(&lck->lk.now_serving,
637 std::memory_order_acquire) == my_ticket) {
639 }
640 KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
642}
643
646 return retval;
647}
648
650 kmp_int32 gtid) {
651 char const *const func = "omp_set_lock";
652
653 if (!std::atomic_load_explicit(&lck->lk.initialized,
654 std::memory_order_relaxed)) {
655 KMP_FATAL(LockIsUninitialized, func);
656 }
657 if (lck->lk.self != lck) {
658 KMP_FATAL(LockIsUninitialized, func);
659 }
661 KMP_FATAL(LockNestableUsedAsSimple, func);
662 }
663 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
664 KMP_FATAL(LockIsAlreadyOwned, func);
665 }
666
668
669 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
670 std::memory_order_relaxed);
672}
673
675 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
676 std::memory_order_relaxed);
677
678 if (std::atomic_load_explicit(&lck->lk.now_serving,
679 std::memory_order_relaxed) == my_ticket) {
680 kmp_uint32 next_ticket = my_ticket + 1;
681 if (std::atomic_compare_exchange_strong_explicit(
682 &lck->lk.next_ticket, &my_ticket, next_ticket,
683 std::memory_order_acquire, std::memory_order_acquire)) {
684 return TRUE;
685 }
686 }
687 return FALSE;
688}
689
691 kmp_int32 gtid) {
692 char const *const func = "omp_test_lock";
693
694 if (!std::atomic_load_explicit(&lck->lk.initialized,
695 std::memory_order_relaxed)) {
696 KMP_FATAL(LockIsUninitialized, func);
697 }
698 if (lck->lk.self != lck) {
699 KMP_FATAL(LockIsUninitialized, func);
700 }
702 KMP_FATAL(LockNestableUsedAsSimple, func);
703 }
704
705 int retval = __kmp_test_ticket_lock(lck, gtid);
706
707 if (retval) {
708 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
709 std::memory_order_relaxed);
710 }
711 return retval;
712}
713
715 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
716 std::memory_order_relaxed) -
717 std::atomic_load_explicit(&lck->lk.now_serving,
718 std::memory_order_relaxed);
719
720 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
721 std::memory_order_release);
722
723 KMP_YIELD(distance >
725 return KMP_LOCK_RELEASED;
726}
727
729 kmp_int32 gtid) {
730 char const *const func = "omp_unset_lock";
731
732 if (!std::atomic_load_explicit(&lck->lk.initialized,
733 std::memory_order_relaxed)) {
734 KMP_FATAL(LockIsUninitialized, func);
735 }
736 if (lck->lk.self != lck) {
737 KMP_FATAL(LockIsUninitialized, func);
738 }
740 KMP_FATAL(LockNestableUsedAsSimple, func);
741 }
742 if (__kmp_get_ticket_lock_owner(lck) == -1) {
743 KMP_FATAL(LockUnsettingFree, func);
744 }
745 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
746 (__kmp_get_ticket_lock_owner(lck) != gtid)) {
747 KMP_FATAL(LockUnsettingSetByAnother, func);
748 }
749 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
750 return __kmp_release_ticket_lock(lck, gtid);
751}
752
754 lck->lk.location = NULL;
755 lck->lk.self = lck;
756 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
757 std::memory_order_relaxed);
758 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
759 std::memory_order_relaxed);
760 std::atomic_store_explicit(
761 &lck->lk.owner_id, 0,
762 std::memory_order_relaxed); // no thread owns the lock.
763 std::atomic_store_explicit(
764 &lck->lk.depth_locked, -1,
765 std::memory_order_relaxed); // -1 => not a nested lock.
766 std::atomic_store_explicit(&lck->lk.initialized, true,
767 std::memory_order_release);
768}
769
771 std::atomic_store_explicit(&lck->lk.initialized, false,
772 std::memory_order_release);
773 lck->lk.self = NULL;
774 lck->lk.location = NULL;
775 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
776 std::memory_order_relaxed);
777 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
778 std::memory_order_relaxed);
779 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
780 std::atomic_store_explicit(&lck->lk.depth_locked, -1,
781 std::memory_order_relaxed);
782}
783
785 char const *const func = "omp_destroy_lock";
786
787 if (!std::atomic_load_explicit(&lck->lk.initialized,
788 std::memory_order_relaxed)) {
789 KMP_FATAL(LockIsUninitialized, func);
790 }
791 if (lck->lk.self != lck) {
792 KMP_FATAL(LockIsUninitialized, func);
793 }
795 KMP_FATAL(LockNestableUsedAsSimple, func);
796 }
797 if (__kmp_get_ticket_lock_owner(lck) != -1) {
798 KMP_FATAL(LockStillOwned, func);
799 }
801}
802
803// nested ticket locks
804
806 KMP_DEBUG_ASSERT(gtid >= 0);
807
808 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
809 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
810 std::memory_order_relaxed);
812 } else {
814 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
815 std::memory_order_relaxed);
816 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
817 std::memory_order_relaxed);
819 }
820}
821
823 kmp_int32 gtid) {
824 char const *const func = "omp_set_nest_lock";
825
826 if (!std::atomic_load_explicit(&lck->lk.initialized,
827 std::memory_order_relaxed)) {
828 KMP_FATAL(LockIsUninitialized, func);
829 }
830 if (lck->lk.self != lck) {
831 KMP_FATAL(LockIsUninitialized, func);
832 }
834 KMP_FATAL(LockSimpleUsedAsNestable, func);
835 }
837}
838
840 int retval;
841
842 KMP_DEBUG_ASSERT(gtid >= 0);
843
844 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
845 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
846 std::memory_order_relaxed) +
847 1;
848 } else if (!__kmp_test_ticket_lock(lck, gtid)) {
849 retval = 0;
850 } else {
851 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
852 std::memory_order_relaxed);
853 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
854 std::memory_order_relaxed);
855 retval = 1;
856 }
857 return retval;
858}
859
861 kmp_int32 gtid) {
862 char const *const func = "omp_test_nest_lock";
863
864 if (!std::atomic_load_explicit(&lck->lk.initialized,
865 std::memory_order_relaxed)) {
866 KMP_FATAL(LockIsUninitialized, func);
867 }
868 if (lck->lk.self != lck) {
869 KMP_FATAL(LockIsUninitialized, func);
870 }
872 KMP_FATAL(LockSimpleUsedAsNestable, func);
873 }
875}
876
878 KMP_DEBUG_ASSERT(gtid >= 0);
879
880 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
881 std::memory_order_relaxed) -
882 1) == 0) {
883 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
885 return KMP_LOCK_RELEASED;
886 }
887 return KMP_LOCK_STILL_HELD;
888}
889
891 kmp_int32 gtid) {
892 char const *const func = "omp_unset_nest_lock";
893
894 if (!std::atomic_load_explicit(&lck->lk.initialized,
895 std::memory_order_relaxed)) {
896 KMP_FATAL(LockIsUninitialized, func);
897 }
898 if (lck->lk.self != lck) {
899 KMP_FATAL(LockIsUninitialized, func);
900 }
902 KMP_FATAL(LockSimpleUsedAsNestable, func);
903 }
904 if (__kmp_get_ticket_lock_owner(lck) == -1) {
905 KMP_FATAL(LockUnsettingFree, func);
906 }
907 if (__kmp_get_ticket_lock_owner(lck) != gtid) {
908 KMP_FATAL(LockUnsettingSetByAnother, func);
909 }
911}
912
915 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916 std::memory_order_relaxed);
917 // >= 0 for nestable locks, -1 for simple locks
918}
919
922 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
923 std::memory_order_relaxed);
924}
925
926static void
928 char const *const func = "omp_destroy_nest_lock";
929
930 if (!std::atomic_load_explicit(&lck->lk.initialized,
931 std::memory_order_relaxed)) {
932 KMP_FATAL(LockIsUninitialized, func);
933 }
934 if (lck->lk.self != lck) {
935 KMP_FATAL(LockIsUninitialized, func);
936 }
938 KMP_FATAL(LockSimpleUsedAsNestable, func);
939 }
940 if (__kmp_get_ticket_lock_owner(lck) != -1) {
941 KMP_FATAL(LockStillOwned, func);
942 }
944}
945
946// access functions to fields which don't exist for all lock kinds.
947
949 return lck->lk.location;
950}
951
953 const ident_t *loc) {
954 lck->lk.location = loc;
955}
956
958 return lck->lk.flags;
959}
960
962 kmp_lock_flags_t flags) {
963 lck->lk.flags = flags;
964}
965
966/* ------------------------------------------------------------------------ */
967/* queuing locks */
968
969/* First the states
970 (head,tail) = 0, 0 means lock is unheld, nobody on queue
971 UINT_MAX or -1, 0 means lock is held, nobody on queue
972 h, h means lock held or about to transition,
973 1 element on queue
974 h, t h <> t, means lock is held or about to
975 transition, >1 elements on queue
976
977 Now the transitions
978 Acquire(0,0) = -1 ,0
979 Release(0,0) = Error
980 Acquire(-1,0) = h ,h h > 0
981 Release(-1,0) = 0 ,0
982 Acquire(h,h) = h ,t h > 0, t > 0, h <> t
983 Release(h,h) = -1 ,0 h > 0
984 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
985 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
986
987 And pictorially
988
989 +-----+
990 | 0, 0|------- release -------> Error
991 +-----+
992 | ^
993 acquire| |release
994 | |
995 | |
996 v |
997 +-----+
998 |-1, 0|
999 +-----+
1000 | ^
1001 acquire| |release
1002 | |
1003 | |
1004 v |
1005 +-----+
1006 | h, h|
1007 +-----+
1008 | ^
1009 acquire| |release
1010 | |
1011 | |
1012 v |
1013 +-----+
1014 | h, t|----- acquire, release loopback ---+
1015 +-----+ |
1016 ^ |
1017 | |
1018 +------------------------------------+
1019 */
1020
1021#ifdef DEBUG_QUEUING_LOCKS
1022
1023/* Stuff for circular trace buffer */
1024#define TRACE_BUF_ELE 1024
1025static char traces[TRACE_BUF_ELE][128] = {0};
1026static int tc = 0;
1027#define TRACE_LOCK(X, Y) \
1028 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1029#define TRACE_LOCK_T(X, Y, Z) \
1030 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1031#define TRACE_LOCK_HT(X, Y, Z, Q) \
1032 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1033 Z, Q);
1034
1035static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1037 kmp_int32 tail_id) {
1038 kmp_int32 t, i;
1039
1040 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1041
1042 i = tc % TRACE_BUF_ELE;
1043 __kmp_printf_no_lock("%s\n", traces[i]);
1044 i = (i + 1) % TRACE_BUF_ELE;
1045 while (i != (tc % TRACE_BUF_ELE)) {
1046 __kmp_printf_no_lock("%s", traces[i]);
1047 i = (i + 1) % TRACE_BUF_ELE;
1048 }
1050
1051 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1052 "next_wait:%d, head_id:%d, tail_id:%d\n",
1053 gtid + 1, this_thr->th.th_spin_here,
1054 this_thr->th.th_next_waiting, head_id, tail_id);
1055
1056 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1057
1058 if (lck->lk.head_id >= 1) {
1059 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1060 while (t > 0) {
1061 __kmp_printf_no_lock("-> %d ", t);
1062 t = __kmp_threads[t - 1]->th.th_next_waiting;
1063 }
1064 }
1065 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1066 __kmp_printf_no_lock("\n\n");
1067}
1068
1069#endif /* DEBUG_QUEUING_LOCKS */
1070
1072 return TCR_4(lck->lk.owner_id) - 1;
1073}
1074
1076 return lck->lk.depth_locked != -1;
1077}
1078
1079/* Acquire a lock using a the queuing lock implementation */
1080template <bool takeTime>
1081/* [TLW] The unused template above is left behind because of what BEB believes
1082 is a potential compiler problem with __forceinline. */
1083__forceinline static int
1085 kmp_int32 gtid) {
1086 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1087 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1088 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1089 volatile kmp_uint32 *spin_here_p;
1090
1091#if OMPT_SUPPORT
1092 ompt_state_t prev_state = ompt_state_undefined;
1093#endif
1094
1095 KA_TRACE(1000,
1096 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1097
1099 KMP_DEBUG_ASSERT(this_thr != NULL);
1100 spin_here_p = &this_thr->th.th_spin_here;
1101
1102#ifdef DEBUG_QUEUING_LOCKS
1103 TRACE_LOCK(gtid + 1, "acq ent");
1104 if (*spin_here_p)
1105 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1106 if (this_thr->th.th_next_waiting != 0)
1107 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1108#endif
1109 KMP_DEBUG_ASSERT(!*spin_here_p);
1110 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1111
1112 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1113 head_id_p that may follow, not just in execution order, but also in
1114 visibility order. This way, when a releasing thread observes the changes to
1115 the queue by this thread, it can rightly assume that spin_here_p has
1116 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1117 not premature. If the releasing thread sets spin_here_p to FALSE before
1118 this thread sets it to TRUE, this thread will hang. */
1119 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1120
1121 while (1) {
1122 kmp_int32 enqueued;
1125
1126 head = *head_id_p;
1127
1128 switch (head) {
1129
1130 case -1: {
1131#ifdef DEBUG_QUEUING_LOCKS
1132 tail = *tail_id_p;
1133 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1134#endif
1135 tail = 0; /* to make sure next link asynchronously read is not set
1136 accidentally; this assignment prevents us from entering the
1137 if ( t > 0 ) condition in the enqueued case below, which is not
1138 necessary for this state transition */
1139
1140 /* try (-1,0)->(tid,tid) */
1141 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1142 KMP_PACK_64(-1, 0),
1143 KMP_PACK_64(gtid + 1, gtid + 1));
1144#ifdef DEBUG_QUEUING_LOCKS
1145 if (enqueued)
1146 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1147#endif
1148 } break;
1149
1150 default: {
1151 tail = *tail_id_p;
1152 KMP_DEBUG_ASSERT(tail != gtid + 1);
1153
1154#ifdef DEBUG_QUEUING_LOCKS
1155 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1156#endif
1157
1158 if (tail == 0) {
1159 enqueued = FALSE;
1160 } else {
1161 /* try (h,t) or (h,h)->(h,tid) */
1162 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1163
1164#ifdef DEBUG_QUEUING_LOCKS
1165 if (enqueued)
1166 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1167#endif
1168 }
1169 } break;
1170
1171 case 0: /* empty queue */
1172 {
1173 kmp_int32 grabbed_lock;
1174
1175#ifdef DEBUG_QUEUING_LOCKS
1176 tail = *tail_id_p;
1177 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1178#endif
1179 /* try (0,0)->(-1,0) */
1180
1181 /* only legal transition out of head = 0 is head = -1 with no change to
1182 * tail */
1183 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1184
1185 if (grabbed_lock) {
1186
1187 *spin_here_p = FALSE;
1188
1189 KA_TRACE(
1190 1000,
1191 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1192 lck, gtid));
1193#ifdef DEBUG_QUEUING_LOCKS
1194 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1195#endif
1196
1197#if OMPT_SUPPORT
1198 if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1199 /* change the state before clearing wait_id */
1200 this_thr->th.ompt_thread_info.state = prev_state;
1201 this_thr->th.ompt_thread_info.wait_id = 0;
1202 }
1203#endif
1204
1206 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1207 }
1208 enqueued = FALSE;
1209 } break;
1210 }
1211
1212#if OMPT_SUPPORT
1213 if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1214 /* this thread will spin; set wait_id before entering wait state */
1215 prev_state = this_thr->th.ompt_thread_info.state;
1216 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1217 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1218 }
1219#endif
1220
1221 if (enqueued) {
1222 if (tail > 0) {
1223 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1224 KMP_ASSERT(tail_thr != NULL);
1225 tail_thr->th.th_next_waiting = gtid + 1;
1226 /* corresponding wait for this write in release code */
1227 }
1228 KA_TRACE(1000,
1229 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1230 lck, gtid));
1231
1232 KMP_MB();
1233 // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1234 KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1235 // Synchronize writes to both runtime thread structures
1236 // and writes in user code.
1237 KMP_MB();
1238
1239#ifdef DEBUG_QUEUING_LOCKS
1240 TRACE_LOCK(gtid + 1, "acq spin");
1241
1242 if (this_thr->th.th_next_waiting != 0)
1243 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1244#endif
1245 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1246 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1247 "waiting on queue\n",
1248 lck, gtid));
1249
1250#ifdef DEBUG_QUEUING_LOCKS
1251 TRACE_LOCK(gtid + 1, "acq exit 2");
1252#endif
1253
1254#if OMPT_SUPPORT
1255 /* change the state before clearing wait_id */
1256 this_thr->th.ompt_thread_info.state = prev_state;
1257 this_thr->th.ompt_thread_info.wait_id = 0;
1258#endif
1259
1260 /* got lock, we were dequeued by the thread that released lock */
1262 }
1263
1264 /* Yield if number of threads > number of logical processors */
1265 /* ToDo: Not sure why this should only be in oversubscription case,
1266 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1268
1269#ifdef DEBUG_QUEUING_LOCKS
1270 TRACE_LOCK(gtid + 1, "acq retry");
1271#endif
1272 }
1273 KMP_ASSERT2(0, "should not get here");
1275}
1276
1278 KMP_DEBUG_ASSERT(gtid >= 0);
1279
1280 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1281 return retval;
1282}
1283
1285 kmp_int32 gtid) {
1286 char const *const func = "omp_set_lock";
1287 if (lck->lk.initialized != lck) {
1288 KMP_FATAL(LockIsUninitialized, func);
1289 }
1291 KMP_FATAL(LockNestableUsedAsSimple, func);
1292 }
1293 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1294 KMP_FATAL(LockIsAlreadyOwned, func);
1295 }
1296
1298
1299 lck->lk.owner_id = gtid + 1;
1301}
1302
1304 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1306#ifdef KMP_DEBUG
1307 kmp_info_t *this_thr;
1308#endif
1309
1310 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1311 KMP_DEBUG_ASSERT(gtid >= 0);
1312#ifdef KMP_DEBUG
1313 this_thr = __kmp_thread_from_gtid(gtid);
1314 KMP_DEBUG_ASSERT(this_thr != NULL);
1315 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1316#endif
1317
1318 head = *head_id_p;
1319
1320 if (head == 0) { /* nobody on queue, nobody holding */
1321 /* try (0,0)->(-1,0) */
1322 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1323 KA_TRACE(1000,
1324 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1326 return TRUE;
1327 }
1328 }
1329
1330 KA_TRACE(1000,
1331 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1332 return FALSE;
1333}
1334
1336 kmp_int32 gtid) {
1337 char const *const func = "omp_test_lock";
1338 if (lck->lk.initialized != lck) {
1339 KMP_FATAL(LockIsUninitialized, func);
1340 }
1342 KMP_FATAL(LockNestableUsedAsSimple, func);
1343 }
1344
1345 int retval = __kmp_test_queuing_lock(lck, gtid);
1346
1347 if (retval) {
1348 lck->lk.owner_id = gtid + 1;
1349 }
1350 return retval;
1351}
1352
1354 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1355 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1356
1357 KA_TRACE(1000,
1358 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1359 KMP_DEBUG_ASSERT(gtid >= 0);
1360#if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1361 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1362#endif
1363 KMP_DEBUG_ASSERT(this_thr != NULL);
1364#ifdef DEBUG_QUEUING_LOCKS
1365 TRACE_LOCK(gtid + 1, "rel ent");
1366
1367 if (this_thr->th.th_spin_here)
1368 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1369 if (this_thr->th.th_next_waiting != 0)
1370 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1371#endif
1372 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1373 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1374
1376
1377 while (1) {
1378 kmp_int32 dequeued;
1381
1382 head = *head_id_p;
1383
1384#ifdef DEBUG_QUEUING_LOCKS
1385 tail = *tail_id_p;
1386 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1387 if (head == 0)
1388 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1389#endif
1391 0); /* holding the lock, head must be -1 or queue head */
1392
1393 if (head == -1) { /* nobody on queue */
1394 /* try (-1,0)->(0,0) */
1395 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1396 KA_TRACE(
1397 1000,
1398 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1399 lck, gtid));
1400#ifdef DEBUG_QUEUING_LOCKS
1401 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1402#endif
1403
1404#if OMPT_SUPPORT
1405/* nothing to do - no other thread is trying to shift blame */
1406#endif
1407 return KMP_LOCK_RELEASED;
1408 }
1409 dequeued = FALSE;
1410 } else {
1411 KMP_MB();
1412 tail = *tail_id_p;
1413 if (head == tail) { /* only one thread on the queue */
1414#ifdef DEBUG_QUEUING_LOCKS
1415 if (head <= 0)
1416 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1417#endif
1419
1420 /* try (h,h)->(-1,0) */
1421 dequeued = KMP_COMPARE_AND_STORE_REL64(
1422 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1423 KMP_PACK_64(-1, 0));
1424#ifdef DEBUG_QUEUING_LOCKS
1425 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1426#endif
1427
1428 } else {
1429 volatile kmp_int32 *waiting_id_p;
1430 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1431 KMP_DEBUG_ASSERT(head_thr != NULL);
1432 waiting_id_p = &head_thr->th.th_next_waiting;
1433
1434/* Does this require synchronous reads? */
1435#ifdef DEBUG_QUEUING_LOCKS
1436 if (head <= 0 || tail <= 0)
1437 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1438#endif
1439 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1440
1441 /* try (h,t)->(h',t) or (t,t) */
1442 KMP_MB();
1443 /* make sure enqueuing thread has time to update next waiting thread
1444 * field */
1445 *head_id_p =
1446 KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1447#ifdef DEBUG_QUEUING_LOCKS
1448 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1449#endif
1450 dequeued = TRUE;
1451 }
1452 }
1453
1454 if (dequeued) {
1455 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1456 KMP_DEBUG_ASSERT(head_thr != NULL);
1457
1458/* Does this require synchronous reads? */
1459#ifdef DEBUG_QUEUING_LOCKS
1460 if (head <= 0 || tail <= 0)
1461 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1462#endif
1463 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1464
1465 /* For clean code only. Thread not released until next statement prevents
1466 race with acquire code. */
1467 head_thr->th.th_next_waiting = 0;
1468#ifdef DEBUG_QUEUING_LOCKS
1469 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1470#endif
1471
1472 KMP_MB();
1473 /* reset spin value */
1474 head_thr->th.th_spin_here = FALSE;
1475
1476 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1477 "dequeuing\n",
1478 lck, gtid));
1479#ifdef DEBUG_QUEUING_LOCKS
1480 TRACE_LOCK(gtid + 1, "rel exit 2");
1481#endif
1482 return KMP_LOCK_RELEASED;
1483 }
1484 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1485 threads */
1486
1487#ifdef DEBUG_QUEUING_LOCKS
1488 TRACE_LOCK(gtid + 1, "rel retry");
1489#endif
1490
1491 } /* while */
1492 KMP_ASSERT2(0, "should not get here");
1493 return KMP_LOCK_RELEASED;
1494}
1495
1497 kmp_int32 gtid) {
1498 char const *const func = "omp_unset_lock";
1499 KMP_MB(); /* in case another processor initialized lock */
1500 if (lck->lk.initialized != lck) {
1501 KMP_FATAL(LockIsUninitialized, func);
1502 }
1504 KMP_FATAL(LockNestableUsedAsSimple, func);
1505 }
1506 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1507 KMP_FATAL(LockUnsettingFree, func);
1508 }
1509 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1510 KMP_FATAL(LockUnsettingSetByAnother, func);
1511 }
1512 lck->lk.owner_id = 0;
1513 return __kmp_release_queuing_lock(lck, gtid);
1514}
1515
1517 lck->lk.location = NULL;
1518 lck->lk.head_id = 0;
1519 lck->lk.tail_id = 0;
1520 lck->lk.next_ticket = 0;
1521 lck->lk.now_serving = 0;
1522 lck->lk.owner_id = 0; // no thread owns the lock.
1523 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1524 lck->lk.initialized = lck;
1525
1526 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1527}
1528
1530 lck->lk.initialized = NULL;
1531 lck->lk.location = NULL;
1532 lck->lk.head_id = 0;
1533 lck->lk.tail_id = 0;
1534 lck->lk.next_ticket = 0;
1535 lck->lk.now_serving = 0;
1536 lck->lk.owner_id = 0;
1537 lck->lk.depth_locked = -1;
1538}
1539
1541 char const *const func = "omp_destroy_lock";
1542 if (lck->lk.initialized != lck) {
1543 KMP_FATAL(LockIsUninitialized, func);
1544 }
1546 KMP_FATAL(LockNestableUsedAsSimple, func);
1547 }
1548 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1549 KMP_FATAL(LockStillOwned, func);
1550 }
1552}
1553
1554// nested queuing locks
1555
1557 KMP_DEBUG_ASSERT(gtid >= 0);
1558
1559 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1560 lck->lk.depth_locked += 1;
1562 } else {
1563 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1564 KMP_MB();
1565 lck->lk.depth_locked = 1;
1566 KMP_MB();
1567 lck->lk.owner_id = gtid + 1;
1569 }
1570}
1571
1572static int
1574 kmp_int32 gtid) {
1575 char const *const func = "omp_set_nest_lock";
1576 if (lck->lk.initialized != lck) {
1577 KMP_FATAL(LockIsUninitialized, func);
1578 }
1580 KMP_FATAL(LockSimpleUsedAsNestable, func);
1581 }
1583}
1584
1586 int retval;
1587
1588 KMP_DEBUG_ASSERT(gtid >= 0);
1589
1590 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1591 retval = ++lck->lk.depth_locked;
1592 } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1593 retval = 0;
1594 } else {
1595 KMP_MB();
1596 retval = lck->lk.depth_locked = 1;
1597 KMP_MB();
1598 lck->lk.owner_id = gtid + 1;
1599 }
1600 return retval;
1601}
1602
1604 kmp_int32 gtid) {
1605 char const *const func = "omp_test_nest_lock";
1606 if (lck->lk.initialized != lck) {
1607 KMP_FATAL(LockIsUninitialized, func);
1608 }
1610 KMP_FATAL(LockSimpleUsedAsNestable, func);
1611 }
1612 return __kmp_test_nested_queuing_lock(lck, gtid);
1613}
1614
1616 KMP_DEBUG_ASSERT(gtid >= 0);
1617
1618 KMP_MB();
1619 if (--(lck->lk.depth_locked) == 0) {
1620 KMP_MB();
1621 lck->lk.owner_id = 0;
1623 return KMP_LOCK_RELEASED;
1624 }
1625 return KMP_LOCK_STILL_HELD;
1626}
1627
1628static int
1630 kmp_int32 gtid) {
1631 char const *const func = "omp_unset_nest_lock";
1632 KMP_MB(); /* in case another processor initialized lock */
1633 if (lck->lk.initialized != lck) {
1634 KMP_FATAL(LockIsUninitialized, func);
1635 }
1637 KMP_FATAL(LockSimpleUsedAsNestable, func);
1638 }
1639 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1640 KMP_FATAL(LockUnsettingFree, func);
1641 }
1642 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1643 KMP_FATAL(LockUnsettingSetByAnother, func);
1644 }
1646}
1647
1650 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1651}
1652
1655 lck->lk.depth_locked = 0;
1656}
1657
1658static void
1660 char const *const func = "omp_destroy_nest_lock";
1661 if (lck->lk.initialized != lck) {
1662 KMP_FATAL(LockIsUninitialized, func);
1663 }
1665 KMP_FATAL(LockSimpleUsedAsNestable, func);
1666 }
1667 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1668 KMP_FATAL(LockStillOwned, func);
1669 }
1671}
1672
1673// access functions to fields which don't exist for all lock kinds.
1674
1676 return lck->lk.location;
1677}
1678
1680 const ident_t *loc) {
1681 lck->lk.location = loc;
1682}
1683
1685 return lck->lk.flags;
1686}
1687
1689 kmp_lock_flags_t flags) {
1690 lck->lk.flags = flags;
1691}
1692
1693#if KMP_USE_ADAPTIVE_LOCKS
1694
1695/* RTM Adaptive locks */
1696
1697#if KMP_HAVE_RTM_INTRINSICS
1698#include <immintrin.h>
1699#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1700
1701#else
1702
1703// Values from the status register after failed speculation.
1704#define _XBEGIN_STARTED (~0u)
1705#define _XABORT_EXPLICIT (1 << 0)
1706#define _XABORT_RETRY (1 << 1)
1707#define _XABORT_CONFLICT (1 << 2)
1708#define _XABORT_CAPACITY (1 << 3)
1709#define _XABORT_DEBUG (1 << 4)
1710#define _XABORT_NESTED (1 << 5)
1711#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1712
1713// Aborts for which it's worth trying again immediately
1714#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1715
1716#define STRINGIZE_INTERNAL(arg) #arg
1717#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1718
1719// Access to RTM instructions
1720/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1721 an abort. This is the same definition as the compiler intrinsic that will be
1722 supported at some point. */
1723static __inline int _xbegin() {
1724 int res = -1;
1725
1726#if KMP_OS_WINDOWS
1727#if KMP_ARCH_X86_64
1728 _asm {
1729 _emit 0xC7
1730 _emit 0xF8
1731 _emit 2
1732 _emit 0
1733 _emit 0
1734 _emit 0
1735 jmp L2
1736 mov res, eax
1737 L2:
1738 }
1739#else /* IA32 */
1740 _asm {
1741 _emit 0xC7
1742 _emit 0xF8
1743 _emit 2
1744 _emit 0
1745 _emit 0
1746 _emit 0
1747 jmp L2
1748 mov res, eax
1749 L2:
1750 }
1751#endif // KMP_ARCH_X86_64
1752#else
1753 /* Note that %eax must be noted as killed (clobbered), because the XSR is
1754 returned in %eax(%rax) on abort. Other register values are restored, so
1755 don't need to be killed.
1756
1757 We must also mark 'res' as an input and an output, since otherwise
1758 'res=-1' may be dropped as being dead, whereas we do need the assignment on
1759 the successful (i.e., non-abort) path. */
1760 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1761 " .long 1f-1b-6\n"
1762 " jmp 2f\n"
1763 "1: movl %%eax,%0\n"
1764 "2:"
1765 : "+r"(res)::"memory", "%eax");
1766#endif // KMP_OS_WINDOWS
1767 return res;
1768}
1769
1770/* Transaction end */
1771static __inline void _xend() {
1772#if KMP_OS_WINDOWS
1773 __asm {
1774 _emit 0x0f
1775 _emit 0x01
1776 _emit 0xd5
1777 }
1778#else
1779 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1780#endif
1781}
1782
1783/* This is a macro, the argument must be a single byte constant which can be
1784 evaluated by the inline assembler, since it is emitted as a byte into the
1785 assembly code. */
1786// clang-format off
1787#if KMP_OS_WINDOWS
1788#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1789#else
1790#define _xabort(ARG) \
1791 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1792#endif
1793// clang-format on
1794#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1795
1796// Statistics is collected for testing purpose
1797#if KMP_DEBUG_ADAPTIVE_LOCKS
1798
1799// We accumulate speculative lock statistics when the lock is destroyed. We
1800// keep locks that haven't been destroyed in the liveLocks list so that we can
1801// grab their statistics too.
1802static kmp_adaptive_lock_statistics_t destroyedStats;
1803
1804// To hold the list of live locks.
1805static kmp_adaptive_lock_info_t liveLocks;
1806
1807// A lock so we can safely update the list of locks.
1808static kmp_bootstrap_lock_t chain_lock =
1810
1811// Initialize the list of stats.
1812void __kmp_init_speculative_stats() {
1813 kmp_adaptive_lock_info_t *lck = &liveLocks;
1814
1815 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1816 sizeof(lck->stats));
1817 lck->stats.next = lck;
1818 lck->stats.prev = lck;
1819
1820 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1821 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1822
1823 __kmp_init_bootstrap_lock(&chain_lock);
1824}
1825
1826// Insert the lock into the circular list
1827static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1828 __kmp_acquire_bootstrap_lock(&chain_lock);
1829
1830 lck->stats.next = liveLocks.stats.next;
1831 lck->stats.prev = &liveLocks;
1832
1833 liveLocks.stats.next = lck;
1834 lck->stats.next->stats.prev = lck;
1835
1836 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838
1839 __kmp_release_bootstrap_lock(&chain_lock);
1840}
1841
1842static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1843 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1844 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1845
1846 kmp_adaptive_lock_info_t *n = lck->stats.next;
1847 kmp_adaptive_lock_info_t *p = lck->stats.prev;
1848
1849 n->stats.prev = p;
1850 p->stats.next = n;
1851}
1852
1853static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1854 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1855 sizeof(lck->stats));
1856 __kmp_remember_lock(lck);
1857}
1858
1859static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1860 kmp_adaptive_lock_info_t *lck) {
1861 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1862
1863 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1864 t->successfulSpeculations += s->successfulSpeculations;
1865 t->hardFailedSpeculations += s->hardFailedSpeculations;
1866 t->softFailedSpeculations += s->softFailedSpeculations;
1867 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1868 t->lemmingYields += s->lemmingYields;
1869}
1870
1871static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1872 __kmp_acquire_bootstrap_lock(&chain_lock);
1873
1874 __kmp_add_stats(&destroyedStats, lck);
1875 __kmp_forget_lock(lck);
1876
1877 __kmp_release_bootstrap_lock(&chain_lock);
1878}
1879
1880static float percent(kmp_uint32 count, kmp_uint32 total) {
1881 return (total == 0) ? 0.0 : (100.0 * count) / total;
1882}
1883
1884void __kmp_print_speculative_stats() {
1885 kmp_adaptive_lock_statistics_t total = destroyedStats;
1886 kmp_adaptive_lock_info_t *lck;
1887
1888 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1889 __kmp_add_stats(&total, lck);
1890 }
1891 kmp_adaptive_lock_statistics_t *t = &total;
1892 kmp_uint32 totalSections =
1893 t->nonSpeculativeAcquires + t->successfulSpeculations;
1894 kmp_uint32 totalSpeculations = t->successfulSpeculations +
1895 t->hardFailedSpeculations +
1896 t->softFailedSpeculations;
1897 if (totalSections <= 0)
1898 return;
1899
1900 kmp_safe_raii_file_t statsFile;
1901 if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1902 statsFile.set_stdout();
1903 } else {
1904 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1905 char buffer[buffLen];
1906 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1907 (kmp_int32)getpid());
1908 statsFile.open(buffer, "w");
1909 }
1910
1911 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1912 fprintf(statsFile,
1913 " Lock parameters: \n"
1914 " max_soft_retries : %10d\n"
1915 " max_badness : %10d\n",
1916 __kmp_adaptive_backoff_params.max_soft_retries,
1917 __kmp_adaptive_backoff_params.max_badness);
1918 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1919 t->nonSpeculativeAcquireAttempts);
1920 fprintf(statsFile, " Total critical sections : %10d\n",
1921 totalSections);
1922 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1923 t->successfulSpeculations,
1924 percent(t->successfulSpeculations, totalSections));
1925 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1926 t->nonSpeculativeAcquires,
1927 percent(t->nonSpeculativeAcquires, totalSections));
1928 fprintf(statsFile, " Lemming yields : %10d\n\n",
1929 t->lemmingYields);
1930
1931 fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1932 totalSpeculations);
1933 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1934 t->successfulSpeculations,
1935 percent(t->successfulSpeculations, totalSpeculations));
1936 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1937 t->softFailedSpeculations,
1938 percent(t->softFailedSpeculations, totalSpeculations));
1939 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1940 t->hardFailedSpeculations,
1941 percent(t->hardFailedSpeculations, totalSpeculations));
1942}
1943
1944#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1945#else
1946#define KMP_INC_STAT(lck, stat)
1947
1948#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1949
1950static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1951 // It is enough to check that the head_id is zero.
1952 // We don't also need to check the tail.
1953 bool res = lck->lk.head_id == 0;
1954
1955// We need a fence here, since we must ensure that no memory operations
1956// from later in this thread float above that read.
1957#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1958 _mm_mfence();
1959#else
1960 __sync_synchronize();
1961#endif
1962
1963 return res;
1964}
1965
1966// Functions for manipulating the badness
1967static __inline void
1968__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1969 // Reset the badness to zero so we eagerly try to speculate again
1970 lck->lk.adaptive.badness = 0;
1971 KMP_INC_STAT(lck, successfulSpeculations);
1972}
1973
1974// Create a bit mask with one more set bit.
1975static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1976 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1977 if (newBadness > lck->lk.adaptive.max_badness) {
1978 return;
1979 } else {
1980 lck->lk.adaptive.badness = newBadness;
1981 }
1982}
1983
1984// Check whether speculation should be attempted.
1986static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1987 kmp_int32 gtid) {
1988 kmp_uint32 badness = lck->lk.adaptive.badness;
1989 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1990 int res = (attempts & badness) == 0;
1991 return res;
1992}
1993
1994// Attempt to acquire only the speculative lock.
1995// Does not back off to the non-speculative lock.
1997static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1998 kmp_int32 gtid) {
1999 int retries = lck->lk.adaptive.max_soft_retries;
2000
2001 // We don't explicitly count the start of speculation, rather we record the
2002 // results (success, hard fail, soft fail). The sum of all of those is the
2003 // total number of times we started speculation since all speculations must
2004 // end one of those ways.
2005 do {
2006 kmp_uint32 status = _xbegin();
2007 // Switch this in to disable actual speculation but exercise at least some
2008 // of the rest of the code. Useful for debugging...
2009 // kmp_uint32 status = _XABORT_NESTED;
2010
2011 if (status == _XBEGIN_STARTED) {
2012 /* We have successfully started speculation. Check that no-one acquired
2013 the lock for real between when we last looked and now. This also gets
2014 the lock cache line into our read-set, which we need so that we'll
2015 abort if anyone later claims it for real. */
2016 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2017 // Lock is now visibly acquired, so someone beat us to it. Abort the
2018 // transaction so we'll restart from _xbegin with the failure status.
2019 _xabort(0x01);
2020 KMP_ASSERT2(0, "should not get here");
2021 }
2022 return 1; // Lock has been acquired (speculatively)
2023 } else {
2024 // We have aborted, update the statistics
2025 if (status & SOFT_ABORT_MASK) {
2026 KMP_INC_STAT(lck, softFailedSpeculations);
2027 // and loop round to retry.
2028 } else {
2029 KMP_INC_STAT(lck, hardFailedSpeculations);
2030 // Give up if we had a hard failure.
2031 break;
2032 }
2033 }
2034 } while (retries--); // Loop while we have retries, and didn't fail hard.
2035
2036 // Either we had a hard failure or we didn't succeed softly after
2037 // the full set of attempts, so back off the badness.
2038 __kmp_step_badness(lck);
2039 return 0;
2040}
2041
2042// Attempt to acquire the speculative lock, or back off to the non-speculative
2043// one if the speculative lock cannot be acquired.
2044// We can succeed speculatively, non-speculatively, or fail.
2045static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2046 // First try to acquire the lock speculatively
2047 if (__kmp_should_speculate(lck, gtid) &&
2048 __kmp_test_adaptive_lock_only(lck, gtid))
2049 return 1;
2050
2051 // Speculative acquisition failed, so try to acquire it non-speculatively.
2052 // Count the non-speculative acquire attempt
2053 lck->lk.adaptive.acquire_attempts++;
2054
2055 // Use base, non-speculative lock.
2056 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2057 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2058 return 1; // Lock is acquired (non-speculatively)
2059 } else {
2060 return 0; // Failed to acquire the lock, it's already visibly locked.
2061 }
2062}
2063
2064static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2065 kmp_int32 gtid) {
2066 char const *const func = "omp_test_lock";
2067 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2068 KMP_FATAL(LockIsUninitialized, func);
2069 }
2070
2071 int retval = __kmp_test_adaptive_lock(lck, gtid);
2072
2073 if (retval) {
2074 lck->lk.qlk.owner_id = gtid + 1;
2075 }
2076 return retval;
2077}
2078
2079// Block until we can acquire a speculative, adaptive lock. We check whether we
2080// should be trying to speculate. If we should be, we check the real lock to see
2081// if it is free, and, if not, pause without attempting to acquire it until it
2082// is. Then we try the speculative acquire. This means that although we suffer
2083// from lemmings a little (because all we can't acquire the lock speculatively
2084// until the queue of threads waiting has cleared), we don't get into a state
2085// where we can never acquire the lock speculatively (because we force the queue
2086// to clear by preventing new arrivals from entering the queue). This does mean
2087// that when we're trying to break lemmings, the lock is no longer fair. However
2088// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2089// problem.
2090static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2091 kmp_int32 gtid) {
2092 if (__kmp_should_speculate(lck, gtid)) {
2093 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2094 if (__kmp_test_adaptive_lock_only(lck, gtid))
2095 return;
2096 // We tried speculation and failed, so give up.
2097 } else {
2098 // We can't try speculation until the lock is free, so we pause here
2099 // (without suspending on the queueing lock, to allow it to drain, then
2100 // try again. All other threads will also see the same result for
2101 // shouldSpeculate, so will be doing the same if they try to claim the
2102 // lock from now on.
2103 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2104 KMP_INC_STAT(lck, lemmingYields);
2105 KMP_YIELD(TRUE);
2106 }
2107
2108 if (__kmp_test_adaptive_lock_only(lck, gtid))
2109 return;
2110 }
2111 }
2112
2113 // Speculative acquisition failed, so acquire it non-speculatively.
2114 // Count the non-speculative acquire attempt
2115 lck->lk.adaptive.acquire_attempts++;
2116
2117 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2118 // We have acquired the base lock, so count that.
2119 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2120}
2121
2122static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2123 kmp_int32 gtid) {
2124 char const *const func = "omp_set_lock";
2125 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2126 KMP_FATAL(LockIsUninitialized, func);
2127 }
2128 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2129 KMP_FATAL(LockIsAlreadyOwned, func);
2130 }
2131
2132 __kmp_acquire_adaptive_lock(lck, gtid);
2133
2134 lck->lk.qlk.owner_id = gtid + 1;
2135}
2136
2138static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2139 kmp_int32 gtid) {
2140 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2141 lck))) { // If the lock doesn't look claimed we must be speculating.
2142 // (Or the user's code is buggy and they're releasing without locking;
2143 // if we had XTEST we'd be able to check that case...)
2144 _xend(); // Exit speculation
2145 __kmp_update_badness_after_success(lck);
2146 } else { // Since the lock *is* visibly locked we're not speculating,
2147 // so should use the underlying lock's release scheme.
2148 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2149 }
2150 return KMP_LOCK_RELEASED;
2151}
2152
2153static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2154 kmp_int32 gtid) {
2155 char const *const func = "omp_unset_lock";
2156 KMP_MB(); /* in case another processor initialized lock */
2157 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2158 KMP_FATAL(LockIsUninitialized, func);
2159 }
2160 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2161 KMP_FATAL(LockUnsettingFree, func);
2162 }
2163 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2164 KMP_FATAL(LockUnsettingSetByAnother, func);
2165 }
2166 lck->lk.qlk.owner_id = 0;
2167 __kmp_release_adaptive_lock(lck, gtid);
2168 return KMP_LOCK_RELEASED;
2169}
2170
2171static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2172 __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2173 lck->lk.adaptive.badness = 0;
2174 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2175 lck->lk.adaptive.max_soft_retries =
2176 __kmp_adaptive_backoff_params.max_soft_retries;
2177 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2178#if KMP_DEBUG_ADAPTIVE_LOCKS
2179 __kmp_zero_speculative_stats(&lck->lk.adaptive);
2180#endif
2181 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2182}
2183
2184static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2185#if KMP_DEBUG_ADAPTIVE_LOCKS
2186 __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2187#endif
2188 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2189 // Nothing needed for the speculative part.
2190}
2191
2192static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2193 char const *const func = "omp_destroy_lock";
2194 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2195 KMP_FATAL(LockIsUninitialized, func);
2196 }
2197 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2198 KMP_FATAL(LockStillOwned, func);
2199 }
2200 __kmp_destroy_adaptive_lock(lck);
2201}
2202
2203#endif // KMP_USE_ADAPTIVE_LOCKS
2204
2205/* ------------------------------------------------------------------------ */
2206/* DRDPA ticket locks */
2207/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2208
2210 return lck->lk.owner_id - 1;
2211}
2212
2214 return lck->lk.depth_locked != -1;
2215}
2216
2217__forceinline static int
2219 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2220 kmp_uint64 mask = lck->lk.mask; // atomic load
2221 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2222
2223#ifdef USE_LOCK_PROFILE
2224 if (polls[ticket & mask] != ticket)
2225 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2226/* else __kmp_printf( "." );*/
2227#endif /* USE_LOCK_PROFILE */
2228
2229 // Now spin-wait, but reload the polls pointer and mask, in case the
2230 // polling area has been reconfigured. Unless it is reconfigured, the
2231 // reloads stay in L1 cache and are cheap.
2232 //
2233 // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2234 // The current implementation of KMP_WAIT doesn't allow for mask
2235 // and poll to be re-read every spin iteration.
2236 kmp_uint32 spins;
2237 kmp_uint64 time;
2239 KMP_INIT_YIELD(spins);
2240 KMP_INIT_BACKOFF(time);
2241 while (polls[ticket & mask] < ticket) { // atomic load
2242 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2243 // Re-read the mask and the poll pointer from the lock structure.
2244 //
2245 // Make certain that "mask" is read before "polls" !!!
2246 //
2247 // If another thread picks reconfigures the polling area and updates their
2248 // values, and we get the new value of mask and the old polls pointer, we
2249 // could access memory beyond the end of the old polling area.
2250 mask = lck->lk.mask; // atomic load
2251 polls = lck->lk.polls; // atomic load
2252 }
2253
2254 // Critical section starts here
2256 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2257 ticket, lck));
2258 lck->lk.now_serving = ticket; // non-volatile store
2259
2260 // Deallocate a garbage polling area if we know that we are the last
2261 // thread that could possibly access it.
2262 //
2263 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2264 // ticket.
2265 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2266 __kmp_free(lck->lk.old_polls);
2267 lck->lk.old_polls = NULL;
2268 lck->lk.cleanup_ticket = 0;
2269 }
2270
2271 // Check to see if we should reconfigure the polling area.
2272 // If there is still a garbage polling area to be deallocated from a
2273 // previous reconfiguration, let a later thread reconfigure it.
2274 if (lck->lk.old_polls == NULL) {
2275 bool reconfigure = false;
2276 std::atomic<kmp_uint64> *old_polls = polls;
2277 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2278
2279 if (TCR_4(__kmp_nth) >
2281 // We are in oversubscription mode. Contract the polling area
2282 // down to a single location, if that hasn't been done already.
2283 if (num_polls > 1) {
2284 reconfigure = true;
2285 num_polls = TCR_4(lck->lk.num_polls);
2286 mask = 0;
2287 num_polls = 1;
2288 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2289 sizeof(*polls));
2290 polls[0] = ticket;
2291 }
2292 } else {
2293 // We are in under/fully subscribed mode. Check the number of
2294 // threads waiting on the lock. The size of the polling area
2295 // should be at least the number of threads waiting.
2296 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2297 if (num_waiting > num_polls) {
2298 kmp_uint32 old_num_polls = num_polls;
2299 reconfigure = true;
2300 do {
2301 mask = (mask << 1) | 1;
2302 num_polls *= 2;
2303 } while (num_polls <= num_waiting);
2304
2305 // Allocate the new polling area, and copy the relevant portion
2306 // of the old polling area to the new area. __kmp_allocate()
2307 // zeroes the memory it allocates, and most of the old area is
2308 // just zero padding, so we only copy the release counters.
2309 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2310 sizeof(*polls));
2311 kmp_uint32 i;
2312 for (i = 0; i < old_num_polls; i++) {
2313 polls[i].store(old_polls[i]);
2314 }
2315 }
2316 }
2317
2318 if (reconfigure) {
2319 // Now write the updated fields back to the lock structure.
2320 //
2321 // Make certain that "polls" is written before "mask" !!!
2322 //
2323 // If another thread picks up the new value of mask and the old polls
2324 // pointer , it could access memory beyond the end of the old polling
2325 // area.
2326 //
2327 // On x86, we need memory fences.
2328 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2329 "lock %p to %d polls\n",
2330 ticket, lck, num_polls));
2331
2332 lck->lk.old_polls = old_polls;
2333 lck->lk.polls = polls; // atomic store
2334
2335 KMP_MB();
2336
2337 lck->lk.num_polls = num_polls;
2338 lck->lk.mask = mask; // atomic store
2339
2340 KMP_MB();
2341
2342 // Only after the new polling area and mask have been flushed
2343 // to main memory can we update the cleanup ticket field.
2344 //
2345 // volatile load / non-volatile store
2346 lck->lk.cleanup_ticket = lck->lk.next_ticket;
2347 }
2348 }
2350}
2351
2353 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2354 return retval;
2355}
2356
2358 kmp_int32 gtid) {
2359 char const *const func = "omp_set_lock";
2360 if (lck->lk.initialized != lck) {
2361 KMP_FATAL(LockIsUninitialized, func);
2362 }
2364 KMP_FATAL(LockNestableUsedAsSimple, func);
2365 }
2366 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2367 KMP_FATAL(LockIsAlreadyOwned, func);
2368 }
2369
2371
2372 lck->lk.owner_id = gtid + 1;
2374}
2375
2377 // First get a ticket, then read the polls pointer and the mask.
2378 // The polls pointer must be read before the mask!!! (See above)
2379 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2380 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2381 kmp_uint64 mask = lck->lk.mask; // atomic load
2382 if (polls[ticket & mask] == ticket) {
2383 kmp_uint64 next_ticket = ticket + 1;
2384 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2385 next_ticket)) {
2387 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2388 ticket, lck));
2389 lck->lk.now_serving = ticket; // non-volatile store
2390
2391 // Since no threads are waiting, there is no possibility that we would
2392 // want to reconfigure the polling area. We might have the cleanup ticket
2393 // value (which says that it is now safe to deallocate old_polls), but
2394 // we'll let a later thread which calls __kmp_acquire_lock do that - this
2395 // routine isn't supposed to block, and we would risk blocks if we called
2396 // __kmp_free() to do the deallocation.
2397 return TRUE;
2398 }
2399 }
2400 return FALSE;
2401}
2402
2404 kmp_int32 gtid) {
2405 char const *const func = "omp_test_lock";
2406 if (lck->lk.initialized != lck) {
2407 KMP_FATAL(LockIsUninitialized, func);
2408 }
2410 KMP_FATAL(LockNestableUsedAsSimple, func);
2411 }
2412
2413 int retval = __kmp_test_drdpa_lock(lck, gtid);
2414
2415 if (retval) {
2416 lck->lk.owner_id = gtid + 1;
2417 }
2418 return retval;
2419}
2420
2422 // Read the ticket value from the lock data struct, then the polls pointer and
2423 // the mask. The polls pointer must be read before the mask!!! (See above)
2424 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2425 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2426 kmp_uint64 mask = lck->lk.mask; // atomic load
2427 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2428 ticket - 1, lck));
2430 polls[ticket & mask] = ticket; // atomic store
2431 return KMP_LOCK_RELEASED;
2432}
2433
2435 kmp_int32 gtid) {
2436 char const *const func = "omp_unset_lock";
2437 KMP_MB(); /* in case another processor initialized lock */
2438 if (lck->lk.initialized != lck) {
2439 KMP_FATAL(LockIsUninitialized, func);
2440 }
2442 KMP_FATAL(LockNestableUsedAsSimple, func);
2443 }
2444 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2445 KMP_FATAL(LockUnsettingFree, func);
2446 }
2447 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2448 (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2449 KMP_FATAL(LockUnsettingSetByAnother, func);
2450 }
2451 lck->lk.owner_id = 0;
2452 return __kmp_release_drdpa_lock(lck, gtid);
2453}
2454
2456 lck->lk.location = NULL;
2457 lck->lk.mask = 0;
2458 lck->lk.num_polls = 1;
2459 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2460 lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2461 lck->lk.cleanup_ticket = 0;
2462 lck->lk.old_polls = NULL;
2463 lck->lk.next_ticket = 0;
2464 lck->lk.now_serving = 0;
2465 lck->lk.owner_id = 0; // no thread owns the lock.
2466 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2467 lck->lk.initialized = lck;
2468
2469 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2470}
2471
2473 lck->lk.initialized = NULL;
2474 lck->lk.location = NULL;
2475 if (lck->lk.polls.load() != NULL) {
2476 __kmp_free(lck->lk.polls.load());
2477 lck->lk.polls = NULL;
2478 }
2479 if (lck->lk.old_polls != NULL) {
2480 __kmp_free(lck->lk.old_polls);
2481 lck->lk.old_polls = NULL;
2482 }
2483 lck->lk.mask = 0;
2484 lck->lk.num_polls = 0;
2485 lck->lk.cleanup_ticket = 0;
2486 lck->lk.next_ticket = 0;
2487 lck->lk.now_serving = 0;
2488 lck->lk.owner_id = 0;
2489 lck->lk.depth_locked = -1;
2490}
2491
2493 char const *const func = "omp_destroy_lock";
2494 if (lck->lk.initialized != lck) {
2495 KMP_FATAL(LockIsUninitialized, func);
2496 }
2498 KMP_FATAL(LockNestableUsedAsSimple, func);
2499 }
2500 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2501 KMP_FATAL(LockStillOwned, func);
2502 }
2504}
2505
2506// nested drdpa ticket locks
2507
2509 KMP_DEBUG_ASSERT(gtid >= 0);
2510
2511 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2512 lck->lk.depth_locked += 1;
2514 } else {
2516 KMP_MB();
2517 lck->lk.depth_locked = 1;
2518 KMP_MB();
2519 lck->lk.owner_id = gtid + 1;
2521 }
2522}
2523
2525 kmp_int32 gtid) {
2526 char const *const func = "omp_set_nest_lock";
2527 if (lck->lk.initialized != lck) {
2528 KMP_FATAL(LockIsUninitialized, func);
2529 }
2531 KMP_FATAL(LockSimpleUsedAsNestable, func);
2532 }
2534}
2535
2537 int retval;
2538
2539 KMP_DEBUG_ASSERT(gtid >= 0);
2540
2541 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2542 retval = ++lck->lk.depth_locked;
2543 } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2544 retval = 0;
2545 } else {
2546 KMP_MB();
2547 retval = lck->lk.depth_locked = 1;
2548 KMP_MB();
2549 lck->lk.owner_id = gtid + 1;
2550 }
2551 return retval;
2552}
2553
2555 kmp_int32 gtid) {
2556 char const *const func = "omp_test_nest_lock";
2557 if (lck->lk.initialized != lck) {
2558 KMP_FATAL(LockIsUninitialized, func);
2559 }
2561 KMP_FATAL(LockSimpleUsedAsNestable, func);
2562 }
2563 return __kmp_test_nested_drdpa_lock(lck, gtid);
2564}
2565
2567 KMP_DEBUG_ASSERT(gtid >= 0);
2568
2569 KMP_MB();
2570 if (--(lck->lk.depth_locked) == 0) {
2571 KMP_MB();
2572 lck->lk.owner_id = 0;
2574 return KMP_LOCK_RELEASED;
2575 }
2576 return KMP_LOCK_STILL_HELD;
2577}
2578
2580 kmp_int32 gtid) {
2581 char const *const func = "omp_unset_nest_lock";
2582 KMP_MB(); /* in case another processor initialized lock */
2583 if (lck->lk.initialized != lck) {
2584 KMP_FATAL(LockIsUninitialized, func);
2585 }
2587 KMP_FATAL(LockSimpleUsedAsNestable, func);
2588 }
2589 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2590 KMP_FATAL(LockUnsettingFree, func);
2591 }
2592 if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2593 KMP_FATAL(LockUnsettingSetByAnother, func);
2594 }
2596}
2597
2600 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2601}
2602
2605 lck->lk.depth_locked = 0;
2606}
2607
2609 char const *const func = "omp_destroy_nest_lock";
2610 if (lck->lk.initialized != lck) {
2611 KMP_FATAL(LockIsUninitialized, func);
2612 }
2614 KMP_FATAL(LockSimpleUsedAsNestable, func);
2615 }
2616 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2617 KMP_FATAL(LockStillOwned, func);
2618 }
2620}
2621
2622// access functions to fields which don't exist for all lock kinds.
2623
2625 return lck->lk.location;
2626}
2627
2629 const ident_t *loc) {
2630 lck->lk.location = loc;
2631}
2632
2634 return lck->lk.flags;
2635}
2636
2638 kmp_lock_flags_t flags) {
2639 lck->lk.flags = flags;
2640}
2641
2642// Time stamp counter
2643#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644#define __kmp_tsc() __kmp_hardware_timestamp()
2645// Runtime's default backoff parameters
2647#else
2648// Use nanoseconds for other platforms
2649extern kmp_uint64 __kmp_now_nsec();
2651#define __kmp_tsc() __kmp_now_nsec()
2652#endif
2653
2654// A useful predicate for dealing with timestamps that may wrap.
2655// Is a before b? Since the timestamps may wrap, this is asking whether it's
2656// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2657// Times where going clockwise is less distance than going anti-clockwise
2658// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2659// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2660// signed(b) = 0 captures the actual difference
2661static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2662 return ((kmp_int64)b - (kmp_int64)a) > 0;
2663}
2664
2665// Truncated binary exponential backoff function
2667 // We could flatten this loop, but making it a nested loop gives better result
2668 kmp_uint32 i;
2669 for (i = boff->step; i > 0; i--) {
2670 kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2671#if KMP_HAVE_UMWAIT
2672 if (__kmp_umwait_enabled) {
2673 __kmp_tpause(0, boff->min_tick);
2674 } else {
2675#endif
2676 do {
2677 KMP_CPU_PAUSE();
2678 } while (before(__kmp_tsc(), goal));
2679#if KMP_HAVE_UMWAIT
2680 }
2681#endif
2682 }
2683 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2684}
2685
2686#if KMP_USE_DYNAMIC_LOCK
2687
2688// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2689// lock word.
2690static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2691 kmp_dyna_lockseq_t seq) {
2692 TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));
2693 KA_TRACE(
2694 20,
2695 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2696}
2697
2698#if KMP_USE_TSX
2699
2700// HLE lock functions - imported from the testbed runtime.
2701#define HLE_ACQUIRE ".byte 0xf2;"
2702#define HLE_RELEASE ".byte 0xf3;"
2703
2704static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2705 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2706 return v;
2707}
2708
2709static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2710
2711static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2712 TCW_4(*lck, 0);
2713}
2714
2715static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2716 // Use gtid for KMP_LOCK_BUSY if necessary
2717 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2718 int delay = 1;
2719 do {
2720 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2721 for (int i = delay; i != 0; --i)
2722 KMP_CPU_PAUSE();
2723 delay = ((delay << 1) | 1) & 7;
2724 }
2725 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2726 }
2727}
2728
2729static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2730 kmp_int32 gtid) {
2731 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2732}
2733
2734static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2735 __asm__ volatile(HLE_RELEASE "movl %1,%0"
2736 : "=m"(*lck)
2737 : "r"(KMP_LOCK_FREE(hle))
2738 : "memory");
2739 return KMP_LOCK_RELEASED;
2740}
2741
2742static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2743 kmp_int32 gtid) {
2744 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2745}
2746
2747static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2748 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2749}
2750
2751static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2752 kmp_int32 gtid) {
2753 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2754}
2755
2756static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2758}
2759
2760static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2762}
2763
2764static void
2765__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2767}
2768
2770static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2771 kmp_int32 gtid) {
2772 unsigned retries = 3, status;
2773 do {
2774 status = _xbegin();
2775 if (status == _XBEGIN_STARTED) {
2776 if (__kmp_is_unlocked_queuing_lock(lck))
2777 return;
2778 _xabort(0xff);
2779 }
2780 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2781 // Wait until lock becomes free
2782 while (!__kmp_is_unlocked_queuing_lock(lck)) {
2783 KMP_YIELD(TRUE);
2784 }
2785 } else if (!(status & _XABORT_RETRY))
2786 break;
2787 } while (retries--);
2788
2789 // Fall-back non-speculative lock (xchg)
2791}
2792
2793static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794 kmp_int32 gtid) {
2795 __kmp_acquire_rtm_queuing_lock(lck, gtid);
2796}
2797
2799static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800 kmp_int32 gtid) {
2801 if (__kmp_is_unlocked_queuing_lock(lck)) {
2802 // Releasing from speculation
2803 _xend();
2804 } else {
2805 // Releasing from a real lock
2807 }
2808 return KMP_LOCK_RELEASED;
2809}
2810
2811static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2812 kmp_int32 gtid) {
2813 return __kmp_release_rtm_queuing_lock(lck, gtid);
2814}
2815
2817static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2818 kmp_int32 gtid) {
2819 unsigned retries = 3, status;
2820 do {
2821 status = _xbegin();
2822 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2823 return 1;
2824 }
2825 if (!(status & _XABORT_RETRY))
2826 break;
2827 } while (retries--);
2828
2829 return __kmp_test_queuing_lock(lck, gtid);
2830}
2831
2832static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2833 kmp_int32 gtid) {
2834 return __kmp_test_rtm_queuing_lock(lck, gtid);
2835}
2836
2837// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2838typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2839
2840static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2841 KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2842}
2843
2844static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2845 __kmp_destroy_rtm_spin_lock(lck);
2846}
2847
2849static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2850 kmp_int32 gtid) {
2851 unsigned retries = 3, status;
2852 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2853 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2854 do {
2855 status = _xbegin();
2856 if (status == _XBEGIN_STARTED) {
2857 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2859 _xabort(0xff);
2860 }
2861 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2862 // Wait until lock becomes free
2863 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2864 KMP_YIELD(TRUE);
2865 }
2866 } else if (!(status & _XABORT_RETRY))
2867 break;
2868 } while (retries--);
2869
2870 // Fall-back spin lock
2873 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2874 !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2875 __kmp_spin_backoff(&backoff);
2876 }
2879}
2880
2881static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2882 kmp_int32 gtid) {
2883 return __kmp_acquire_rtm_spin_lock(lck, gtid);
2884}
2885
2887static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2888 kmp_int32 gtid) {
2889 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2890 // Releasing from speculation
2891 _xend();
2892 } else {
2893 // Releasing from a real lock
2895 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2896 }
2897 return KMP_LOCK_RELEASED;
2898}
2899
2900static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2901 kmp_int32 gtid) {
2902 return __kmp_release_rtm_spin_lock(lck, gtid);
2903}
2904
2906static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2907 unsigned retries = 3, status;
2908 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2909 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2910 do {
2911 status = _xbegin();
2912 if (status == _XBEGIN_STARTED &&
2913 KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2914 return TRUE;
2915 }
2916 if (!(status & _XABORT_RETRY))
2917 break;
2918 } while (retries--);
2919
2920 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2921 __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2923 return TRUE;
2924 }
2925 return FALSE;
2926}
2927
2928static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2929 kmp_int32 gtid) {
2930 return __kmp_test_rtm_spin_lock(lck, gtid);
2931}
2932
2933#endif // KMP_USE_TSX
2934
2935// Entry functions for indirect locks (first element of direct lock jump tables)
2936static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2937 kmp_dyna_lockseq_t tag);
2938static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2939static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2940static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2941static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2942static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2943 kmp_int32);
2944static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2945 kmp_int32);
2946static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2947 kmp_int32);
2948
2949// Lock function definitions for the union parameter type
2950#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2951
2952#define expand1(lk, op) \
2953 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2954 __kmp_##op##_##lk##_##lock(&lock->lk); \
2955 }
2956#define expand2(lk, op) \
2957 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2958 kmp_int32 gtid) { \
2959 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2960 }
2961#define expand3(lk, op) \
2962 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2963 kmp_lock_flags_t flags) { \
2964 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2965 }
2966#define expand4(lk, op) \
2967 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2968 const ident_t *loc) { \
2969 __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2970 }
2971
2972KMP_FOREACH_LOCK_KIND(expand1, init)
2973KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2974KMP_FOREACH_LOCK_KIND(expand1, destroy)
2975KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2976KMP_FOREACH_LOCK_KIND(expand2, acquire)
2977KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2978KMP_FOREACH_LOCK_KIND(expand2, release)
2979KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2980KMP_FOREACH_LOCK_KIND(expand2, test)
2981KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2982KMP_FOREACH_LOCK_KIND(expand3, )
2983KMP_FOREACH_LOCK_KIND(expand4, )
2984
2985#undef expand1
2986#undef expand2
2987#undef expand3
2988#undef expand4
2989
2990// Jump tables for the indirect lock functions
2991// Only fill in the odd entries, that avoids the need to shift out the low bit
2992
2993// init functions
2994#define expand(l, op) 0, __kmp_init_direct_lock,
2995void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2996 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2997#undef expand
2998
2999// destroy functions
3000#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3001static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
3002 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3003#undef expand
3004#define expand(l, op) \
3005 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
3006static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
3007 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3008#undef expand
3009
3010// set/acquire functions
3011#define expand(l, op) \
3012 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3013static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
3014 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3015#undef expand
3016#define expand(l, op) \
3017 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3018static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019 __kmp_set_indirect_lock_with_checks, 0,
3020 KMP_FOREACH_D_LOCK(expand, acquire)};
3021#undef expand
3022
3023// unset/release and test functions
3024#define expand(l, op) \
3025 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3026static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3027 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3028static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3029 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3030#undef expand
3031#define expand(l, op) \
3032 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3033static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3034 __kmp_unset_indirect_lock_with_checks, 0,
3035 KMP_FOREACH_D_LOCK(expand, release)};
3036static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3037 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3038#undef expand
3039
3040// Exposes only one set of jump tables (*lock or *lock_with_checks).
3041void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3042int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3043int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3044int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3045
3046// Jump tables for the indirect lock functions
3047#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3048void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3049 KMP_FOREACH_I_LOCK(expand, init)};
3050#undef expand
3051
3052#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3053static void (*indirect_destroy[])(kmp_user_lock_p) = {
3054 KMP_FOREACH_I_LOCK(expand, destroy)};
3055#undef expand
3056#define expand(l, op) \
3057 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3058static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3059 KMP_FOREACH_I_LOCK(expand, destroy)};
3060#undef expand
3061
3062// set/acquire functions
3063#define expand(l, op) \
3064 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3065static int (*indirect_set[])(kmp_user_lock_p,
3066 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3067#undef expand
3068#define expand(l, op) \
3069 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3070static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3071 KMP_FOREACH_I_LOCK(expand, acquire)};
3072#undef expand
3073
3074// unset/release and test functions
3075#define expand(l, op) \
3076 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3077static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3078 KMP_FOREACH_I_LOCK(expand, release)};
3079static int (*indirect_test[])(kmp_user_lock_p,
3080 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3081#undef expand
3082#define expand(l, op) \
3083 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3084static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3085 KMP_FOREACH_I_LOCK(expand, release)};
3086static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3087 KMP_FOREACH_I_LOCK(expand, test)};
3088#undef expand
3089
3090// Exposes only one jump tables (*lock or *lock_with_checks).
3091void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3092int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3093int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3094int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3095
3096// Lock index table.
3097kmp_indirect_lock_table_t __kmp_i_lock_table;
3098
3099// Size of indirect locks.
3100static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3101
3102// Jump tables for lock accessor/modifier.
3103void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3104 const ident_t *) = {0};
3105void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3106 kmp_lock_flags_t) = {0};
3107const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3108 kmp_user_lock_p) = {0};
3109kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3110 kmp_user_lock_p) = {0};
3111
3112// Use different lock pools for different lock types.
3113static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3114
3115// User lock allocator for dynamically dispatched indirect locks. Every entry of
3116// the indirect lock table holds the address and type of the allocated indirect
3117// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3118// full. A destroyed indirect lock object is returned to the reusable pool of
3119// locks, unique to each lock type.
3120kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3121 kmp_int32 gtid,
3122 kmp_indirect_locktag_t tag) {
3123 kmp_indirect_lock_t *lck;
3124 kmp_lock_index_t idx, table_idx;
3125
3127
3128 if (__kmp_indirect_lock_pool[tag] != NULL) {
3129 // Reuse the allocated and destroyed lock object
3130 lck = __kmp_indirect_lock_pool[tag];
3131 if (OMP_LOCK_T_SIZE < sizeof(void *))
3132 idx = lck->lock->pool.index;
3133 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3134 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3135 lck));
3136 } else {
3137 kmp_uint32 row, col;
3138 kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3139 idx = 0;
3140 // Find location in list of lock tables to put new lock
3141 while (1) {
3142 table_idx = lock_table->next; // index within this table
3143 idx += lock_table->next; // global index within list of tables
3144 if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3145 row = table_idx / KMP_I_LOCK_CHUNK;
3146 col = table_idx % KMP_I_LOCK_CHUNK;
3147 // Allocate a new row of locks if necessary
3148 if (!lock_table->table[row]) {
3149 lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3150 sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3151 }
3152 break;
3153 }
3154 // Allocate a new lock table if necessary with double the capacity
3155 if (!lock_table->next_table) {
3156 kmp_indirect_lock_table_t *next_table =
3157 (kmp_indirect_lock_table_t *)__kmp_allocate(
3158 sizeof(kmp_indirect_lock_table_t));
3159 next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3160 sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3161 next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3162 next_table->next = 0;
3163 next_table->next_table = nullptr;
3164 lock_table->next_table = next_table;
3165 }
3166 lock_table = lock_table->next_table;
3167 KMP_ASSERT(lock_table);
3168 }
3169 lock_table->next++;
3170
3171 lck = &lock_table->table[row][col];
3172 // Allocate a new base lock object
3173 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3174 KA_TRACE(20,
3175 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3176 }
3177
3179
3180 lck->type = tag;
3181
3182 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3183 *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =
3184 idx << 1; // indirect lock word must be even
3185 } else {
3186 *((kmp_indirect_lock_t **)user_lock) = lck;
3187 }
3188
3189 return lck;
3190}
3191
3192// User lock lookup for dynamically dispatched locks.
3193static __forceinline kmp_indirect_lock_t *
3194__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3196 kmp_indirect_lock_t *lck = NULL;
3197 if (user_lock == NULL) {
3198 KMP_FATAL(LockIsUninitialized, func);
3199 }
3200 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3201 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3202 lck = __kmp_get_i_lock(idx);
3203 } else {
3204 lck = *((kmp_indirect_lock_t **)user_lock);
3205 }
3206 if (lck == NULL) {
3207 KMP_FATAL(LockIsUninitialized, func);
3208 }
3209 return lck;
3210 } else {
3211 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3212 return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3213 } else {
3214 return *((kmp_indirect_lock_t **)user_lock);
3215 }
3216 }
3217}
3218
3219static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3220 kmp_dyna_lockseq_t seq) {
3221#if KMP_USE_ADAPTIVE_LOCKS
3222 if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3223 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3224 seq = lockseq_queuing;
3225 }
3226#endif
3227#if KMP_USE_TSX
3228 if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3229 seq = lockseq_queuing;
3230 }
3231#endif
3232 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3233 kmp_indirect_lock_t *l =
3234 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3235 KMP_I_LOCK_FUNC(l, init)(l->lock);
3236 KA_TRACE(
3237 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3238 seq));
3239}
3240
3241static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3243 kmp_indirect_lock_t *l =
3244 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3245 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3246 kmp_indirect_locktag_t tag = l->type;
3247
3249
3250 // Use the base lock's space to keep the pool chain.
3251 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3252 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3253 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3254 }
3255 __kmp_indirect_lock_pool[tag] = l;
3256
3258}
3259
3260static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3262 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3263}
3264
3265static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3268}
3269
3270static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3271 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3272 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3273}
3274
3275static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3276 kmp_int32 gtid) {
3277 kmp_indirect_lock_t *l =
3278 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3279 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3280}
3281
3282static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3283 kmp_int32 gtid) {
3284 kmp_indirect_lock_t *l =
3285 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3286 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3287}
3288
3289static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3290 kmp_int32 gtid) {
3291 kmp_indirect_lock_t *l =
3292 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3293 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3294}
3295
3296kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3297
3298// This is used only in kmp_error.cpp when consistency checking is on.
3300 switch (seq) {
3301 case lockseq_tas:
3302 case lockseq_nested_tas:
3304#if KMP_USE_FUTEX
3305 case lockseq_futex:
3306 case lockseq_nested_futex:
3307 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3308#endif
3309 case lockseq_ticket:
3310 case lockseq_nested_ticket:
3312 case lockseq_queuing:
3313 case lockseq_nested_queuing:
3314#if KMP_USE_ADAPTIVE_LOCKS
3315 case lockseq_adaptive:
3316#endif
3318 case lockseq_drdpa:
3319 case lockseq_nested_drdpa:
3321 default:
3322 return 0;
3323 }
3324}
3325
3326// Initializes data for dynamic user locks.
3327void __kmp_init_dynamic_user_locks() {
3328 // Initialize jump table for the lock functions
3330 __kmp_direct_set = direct_set_check;
3331 __kmp_direct_unset = direct_unset_check;
3332 __kmp_direct_test = direct_test_check;
3333 __kmp_direct_destroy = direct_destroy_check;
3334 __kmp_indirect_set = indirect_set_check;
3335 __kmp_indirect_unset = indirect_unset_check;
3336 __kmp_indirect_test = indirect_test_check;
3337 __kmp_indirect_destroy = indirect_destroy_check;
3338 } else {
3339 __kmp_direct_set = direct_set;
3340 __kmp_direct_unset = direct_unset;
3341 __kmp_direct_test = direct_test;
3342 __kmp_direct_destroy = direct_destroy;
3343 __kmp_indirect_set = indirect_set;
3344 __kmp_indirect_unset = indirect_unset;
3345 __kmp_indirect_test = indirect_test;
3346 __kmp_indirect_destroy = indirect_destroy;
3347 }
3348 // If the user locks have already been initialized, then return. Allow the
3349 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3350 // new lock tables if they have already been allocated.
3352 return;
3353
3354 // Initialize lock index table
3355 __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3356 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3357 sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3358 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3359 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3360 __kmp_i_lock_table.next = 0;
3361 __kmp_i_lock_table.next_table = nullptr;
3362
3363 // Indirect lock size
3364 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3365 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3366#if KMP_USE_ADAPTIVE_LOCKS
3367 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3368#endif
3369 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3370#if KMP_USE_TSX
3371 __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3372#endif
3373 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3374#if KMP_USE_FUTEX
3375 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3376#endif
3377 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3378 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3379 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3380
3381// Initialize lock accessor/modifier
3382#define fill_jumps(table, expand, sep) \
3383 { \
3384 table[locktag##sep##ticket] = expand(ticket); \
3385 table[locktag##sep##queuing] = expand(queuing); \
3386 table[locktag##sep##drdpa] = expand(drdpa); \
3387 }
3388
3389#if KMP_USE_ADAPTIVE_LOCKS
3390#define fill_table(table, expand) \
3391 { \
3392 fill_jumps(table, expand, _); \
3393 table[locktag_adaptive] = expand(queuing); \
3394 fill_jumps(table, expand, _nested_); \
3395 }
3396#else
3397#define fill_table(table, expand) \
3398 { \
3399 fill_jumps(table, expand, _); \
3400 fill_jumps(table, expand, _nested_); \
3401 }
3402#endif // KMP_USE_ADAPTIVE_LOCKS
3403
3404#define expand(l) \
3405 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3406 fill_table(__kmp_indirect_set_location, expand);
3407#undef expand
3408#define expand(l) \
3409 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3410 fill_table(__kmp_indirect_set_flags, expand);
3411#undef expand
3412#define expand(l) \
3413 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3414 fill_table(__kmp_indirect_get_location, expand);
3415#undef expand
3416#define expand(l) \
3417 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3418 fill_table(__kmp_indirect_get_flags, expand);
3419#undef expand
3420
3422}
3423
3424// Clean up the lock table.
3425void __kmp_cleanup_indirect_user_locks() {
3426 int k;
3427
3428 // Clean up locks in the pools first (they were already destroyed before going
3429 // into the pools).
3430 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3431 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3432 while (l != NULL) {
3433 kmp_indirect_lock_t *ll = l;
3434 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3435 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3436 ll));
3437 __kmp_free(ll->lock);
3438 ll->lock = NULL;
3439 }
3440 __kmp_indirect_lock_pool[k] = NULL;
3441 }
3442 // Clean up the remaining undestroyed locks.
3443 kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3444 while (ptr) {
3445 for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3446 if (!ptr->table[row])
3447 continue;
3448 for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3449 kmp_indirect_lock_t *l = &ptr->table[row][col];
3450 if (l->lock) {
3451 // Locks not destroyed explicitly need to be destroyed here.
3452 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3453 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3454 "from table\n",
3455 l));
3456 __kmp_free(l->lock);
3457 }
3458 }
3459 __kmp_free(ptr->table[row]);
3460 }
3461 kmp_indirect_lock_table_t *next_table = ptr->next_table;
3462 if (ptr != &__kmp_i_lock_table)
3463 __kmp_free(ptr);
3464 ptr = next_table;
3465 }
3466
3468}
3469
3471int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3472
3473#else // KMP_USE_DYNAMIC_LOCK
3474
3477}
3478
3481}
3482
3483#if KMP_USE_FUTEX
3484static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485 __kmp_init_futex_lock(lck);
3486}
3487
3488static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3489 __kmp_init_nested_futex_lock(lck);
3490}
3491#endif
3492
3494 return lck == lck->lk.self;
3495}
3496
3499}
3500
3503}
3504
3506 return lck == lck->lk.initialized;
3507}
3508
3511}
3512
3513static void
3516}
3517
3518#if KMP_USE_ADAPTIVE_LOCKS
3519static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3520 __kmp_init_adaptive_lock(lck);
3521}
3522#endif
3523
3525 return lck == lck->lk.initialized;
3526}
3527
3530}
3531
3534}
3535
3536/* user locks
3537 * They are implemented as a table of function pointers which are set to the
3538 * lock functions of the appropriate kind, once that has been determined. */
3539
3541
3544
3547 kmp_int32 gtid) = NULL;
3548
3550 kmp_int32 gtid) = NULL;
3552 kmp_int32 gtid) = NULL;
3557 kmp_int32 gtid) = NULL;
3558
3560 kmp_int32 gtid) = NULL;
3562 kmp_int32 gtid) = NULL;
3565
3567const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3569 const ident_t *loc) = NULL;
3572 kmp_lock_flags_t flags) = NULL;
3573
3575 switch (user_lock_kind) {
3576 case lk_default:
3577 default:
3578 KMP_ASSERT(0);
3579
3580 case lk_tas: {
3583
3586
3590 } else {
3591 KMP_BIND_USER_LOCK(tas);
3593 }
3594
3597
3599
3601
3603 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3604
3606
3609 } break;
3610
3611#if KMP_USE_FUTEX
3612
3613 case lk_futex: {
3614 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3615 __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3616
3618 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3619
3623 } else {
3624 KMP_BIND_USER_LOCK(futex);
3626 }
3627
3629 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3630
3632
3634
3636 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3637
3639
3642 } break;
3643
3644#endif // KMP_USE_FUTEX
3645
3646 case lk_ticket: {
3649
3652
3656 } else {
3657 KMP_BIND_USER_LOCK(ticket);
3659 }
3660
3663
3666
3669
3672
3675
3678 } break;
3679
3680 case lk_queuing: {
3683
3686
3690 } else {
3691 KMP_BIND_USER_LOCK(queuing);
3693 }
3694
3697
3700
3703
3706
3709
3712 } break;
3713
3714#if KMP_USE_ADAPTIVE_LOCKS
3715 case lk_adaptive: {
3716 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3717 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3718
3721
3724 } else {
3725 KMP_BIND_USER_LOCK(adaptive);
3726 }
3727
3729 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3730
3733
3736
3739
3742
3745
3746 } break;
3747#endif // KMP_USE_ADAPTIVE_LOCKS
3748
3749 case lk_drdpa: {
3752
3755
3759 } else {
3760 KMP_BIND_USER_LOCK(drdpa);
3762 }
3763
3766
3769
3772
3775
3778
3781 } break;
3782 }
3783}
3784
3785// ----------------------------------------------------------------------------
3786// User lock table & lock allocation
3787
3790
3791// Lock block-allocation support.
3793int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3794
3796 // Assume that kmp_global_lock is held upon entry/exit.
3797 kmp_lock_index_t index;
3800 kmp_user_lock_p *table;
3801 // Reallocate lock table.
3803 size = 1024;
3804 } else {
3806 }
3807 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3808 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3811 // We cannot free the previous table now, since it may be in use by other
3812 // threads. So save the pointer to the previous table in the first
3813 // element of the new table. All the tables will be organized into a list,
3814 // and could be freed when library shutting down.
3817 }
3823 return index;
3824}
3825
3827 // Assume that kmp_global_lock is held upon entry/exit.
3828 static int last_index = 0;
3829 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3830 // Restart the index.
3831 last_index = 0;
3832 // Need to allocate a new block.
3834 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3835 char *buffer =
3836 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3837 // Set up the new block.
3838 kmp_block_of_locks *new_block =
3839 (kmp_block_of_locks *)(&buffer[space_for_locks]);
3840 new_block->next_block = __kmp_lock_blocks;
3841 new_block->locks = (void *)buffer;
3842 // Publish the new block.
3843 KMP_MB();
3844 __kmp_lock_blocks = new_block;
3845 }
3847 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3848 last_index++;
3849 return ret;
3850}
3851
3852// Get memory for a lock. It may be freshly allocated memory or reused memory
3853// from lock pool.
3855 kmp_lock_flags_t flags) {
3857 kmp_lock_index_t index;
3858 KMP_DEBUG_ASSERT(user_lock);
3859
3861
3862 if (__kmp_lock_pool == NULL) {
3863 // Lock pool is empty. Allocate new memory.
3864
3865 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3867 } else {
3869 }
3870
3871 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3872 // and debugger has info on all allocated locks.
3874 } else {
3875 // Pick up lock from pool.
3877 index = __kmp_lock_pool->pool.index;
3879 }
3880
3881 // We could potentially differentiate between nested and regular locks
3882 // here, and do the lock table lookup for regular locks only.
3883 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3884 *((kmp_lock_index_t *)user_lock) = index;
3885 } else {
3886 *((kmp_user_lock_p *)user_lock) = lck;
3887 }
3888
3889 // mark the lock if it is critical section lock.
3891
3892 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3893
3894 return lck;
3895}
3896
3897// Put lock's memory to pool for reusing.
3898void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3900 KMP_DEBUG_ASSERT(user_lock != NULL);
3901 KMP_DEBUG_ASSERT(lck != NULL);
3902
3904
3905 lck->pool.next = __kmp_lock_pool;
3907 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3908 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3909 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3910 lck->pool.index = index;
3911 }
3912
3914}
3915
3916kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3917 kmp_user_lock_p lck = NULL;
3918
3920 if (user_lock == NULL) {
3921 KMP_FATAL(LockIsUninitialized, func);
3922 }
3923 }
3924
3925 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3926 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3928 if (!(0 < index && index < __kmp_user_lock_table.used)) {
3929 KMP_FATAL(LockIsUninitialized, func);
3930 }
3931 }
3932 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3935 } else {
3936 lck = *((kmp_user_lock_p *)user_lock);
3937 }
3938
3940 if (lck == NULL) {
3941 KMP_FATAL(LockIsUninitialized, func);
3942 }
3943 }
3944
3945 return lck;
3946}
3947
3949 // Reset lock pool. Don't worry about lock in the pool--we will free them when
3950 // iterating through lock table (it includes all the locks, dead or alive).
3951 __kmp_lock_pool = NULL;
3952
3953#define IS_CRITICAL(lck) \
3954 ((__kmp_get_user_lock_flags_ != NULL) && \
3955 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3956
3957 // Loop through lock table, free all locks.
3958 // Do not free item [0], it is reserved for lock tables list.
3959 //
3960 // FIXME - we are iterating through a list of (pointers to) objects of type
3961 // union kmp_user_lock, but we have no way of knowing whether the base type is
3962 // currently "pool" or whatever the global user lock type is.
3963 //
3964 // We are relying on the fact that for all of the user lock types
3965 // (except "tas"), the first field in the lock struct is the "initialized"
3966 // field, which is set to the address of the lock object itself when
3967 // the lock is initialized. When the union is of type "pool", the
3968 // first field is a pointer to the next object in the free list, which
3969 // will not be the same address as the object itself.
3970 //
3971 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3972 // for "pool" objects on the free list. This must happen as the "location"
3973 // field of real user locks overlaps the "index" field of "pool" objects.
3974 //
3975 // It would be better to run through the free list, and remove all "pool"
3976 // objects from the lock table before executing this loop. However,
3977 // "pool" objects do not always have their index field set (only on
3978 // lin_32e), and I don't want to search the lock table for the address
3979 // of every "pool" object on the free list.
3980 while (__kmp_user_lock_table.used > 1) {
3981 const ident *loc;
3982
3983 // reduce __kmp_user_lock_table.used before freeing the lock,
3984 // so that state of locks is consistent
3987
3988 if ((__kmp_is_user_lock_initialized_ != NULL) &&
3990 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3991 // it is NOT a critical section (user is not responsible for destroying
3992 // criticals) AND we know source location to report.
3994 ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3995 (loc->psource != NULL)) {
3996 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3997 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3998 __kmp_str_loc_free(&str_loc);
3999 }
4000
4001#ifdef KMP_DEBUG
4002 if (IS_CRITICAL(lck)) {
4003 KA_TRACE(
4004 20,
4005 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4006 lck, *(void **)lck));
4007 } else {
4008 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
4009 *(void **)lck));
4010 }
4011#endif // KMP_DEBUG
4012
4013 // Cleanup internal lock dynamic resources (for drdpa locks particularly).
4015 }
4016
4017 // Free the lock if block allocation of locks is not used.
4018 if (__kmp_lock_blocks == NULL) {
4019 __kmp_free(lck);
4020 }
4021 }
4022
4023#undef IS_CRITICAL
4024
4025 // delete lock table(s).
4029
4030 while (table_ptr != NULL) {
4031 // In the first element we saved the pointer to the previous
4032 // (smaller) lock table.
4033 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4034 __kmp_free(table_ptr);
4035 table_ptr = next;
4036 }
4037
4038 // Free buffers allocated for blocks of locks.
4040 __kmp_lock_blocks = NULL;
4041
4042 while (block_ptr != NULL) {
4043 kmp_block_of_locks_t *next = block_ptr->next_block;
4044 __kmp_free(block_ptr->locks);
4045 // *block_ptr itself was allocated at the end of the locks vector.
4046 block_ptr = next;
4047 }
4048
4050}
4051
4052#endif // KMP_USE_DYNAMIC_LOCK
This class safely opens and closes a C-style FILE* object using RAII semantics.
Definition: kmp.h:4689
void set_stdout()
Set the FILE* object to stdout and output there No open call should happen before this call.
Definition: kmp.h:4737
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Open filename using mode.
Definition: kmp.h:4711
int64_t kmp_int64
Definition: common.h:10
void
Definition: ittnotify.h:3324
void const char const char int ITT_FORMAT __itt_group_sync s
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id head
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
#define __kmp_free(ptr)
Definition: kmp.h:3756
volatile int __kmp_init_user_locks
Definition: kmp_global.cpp:57
#define KMP_CPU_PAUSE()
Definition: kmp.h:1564
#define KMP_PACK_64(HIGH_32, LOW_32)
Definition: kmp.h:291
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time)
Definition: kmp.h:1640
int __kmp_xproc
Definition: kmp_global.cpp:122
kmp_lock_t __kmp_global_lock
#define __kmp_entry_gtid()
Definition: kmp.h:3601
volatile int __kmp_nth
Definition: kmp_global.cpp:456
kmp_info_t ** __kmp_threads
Definition: kmp_global.cpp:450
#define KMP_YIELD_OVERSUB()
Definition: kmp.h:1589
#define KMP_INIT_YIELD(count)
Definition: kmp.h:1567
#define KMP_INIT_BACKOFF(time)
Definition: kmp.h:1570
#define KMP_YIELD(cond)
Definition: kmp.h:1582
#define __kmp_allocate(size)
Definition: kmp.h:3754
#define TRUE
Definition: kmp.h:1324
#define FALSE
Definition: kmp.h:1323
int __kmp_env_consistency_check
Definition: kmp_global.cpp:423
int __kmp_avail_proc
Definition: kmp_global.cpp:123
static kmp_info_t * __kmp_thread_from_gtid(int gtid)
Definition: kmp.h:3634
union KMP_ALIGN_CACHE kmp_info kmp_info_t
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86<<, 2i, 1, KMP_ARCH_X86) ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, > KMP_ARCH_X86 KMP_ARCH_X86 kmp_uint32
#define KA_TRACE(d, x)
Definition: kmp_debug.h:157
#define KMP_ASSERT(cond)
Definition: kmp_debug.h:59
#define KMP_DEBUG_ASSERT(cond)
Definition: kmp_debug.h:61
#define KMP_ASSERT2(cond, msg)
Definition: kmp_debug.h:60
unsigned long long kmp_uint64
static volatile kmp_i18n_cat_status_t status
Definition: kmp_i18n.cpp:48
static kmp_bootstrap_lock_t lock
Definition: kmp_i18n.cpp:57
#define KMP_WARNING(...)
Definition: kmp_i18n.h:144
#define KMP_FATAL(...)
Definition: kmp_i18n.h:146
void __kmp_printf(char const *format,...)
Definition: kmp_io.cpp:186
void __kmp_printf_no_lock(char const *format,...)
Definition: kmp_io.cpp:197
#define KMP_FSYNC_PREPARE(obj)
Definition: kmp_itt.h:332
#define KMP_FSYNC_RELEASING(obj)
Definition: kmp_itt.h:335
#define KMP_FSYNC_ACQUIRED(obj)
Definition: kmp_itt.h:334
kmp_backoff_t __kmp_spin_backoff_params
Definition: kmp_lock.cpp:2650
size_t __kmp_base_user_lock_size
Definition: kmp_lock.cpp:3542
static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:290
static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2554
void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1516
static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:3532
static bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:74
enum kmp_lock_kind __kmp_user_lock_kind
Definition: kmp_lock.cpp:3540
int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:118
kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags)
Definition: kmp_lock.cpp:3854
static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:3505
size_t __kmp_user_lock_size
Definition: kmp_lock.cpp:3543
int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:206
int(* __kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:3556
static const ident_t * __kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2624
static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, const ident_t *loc)
Definition: kmp_lock.cpp:2628
void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:770
static __forceinline int __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1084
static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, kmp_lock_flags_t flags)
Definition: kmp_lock.cpp:1688
static void __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:3514
void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:280
int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2352
void(* __kmp_set_user_lock_location_)(kmp_user_lock_p lck, const ident_t *loc)
Definition: kmp_lock.cpp:3568
static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:3501
int(* __kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:3561
int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:253
static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:3493
int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1615
int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2376
void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2598
int(* __kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:3559
void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2603
void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind)
Definition: kmp_lock.cpp:3574
kmp_int32(* __kmp_get_user_lock_owner_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3545
static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2209
static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:607
kmp_block_of_locks * __kmp_lock_blocks
Definition: kmp_lock.cpp:3792
static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1603
int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1353
void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:190
kmp_uint64 __kmp_now_nsec()
static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2633
void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:913
static bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1075
static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2357
static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:3497
static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2608
int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2508
static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1496
int(* __kmp_is_user_lock_initialized_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3566
int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:839
static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:3509
static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1071
void(* __kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3555
static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:822
static const ident_t * __kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:948
void __kmp_init_tas_lock(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:186
static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, const ident_t *loc)
Definition: kmp_lock.cpp:1679
#define __kmp_tsc()
Definition: kmp_lock.cpp:2651
static int __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1573
int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:157
int(* __kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:3546
kmp_lock_table_t __kmp_user_lock_table
Definition: kmp_lock.cpp:3788
static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:3528
void(* __kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3563
void(* __kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3553
int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1585
static void __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:927
static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:860
static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2579
void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:753
int __kmp_num_locks_in_block
Definition: kmp_lock.cpp:3793
static const ident_t * __kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1675
static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket)
Definition: kmp_lock.cpp:618
int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2421
int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1303
void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3898
static __forceinline int __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2218
static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1540
static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:147
static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2492
static __forceinline int __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:79
static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:649
void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1648
int(* __kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:3549
static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:3475
const ident_t *(* __kmp_get_user_lock_location_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3567
static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:3479
int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2566
#define IS_CRITICAL(lck)
static bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:613
static kmp_user_lock_p __kmp_lock_block_allocate()
Definition: kmp_lock.cpp:3826
static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags)
Definition: kmp_lock.cpp:2637
int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1556
kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func)
Definition: kmp_lock.cpp:3916
int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:674
static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, kmp_lock_flags_t flags)
Definition: kmp_lock.cpp:961
static bool before(kmp_uint64 a, kmp_uint64 b)
Definition: kmp_lock.cpp:2661
void(* __kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3564
kmp_user_lock_p __kmp_lock_pool
Definition: kmp_lock.cpp:3789
int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:644
static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:890
int(* __kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:3551
static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:784
static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:192
void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2472
void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:285
void __kmp_cleanup_user_locks(void)
Definition: kmp_lock.cpp:3948
void(* __kmp_destroy_user_lock_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3554
static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:244
void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1653
static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:690
static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2403
int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2536
void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2455
void __kmp_spin_backoff(kmp_backoff_t *boff)
Definition: kmp_lock.cpp:2666
void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1529
void __kmp_validate_locks(void)
Definition: kmp_lock.cpp:43
int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1277
static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:219
int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:877
int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:805
static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:728
static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:123
static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1284
static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2434
int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:228
kmp_lock_flags_t(* __kmp_get_user_lock_flags_)(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3570
static bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:2213
static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck)
Definition: kmp_lock.cpp:3795
static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, const ident_t *loc)
Definition: kmp_lock.cpp:952
static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:264
static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1684
static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:168
static __forceinline int __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:624
static int __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1629
static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:2524
static void __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
Definition: kmp_lock.cpp:1659
static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:957
static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck)
Definition: kmp_lock.cpp:70
int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:136
int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:714
static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck)
Definition: kmp_lock.cpp:3524
static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.cpp:1335
void(* __kmp_set_user_lock_flags_)(kmp_user_lock_p lck, kmp_lock_flags_t flags)
Definition: kmp_lock.cpp:3571
void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck)
Definition: kmp_lock.cpp:920
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition: kmp_lock.h:535
struct kmp_base_tas_lock kmp_base_tas_lock_t
Definition: kmp_lock.h:135
#define KMP_BIND_NESTED_USER_LOCK(kind)
Definition: kmp_lock.h:932
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.h:559
enum kmp_lock_kind kmp_lock_kind_t
Definition: kmp_lock.h:605
union kmp_user_lock * kmp_user_lock_p
Definition: kmp_lock.h:623
union kmp_ticket_lock kmp_ticket_lock_t
Definition: kmp_lock.h:280
static void __kmp_destroy_user_lock(kmp_user_lock_p lck)
Definition: kmp_lock.h:735
#define KMP_LOCK_RELEASED
Definition: kmp_lock.h:164
kmp_lock_kind
Definition: kmp_lock.h:586
@ lk_default
Definition: kmp_lock.h:587
@ lk_ticket
Definition: kmp_lock.h:597
@ lk_drdpa
Definition: kmp_lock.h:599
@ lk_queuing
Definition: kmp_lock.h:598
@ lk_tas
Definition: kmp_lock.h:588
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition: kmp_lock.h:527
#define KMP_LOCK_FREE(type)
Definition: kmp_lock.h:1284
#define KMP_LOCK_STRIP(v)
Definition: kmp_lock.h:1285
struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t
Definition: kmp_lock.h:483
#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind)
Definition: kmp_lock.h:934
#define OMP_LOCK_T_SIZE
Definition: kmp_lock.h:57
#define KMP_LOCK_ACQUIRED_NEXT
Definition: kmp_lock.h:167
#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)
Definition: kmp_lock.h:930
kmp_uint32 kmp_lock_flags_t
Definition: kmp_lock.h:68
union kmp_drdpa_lock kmp_drdpa_lock_t
Definition: kmp_lock.h:493
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition: kmp_lock.h:567
kmp_uint32 kmp_lock_index_t
Definition: kmp_lock.h:73
#define KMP_LOCK_ACQUIRED_FIRST
Definition: kmp_lock.h:166
struct kmp_base_queuing_lock kmp_base_queuing_lock_t
Definition: kmp_lock.h:384
#define KMP_BIND_USER_LOCK(kind)
Definition: kmp_lock.h:929
static kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck)
Definition: kmp_lock.h:632
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition: kmp_lock.h:539
union kmp_tas_lock kmp_tas_lock_t
Definition: kmp_lock.h:143
static void __kmp_set_user_lock_flags(kmp_user_lock_p lck, kmp_lock_flags_t flags)
Definition: kmp_lock.h:904
#define KMP_LOCK_STILL_HELD
Definition: kmp_lock.h:165
#define KMP_LOCK_BUSY(v, type)
Definition: kmp_lock.h:1283
#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock)
Definition: kmp_lock.h:523
struct kmp_base_ticket_lock kmp_base_ticket_lock_t
Definition: kmp_lock.h:270
union kmp_queuing_lock kmp_queuing_lock_t
Definition: kmp_lock.h:396
static const ident_t * __kmp_get_user_lock_location(kmp_user_lock_p lck)
Definition: kmp_lock.h:881
#define KMP_XCHG_FIXED32(p, v)
Definition: kmp_os.h:877
#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv)
Definition: kmp_os.h:859
#define KMP_WAIT
Definition: kmp_os.h:1191
#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv)
Definition: kmp_os.h:816
#define KMP_ATOMIC_ST_REL(p, v)
Definition: kmp_os.h:1259
#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv)
Definition: kmp_os.h:856
#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv)
Definition: kmp_os.h:829
#define KMP_NEQ
Definition: kmp_os.h:1194
#define RCAST(type, var)
Definition: kmp_os.h:291
#define KMP_WAIT_PTR
Definition: kmp_os.h:1192
#define CCAST(type, var)
Definition: kmp_os.h:290
#define KMP_MB()
Definition: kmp_os.h:1064
#define KMP_EQ
Definition: kmp_os.h:1193
bool __kmp_atomic_compare_store_acq(std::atomic< T > *p, T expected, T desired)
Definition: kmp_os.h:1280
#define TCR_4(a)
Definition: kmp_os.h:1135
#define KMP_ATOMIC_LD_RLX(p)
Definition: kmp_os.h:1258
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
Definition: kmp_os.h:813
#define TCW_4(a, b)
Definition: kmp_os.h:1136
#define TCR_8(a)
Definition: kmp_os.h:1139
#define KMP_ATTRIBUTE_TARGET_RTM
Definition: kmp_os.h:375
#define KMP_ATOMIC_INC(p)
Definition: kmp_os.h:1267
#define KMP_SNPRINTF
#define KMP_MEMCPY
#define KMP_STRLEN
kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname)
Definition: kmp_str.cpp:347
void __kmp_str_loc_free(kmp_str_loc_t *loc)
Definition: kmp_str.cpp:393
#define i
Definition: kmp_stub.cpp:87
int a
int32_t kmp_int32
int test(unsigned upper_bound)
volatile int release
omp_lock_t lck
Definition: omp_lock.c:7
void func(int *num_exec)
#define res
ompt_callbacks_active_t ompt_enabled
return ret
static id loc
#define delay(t)
Definition: ompt-signal.h:6
char * psource
The ident structure that describes a source location.
Definition: kmp.h:247
kmp_uint32 max_backoff
Definition: kmp_lock.h:1292
kmp_uint32 step
Definition: kmp_lock.h:1291
kmp_uint32 min_tick
Definition: kmp_lock.h:1293
struct kmp_block_of_locks * next_block
Definition: kmp_lock.h:972
kmp_lock_index_t index
Definition: kmp_lock.h:79
union kmp_user_lock * next
Definition: kmp_lock.h:78
kmp_lock_index_t allocated
Definition: kmp_lock.h:962
kmp_user_lock_p * table
Definition: kmp_lock.h:963
kmp_lock_index_t used
Definition: kmp_lock.h:961
int line
Definition: kmp_str.h:97
char * file
Definition: kmp_str.h:95
kmp_lock_pool_t pool
Definition: kmp_lock.h:620