LLVM OpenMP 22.0.0git
kmp_lock.cpp
Go to the documentation of this file.
1/*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include <stddef.h>
14#include <atomic>
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#if KMP_USE_FUTEX
25#include <sys/syscall.h>
26#include <unistd.h>
27// We should really include <futex.h>, but that causes compatibility problems on
28// different Linux* OS distributions that either require that you include (or
29// break when you try to include) <pci/types.h>. Since all we need is the two
30// macros below (which are part of the kernel ABI, so can't change) we just
31// define the constants here and don't include <futex.h>
32#ifndef FUTEX_WAIT
33#define FUTEX_WAIT 0
34#endif
35#ifndef FUTEX_WAKE
36#define FUTEX_WAKE 1
37#endif
38#endif
39
40/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
44 int i;
45 kmp_uint32 x, y;
46
47 /* Check to make sure unsigned arithmetic does wraps properly */
48 x = ~((kmp_uint32)0) - 2;
49 y = x - 2;
50
51 for (i = 0; i < 8; ++i, ++x, ++y) {
52 kmp_uint32 z = (x - y);
53 KMP_ASSERT(z == 2);
54 }
55
56 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57}
58
59/* ------------------------------------------------------------------------ */
60/* test and set locks */
61
62// For the non-nested locks, we can only assume that the first 4 bytes were
63// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64// compiler only allocates a 4 byte pointer on IA-32 architecture. On
65// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66//
67// gcc reserves >= 8 bytes for nested locks, so we can assume that the
68// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
73
75 return lck->lk.depth_locked != -1;
76}
77
78__forceinline static int
80 KMP_MB();
81
82#ifdef USE_LOCK_PROFILE
83 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84 if ((curr != 0) && (curr != gtid + 1))
85 __kmp_printf("LOCK CONTENTION: %p\n", lck);
86/* else __kmp_printf( "." );*/
87#endif /* USE_LOCK_PROFILE */
88
89 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
96 }
97
98 kmp_uint32 spins;
99 kmp_uint64 time;
101 KMP_INIT_YIELD(spins);
102 KMP_INIT_BACKOFF(time);
104 do {
105#if !KMP_HAVE_UMWAIT
106 __kmp_spin_backoff(&backoff);
107#else
108 if (!__kmp_tpause_enabled)
109 __kmp_spin_backoff(&backoff);
110#endif
111 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112 } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
116}
117
119 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120 return retval;
121}
122
124 kmp_int32 gtid) {
125 char const *const func = "omp_set_lock";
126 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
128 KMP_FATAL(LockNestableUsedAsSimple, func);
129 }
130 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131 KMP_FATAL(LockIsAlreadyOwned, func);
132 }
133 return __kmp_acquire_tas_lock(lck, gtid);
134}
135
137 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
142 return TRUE;
143 }
144 return FALSE;
145}
146
148 kmp_int32 gtid) {
149 char const *const func = "omp_test_lock";
150 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
152 KMP_FATAL(LockNestableUsedAsSimple, func);
153 }
154 return __kmp_test_tas_lock(lck, gtid);
155}
156
158 KMP_MB(); /* Flush all pending memory write invalidates. */
159
161 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
162 KMP_MB(); /* Flush all pending memory write invalidates. */
163
165 return KMP_LOCK_RELEASED;
166}
167
169 kmp_int32 gtid) {
170 char const *const func = "omp_unset_lock";
171 KMP_MB(); /* in case another processor initialized lock */
172 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
174 KMP_FATAL(LockNestableUsedAsSimple, func);
175 }
176 if (__kmp_get_tas_lock_owner(lck) == -1) {
177 KMP_FATAL(LockUnsettingFree, func);
178 }
179 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
180 (__kmp_get_tas_lock_owner(lck) != gtid)) {
181 KMP_FATAL(LockUnsettingSetByAnother, func);
182 }
183 return __kmp_release_tas_lock(lck, gtid);
184}
185
187 lck->lk.poll = KMP_LOCK_FREE(tas);
188}
189
191
193 char const *const func = "omp_destroy_lock";
194 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
196 KMP_FATAL(LockNestableUsedAsSimple, func);
197 }
198 if (__kmp_get_tas_lock_owner(lck) != -1) {
199 KMP_FATAL(LockStillOwned, func);
200 }
202}
203
204// nested test and set locks
205
207 KMP_DEBUG_ASSERT(gtid >= 0);
208
209 if (__kmp_get_tas_lock_owner(lck) == gtid) {
210 lck->lk.depth_locked += 1;
212 } else {
214 lck->lk.depth_locked = 1;
216 }
217}
218
220 kmp_int32 gtid) {
221 char const *const func = "omp_set_nest_lock";
223 KMP_FATAL(LockSimpleUsedAsNestable, func);
224 }
226}
227
229 int retval;
230
231 KMP_DEBUG_ASSERT(gtid >= 0);
232
233 if (__kmp_get_tas_lock_owner(lck) == gtid) {
234 retval = ++lck->lk.depth_locked;
235 } else if (!__kmp_test_tas_lock(lck, gtid)) {
236 retval = 0;
237 } else {
238 KMP_MB();
239 retval = lck->lk.depth_locked = 1;
240 }
241 return retval;
242}
243
245 kmp_int32 gtid) {
246 char const *const func = "omp_test_nest_lock";
248 KMP_FATAL(LockSimpleUsedAsNestable, func);
249 }
250 return __kmp_test_nested_tas_lock(lck, gtid);
251}
252
254 KMP_DEBUG_ASSERT(gtid >= 0);
255
256 KMP_MB();
257 if (--(lck->lk.depth_locked) == 0) {
259 return KMP_LOCK_RELEASED;
260 }
261 return KMP_LOCK_STILL_HELD;
262}
263
265 kmp_int32 gtid) {
266 char const *const func = "omp_unset_nest_lock";
267 KMP_MB(); /* in case another processor initialized lock */
269 KMP_FATAL(LockSimpleUsedAsNestable, func);
270 }
271 if (__kmp_get_tas_lock_owner(lck) == -1) {
272 KMP_FATAL(LockUnsettingFree, func);
273 }
274 if (__kmp_get_tas_lock_owner(lck) != gtid) {
275 KMP_FATAL(LockUnsettingSetByAnother, func);
276 }
278}
279
282 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
283}
284
287 lck->lk.depth_locked = 0;
288}
289
291 char const *const func = "omp_destroy_nest_lock";
293 KMP_FATAL(LockSimpleUsedAsNestable, func);
294 }
295 if (__kmp_get_tas_lock_owner(lck) != -1) {
296 KMP_FATAL(LockStillOwned, func);
297 }
299}
300
301#if KMP_USE_FUTEX
302
303/* ------------------------------------------------------------------------ */
304/* futex locks */
305
306// futex locks are really just test and set locks, with a different method
307// of handling contention. They take the same amount of space as test and
308// set locks, and are allocated the same way (i.e. use the area allocated by
309// the compiler for non-nested locks / allocate nested locks on the heap).
310
311static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
313}
314
315static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
316 return lck->lk.depth_locked != -1;
317}
318
319__forceinline static int
320__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
321 kmp_int32 gtid_code = (gtid + 1) << 1;
322
323 KMP_MB();
324
325#ifdef USE_LOCK_PROFILE
326 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
327 if ((curr != 0) && (curr != gtid_code))
328 __kmp_printf("LOCK CONTENTION: %p\n", lck);
329/* else __kmp_printf( "." );*/
330#endif /* USE_LOCK_PROFILE */
331
333 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334 lck, lck->lk.poll, gtid));
335
336 kmp_int32 poll_val;
337
338 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
339 &(lck->lk.poll), KMP_LOCK_FREE(futex),
340 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
341
342 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
343 KA_TRACE(
344 1000,
345 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346 lck, gtid, poll_val, cond));
347
348 // NOTE: if you try to use the following condition for this branch
349 //
350 // if ( poll_val & 1 == 0 )
351 //
352 // Then the 12.0 compiler has a bug where the following block will
353 // always be skipped, regardless of the value of the LSB of poll_val.
354 if (!cond) {
355 // Try to set the lsb in the poll to indicate to the owner
356 // thread that they need to wake this thread up.
357 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
358 poll_val | KMP_LOCK_BUSY(1, futex))) {
359 KA_TRACE(
360 1000,
361 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362 lck, lck->lk.poll, gtid));
363 continue;
364 }
365 poll_val |= KMP_LOCK_BUSY(1, futex);
366
367 KA_TRACE(1000,
368 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
369 lck->lk.poll, gtid));
370 }
371
372 KA_TRACE(
373 1000,
374 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375 lck, gtid, poll_val));
376
377 long rc;
378 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
379 NULL, 0)) != 0) {
380 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381 "failed (rc=%ld errno=%d)\n",
382 lck, gtid, poll_val, rc, errno));
383 continue;
384 }
385
386 KA_TRACE(1000,
387 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388 lck, gtid, poll_val));
389 // This thread has now done a successful futex wait call and was entered on
390 // the OS futex queue. We must now perform a futex wake call when releasing
391 // the lock, as we have no idea how many other threads are in the queue.
392 gtid_code |= 1;
393 }
394
396 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
397 lck->lk.poll, gtid));
399}
400
401int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
402 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
403 return retval;
404}
405
406static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
407 kmp_int32 gtid) {
408 char const *const func = "omp_set_lock";
409 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
410 __kmp_is_futex_lock_nestable(lck)) {
411 KMP_FATAL(LockNestableUsedAsSimple, func);
412 }
413 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
414 KMP_FATAL(LockIsAlreadyOwned, func);
415 }
416 return __kmp_acquire_futex_lock(lck, gtid);
417}
418
419int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
420 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
421 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
423 return TRUE;
424 }
425 return FALSE;
426}
427
428static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
429 kmp_int32 gtid) {
430 char const *const func = "omp_test_lock";
431 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
432 __kmp_is_futex_lock_nestable(lck)) {
433 KMP_FATAL(LockNestableUsedAsSimple, func);
434 }
435 return __kmp_test_futex_lock(lck, gtid);
436}
437
438int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
439 KMP_MB(); /* Flush all pending memory write invalidates. */
440
441 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442 lck, lck->lk.poll, gtid));
443
445
446 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447
448 KA_TRACE(1000,
449 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450 lck, gtid, poll_val));
451
452 if (KMP_LOCK_STRIP(poll_val) & 1) {
453 KA_TRACE(1000,
454 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455 lck, gtid));
456 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457 NULL, NULL, 0);
458 }
459
460 KMP_MB(); /* Flush all pending memory write invalidates. */
461
462 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463 lck->lk.poll, gtid));
464
466 return KMP_LOCK_RELEASED;
467}
468
469static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470 kmp_int32 gtid) {
471 char const *const func = "omp_unset_lock";
472 KMP_MB(); /* in case another processor initialized lock */
473 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474 __kmp_is_futex_lock_nestable(lck)) {
475 KMP_FATAL(LockNestableUsedAsSimple, func);
476 }
477 if (__kmp_get_futex_lock_owner(lck) == -1) {
478 KMP_FATAL(LockUnsettingFree, func);
479 }
480 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481 (__kmp_get_futex_lock_owner(lck) != gtid)) {
482 KMP_FATAL(LockUnsettingSetByAnother, func);
483 }
484 return __kmp_release_futex_lock(lck, gtid);
485}
486
487void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489}
490
491void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492
493static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494 char const *const func = "omp_destroy_lock";
495 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496 __kmp_is_futex_lock_nestable(lck)) {
497 KMP_FATAL(LockNestableUsedAsSimple, func);
498 }
499 if (__kmp_get_futex_lock_owner(lck) != -1) {
500 KMP_FATAL(LockStillOwned, func);
501 }
502 __kmp_destroy_futex_lock(lck);
503}
504
505// nested futex locks
506
507int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508 KMP_DEBUG_ASSERT(gtid >= 0);
509
510 if (__kmp_get_futex_lock_owner(lck) == gtid) {
511 lck->lk.depth_locked += 1;
513 } else {
514 __kmp_acquire_futex_lock_timed_template(lck, gtid);
515 lck->lk.depth_locked = 1;
517 }
518}
519
520static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
521 kmp_int32 gtid) {
522 char const *const func = "omp_set_nest_lock";
523 if (!__kmp_is_futex_lock_nestable(lck)) {
524 KMP_FATAL(LockSimpleUsedAsNestable, func);
525 }
526 return __kmp_acquire_nested_futex_lock(lck, gtid);
527}
528
529int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
530 int retval;
531
532 KMP_DEBUG_ASSERT(gtid >= 0);
533
534 if (__kmp_get_futex_lock_owner(lck) == gtid) {
535 retval = ++lck->lk.depth_locked;
536 } else if (!__kmp_test_futex_lock(lck, gtid)) {
537 retval = 0;
538 } else {
539 KMP_MB();
540 retval = lck->lk.depth_locked = 1;
541 }
542 return retval;
543}
544
545static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
546 kmp_int32 gtid) {
547 char const *const func = "omp_test_nest_lock";
548 if (!__kmp_is_futex_lock_nestable(lck)) {
549 KMP_FATAL(LockSimpleUsedAsNestable, func);
550 }
551 return __kmp_test_nested_futex_lock(lck, gtid);
552}
553
554int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
555 KMP_DEBUG_ASSERT(gtid >= 0);
556
557 KMP_MB();
558 if (--(lck->lk.depth_locked) == 0) {
559 __kmp_release_futex_lock(lck, gtid);
560 return KMP_LOCK_RELEASED;
561 }
562 return KMP_LOCK_STILL_HELD;
563}
564
565static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
566 kmp_int32 gtid) {
567 char const *const func = "omp_unset_nest_lock";
568 KMP_MB(); /* in case another processor initialized lock */
569 if (!__kmp_is_futex_lock_nestable(lck)) {
570 KMP_FATAL(LockSimpleUsedAsNestable, func);
571 }
572 if (__kmp_get_futex_lock_owner(lck) == -1) {
573 KMP_FATAL(LockUnsettingFree, func);
574 }
575 if (__kmp_get_futex_lock_owner(lck) != gtid) {
576 KMP_FATAL(LockUnsettingSetByAnother, func);
577 }
578 return __kmp_release_nested_futex_lock(lck, gtid);
579}
580
581void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
582 __kmp_init_futex_lock(lck);
583 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
584}
585
586void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
587 __kmp_destroy_futex_lock(lck);
588 lck->lk.depth_locked = 0;
589}
590
591static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
592 char const *const func = "omp_destroy_nest_lock";
593 if (!__kmp_is_futex_lock_nestable(lck)) {
594 KMP_FATAL(LockSimpleUsedAsNestable, func);
595 }
596 if (__kmp_get_futex_lock_owner(lck) != -1) {
597 KMP_FATAL(LockStillOwned, func);
598 }
599 __kmp_destroy_nested_futex_lock(lck);
600}
601
602#endif // KMP_USE_FUTEX
603
604/* ------------------------------------------------------------------------ */
605/* ticket (bakery) locks */
606
608 return std::atomic_load_explicit(&lck->lk.owner_id,
609 std::memory_order_relaxed) -
610 1;
611}
612
614 return std::atomic_load_explicit(&lck->lk.depth_locked,
615 std::memory_order_relaxed) != -1;
616}
617
618static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
619 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
620 std::memory_order_acquire) == my_ticket;
621}
622
623__forceinline static int
625 kmp_int32 gtid) {
626 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
627 &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
628
629#ifdef USE_LOCK_PROFILE
630 if (std::atomic_load_explicit(&lck->lk.now_serving,
631 std::memory_order_relaxed) != my_ticket)
632 __kmp_printf("LOCK CONTENTION: %p\n", lck);
633/* else __kmp_printf( "." );*/
634#endif /* USE_LOCK_PROFILE */
635
636 if (std::atomic_load_explicit(&lck->lk.now_serving,
637 std::memory_order_acquire) == my_ticket) {
639 }
640 KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
642}
643
646 return retval;
647}
648
650 kmp_int32 gtid) {
651 char const *const func = "omp_set_lock";
652
653 if (!std::atomic_load_explicit(&lck->lk.initialized,
654 std::memory_order_relaxed)) {
655 KMP_FATAL(LockIsUninitialized, func);
656 }
657 if (lck->lk.self != lck) {
658 KMP_FATAL(LockIsUninitialized, func);
659 }
661 KMP_FATAL(LockNestableUsedAsSimple, func);
662 }
663 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
664 KMP_FATAL(LockIsAlreadyOwned, func);
665 }
666
668
669 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
670 std::memory_order_relaxed);
672}
673
675 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
676 std::memory_order_relaxed);
677
678 if (std::atomic_load_explicit(&lck->lk.now_serving,
679 std::memory_order_relaxed) == my_ticket) {
680 kmp_uint32 next_ticket = my_ticket + 1;
681 if (std::atomic_compare_exchange_strong_explicit(
682 &lck->lk.next_ticket, &my_ticket, next_ticket,
683 std::memory_order_acquire, std::memory_order_acquire)) {
684 return TRUE;
685 }
686 }
687 return FALSE;
688}
689
691 kmp_int32 gtid) {
692 char const *const func = "omp_test_lock";
693
694 if (!std::atomic_load_explicit(&lck->lk.initialized,
695 std::memory_order_relaxed)) {
696 KMP_FATAL(LockIsUninitialized, func);
697 }
698 if (lck->lk.self != lck) {
699 KMP_FATAL(LockIsUninitialized, func);
700 }
702 KMP_FATAL(LockNestableUsedAsSimple, func);
703 }
704
705 int retval = __kmp_test_ticket_lock(lck, gtid);
706
707 if (retval) {
708 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
709 std::memory_order_relaxed);
710 }
711 return retval;
712}
713
715 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
716 std::memory_order_release);
717
718 return KMP_LOCK_RELEASED;
719}
720
722 kmp_int32 gtid) {
723 char const *const func = "omp_unset_lock";
724
725 if (!std::atomic_load_explicit(&lck->lk.initialized,
726 std::memory_order_relaxed)) {
727 KMP_FATAL(LockIsUninitialized, func);
728 }
729 if (lck->lk.self != lck) {
730 KMP_FATAL(LockIsUninitialized, func);
731 }
733 KMP_FATAL(LockNestableUsedAsSimple, func);
734 }
735 if (__kmp_get_ticket_lock_owner(lck) == -1) {
736 KMP_FATAL(LockUnsettingFree, func);
737 }
738 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739 (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740 KMP_FATAL(LockUnsettingSetByAnother, func);
741 }
742 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743 return __kmp_release_ticket_lock(lck, gtid);
744}
745
747 lck->lk.location = NULL;
748 lck->lk.self = lck;
749 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750 std::memory_order_relaxed);
751 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752 std::memory_order_relaxed);
753 std::atomic_store_explicit(
754 &lck->lk.owner_id, 0,
755 std::memory_order_relaxed); // no thread owns the lock.
756 std::atomic_store_explicit(
757 &lck->lk.depth_locked, -1,
758 std::memory_order_relaxed); // -1 => not a nested lock.
759 std::atomic_store_explicit(&lck->lk.initialized, true,
760 std::memory_order_release);
761}
762
764 std::atomic_store_explicit(&lck->lk.initialized, false,
765 std::memory_order_release);
766 lck->lk.self = NULL;
767 lck->lk.location = NULL;
768 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769 std::memory_order_relaxed);
770 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771 std::memory_order_relaxed);
772 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773 std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774 std::memory_order_relaxed);
775}
776
778 char const *const func = "omp_destroy_lock";
779
780 if (!std::atomic_load_explicit(&lck->lk.initialized,
781 std::memory_order_relaxed)) {
782 KMP_FATAL(LockIsUninitialized, func);
783 }
784 if (lck->lk.self != lck) {
785 KMP_FATAL(LockIsUninitialized, func);
786 }
788 KMP_FATAL(LockNestableUsedAsSimple, func);
789 }
790 if (__kmp_get_ticket_lock_owner(lck) != -1) {
791 KMP_FATAL(LockStillOwned, func);
792 }
794}
795
796// nested ticket locks
797
799 KMP_DEBUG_ASSERT(gtid >= 0);
800
801 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803 std::memory_order_relaxed);
805 } else {
807 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808 std::memory_order_relaxed);
809 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810 std::memory_order_relaxed);
812 }
813}
814
816 kmp_int32 gtid) {
817 char const *const func = "omp_set_nest_lock";
818
819 if (!std::atomic_load_explicit(&lck->lk.initialized,
820 std::memory_order_relaxed)) {
821 KMP_FATAL(LockIsUninitialized, func);
822 }
823 if (lck->lk.self != lck) {
824 KMP_FATAL(LockIsUninitialized, func);
825 }
827 KMP_FATAL(LockSimpleUsedAsNestable, func);
828 }
830}
831
833 int retval;
834
835 KMP_DEBUG_ASSERT(gtid >= 0);
836
837 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839 std::memory_order_relaxed) +
840 1;
841 } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842 retval = 0;
843 } else {
844 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845 std::memory_order_relaxed);
846 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847 std::memory_order_relaxed);
848 retval = 1;
849 }
850 return retval;
851}
852
854 kmp_int32 gtid) {
855 char const *const func = "omp_test_nest_lock";
856
857 if (!std::atomic_load_explicit(&lck->lk.initialized,
858 std::memory_order_relaxed)) {
859 KMP_FATAL(LockIsUninitialized, func);
860 }
861 if (lck->lk.self != lck) {
862 KMP_FATAL(LockIsUninitialized, func);
863 }
865 KMP_FATAL(LockSimpleUsedAsNestable, func);
866 }
868}
869
871 KMP_DEBUG_ASSERT(gtid >= 0);
872
873 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874 std::memory_order_relaxed) -
875 1) == 0) {
876 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
878 return KMP_LOCK_RELEASED;
879 }
880 return KMP_LOCK_STILL_HELD;
881}
882
884 kmp_int32 gtid) {
885 char const *const func = "omp_unset_nest_lock";
886
887 if (!std::atomic_load_explicit(&lck->lk.initialized,
888 std::memory_order_relaxed)) {
889 KMP_FATAL(LockIsUninitialized, func);
890 }
891 if (lck->lk.self != lck) {
892 KMP_FATAL(LockIsUninitialized, func);
893 }
895 KMP_FATAL(LockSimpleUsedAsNestable, func);
896 }
897 if (__kmp_get_ticket_lock_owner(lck) == -1) {
898 KMP_FATAL(LockUnsettingFree, func);
899 }
900 if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901 KMP_FATAL(LockUnsettingSetByAnother, func);
902 }
904}
905
908 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909 std::memory_order_relaxed);
910 // >= 0 for nestable locks, -1 for simple locks
911}
912
915 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916 std::memory_order_relaxed);
917}
918
919static void
921 char const *const func = "omp_destroy_nest_lock";
922
923 if (!std::atomic_load_explicit(&lck->lk.initialized,
924 std::memory_order_relaxed)) {
925 KMP_FATAL(LockIsUninitialized, func);
926 }
927 if (lck->lk.self != lck) {
928 KMP_FATAL(LockIsUninitialized, func);
929 }
931 KMP_FATAL(LockSimpleUsedAsNestable, func);
932 }
933 if (__kmp_get_ticket_lock_owner(lck) != -1) {
934 KMP_FATAL(LockStillOwned, func);
935 }
937}
938
939// access functions to fields which don't exist for all lock kinds.
940
942 return lck->lk.location;
943}
944
946 const ident_t *loc) {
947 lck->lk.location = loc;
948}
949
953
955 kmp_lock_flags_t flags) {
956 lck->lk.flags = flags;
957}
958
959/* ------------------------------------------------------------------------ */
960/* queuing locks */
961
962/* First the states
963 (head,tail) = 0, 0 means lock is unheld, nobody on queue
964 UINT_MAX or -1, 0 means lock is held, nobody on queue
965 h, h means lock held or about to transition,
966 1 element on queue
967 h, t h <> t, means lock is held or about to
968 transition, >1 elements on queue
969
970 Now the transitions
971 Acquire(0,0) = -1 ,0
972 Release(0,0) = Error
973 Acquire(-1,0) = h ,h h > 0
974 Release(-1,0) = 0 ,0
975 Acquire(h,h) = h ,t h > 0, t > 0, h <> t
976 Release(h,h) = -1 ,0 h > 0
977 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
979
980 And pictorially
981
982 +-----+
983 | 0, 0|------- release -------> Error
984 +-----+
985 | ^
986 acquire| |release
987 | |
988 | |
989 v |
990 +-----+
991 |-1, 0|
992 +-----+
993 | ^
994 acquire| |release
995 | |
996 | |
997 v |
998 +-----+
999 | h, h|
1000 +-----+
1001 | ^
1002 acquire| |release
1003 | |
1004 | |
1005 v |
1006 +-----+
1007 | h, t|----- acquire, release loopback ---+
1008 +-----+ |
1009 ^ |
1010 | |
1011 +------------------------------------+
1012 */
1013
1014#ifdef DEBUG_QUEUING_LOCKS
1015
1016/* Stuff for circular trace buffer */
1017#define TRACE_BUF_ELE 1024
1018static char traces[TRACE_BUF_ELE][128] = {0};
1019static int tc = 0;
1020#define TRACE_LOCK(X, Y) \
1021 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022#define TRACE_LOCK_T(X, Y, Z) \
1023 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024#define TRACE_LOCK_HT(X, Y, Z, Q) \
1025 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1026 Z, Q);
1027
1028static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1030 kmp_int32 tail_id) {
1031 kmp_int32 t, i;
1032
1033 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034
1035 i = tc % TRACE_BUF_ELE;
1036 __kmp_printf_no_lock("%s\n", traces[i]);
1037 i = (i + 1) % TRACE_BUF_ELE;
1038 while (i != (tc % TRACE_BUF_ELE)) {
1039 __kmp_printf_no_lock("%s", traces[i]);
1040 i = (i + 1) % TRACE_BUF_ELE;
1041 }
1043
1044 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045 "next_wait:%d, head_id:%d, tail_id:%d\n",
1046 gtid + 1, this_thr->th.th_spin_here,
1047 this_thr->th.th_next_waiting, head_id, tail_id);
1048
1049 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050
1051 if (lck->lk.head_id >= 1) {
1052 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053 while (t > 0) {
1054 __kmp_printf_no_lock("-> %d ", t);
1055 t = __kmp_threads[t - 1]->th.th_next_waiting;
1056 }
1057 }
1058 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1059 __kmp_printf_no_lock("\n\n");
1060}
1061
1062#endif /* DEBUG_QUEUING_LOCKS */
1063
1065 return TCR_4(lck->lk.owner_id) - 1;
1066}
1067
1069 return lck->lk.depth_locked != -1;
1070}
1071
1072/* Acquire a lock using a the queuing lock implementation */
1073template <bool takeTime>
1074/* [TLW] The unused template above is left behind because of what BEB believes
1075 is a potential compiler problem with __forceinline. */
1076__forceinline static int
1078 kmp_int32 gtid) {
1079 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082 volatile kmp_uint32 *spin_here_p;
1083
1084#if OMPT_SUPPORT
1085 ompt_state_t prev_state = ompt_state_undefined;
1086#endif
1087
1088 KA_TRACE(1000,
1089 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090
1092 KMP_DEBUG_ASSERT(this_thr != NULL);
1093 spin_here_p = &this_thr->th.th_spin_here;
1094
1095#ifdef DEBUG_QUEUING_LOCKS
1096 TRACE_LOCK(gtid + 1, "acq ent");
1097 if (*spin_here_p)
1098 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099 if (this_thr->th.th_next_waiting != 0)
1100 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101#endif
1102 KMP_DEBUG_ASSERT(!*spin_here_p);
1103 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104
1105 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106 head_id_p that may follow, not just in execution order, but also in
1107 visibility order. This way, when a releasing thread observes the changes to
1108 the queue by this thread, it can rightly assume that spin_here_p has
1109 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110 not premature. If the releasing thread sets spin_here_p to FALSE before
1111 this thread sets it to TRUE, this thread will hang. */
1112 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113
1114 while (1) {
1115 kmp_int32 enqueued;
1118
1119 head = *head_id_p;
1120
1121 switch (head) {
1122
1123 case -1: {
1124#ifdef DEBUG_QUEUING_LOCKS
1125 tail = *tail_id_p;
1126 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127#endif
1128 tail = 0; /* to make sure next link asynchronously read is not set
1129 accidentally; this assignment prevents us from entering the
1130 if ( t > 0 ) condition in the enqueued case below, which is not
1131 necessary for this state transition */
1132
1133 /* try (-1,0)->(tid,tid) */
1134 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135 KMP_PACK_64(-1, 0),
1136 KMP_PACK_64(gtid + 1, gtid + 1));
1137#ifdef DEBUG_QUEUING_LOCKS
1138 if (enqueued)
1139 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140#endif
1141 } break;
1142
1143 default: {
1144 tail = *tail_id_p;
1145 KMP_DEBUG_ASSERT(tail != gtid + 1);
1146
1147#ifdef DEBUG_QUEUING_LOCKS
1148 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149#endif
1150
1151 if (tail == 0) {
1152 enqueued = FALSE;
1153 } else {
1154 /* try (h,t) or (h,h)->(h,tid) */
1155 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156
1157#ifdef DEBUG_QUEUING_LOCKS
1158 if (enqueued)
1159 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160#endif
1161 }
1162 } break;
1163
1164 case 0: /* empty queue */
1165 {
1166 kmp_int32 grabbed_lock;
1167
1168#ifdef DEBUG_QUEUING_LOCKS
1169 tail = *tail_id_p;
1170 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171#endif
1172 /* try (0,0)->(-1,0) */
1173
1174 /* only legal transition out of head = 0 is head = -1 with no change to
1175 * tail */
1176 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177
1178 if (grabbed_lock) {
1179
1180 *spin_here_p = FALSE;
1181
1182 KA_TRACE(
1183 1000,
1184 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185 lck, gtid));
1186#ifdef DEBUG_QUEUING_LOCKS
1187 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188#endif
1189
1190#if OMPT_SUPPORT
1191 if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192 /* change the state before clearing wait_id */
1193 this_thr->th.ompt_thread_info.state = prev_state;
1194 this_thr->th.ompt_thread_info.wait_id = 0;
1195 }
1196#endif
1197
1199 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200 }
1201 enqueued = FALSE;
1202 } break;
1203 }
1204
1205#if OMPT_SUPPORT
1206 if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207 /* this thread will spin; set wait_id before entering wait state */
1208 prev_state = this_thr->th.ompt_thread_info.state;
1209 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211 }
1212#endif
1213
1214 if (enqueued) {
1215 if (tail > 0) {
1216 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217 KMP_ASSERT(tail_thr != NULL);
1218 tail_thr->th.th_next_waiting = gtid + 1;
1219 /* corresponding wait for this write in release code */
1220 }
1221 KA_TRACE(1000,
1222 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223 lck, gtid));
1224
1225 KMP_MB();
1226 // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227 KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228 // Synchronize writes to both runtime thread structures
1229 // and writes in user code.
1230 KMP_MB();
1231
1232#ifdef DEBUG_QUEUING_LOCKS
1233 TRACE_LOCK(gtid + 1, "acq spin");
1234
1235 if (this_thr->th.th_next_waiting != 0)
1236 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237#endif
1238 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240 "waiting on queue\n",
1241 lck, gtid));
1242
1243#ifdef DEBUG_QUEUING_LOCKS
1244 TRACE_LOCK(gtid + 1, "acq exit 2");
1245#endif
1246
1247#if OMPT_SUPPORT
1248 /* change the state before clearing wait_id */
1249 this_thr->th.ompt_thread_info.state = prev_state;
1250 this_thr->th.ompt_thread_info.wait_id = 0;
1251#endif
1252
1253 /* got lock, we were dequeued by the thread that released lock */
1255 }
1256
1257 /* Yield if number of threads > number of logical processors */
1258 /* ToDo: Not sure why this should only be in oversubscription case,
1259 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1261
1262#ifdef DEBUG_QUEUING_LOCKS
1263 TRACE_LOCK(gtid + 1, "acq retry");
1264#endif
1265 }
1266 KMP_ASSERT2(0, "should not get here");
1268}
1269
1271 KMP_DEBUG_ASSERT(gtid >= 0);
1272
1274 return retval;
1275}
1276
1278 kmp_int32 gtid) {
1279 char const *const func = "omp_set_lock";
1280 if (lck->lk.initialized != lck) {
1281 KMP_FATAL(LockIsUninitialized, func);
1282 }
1284 KMP_FATAL(LockNestableUsedAsSimple, func);
1285 }
1286 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287 KMP_FATAL(LockIsAlreadyOwned, func);
1288 }
1289
1291
1292 lck->lk.owner_id = gtid + 1;
1294}
1295
1297 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1299#ifdef KMP_DEBUG
1300 kmp_info_t *this_thr;
1301#endif
1302
1303 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304 KMP_DEBUG_ASSERT(gtid >= 0);
1305#ifdef KMP_DEBUG
1306 this_thr = __kmp_thread_from_gtid(gtid);
1307 KMP_DEBUG_ASSERT(this_thr != NULL);
1308 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309#endif
1310
1311 head = *head_id_p;
1312
1313 if (head == 0) { /* nobody on queue, nobody holding */
1314 /* try (0,0)->(-1,0) */
1315 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316 KA_TRACE(1000,
1317 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1319 return TRUE;
1320 }
1321 }
1322
1323 KA_TRACE(1000,
1324 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325 return FALSE;
1326}
1327
1329 kmp_int32 gtid) {
1330 char const *const func = "omp_test_lock";
1331 if (lck->lk.initialized != lck) {
1332 KMP_FATAL(LockIsUninitialized, func);
1333 }
1335 KMP_FATAL(LockNestableUsedAsSimple, func);
1336 }
1337
1338 int retval = __kmp_test_queuing_lock(lck, gtid);
1339
1340 if (retval) {
1341 lck->lk.owner_id = gtid + 1;
1342 }
1343 return retval;
1344}
1345
1347 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1348 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1349
1350 KA_TRACE(1000,
1351 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1352 KMP_DEBUG_ASSERT(gtid >= 0);
1353#if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1354 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1355#endif
1356 KMP_DEBUG_ASSERT(this_thr != NULL);
1357#ifdef DEBUG_QUEUING_LOCKS
1358 TRACE_LOCK(gtid + 1, "rel ent");
1359
1360 if (this_thr->th.th_spin_here)
1361 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1362 if (this_thr->th.th_next_waiting != 0)
1363 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1364#endif
1365 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1366 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1367
1369
1370 while (1) {
1371 kmp_int32 dequeued;
1374
1375 head = *head_id_p;
1376
1377#ifdef DEBUG_QUEUING_LOCKS
1378 tail = *tail_id_p;
1379 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1380 if (head == 0)
1381 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1382#endif
1384 0); /* holding the lock, head must be -1 or queue head */
1385
1386 if (head == -1) { /* nobody on queue */
1387 /* try (-1,0)->(0,0) */
1388 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1389 KA_TRACE(
1390 1000,
1391 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1392 lck, gtid));
1393#ifdef DEBUG_QUEUING_LOCKS
1394 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1395#endif
1396
1397#if OMPT_SUPPORT
1398/* nothing to do - no other thread is trying to shift blame */
1399#endif
1400 return KMP_LOCK_RELEASED;
1401 }
1402 dequeued = FALSE;
1403 } else {
1404 KMP_MB();
1405 tail = *tail_id_p;
1406 if (head == tail) { /* only one thread on the queue */
1407#ifdef DEBUG_QUEUING_LOCKS
1408 if (head <= 0)
1409 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1410#endif
1412
1413 /* try (h,h)->(-1,0) */
1414 dequeued = KMP_COMPARE_AND_STORE_REL64(
1415 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1416 KMP_PACK_64(-1, 0));
1417#ifdef DEBUG_QUEUING_LOCKS
1418 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1419#endif
1420
1421 } else {
1422 volatile kmp_int32 *waiting_id_p;
1423 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1424 KMP_DEBUG_ASSERT(head_thr != NULL);
1425 waiting_id_p = &head_thr->th.th_next_waiting;
1426
1427/* Does this require synchronous reads? */
1428#ifdef DEBUG_QUEUING_LOCKS
1429 if (head <= 0 || tail <= 0)
1430 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1431#endif
1432 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1433
1434 /* try (h,t)->(h',t) or (t,t) */
1435 KMP_MB();
1436 /* make sure enqueuing thread has time to update next waiting thread
1437 * field */
1438 *head_id_p =
1439 KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1440#ifdef DEBUG_QUEUING_LOCKS
1441 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1442#endif
1443 dequeued = TRUE;
1444 }
1445 }
1446
1447 if (dequeued) {
1448 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1449 KMP_DEBUG_ASSERT(head_thr != NULL);
1450
1451/* Does this require synchronous reads? */
1452#ifdef DEBUG_QUEUING_LOCKS
1453 if (head <= 0 || tail <= 0)
1454 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1455#endif
1456 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1457
1458 /* For clean code only. Thread not released until next statement prevents
1459 race with acquire code. */
1460 head_thr->th.th_next_waiting = 0;
1461#ifdef DEBUG_QUEUING_LOCKS
1462 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1463#endif
1464
1465 KMP_MB();
1466 /* reset spin value */
1467 head_thr->th.th_spin_here = FALSE;
1468
1469 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1470 "dequeuing\n",
1471 lck, gtid));
1472#ifdef DEBUG_QUEUING_LOCKS
1473 TRACE_LOCK(gtid + 1, "rel exit 2");
1474#endif
1475 return KMP_LOCK_RELEASED;
1476 }
1477 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1478 threads */
1479
1480#ifdef DEBUG_QUEUING_LOCKS
1481 TRACE_LOCK(gtid + 1, "rel retry");
1482#endif
1483
1484 } /* while */
1485 KMP_ASSERT2(0, "should not get here");
1486 return KMP_LOCK_RELEASED;
1487}
1488
1490 kmp_int32 gtid) {
1491 char const *const func = "omp_unset_lock";
1492 KMP_MB(); /* in case another processor initialized lock */
1493 if (lck->lk.initialized != lck) {
1494 KMP_FATAL(LockIsUninitialized, func);
1495 }
1497 KMP_FATAL(LockNestableUsedAsSimple, func);
1498 }
1499 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1500 KMP_FATAL(LockUnsettingFree, func);
1501 }
1502 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1503 KMP_FATAL(LockUnsettingSetByAnother, func);
1504 }
1505 lck->lk.owner_id = 0;
1506 return __kmp_release_queuing_lock(lck, gtid);
1507}
1508
1510 lck->lk.location = NULL;
1511 lck->lk.head_id = 0;
1512 lck->lk.tail_id = 0;
1513 lck->lk.next_ticket = 0;
1514 lck->lk.now_serving = 0;
1515 lck->lk.owner_id = 0; // no thread owns the lock.
1516 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1517 lck->lk.initialized = lck;
1518
1519 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1520}
1521
1523 lck->lk.initialized = NULL;
1524 lck->lk.location = NULL;
1525 lck->lk.head_id = 0;
1526 lck->lk.tail_id = 0;
1527 lck->lk.next_ticket = 0;
1528 lck->lk.now_serving = 0;
1529 lck->lk.owner_id = 0;
1530 lck->lk.depth_locked = -1;
1531}
1532
1534 char const *const func = "omp_destroy_lock";
1535 if (lck->lk.initialized != lck) {
1536 KMP_FATAL(LockIsUninitialized, func);
1537 }
1539 KMP_FATAL(LockNestableUsedAsSimple, func);
1540 }
1541 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1542 KMP_FATAL(LockStillOwned, func);
1543 }
1545}
1546
1547// nested queuing locks
1548
1550 KMP_DEBUG_ASSERT(gtid >= 0);
1551
1552 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1553 lck->lk.depth_locked += 1;
1555 } else {
1557 KMP_MB();
1558 lck->lk.depth_locked = 1;
1559 KMP_MB();
1560 lck->lk.owner_id = gtid + 1;
1562 }
1563}
1564
1565static int
1567 kmp_int32 gtid) {
1568 char const *const func = "omp_set_nest_lock";
1569 if (lck->lk.initialized != lck) {
1570 KMP_FATAL(LockIsUninitialized, func);
1571 }
1573 KMP_FATAL(LockSimpleUsedAsNestable, func);
1574 }
1576}
1577
1579 int retval;
1580
1581 KMP_DEBUG_ASSERT(gtid >= 0);
1582
1583 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1584 retval = ++lck->lk.depth_locked;
1585 } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1586 retval = 0;
1587 } else {
1588 KMP_MB();
1589 retval = lck->lk.depth_locked = 1;
1590 KMP_MB();
1591 lck->lk.owner_id = gtid + 1;
1592 }
1593 return retval;
1594}
1595
1597 kmp_int32 gtid) {
1598 char const *const func = "omp_test_nest_lock";
1599 if (lck->lk.initialized != lck) {
1600 KMP_FATAL(LockIsUninitialized, func);
1601 }
1603 KMP_FATAL(LockSimpleUsedAsNestable, func);
1604 }
1605 return __kmp_test_nested_queuing_lock(lck, gtid);
1606}
1607
1609 KMP_DEBUG_ASSERT(gtid >= 0);
1610
1611 KMP_MB();
1612 if (--(lck->lk.depth_locked) == 0) {
1613 KMP_MB();
1614 lck->lk.owner_id = 0;
1616 return KMP_LOCK_RELEASED;
1617 }
1618 return KMP_LOCK_STILL_HELD;
1619}
1620
1621static int
1623 kmp_int32 gtid) {
1624 char const *const func = "omp_unset_nest_lock";
1625 KMP_MB(); /* in case another processor initialized lock */
1626 if (lck->lk.initialized != lck) {
1627 KMP_FATAL(LockIsUninitialized, func);
1628 }
1630 KMP_FATAL(LockSimpleUsedAsNestable, func);
1631 }
1632 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1633 KMP_FATAL(LockUnsettingFree, func);
1634 }
1635 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1636 KMP_FATAL(LockUnsettingSetByAnother, func);
1637 }
1639}
1640
1643 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1644}
1645
1650
1651static void
1653 char const *const func = "omp_destroy_nest_lock";
1654 if (lck->lk.initialized != lck) {
1655 KMP_FATAL(LockIsUninitialized, func);
1656 }
1658 KMP_FATAL(LockSimpleUsedAsNestable, func);
1659 }
1660 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1661 KMP_FATAL(LockStillOwned, func);
1662 }
1664}
1665
1666// access functions to fields which don't exist for all lock kinds.
1667
1669 return lck->lk.location;
1670}
1671
1673 const ident_t *loc) {
1674 lck->lk.location = loc;
1675}
1676
1680
1682 kmp_lock_flags_t flags) {
1683 lck->lk.flags = flags;
1684}
1685
1686#if KMP_USE_ADAPTIVE_LOCKS
1687
1688/* RTM Adaptive locks */
1689
1690#if KMP_HAVE_RTM_INTRINSICS
1691#include <immintrin.h>
1692#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1693
1694#else
1695
1696// Values from the status register after failed speculation.
1697#define _XBEGIN_STARTED (~0u)
1698#define _XABORT_EXPLICIT (1 << 0)
1699#define _XABORT_RETRY (1 << 1)
1700#define _XABORT_CONFLICT (1 << 2)
1701#define _XABORT_CAPACITY (1 << 3)
1702#define _XABORT_DEBUG (1 << 4)
1703#define _XABORT_NESTED (1 << 5)
1704#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1705
1706// Aborts for which it's worth trying again immediately
1707#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1708
1709#define STRINGIZE_INTERNAL(arg) #arg
1710#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1711
1712// Access to RTM instructions
1713/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1714 an abort. This is the same definition as the compiler intrinsic that will be
1715 supported at some point. */
1716static __inline int _xbegin() {
1717 int res = -1;
1718
1719#if KMP_OS_WINDOWS
1720#if KMP_ARCH_X86_64
1721 _asm {
1722 _emit 0xC7
1723 _emit 0xF8
1724 _emit 2
1725 _emit 0
1726 _emit 0
1727 _emit 0
1728 jmp L2
1729 mov res, eax
1730 L2:
1731 }
1732#else /* IA32 */
1733 _asm {
1734 _emit 0xC7
1735 _emit 0xF8
1736 _emit 2
1737 _emit 0
1738 _emit 0
1739 _emit 0
1740 jmp L2
1741 mov res, eax
1742 L2:
1743 }
1744#endif // KMP_ARCH_X86_64
1745#else
1746 /* Note that %eax must be noted as killed (clobbered), because the XSR is
1747 returned in %eax(%rax) on abort. Other register values are restored, so
1748 don't need to be killed.
1749
1750 We must also mark 'res' as an input and an output, since otherwise
1751 'res=-1' may be dropped as being dead, whereas we do need the assignment on
1752 the successful (i.e., non-abort) path. */
1753 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1754 " .long 1f-1b-6\n"
1755 " jmp 2f\n"
1756 "1: movl %%eax,%0\n"
1757 "2:"
1758 : "+r"(res)::"memory", "%eax");
1759#endif // KMP_OS_WINDOWS
1760 return res;
1761}
1762
1763/* Transaction end */
1764static __inline void _xend() {
1765#if KMP_OS_WINDOWS
1766 __asm {
1767 _emit 0x0f
1768 _emit 0x01
1769 _emit 0xd5
1770 }
1771#else
1772 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1773#endif
1774}
1775
1776/* This is a macro, the argument must be a single byte constant which can be
1777 evaluated by the inline assembler, since it is emitted as a byte into the
1778 assembly code. */
1779// clang-format off
1780#if KMP_OS_WINDOWS
1781#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1782#else
1783#define _xabort(ARG) \
1784 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1785#endif
1786// clang-format on
1787#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1788
1789// Statistics is collected for testing purpose
1790#if KMP_DEBUG_ADAPTIVE_LOCKS
1791
1792// We accumulate speculative lock statistics when the lock is destroyed. We
1793// keep locks that haven't been destroyed in the liveLocks list so that we can
1794// grab their statistics too.
1795static kmp_adaptive_lock_statistics_t destroyedStats;
1796
1797// To hold the list of live locks.
1798static kmp_adaptive_lock_info_t liveLocks;
1799
1800// A lock so we can safely update the list of locks.
1801static kmp_bootstrap_lock_t chain_lock =
1803
1804// Initialize the list of stats.
1805void __kmp_init_speculative_stats() {
1806 kmp_adaptive_lock_info_t *lck = &liveLocks;
1807
1808 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1809 sizeof(lck->stats));
1810 lck->stats.next = lck;
1811 lck->stats.prev = lck;
1812
1813 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1814 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1815
1816 __kmp_init_bootstrap_lock(&chain_lock);
1817}
1818
1819// Insert the lock into the circular list
1820static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1821 __kmp_acquire_bootstrap_lock(&chain_lock);
1822
1823 lck->stats.next = liveLocks.stats.next;
1824 lck->stats.prev = &liveLocks;
1825
1826 liveLocks.stats.next = lck;
1827 lck->stats.next->stats.prev = lck;
1828
1829 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1830 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1831
1832 __kmp_release_bootstrap_lock(&chain_lock);
1833}
1834
1835static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1836 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838
1839 kmp_adaptive_lock_info_t *n = lck->stats.next;
1840 kmp_adaptive_lock_info_t *p = lck->stats.prev;
1841
1842 n->stats.prev = p;
1843 p->stats.next = n;
1844}
1845
1846static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1847 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1848 sizeof(lck->stats));
1849 __kmp_remember_lock(lck);
1850}
1851
1852static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1853 kmp_adaptive_lock_info_t *lck) {
1854 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1855
1856 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1857 t->successfulSpeculations += s->successfulSpeculations;
1858 t->hardFailedSpeculations += s->hardFailedSpeculations;
1859 t->softFailedSpeculations += s->softFailedSpeculations;
1860 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1861 t->lemmingYields += s->lemmingYields;
1862}
1863
1864static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1865 __kmp_acquire_bootstrap_lock(&chain_lock);
1866
1867 __kmp_add_stats(&destroyedStats, lck);
1868 __kmp_forget_lock(lck);
1869
1870 __kmp_release_bootstrap_lock(&chain_lock);
1871}
1872
1873static float percent(kmp_uint32 count, kmp_uint32 total) {
1874 return (total == 0) ? 0.0 : (100.0 * count) / total;
1875}
1876
1877void __kmp_print_speculative_stats() {
1878 kmp_adaptive_lock_statistics_t total = destroyedStats;
1879 kmp_adaptive_lock_info_t *lck;
1880
1881 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1882 __kmp_add_stats(&total, lck);
1883 }
1884 kmp_adaptive_lock_statistics_t *t = &total;
1885 kmp_uint32 totalSections =
1886 t->nonSpeculativeAcquires + t->successfulSpeculations;
1887 kmp_uint32 totalSpeculations = t->successfulSpeculations +
1888 t->hardFailedSpeculations +
1889 t->softFailedSpeculations;
1890 if (totalSections <= 0)
1891 return;
1892
1893 kmp_safe_raii_file_t statsFile;
1894 if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1895 statsFile.set_stdout();
1896 } else {
1897 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1898 char buffer[buffLen];
1899 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1900 (kmp_int32)getpid());
1901 statsFile.open(buffer, "w");
1902 }
1903
1904 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1905 fprintf(statsFile,
1906 " Lock parameters: \n"
1907 " max_soft_retries : %10d\n"
1908 " max_badness : %10d\n",
1909 __kmp_adaptive_backoff_params.max_soft_retries,
1910 __kmp_adaptive_backoff_params.max_badness);
1911 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1912 t->nonSpeculativeAcquireAttempts);
1913 fprintf(statsFile, " Total critical sections : %10d\n",
1914 totalSections);
1915 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1916 t->successfulSpeculations,
1917 percent(t->successfulSpeculations, totalSections));
1918 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1919 t->nonSpeculativeAcquires,
1920 percent(t->nonSpeculativeAcquires, totalSections));
1921 fprintf(statsFile, " Lemming yields : %10d\n\n",
1922 t->lemmingYields);
1923
1924 fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1925 totalSpeculations);
1926 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1927 t->successfulSpeculations,
1928 percent(t->successfulSpeculations, totalSpeculations));
1929 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1930 t->softFailedSpeculations,
1931 percent(t->softFailedSpeculations, totalSpeculations));
1932 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1933 t->hardFailedSpeculations,
1934 percent(t->hardFailedSpeculations, totalSpeculations));
1935}
1936
1937#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1938#else
1939#define KMP_INC_STAT(lck, stat)
1940
1941#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1942
1943static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1944 // It is enough to check that the head_id is zero.
1945 // We don't also need to check the tail.
1946 bool res = lck->lk.head_id == 0;
1947
1948// We need a fence here, since we must ensure that no memory operations
1949// from later in this thread float above that read.
1950#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1951 _mm_mfence();
1952#else
1953 __sync_synchronize();
1954#endif
1955
1956 return res;
1957}
1958
1959// Functions for manipulating the badness
1960static __inline void
1961__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1962 // Reset the badness to zero so we eagerly try to speculate again
1963 lck->lk.adaptive.badness = 0;
1964 KMP_INC_STAT(lck, successfulSpeculations);
1965}
1966
1967// Create a bit mask with one more set bit.
1968static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1969 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1970 if (newBadness > lck->lk.adaptive.max_badness) {
1971 return;
1972 } else {
1973 lck->lk.adaptive.badness = newBadness;
1974 }
1975}
1976
1977// Check whether speculation should be attempted.
1979static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1980 kmp_int32 gtid) {
1981 kmp_uint32 badness = lck->lk.adaptive.badness;
1982 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1983 int res = (attempts & badness) == 0;
1984 return res;
1985}
1986
1987// Attempt to acquire only the speculative lock.
1988// Does not back off to the non-speculative lock.
1990static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1991 kmp_int32 gtid) {
1992 int retries = lck->lk.adaptive.max_soft_retries;
1993
1994 // We don't explicitly count the start of speculation, rather we record the
1995 // results (success, hard fail, soft fail). The sum of all of those is the
1996 // total number of times we started speculation since all speculations must
1997 // end one of those ways.
1998 do {
1999 kmp_uint32 status = _xbegin();
2000 // Switch this in to disable actual speculation but exercise at least some
2001 // of the rest of the code. Useful for debugging...
2002 // kmp_uint32 status = _XABORT_NESTED;
2003
2004 if (status == _XBEGIN_STARTED) {
2005 /* We have successfully started speculation. Check that no-one acquired
2006 the lock for real between when we last looked and now. This also gets
2007 the lock cache line into our read-set, which we need so that we'll
2008 abort if anyone later claims it for real. */
2009 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2010 // Lock is now visibly acquired, so someone beat us to it. Abort the
2011 // transaction so we'll restart from _xbegin with the failure status.
2012 _xabort(0x01);
2013 KMP_ASSERT2(0, "should not get here");
2014 }
2015 return 1; // Lock has been acquired (speculatively)
2016 } else {
2017 // We have aborted, update the statistics
2018 if (status & SOFT_ABORT_MASK) {
2019 KMP_INC_STAT(lck, softFailedSpeculations);
2020 // and loop round to retry.
2021 } else {
2022 KMP_INC_STAT(lck, hardFailedSpeculations);
2023 // Give up if we had a hard failure.
2024 break;
2025 }
2026 }
2027 } while (retries--); // Loop while we have retries, and didn't fail hard.
2028
2029 // Either we had a hard failure or we didn't succeed softly after
2030 // the full set of attempts, so back off the badness.
2031 __kmp_step_badness(lck);
2032 return 0;
2033}
2034
2035// Attempt to acquire the speculative lock, or back off to the non-speculative
2036// one if the speculative lock cannot be acquired.
2037// We can succeed speculatively, non-speculatively, or fail.
2038static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2039 // First try to acquire the lock speculatively
2040 if (__kmp_should_speculate(lck, gtid) &&
2041 __kmp_test_adaptive_lock_only(lck, gtid))
2042 return 1;
2043
2044 // Speculative acquisition failed, so try to acquire it non-speculatively.
2045 // Count the non-speculative acquire attempt
2046 lck->lk.adaptive.acquire_attempts++;
2047
2048 // Use base, non-speculative lock.
2049 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2050 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2051 return 1; // Lock is acquired (non-speculatively)
2052 } else {
2053 return 0; // Failed to acquire the lock, it's already visibly locked.
2054 }
2055}
2056
2057static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2058 kmp_int32 gtid) {
2059 char const *const func = "omp_test_lock";
2060 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2061 KMP_FATAL(LockIsUninitialized, func);
2062 }
2063
2064 int retval = __kmp_test_adaptive_lock(lck, gtid);
2065
2066 if (retval) {
2067 lck->lk.qlk.owner_id = gtid + 1;
2068 }
2069 return retval;
2070}
2071
2072// Block until we can acquire a speculative, adaptive lock. We check whether we
2073// should be trying to speculate. If we should be, we check the real lock to see
2074// if it is free, and, if not, pause without attempting to acquire it until it
2075// is. Then we try the speculative acquire. This means that although we suffer
2076// from lemmings a little (because all we can't acquire the lock speculatively
2077// until the queue of threads waiting has cleared), we don't get into a state
2078// where we can never acquire the lock speculatively (because we force the queue
2079// to clear by preventing new arrivals from entering the queue). This does mean
2080// that when we're trying to break lemmings, the lock is no longer fair. However
2081// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2082// problem.
2083static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2084 kmp_int32 gtid) {
2085 if (__kmp_should_speculate(lck, gtid)) {
2086 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2087 if (__kmp_test_adaptive_lock_only(lck, gtid))
2088 return;
2089 // We tried speculation and failed, so give up.
2090 } else {
2091 // We can't try speculation until the lock is free, so we pause here
2092 // (without suspending on the queueing lock, to allow it to drain, then
2093 // try again. All other threads will also see the same result for
2094 // shouldSpeculate, so will be doing the same if they try to claim the
2095 // lock from now on.
2096 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2097 KMP_INC_STAT(lck, lemmingYields);
2098 KMP_YIELD(TRUE);
2099 }
2100
2101 if (__kmp_test_adaptive_lock_only(lck, gtid))
2102 return;
2103 }
2104 }
2105
2106 // Speculative acquisition failed, so acquire it non-speculatively.
2107 // Count the non-speculative acquire attempt
2108 lck->lk.adaptive.acquire_attempts++;
2109
2111 // We have acquired the base lock, so count that.
2112 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2113}
2114
2115static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2116 kmp_int32 gtid) {
2117 char const *const func = "omp_set_lock";
2118 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2119 KMP_FATAL(LockIsUninitialized, func);
2120 }
2121 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2122 KMP_FATAL(LockIsAlreadyOwned, func);
2123 }
2124
2125 __kmp_acquire_adaptive_lock(lck, gtid);
2126
2127 lck->lk.qlk.owner_id = gtid + 1;
2128}
2129
2131static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2132 kmp_int32 gtid) {
2133 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2134 lck))) { // If the lock doesn't look claimed we must be speculating.
2135 // (Or the user's code is buggy and they're releasing without locking;
2136 // if we had XTEST we'd be able to check that case...)
2137 _xend(); // Exit speculation
2138 __kmp_update_badness_after_success(lck);
2139 } else { // Since the lock *is* visibly locked we're not speculating,
2140 // so should use the underlying lock's release scheme.
2141 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2142 }
2143 return KMP_LOCK_RELEASED;
2144}
2145
2146static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2147 kmp_int32 gtid) {
2148 char const *const func = "omp_unset_lock";
2149 KMP_MB(); /* in case another processor initialized lock */
2150 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2151 KMP_FATAL(LockIsUninitialized, func);
2152 }
2153 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2154 KMP_FATAL(LockUnsettingFree, func);
2155 }
2156 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2157 KMP_FATAL(LockUnsettingSetByAnother, func);
2158 }
2159 lck->lk.qlk.owner_id = 0;
2160 __kmp_release_adaptive_lock(lck, gtid);
2161 return KMP_LOCK_RELEASED;
2162}
2163
2164static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2165 __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2166 lck->lk.adaptive.badness = 0;
2167 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2168 lck->lk.adaptive.max_soft_retries =
2169 __kmp_adaptive_backoff_params.max_soft_retries;
2170 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2171#if KMP_DEBUG_ADAPTIVE_LOCKS
2172 __kmp_zero_speculative_stats(&lck->lk.adaptive);
2173#endif
2174 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2175}
2176
2177static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2178#if KMP_DEBUG_ADAPTIVE_LOCKS
2179 __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2180#endif
2181 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2182 // Nothing needed for the speculative part.
2183}
2184
2185static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2186 char const *const func = "omp_destroy_lock";
2187 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2188 KMP_FATAL(LockIsUninitialized, func);
2189 }
2190 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2191 KMP_FATAL(LockStillOwned, func);
2192 }
2193 __kmp_destroy_adaptive_lock(lck);
2194}
2195
2196#endif // KMP_USE_ADAPTIVE_LOCKS
2197
2198/* ------------------------------------------------------------------------ */
2199/* DRDPA ticket locks */
2200/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2201
2203 return lck->lk.owner_id - 1;
2204}
2205
2207 return lck->lk.depth_locked != -1;
2208}
2209
2210__forceinline static int
2212 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2213 kmp_uint64 mask = lck->lk.mask; // atomic load
2214 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2215
2216#ifdef USE_LOCK_PROFILE
2217 if (polls[ticket & mask] != ticket)
2218 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2219/* else __kmp_printf( "." );*/
2220#endif /* USE_LOCK_PROFILE */
2221
2222 // Now spin-wait, but reload the polls pointer and mask, in case the
2223 // polling area has been reconfigured. Unless it is reconfigured, the
2224 // reloads stay in L1 cache and are cheap.
2225 //
2226 // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2227 // The current implementation of KMP_WAIT doesn't allow for mask
2228 // and poll to be re-read every spin iteration.
2229 kmp_uint32 spins;
2230 kmp_uint64 time;
2232 KMP_INIT_YIELD(spins);
2233 KMP_INIT_BACKOFF(time);
2234 while (polls[ticket & mask] < ticket) { // atomic load
2235 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2236 // Re-read the mask and the poll pointer from the lock structure.
2237 //
2238 // Make certain that "mask" is read before "polls" !!!
2239 //
2240 // If another thread picks reconfigures the polling area and updates their
2241 // values, and we get the new value of mask and the old polls pointer, we
2242 // could access memory beyond the end of the old polling area.
2243 mask = lck->lk.mask; // atomic load
2244 polls = lck->lk.polls; // atomic load
2245 }
2246
2247 // Critical section starts here
2249 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2250 ticket, lck));
2251 lck->lk.now_serving = ticket; // non-volatile store
2252
2253 // Deallocate a garbage polling area if we know that we are the last
2254 // thread that could possibly access it.
2255 //
2256 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2257 // ticket.
2258 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2259 __kmp_free(lck->lk.old_polls);
2260 lck->lk.old_polls = NULL;
2261 lck->lk.cleanup_ticket = 0;
2262 }
2263
2264 // Check to see if we should reconfigure the polling area.
2265 // If there is still a garbage polling area to be deallocated from a
2266 // previous reconfiguration, let a later thread reconfigure it.
2267 if (lck->lk.old_polls == NULL) {
2268 bool reconfigure = false;
2269 std::atomic<kmp_uint64> *old_polls = polls;
2270 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2271
2272 if (TCR_4(__kmp_nth) >
2274 // We are in oversubscription mode. Contract the polling area
2275 // down to a single location, if that hasn't been done already.
2276 if (num_polls > 1) {
2277 reconfigure = true;
2278 num_polls = TCR_4(lck->lk.num_polls);
2279 mask = 0;
2280 num_polls = 1;
2281 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2282 sizeof(*polls));
2283 polls[0] = ticket;
2284 }
2285 } else {
2286 // We are in under/fully subscribed mode. Check the number of
2287 // threads waiting on the lock. The size of the polling area
2288 // should be at least the number of threads waiting.
2289 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2290 if (num_waiting > num_polls) {
2291 kmp_uint32 old_num_polls = num_polls;
2292 reconfigure = true;
2293 do {
2294 mask = (mask << 1) | 1;
2295 num_polls *= 2;
2296 } while (num_polls <= num_waiting);
2297
2298 // Allocate the new polling area, and copy the relevant portion
2299 // of the old polling area to the new area. __kmp_allocate()
2300 // zeroes the memory it allocates, and most of the old area is
2301 // just zero padding, so we only copy the release counters.
2302 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2303 sizeof(*polls));
2304 kmp_uint32 i;
2305 for (i = 0; i < old_num_polls; i++) {
2306 polls[i].store(old_polls[i]);
2307 }
2308 }
2309 }
2310
2311 if (reconfigure) {
2312 // Now write the updated fields back to the lock structure.
2313 //
2314 // Make certain that "polls" is written before "mask" !!!
2315 //
2316 // If another thread picks up the new value of mask and the old polls
2317 // pointer , it could access memory beyond the end of the old polling
2318 // area.
2319 //
2320 // On x86, we need memory fences.
2321 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2322 "lock %p to %d polls\n",
2323 ticket, lck, num_polls));
2324
2325 lck->lk.old_polls = old_polls;
2326 lck->lk.polls = polls; // atomic store
2327
2328 KMP_MB();
2329
2330 lck->lk.num_polls = num_polls;
2331 lck->lk.mask = mask; // atomic store
2332
2333 KMP_MB();
2334
2335 // Only after the new polling area and mask have been flushed
2336 // to main memory can we update the cleanup ticket field.
2337 //
2338 // volatile load / non-volatile store
2339 lck->lk.cleanup_ticket = lck->lk.next_ticket;
2340 }
2341 }
2343}
2344
2346 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2347 return retval;
2348}
2349
2351 kmp_int32 gtid) {
2352 char const *const func = "omp_set_lock";
2353 if (lck->lk.initialized != lck) {
2354 KMP_FATAL(LockIsUninitialized, func);
2355 }
2357 KMP_FATAL(LockNestableUsedAsSimple, func);
2358 }
2359 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2360 KMP_FATAL(LockIsAlreadyOwned, func);
2361 }
2362
2364
2365 lck->lk.owner_id = gtid + 1;
2367}
2368
2370 // First get a ticket, then read the polls pointer and the mask.
2371 // The polls pointer must be read before the mask!!! (See above)
2372 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2373 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2374 kmp_uint64 mask = lck->lk.mask; // atomic load
2375 if (polls[ticket & mask] == ticket) {
2376 kmp_uint64 next_ticket = ticket + 1;
2377 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2378 next_ticket)) {
2380 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2381 ticket, lck));
2382 lck->lk.now_serving = ticket; // non-volatile store
2383
2384 // Since no threads are waiting, there is no possibility that we would
2385 // want to reconfigure the polling area. We might have the cleanup ticket
2386 // value (which says that it is now safe to deallocate old_polls), but
2387 // we'll let a later thread which calls __kmp_acquire_lock do that - this
2388 // routine isn't supposed to block, and we would risk blocks if we called
2389 // __kmp_free() to do the deallocation.
2390 return TRUE;
2391 }
2392 }
2393 return FALSE;
2394}
2395
2397 kmp_int32 gtid) {
2398 char const *const func = "omp_test_lock";
2399 if (lck->lk.initialized != lck) {
2400 KMP_FATAL(LockIsUninitialized, func);
2401 }
2403 KMP_FATAL(LockNestableUsedAsSimple, func);
2404 }
2405
2406 int retval = __kmp_test_drdpa_lock(lck, gtid);
2407
2408 if (retval) {
2409 lck->lk.owner_id = gtid + 1;
2410 }
2411 return retval;
2412}
2413
2415 // Read the ticket value from the lock data struct, then the polls pointer and
2416 // the mask. The polls pointer must be read before the mask!!! (See above)
2417 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2418 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2419 kmp_uint64 mask = lck->lk.mask; // atomic load
2420 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2421 ticket - 1, lck));
2423 polls[ticket & mask] = ticket; // atomic store
2424 return KMP_LOCK_RELEASED;
2425}
2426
2428 kmp_int32 gtid) {
2429 char const *const func = "omp_unset_lock";
2430 KMP_MB(); /* in case another processor initialized lock */
2431 if (lck->lk.initialized != lck) {
2432 KMP_FATAL(LockIsUninitialized, func);
2433 }
2435 KMP_FATAL(LockNestableUsedAsSimple, func);
2436 }
2437 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2438 KMP_FATAL(LockUnsettingFree, func);
2439 }
2440 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2441 (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2442 KMP_FATAL(LockUnsettingSetByAnother, func);
2443 }
2444 lck->lk.owner_id = 0;
2445 return __kmp_release_drdpa_lock(lck, gtid);
2446}
2447
2449 lck->lk.location = NULL;
2450 lck->lk.mask = 0;
2451 lck->lk.num_polls = 1;
2452 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2453 lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2454 lck->lk.cleanup_ticket = 0;
2455 lck->lk.old_polls = NULL;
2456 lck->lk.next_ticket = 0;
2457 lck->lk.now_serving = 0;
2458 lck->lk.owner_id = 0; // no thread owns the lock.
2459 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2460 lck->lk.initialized = lck;
2461
2462 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2463}
2464
2466 lck->lk.initialized = NULL;
2467 lck->lk.location = NULL;
2468 if (lck->lk.polls.load() != NULL) {
2469 __kmp_free(lck->lk.polls.load());
2470 lck->lk.polls = NULL;
2471 }
2472 if (lck->lk.old_polls != NULL) {
2473 __kmp_free(lck->lk.old_polls);
2474 lck->lk.old_polls = NULL;
2475 }
2476 lck->lk.mask = 0;
2477 lck->lk.num_polls = 0;
2478 lck->lk.cleanup_ticket = 0;
2479 lck->lk.next_ticket = 0;
2480 lck->lk.now_serving = 0;
2481 lck->lk.owner_id = 0;
2482 lck->lk.depth_locked = -1;
2483}
2484
2486 char const *const func = "omp_destroy_lock";
2487 if (lck->lk.initialized != lck) {
2488 KMP_FATAL(LockIsUninitialized, func);
2489 }
2491 KMP_FATAL(LockNestableUsedAsSimple, func);
2492 }
2493 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2494 KMP_FATAL(LockStillOwned, func);
2495 }
2497}
2498
2499// nested drdpa ticket locks
2500
2502 KMP_DEBUG_ASSERT(gtid >= 0);
2503
2504 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2505 lck->lk.depth_locked += 1;
2507 } else {
2509 KMP_MB();
2510 lck->lk.depth_locked = 1;
2511 KMP_MB();
2512 lck->lk.owner_id = gtid + 1;
2514 }
2515}
2516
2518 kmp_int32 gtid) {
2519 char const *const func = "omp_set_nest_lock";
2520 if (lck->lk.initialized != lck) {
2521 KMP_FATAL(LockIsUninitialized, func);
2522 }
2524 KMP_FATAL(LockSimpleUsedAsNestable, func);
2525 }
2527}
2528
2530 int retval;
2531
2532 KMP_DEBUG_ASSERT(gtid >= 0);
2533
2534 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2535 retval = ++lck->lk.depth_locked;
2536 } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2537 retval = 0;
2538 } else {
2539 KMP_MB();
2540 retval = lck->lk.depth_locked = 1;
2541 KMP_MB();
2542 lck->lk.owner_id = gtid + 1;
2543 }
2544 return retval;
2545}
2546
2548 kmp_int32 gtid) {
2549 char const *const func = "omp_test_nest_lock";
2550 if (lck->lk.initialized != lck) {
2551 KMP_FATAL(LockIsUninitialized, func);
2552 }
2554 KMP_FATAL(LockSimpleUsedAsNestable, func);
2555 }
2556 return __kmp_test_nested_drdpa_lock(lck, gtid);
2557}
2558
2560 KMP_DEBUG_ASSERT(gtid >= 0);
2561
2562 KMP_MB();
2563 if (--(lck->lk.depth_locked) == 0) {
2564 KMP_MB();
2565 lck->lk.owner_id = 0;
2567 return KMP_LOCK_RELEASED;
2568 }
2569 return KMP_LOCK_STILL_HELD;
2570}
2571
2573 kmp_int32 gtid) {
2574 char const *const func = "omp_unset_nest_lock";
2575 KMP_MB(); /* in case another processor initialized lock */
2576 if (lck->lk.initialized != lck) {
2577 KMP_FATAL(LockIsUninitialized, func);
2578 }
2580 KMP_FATAL(LockSimpleUsedAsNestable, func);
2581 }
2582 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2583 KMP_FATAL(LockUnsettingFree, func);
2584 }
2585 if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2586 KMP_FATAL(LockUnsettingSetByAnother, func);
2587 }
2589}
2590
2593 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2594}
2595
2600
2602 char const *const func = "omp_destroy_nest_lock";
2603 if (lck->lk.initialized != lck) {
2604 KMP_FATAL(LockIsUninitialized, func);
2605 }
2607 KMP_FATAL(LockSimpleUsedAsNestable, func);
2608 }
2609 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2610 KMP_FATAL(LockStillOwned, func);
2611 }
2613}
2614
2615// access functions to fields which don't exist for all lock kinds.
2616
2618 return lck->lk.location;
2619}
2620
2622 const ident_t *loc) {
2623 lck->lk.location = loc;
2624}
2625
2629
2631 kmp_lock_flags_t flags) {
2632 lck->lk.flags = flags;
2633}
2634
2635// Time stamp counter
2636#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2637#define __kmp_tsc() __kmp_hardware_timestamp()
2638// Runtime's default backoff parameters
2640#else
2641// Use nanoseconds for other platforms
2642extern kmp_uint64 __kmp_now_nsec();
2644#define __kmp_tsc() __kmp_now_nsec()
2645#endif
2646
2647// A useful predicate for dealing with timestamps that may wrap.
2648// Is a before b? Since the timestamps may wrap, this is asking whether it's
2649// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2650// Times where going clockwise is less distance than going anti-clockwise
2651// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2652// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2653// signed(b) = 0 captures the actual difference
2654static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2655 return ((kmp_int64)b - (kmp_int64)a) > 0;
2656}
2657
2658// Truncated binary exponential backoff function
2660 // We could flatten this loop, but making it a nested loop gives better result
2661 kmp_uint32 i;
2662 for (i = boff->step; i > 0; i--) {
2663 kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2664#if KMP_HAVE_UMWAIT
2665 if (__kmp_umwait_enabled) {
2666 __kmp_tpause(0, boff->min_tick);
2667 } else {
2668#endif
2669 do {
2670 KMP_CPU_PAUSE();
2671 } while (before(__kmp_tsc(), goal));
2672#if KMP_HAVE_UMWAIT
2673 }
2674#endif
2675 }
2676 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2677}
2678
2679#if KMP_USE_DYNAMIC_LOCK
2680
2681// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2682// lock word.
2683static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2684 kmp_dyna_lockseq_t seq) {
2685 TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));
2686 KA_TRACE(
2687 20,
2688 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2689}
2690
2691#if KMP_USE_TSX
2692
2693// HLE lock functions - imported from the testbed runtime.
2694#define HLE_ACQUIRE ".byte 0xf2;"
2695#define HLE_RELEASE ".byte 0xf3;"
2696
2697static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2698 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2699 return v;
2700}
2701
2702static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2703
2704static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2705 TCW_4(*lck, 0);
2706}
2707
2708static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2709 // Use gtid for KMP_LOCK_BUSY if necessary
2710 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2711 int delay = 1;
2712 do {
2713 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2714 for (int i = delay; i != 0; --i)
2715 KMP_CPU_PAUSE();
2716 delay = ((delay << 1) | 1) & 7;
2717 }
2718 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2719 }
2720}
2721
2722static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2723 kmp_int32 gtid) {
2724 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2725}
2726
2727static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2728 __asm__ volatile(HLE_RELEASE "movl %1,%0"
2729 : "=m"(*lck)
2730 : "r"(KMP_LOCK_FREE(hle))
2731 : "memory");
2732 return KMP_LOCK_RELEASED;
2733}
2734
2735static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2736 kmp_int32 gtid) {
2737 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2738}
2739
2740static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2741 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2742}
2743
2744static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2745 kmp_int32 gtid) {
2746 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2747}
2748
2749static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2751}
2752
2753static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2755}
2756
2757static void
2758__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2760}
2761
2763static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2764 kmp_int32 gtid) {
2765 unsigned retries = 3, status;
2766 do {
2767 status = _xbegin();
2768 if (status == _XBEGIN_STARTED) {
2769 if (__kmp_is_unlocked_queuing_lock(lck))
2770 return;
2771 _xabort(0xff);
2772 }
2773 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2774 // Wait until lock becomes free
2775 while (!__kmp_is_unlocked_queuing_lock(lck)) {
2776 KMP_YIELD(TRUE);
2777 }
2778 } else if (!(status & _XABORT_RETRY))
2779 break;
2780 } while (retries--);
2781
2782 // Fall-back non-speculative lock (xchg)
2784}
2785
2786static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2787 kmp_int32 gtid) {
2788 __kmp_acquire_rtm_queuing_lock(lck, gtid);
2789}
2790
2792static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2793 kmp_int32 gtid) {
2794 if (__kmp_is_unlocked_queuing_lock(lck)) {
2795 // Releasing from speculation
2796 _xend();
2797 } else {
2798 // Releasing from a real lock
2800 }
2801 return KMP_LOCK_RELEASED;
2802}
2803
2804static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2805 kmp_int32 gtid) {
2806 return __kmp_release_rtm_queuing_lock(lck, gtid);
2807}
2808
2810static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2811 kmp_int32 gtid) {
2812 unsigned retries = 3, status;
2813 do {
2814 status = _xbegin();
2815 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2816 return 1;
2817 }
2818 if (!(status & _XABORT_RETRY))
2819 break;
2820 } while (retries--);
2821
2822 return __kmp_test_queuing_lock(lck, gtid);
2823}
2824
2825static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2826 kmp_int32 gtid) {
2827 return __kmp_test_rtm_queuing_lock(lck, gtid);
2828}
2829
2830// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2831typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2832
2833static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2834 KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2835}
2836
2837static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2838 __kmp_destroy_rtm_spin_lock(lck);
2839}
2840
2842static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2843 kmp_int32 gtid) {
2844 unsigned retries = 3, status;
2845 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2846 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2847 do {
2848 status = _xbegin();
2849 if (status == _XBEGIN_STARTED) {
2850 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2852 _xabort(0xff);
2853 }
2854 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2855 // Wait until lock becomes free
2856 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2857 KMP_YIELD(TRUE);
2858 }
2859 } else if (!(status & _XABORT_RETRY))
2860 break;
2861 } while (retries--);
2862
2863 // Fall-back spin lock
2866 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2867 !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2868 __kmp_spin_backoff(&backoff);
2869 }
2872}
2873
2874static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2875 kmp_int32 gtid) {
2876 return __kmp_acquire_rtm_spin_lock(lck, gtid);
2877}
2878
2880static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2881 kmp_int32 gtid) {
2882 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2883 // Releasing from speculation
2884 _xend();
2885 } else {
2886 // Releasing from a real lock
2888 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2889 }
2890 return KMP_LOCK_RELEASED;
2891}
2892
2893static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2894 kmp_int32 gtid) {
2895 return __kmp_release_rtm_spin_lock(lck, gtid);
2896}
2897
2899static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2900 unsigned retries = 3, status;
2901 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2902 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2903 do {
2904 status = _xbegin();
2905 if (status == _XBEGIN_STARTED &&
2906 KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2907 return TRUE;
2908 }
2909 if (!(status & _XABORT_RETRY))
2910 break;
2911 } while (retries--);
2912
2913 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2914 __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2916 return TRUE;
2917 }
2918 return FALSE;
2919}
2920
2921static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2922 kmp_int32 gtid) {
2923 return __kmp_test_rtm_spin_lock(lck, gtid);
2924}
2925
2926#endif // KMP_USE_TSX
2927
2928// Entry functions for indirect locks (first element of direct lock jump tables)
2929static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2930 kmp_dyna_lockseq_t tag);
2931static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2932static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2933static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2934static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2935static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2936 kmp_int32);
2937static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2938 kmp_int32);
2939static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2940 kmp_int32);
2941
2942// Lock function definitions for the union parameter type
2943#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2944
2945#define expand1(lk, op) \
2946 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2947 __kmp_##op##_##lk##_##lock(&lock->lk); \
2948 }
2949#define expand2(lk, op) \
2950 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2951 kmp_int32 gtid) { \
2952 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2953 }
2954#define expand3(lk, op) \
2955 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2956 kmp_lock_flags_t flags) { \
2957 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2958 }
2959#define expand4(lk, op) \
2960 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2961 const ident_t *loc) { \
2962 __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2963 }
2964
2965KMP_FOREACH_LOCK_KIND(expand1, init)
2966KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2967KMP_FOREACH_LOCK_KIND(expand1, destroy)
2968KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2969KMP_FOREACH_LOCK_KIND(expand2, acquire)
2970KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2971KMP_FOREACH_LOCK_KIND(expand2, release)
2972KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2973KMP_FOREACH_LOCK_KIND(expand2, test)
2974KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2975KMP_FOREACH_LOCK_KIND(expand3, )
2976KMP_FOREACH_LOCK_KIND(expand4, )
2977
2978#undef expand1
2979#undef expand2
2980#undef expand3
2981#undef expand4
2982
2983// Jump tables for the indirect lock functions
2984// Only fill in the odd entries, that avoids the need to shift out the low bit
2985
2986// init functions
2987#define expand(l, op) 0, __kmp_init_direct_lock,
2988void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2989 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2990#undef expand
2991
2992// destroy functions
2993#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2994static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2995 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2996#undef expand
2997#define expand(l, op) \
2998 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2999static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
3000 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3001#undef expand
3002
3003// set/acquire functions
3004#define expand(l, op) \
3005 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3006static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
3007 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3008#undef expand
3009#define expand(l, op) \
3010 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3011static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3012 __kmp_set_indirect_lock_with_checks, 0,
3013 KMP_FOREACH_D_LOCK(expand, acquire)};
3014#undef expand
3015
3016// unset/release and test functions
3017#define expand(l, op) \
3018 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3019static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3020 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3021static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3022 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3023#undef expand
3024#define expand(l, op) \
3025 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3026static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3027 __kmp_unset_indirect_lock_with_checks, 0,
3028 KMP_FOREACH_D_LOCK(expand, release)};
3029static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3030 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3031#undef expand
3032
3033// Exposes only one set of jump tables (*lock or *lock_with_checks).
3034void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3035int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3036int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3037int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3038
3039// Jump tables for the indirect lock functions
3040#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3041void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3042 KMP_FOREACH_I_LOCK(expand, init)};
3043#undef expand
3044
3045#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3046static void (*indirect_destroy[])(kmp_user_lock_p) = {
3047 KMP_FOREACH_I_LOCK(expand, destroy)};
3048#undef expand
3049#define expand(l, op) \
3050 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3051static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3052 KMP_FOREACH_I_LOCK(expand, destroy)};
3053#undef expand
3054
3055// set/acquire functions
3056#define expand(l, op) \
3057 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3058static int (*indirect_set[])(kmp_user_lock_p,
3059 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3060#undef expand
3061#define expand(l, op) \
3062 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3063static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3064 KMP_FOREACH_I_LOCK(expand, acquire)};
3065#undef expand
3066
3067// unset/release and test functions
3068#define expand(l, op) \
3069 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3070static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3071 KMP_FOREACH_I_LOCK(expand, release)};
3072static int (*indirect_test[])(kmp_user_lock_p,
3073 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3074#undef expand
3075#define expand(l, op) \
3076 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3077static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3078 KMP_FOREACH_I_LOCK(expand, release)};
3079static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3080 KMP_FOREACH_I_LOCK(expand, test)};
3081#undef expand
3082
3083// Exposes only one jump tables (*lock or *lock_with_checks).
3084void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3085int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3086int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3087int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3088
3089// Lock index table.
3090kmp_indirect_lock_table_t __kmp_i_lock_table;
3091
3092// Size of indirect locks.
3093static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3094
3095// Jump tables for lock accessor/modifier.
3096void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3097 const ident_t *) = {0};
3098void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3099 kmp_lock_flags_t) = {0};
3100const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3101 kmp_user_lock_p) = {0};
3102kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3103 kmp_user_lock_p) = {0};
3104
3105// Use different lock pools for different lock types.
3106static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3107
3108// User lock allocator for dynamically dispatched indirect locks. Every entry of
3109// the indirect lock table holds the address and type of the allocated indirect
3110// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3111// full. A destroyed indirect lock object is returned to the reusable pool of
3112// locks, unique to each lock type.
3113kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3114 kmp_int32 gtid,
3115 kmp_indirect_locktag_t tag) {
3116 kmp_indirect_lock_t *lck;
3117 kmp_lock_index_t idx, table_idx;
3118
3120
3121 if (__kmp_indirect_lock_pool[tag] != NULL) {
3122 // Reuse the allocated and destroyed lock object
3123 lck = __kmp_indirect_lock_pool[tag];
3124 if (OMP_LOCK_T_SIZE < sizeof(void *))
3125 idx = lck->lock->pool.index;
3126 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3127 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3128 lck));
3129 } else {
3130 kmp_uint32 row, col;
3131 kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3132 idx = 0;
3133 // Find location in list of lock tables to put new lock
3134 while (1) {
3135 table_idx = lock_table->next; // index within this table
3136 idx += lock_table->next; // global index within list of tables
3137 if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3138 row = table_idx / KMP_I_LOCK_CHUNK;
3139 col = table_idx % KMP_I_LOCK_CHUNK;
3140 // Allocate a new row of locks if necessary
3141 if (!lock_table->table[row]) {
3142 lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3143 sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3144 }
3145 break;
3146 }
3147 // Allocate a new lock table if necessary with double the capacity
3148 if (!lock_table->next_table) {
3149 kmp_indirect_lock_table_t *next_table =
3150 (kmp_indirect_lock_table_t *)__kmp_allocate(
3151 sizeof(kmp_indirect_lock_table_t));
3152 next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3153 sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3154 next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3155 next_table->next = 0;
3156 next_table->next_table = nullptr;
3157 lock_table->next_table = next_table;
3158 }
3159 lock_table = lock_table->next_table;
3160 KMP_ASSERT(lock_table);
3161 }
3162 lock_table->next++;
3163
3164 lck = &lock_table->table[row][col];
3165 // Allocate a new base lock object
3166 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3167 KA_TRACE(20,
3168 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3169 }
3170
3172
3173 lck->type = tag;
3174
3175 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3176 *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =
3177 idx << 1; // indirect lock word must be even
3178 } else {
3179 *((kmp_indirect_lock_t **)user_lock) = lck;
3180 }
3181
3182 return lck;
3183}
3184
3185// User lock lookup for dynamically dispatched locks.
3186static __forceinline kmp_indirect_lock_t *
3187__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3189 kmp_indirect_lock_t *lck = NULL;
3190 if (user_lock == NULL) {
3191 KMP_FATAL(LockIsUninitialized, func);
3192 }
3193 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3194 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3195 lck = __kmp_get_i_lock(idx);
3196 } else {
3197 lck = *((kmp_indirect_lock_t **)user_lock);
3198 }
3199 if (lck == NULL) {
3200 KMP_FATAL(LockIsUninitialized, func);
3201 }
3202 return lck;
3203 } else {
3204 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3205 return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3206 } else {
3207 return *((kmp_indirect_lock_t **)user_lock);
3208 }
3209 }
3210}
3211
3212static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3213 kmp_dyna_lockseq_t seq) {
3214#if KMP_USE_ADAPTIVE_LOCKS
3215 if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3216 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3217 seq = lockseq_queuing;
3218 }
3219#endif
3220#if KMP_USE_TSX
3221 if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3222 seq = lockseq_queuing;
3223 }
3224#endif
3225 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3226 kmp_indirect_lock_t *l =
3227 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3228 KMP_I_LOCK_FUNC(l, init)(l->lock);
3229 KA_TRACE(
3230 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3231 seq));
3232}
3233
3234static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3236 kmp_indirect_lock_t *l =
3237 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3238 if (l == nullptr)
3239 return; // avoid segv if lock already destroyed
3240 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3241 kmp_indirect_locktag_t tag = l->type;
3242
3244
3245 // Use the base lock's space to keep the pool chain.
3246 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3247 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3248 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3249 }
3250 __kmp_indirect_lock_pool[tag] = l;
3251
3253}
3254
3255static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3256 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3257 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3258}
3259
3260static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3262 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3263}
3264
3265static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3268}
3269
3270static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3271 kmp_int32 gtid) {
3272 kmp_indirect_lock_t *l =
3273 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3274 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3275}
3276
3277static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3278 kmp_int32 gtid) {
3279 kmp_indirect_lock_t *l =
3280 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3281 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3282}
3283
3284static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3285 kmp_int32 gtid) {
3286 kmp_indirect_lock_t *l =
3287 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3288 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3289}
3290
3291kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3292
3293// This is used only in kmp_error.cpp when consistency checking is on.
3295 switch (seq) {
3296 case lockseq_tas:
3297 case lockseq_nested_tas:
3299#if KMP_USE_FUTEX
3300 case lockseq_futex:
3301 case lockseq_nested_futex:
3302 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3303#endif
3304 case lockseq_ticket:
3305 case lockseq_nested_ticket:
3307 case lockseq_queuing:
3308 case lockseq_nested_queuing:
3309#if KMP_USE_ADAPTIVE_LOCKS
3310 case lockseq_adaptive:
3311#endif
3313 case lockseq_drdpa:
3314 case lockseq_nested_drdpa:
3316 default:
3317 return 0;
3318 }
3319}
3320
3321// Initializes data for dynamic user locks.
3322void __kmp_init_dynamic_user_locks() {
3323 // Initialize jump table for the lock functions
3325 __kmp_direct_set = direct_set_check;
3326 __kmp_direct_unset = direct_unset_check;
3327 __kmp_direct_test = direct_test_check;
3328 __kmp_direct_destroy = direct_destroy_check;
3329 __kmp_indirect_set = indirect_set_check;
3330 __kmp_indirect_unset = indirect_unset_check;
3331 __kmp_indirect_test = indirect_test_check;
3332 __kmp_indirect_destroy = indirect_destroy_check;
3333 } else {
3334 __kmp_direct_set = direct_set;
3335 __kmp_direct_unset = direct_unset;
3336 __kmp_direct_test = direct_test;
3337 __kmp_direct_destroy = direct_destroy;
3338 __kmp_indirect_set = indirect_set;
3339 __kmp_indirect_unset = indirect_unset;
3340 __kmp_indirect_test = indirect_test;
3341 __kmp_indirect_destroy = indirect_destroy;
3342 }
3343 // If the user locks have already been initialized, then return. Allow the
3344 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3345 // new lock tables if they have already been allocated.
3347 return;
3348
3349 // Initialize lock index table
3350 __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3351 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3352 sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3353 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3354 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3355 __kmp_i_lock_table.next = 0;
3356 __kmp_i_lock_table.next_table = nullptr;
3357
3358 // Indirect lock size
3359 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3360 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3361#if KMP_USE_ADAPTIVE_LOCKS
3362 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3363#endif
3364 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3365#if KMP_USE_TSX
3366 __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3367#endif
3368 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3369#if KMP_USE_FUTEX
3370 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3371#endif
3372 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3373 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3374 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3375
3376// Initialize lock accessor/modifier
3377#define fill_jumps(table, expand, sep) \
3378 { \
3379 table[locktag##sep##ticket] = expand(ticket); \
3380 table[locktag##sep##queuing] = expand(queuing); \
3381 table[locktag##sep##drdpa] = expand(drdpa); \
3382 }
3383
3384#if KMP_USE_ADAPTIVE_LOCKS
3385#define fill_table(table, expand) \
3386 { \
3387 fill_jumps(table, expand, _); \
3388 table[locktag_adaptive] = expand(queuing); \
3389 fill_jumps(table, expand, _nested_); \
3390 }
3391#else
3392#define fill_table(table, expand) \
3393 { \
3394 fill_jumps(table, expand, _); \
3395 fill_jumps(table, expand, _nested_); \
3396 }
3397#endif // KMP_USE_ADAPTIVE_LOCKS
3398
3399#define expand(l) \
3400 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3401 fill_table(__kmp_indirect_set_location, expand);
3402#undef expand
3403#define expand(l) \
3404 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3405 fill_table(__kmp_indirect_set_flags, expand);
3406#undef expand
3407#define expand(l) \
3408 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3409 fill_table(__kmp_indirect_get_location, expand);
3410#undef expand
3411#define expand(l) \
3412 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3413 fill_table(__kmp_indirect_get_flags, expand);
3414#undef expand
3415
3417}
3418
3419// Clean up the lock table.
3420void __kmp_cleanup_indirect_user_locks() {
3421 int k;
3422
3423 // Clean up locks in the pools first (they were already destroyed before going
3424 // into the pools).
3425 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3426 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3427 while (l != NULL) {
3428 kmp_indirect_lock_t *ll = l;
3429 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3430 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3431 ll));
3432 __kmp_free(ll->lock);
3433 ll->lock = NULL;
3434 }
3435 __kmp_indirect_lock_pool[k] = NULL;
3436 }
3437 // Clean up the remaining undestroyed locks.
3438 kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3439 while (ptr) {
3440 for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3441 if (!ptr->table[row])
3442 continue;
3443 for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3444 kmp_indirect_lock_t *l = &ptr->table[row][col];
3445 if (l->lock) {
3446 // Locks not destroyed explicitly need to be destroyed here.
3447 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3448 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3449 "from table\n",
3450 l));
3451 __kmp_free(l->lock);
3452 }
3453 }
3454 __kmp_free(ptr->table[row]);
3455 }
3456 __kmp_free(ptr->table);
3457 kmp_indirect_lock_table_t *next_table = ptr->next_table;
3458 if (ptr != &__kmp_i_lock_table)
3459 __kmp_free(ptr);
3460 ptr = next_table;
3461 }
3462
3464}
3465
3467int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3468
3469#else // KMP_USE_DYNAMIC_LOCK
3470
3474
3478
3479#if KMP_USE_FUTEX
3480static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3481 __kmp_init_futex_lock(lck);
3482}
3483
3484static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485 __kmp_init_nested_futex_lock(lck);
3486}
3487#endif
3488
3490 return lck == lck->lk.self;
3491}
3492
3496
3500
3502 return lck == lck->lk.initialized;
3503}
3504
3508
3509static void
3513
3514#if KMP_USE_ADAPTIVE_LOCKS
3515static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3516 __kmp_init_adaptive_lock(lck);
3517}
3518#endif
3519
3521 return lck == lck->lk.initialized;
3522}
3523
3527
3531
3532/* user locks
3533 * They are implemented as a table of function pointers which are set to the
3534 * lock functions of the appropriate kind, once that has been determined. */
3535
3537
3540
3543 kmp_int32 gtid) = NULL;
3544
3546 kmp_int32 gtid) = NULL;
3548 kmp_int32 gtid) = NULL;
3553 kmp_int32 gtid) = NULL;
3554
3556 kmp_int32 gtid) = NULL;
3558 kmp_int32 gtid) = NULL;
3561
3563const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3565 const ident_t *loc) = NULL;
3568 kmp_lock_flags_t flags) = NULL;
3569
3571 switch (user_lock_kind) {
3572 case lk_default:
3573 default:
3574 KMP_ASSERT(0);
3575
3576 case lk_tas: {
3579
3582
3586 } else {
3587 KMP_BIND_USER_LOCK(tas);
3589 }
3590
3593
3595
3597
3599 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3600
3602
3605 } break;
3606
3607#if KMP_USE_FUTEX
3608
3609 case lk_futex: {
3610 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3611 __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3612
3614 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3615
3619 } else {
3620 KMP_BIND_USER_LOCK(futex);
3622 }
3623
3625 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3626
3628
3630
3632 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3633
3635
3638 } break;
3639
3640#endif // KMP_USE_FUTEX
3641
3642 case lk_ticket: {
3645
3648
3652 } else {
3653 KMP_BIND_USER_LOCK(ticket);
3655 }
3656
3659
3662
3665
3668
3671
3674 } break;
3675
3676 case lk_queuing: {
3679
3682
3686 } else {
3687 KMP_BIND_USER_LOCK(queuing);
3689 }
3690
3693
3696
3699
3702
3705
3708 } break;
3709
3710#if KMP_USE_ADAPTIVE_LOCKS
3711 case lk_adaptive: {
3712 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3713 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3714
3717
3720 } else {
3721 KMP_BIND_USER_LOCK(adaptive);
3722 }
3723
3725 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3726
3729
3732
3735
3738
3741
3742 } break;
3743#endif // KMP_USE_ADAPTIVE_LOCKS
3744
3745 case lk_drdpa: {
3748
3751
3755 } else {
3756 KMP_BIND_USER_LOCK(drdpa);
3758 }
3759
3762
3765
3768
3771
3774
3777 } break;
3778 }
3779}
3780
3781// ----------------------------------------------------------------------------
3782// User lock table & lock allocation
3783
3786
3787// Lock block-allocation support.
3789int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3790
3792 // Assume that kmp_global_lock is held upon entry/exit.
3793 kmp_lock_index_t index;
3794 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3796 kmp_user_lock_p *table;
3797 // Reallocate lock table.
3798 if (__kmp_user_lock_table.allocated == 0) {
3799 size = 1024;
3800 } else {
3801 size = __kmp_user_lock_table.allocated * 2;
3802 }
3803 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3804 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3805 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3806 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3807 // We cannot free the previous table now, since it may be in use by other
3808 // threads. So save the pointer to the previous table in the first
3809 // element of the new table. All the tables will be organized into a list,
3810 // and could be freed when library shutting down.
3811 __kmp_user_lock_table.table = table;
3812 __kmp_user_lock_table.allocated = size;
3813 }
3815 __kmp_user_lock_table.allocated);
3816 index = __kmp_user_lock_table.used;
3817 __kmp_user_lock_table.table[index] = lck;
3818 ++__kmp_user_lock_table.used;
3819 return index;
3820}
3821
3823 // Assume that kmp_global_lock is held upon entry/exit.
3824 static int last_index = 0;
3825 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3826 // Restart the index.
3827 last_index = 0;
3828 // Need to allocate a new block.
3830 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3831 char *buffer =
3832 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3833 // Set up the new block.
3834 kmp_block_of_locks *new_block =
3835 (kmp_block_of_locks *)(&buffer[space_for_locks]);
3836 new_block->next_block = __kmp_lock_blocks;
3837 new_block->locks = (void *)buffer;
3838 // Publish the new block.
3839 KMP_MB();
3840 __kmp_lock_blocks = new_block;
3841 }
3843 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3844 last_index++;
3845 return ret;
3846}
3847
3848// Get memory for a lock. It may be freshly allocated memory or reused memory
3849// from lock pool.
3851 kmp_lock_flags_t flags) {
3853 kmp_lock_index_t index;
3854 KMP_DEBUG_ASSERT(user_lock);
3855
3857
3858 if (__kmp_lock_pool == NULL) {
3859 // Lock pool is empty. Allocate new memory.
3860
3861 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3863 } else {
3865 }
3866
3867 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3868 // and debugger has info on all allocated locks.
3870 } else {
3871 // Pick up lock from pool.
3873 index = __kmp_lock_pool->pool.index;
3874 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3875 }
3876
3877 // We could potentially differentiate between nested and regular locks
3878 // here, and do the lock table lookup for regular locks only.
3879 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3880 *((kmp_lock_index_t *)user_lock) = index;
3881 } else {
3882 *((kmp_user_lock_p *)user_lock) = lck;
3883 }
3884
3885 // mark the lock if it is critical section lock.
3887
3888 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3889
3890 return lck;
3891}
3892
3893// Put lock's memory to pool for reusing.
3894void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3896 KMP_DEBUG_ASSERT(user_lock != NULL);
3897 KMP_DEBUG_ASSERT(lck != NULL);
3898
3900
3901 lck->pool.next = __kmp_lock_pool;
3903 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3904 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3905 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3906 lck->pool.index = index;
3907 }
3908
3910}
3911
3912kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3913 kmp_user_lock_p lck = NULL;
3914
3916 if (user_lock == NULL) {
3917 KMP_FATAL(LockIsUninitialized, func);
3918 }
3919 }
3920
3921 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3922 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3924 if (!(0 < index && index < __kmp_user_lock_table.used)) {
3925 KMP_FATAL(LockIsUninitialized, func);
3926 }
3927 }
3928 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3930 lck = __kmp_user_lock_table.table[index];
3931 } else {
3932 lck = *((kmp_user_lock_p *)user_lock);
3933 }
3934
3936 if (lck == NULL) {
3937 KMP_FATAL(LockIsUninitialized, func);
3938 }
3939 }
3940
3941 return lck;
3942}
3943
3945 // Reset lock pool. Don't worry about lock in the pool--we will free them when
3946 // iterating through lock table (it includes all the locks, dead or alive).
3947 __kmp_lock_pool = NULL;
3948
3949#define IS_CRITICAL(lck) \
3950 ((__kmp_get_user_lock_flags_ != NULL) && \
3951 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3952
3953 // Loop through lock table, free all locks.
3954 // Do not free item [0], it is reserved for lock tables list.
3955 //
3956 // FIXME - we are iterating through a list of (pointers to) objects of type
3957 // union kmp_user_lock, but we have no way of knowing whether the base type is
3958 // currently "pool" or whatever the global user lock type is.
3959 //
3960 // We are relying on the fact that for all of the user lock types
3961 // (except "tas"), the first field in the lock struct is the "initialized"
3962 // field, which is set to the address of the lock object itself when
3963 // the lock is initialized. When the union is of type "pool", the
3964 // first field is a pointer to the next object in the free list, which
3965 // will not be the same address as the object itself.
3966 //
3967 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3968 // for "pool" objects on the free list. This must happen as the "location"
3969 // field of real user locks overlaps the "index" field of "pool" objects.
3970 //
3971 // It would be better to run through the free list, and remove all "pool"
3972 // objects from the lock table before executing this loop. However,
3973 // "pool" objects do not always have their index field set (only on
3974 // lin_32e), and I don't want to search the lock table for the address
3975 // of every "pool" object on the free list.
3976 while (__kmp_user_lock_table.used > 1) {
3977 const ident *loc;
3978
3979 // reduce __kmp_user_lock_table.used before freeing the lock,
3980 // so that state of locks is consistent
3983
3984 if ((__kmp_is_user_lock_initialized_ != NULL) &&
3986 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3987 // it is NOT a critical section (user is not responsible for destroying
3988 // criticals) AND we know source location to report.
3990 ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3991 (loc->psource != NULL)) {
3992 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3993 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3994 __kmp_str_loc_free(&str_loc);
3995 }
3996
3997#ifdef KMP_DEBUG
3998 if (IS_CRITICAL(lck)) {
3999 KA_TRACE(
4000 20,
4001 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4002 lck, *(void **)lck));
4003 } else {
4004 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
4005 *(void **)lck));
4006 }
4007#endif // KMP_DEBUG
4008
4009 // Cleanup internal lock dynamic resources (for drdpa locks particularly).
4011 }
4012
4013 // Free the lock if block allocation of locks is not used.
4014 if (__kmp_lock_blocks == NULL) {
4015 __kmp_free(lck);
4016 }
4017 }
4018
4019#undef IS_CRITICAL
4020
4021 // delete lock table(s).
4022 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4023 __kmp_user_lock_table.table = NULL;
4024 __kmp_user_lock_table.allocated = 0;
4025
4026 while (table_ptr != NULL) {
4027 // In the first element we saved the pointer to the previous
4028 // (smaller) lock table.
4029 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4030 __kmp_free(table_ptr);
4031 table_ptr = next;
4032 }
4033
4034 // Free buffers allocated for blocks of locks.
4036 __kmp_lock_blocks = NULL;
4037
4038 while (block_ptr != NULL) {
4039 kmp_block_of_locks_t *next = block_ptr->next_block;
4040 __kmp_free(block_ptr->locks);
4041 // *block_ptr itself was allocated at the end of the locks vector.
4042 block_ptr = next;
4043 }
4044
4046}
4047
4048#endif // KMP_USE_DYNAMIC_LOCK
int test()
This class safely opens and closes a C-style FILE* object using RAII semantics.
Definition kmp.h:4716
void set_stdout()
Set the FILE* object to stdout and output there No open call should happen before this call.
Definition kmp.h:4766
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Open filename using mode.
Definition kmp.h:4740
int64_t kmp_int64
Definition common.h:10
void
Definition ittnotify.h:3324
void const char const char int ITT_FORMAT __itt_group_sync s
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id head
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
#define __kmp_free(ptr)
Definition kmp.h:3756
volatile int __kmp_init_user_locks
#define KMP_CPU_PAUSE()
Definition kmp.h:1594
#define KMP_PACK_64(HIGH_32, LOW_32)
Definition kmp.h:271
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time)
Definition kmp.h:1670
int __kmp_xproc
kmp_lock_t __kmp_global_lock
#define __kmp_entry_gtid()
Definition kmp.h:3601
volatile int __kmp_nth
kmp_info_t ** __kmp_threads
#define KMP_YIELD_OVERSUB()
Definition kmp.h:1619
#define KMP_INIT_YIELD(count)
Definition kmp.h:1597
#define KMP_INIT_BACKOFF(time)
Definition kmp.h:1600
#define KMP_YIELD(cond)
Definition kmp.h:1612
#define __kmp_allocate(size)
Definition kmp.h:3754
#define TRUE
Definition kmp.h:1350
#define FALSE
Definition kmp.h:1349
int __kmp_env_consistency_check
int __kmp_avail_proc
static kmp_info_t * __kmp_thread_from_gtid(int gtid)
Definition kmp.h:3634
union KMP_ALIGN_CACHE kmp_info kmp_info_t
KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86 KMP_ARCH_X86<<, 2i, 1, KMP_ARCH_X86) ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, > KMP_ARCH_X86 KMP_ARCH_X86 kmp_uint32
#define KA_TRACE(d, x)
Definition kmp_debug.h:157
#define KMP_ASSERT(cond)
Definition kmp_debug.h:59
#define KMP_DEBUG_ASSERT(cond)
Definition kmp_debug.h:61
#define KMP_ASSERT2(cond, msg)
Definition kmp_debug.h:60
unsigned long long kmp_uint64
static volatile kmp_i18n_cat_status_t status
Definition kmp_i18n.cpp:48
static kmp_bootstrap_lock_t lock
Definition kmp_i18n.cpp:57
#define KMP_WARNING(...)
Definition kmp_i18n.h:144
#define KMP_FATAL(...)
Definition kmp_i18n.h:146
void __kmp_printf(char const *format,...)
Definition kmp_io.cpp:186
void __kmp_printf_no_lock(char const *format,...)
Definition kmp_io.cpp:197
#define KMP_FSYNC_PREPARE(obj)
Definition kmp_itt.h:332
#define KMP_FSYNC_RELEASING(obj)
Definition kmp_itt.h:335
#define KMP_FSYNC_ACQUIRED(obj)
Definition kmp_itt.h:334
kmp_backoff_t __kmp_spin_backoff_params
size_t __kmp_base_user_lock_size
static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:290
static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck)
static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
static bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:74
enum kmp_lock_kind __kmp_user_lock_kind
int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:118
kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags)
static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck)
size_t __kmp_user_lock_size
int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:206
int(* __kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
static const ident_t * __kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck)
static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, const ident_t *loc)
void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:763
static __forceinline int __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, kmp_int32 gtid)
static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, kmp_lock_flags_t flags)
static void __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:280
int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
void(* __kmp_set_user_lock_location_)(kmp_user_lock_p lck, const ident_t *loc)
static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
int(* __kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:253
static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck)
int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck)
int(* __kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck)
void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind)
kmp_int32(* __kmp_get_user_lock_owner_)(kmp_user_lock_p lck)
static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck)
static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:607
kmp_block_of_locks * __kmp_lock_blocks
static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:190
kmp_uint64 __kmp_now_nsec()
static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck)
void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:906
static bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck)
static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
int(* __kmp_is_user_lock_initialized_)(kmp_user_lock_p lck)
int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:832
static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck)
void(* __kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck)
static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:815
static const ident_t * __kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:941
void __kmp_init_tas_lock(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:186
static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, const ident_t *loc)
#define __kmp_tsc()
static int __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:157
int(* __kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
kmp_lock_table_t __kmp_user_lock_table
static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
void(* __kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck)
void(* __kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck)
int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
static void __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:920
static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:853
static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:746
int __kmp_num_locks_in_block
static const ident_t * __kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck)
static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket)
Definition kmp_lock.cpp:618
int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck)
static __forceinline int __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:147
static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck)
static __forceinline int __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:79
static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:649
void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck)
int(* __kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck)
const ident_t *(* __kmp_get_user_lock_location_)(kmp_user_lock_p lck)
static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck)
int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
#define IS_CRITICAL(lck)
static bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:613
static kmp_user_lock_p __kmp_lock_block_allocate()
static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags)
int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func)
int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:674
static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, kmp_lock_flags_t flags)
Definition kmp_lock.cpp:954
static bool before(kmp_uint64 a, kmp_uint64 b)
void(* __kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck)
kmp_user_lock_p __kmp_lock_pool
int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:644
static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:883
int(* __kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid)
static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:777
static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:192
void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck)
void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:285
void __kmp_cleanup_user_locks(void)
void(* __kmp_destroy_user_lock_)(kmp_user_lock_p lck)
static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:244
void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck)
static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:690
static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck)
void __kmp_spin_backoff(kmp_backoff_t *boff)
void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck)
void __kmp_validate_locks(void)
Definition kmp_lock.cpp:43
int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:219
int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:870
int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:798
static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:721
static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:123
static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:228
kmp_lock_flags_t(* __kmp_get_user_lock_flags_)(kmp_user_lock_p lck)
static bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck)
static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck)
static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, const ident_t *loc)
Definition kmp_lock.cpp:945
static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:264
static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck)
static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:168
static __forceinline int __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:624
static int __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, kmp_int32 gtid)
static void __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck)
static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:950
static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck)
Definition kmp_lock.cpp:70
int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:136
int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.cpp:714
static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck)
static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
void(* __kmp_set_user_lock_flags_)(kmp_user_lock_p lck, kmp_lock_flags_t flags)
void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck)
Definition kmp_lock.cpp:913
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:535
struct kmp_base_tas_lock kmp_base_tas_lock_t
Definition kmp_lock.h:135
#define KMP_BIND_NESTED_USER_LOCK(kind)
Definition kmp_lock.h:932
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.h:559
enum kmp_lock_kind kmp_lock_kind_t
Definition kmp_lock.h:605
union kmp_user_lock * kmp_user_lock_p
Definition kmp_lock.h:623
struct kmp_block_of_locks kmp_block_of_locks_t
Definition kmp_lock.h:976
kmp_ticket_lock_t kmp_bootstrap_lock_t
Definition kmp_lock.h:521
union kmp_ticket_lock kmp_ticket_lock_t
Definition kmp_lock.h:280
static void __kmp_destroy_user_lock(kmp_user_lock_p lck)
Definition kmp_lock.h:735
#define KMP_LOCK_RELEASED
Definition kmp_lock.h:164
kmp_lock_kind
Definition kmp_lock.h:586
@ lk_default
Definition kmp_lock.h:587
@ lk_ticket
Definition kmp_lock.h:597
@ lk_drdpa
Definition kmp_lock.h:599
@ lk_queuing
Definition kmp_lock.h:598
@ lk_tas
Definition kmp_lock.h:588
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:527
#define KMP_LOCK_FREE(type)
Definition kmp_lock.h:1284
#define KMP_LOCK_STRIP(v)
Definition kmp_lock.h:1285
struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t
Definition kmp_lock.h:483
#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind)
Definition kmp_lock.h:934
#define OMP_LOCK_T_SIZE
Definition kmp_lock.h:57
#define KMP_LOCK_ACQUIRED_NEXT
Definition kmp_lock.h:167
#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)
Definition kmp_lock.h:930
kmp_uint32 kmp_lock_flags_t
Definition kmp_lock.h:68
union kmp_drdpa_lock kmp_drdpa_lock_t
Definition kmp_lock.h:493
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.h:567
kmp_uint32 kmp_lock_index_t
Definition kmp_lock.h:73
#define KMP_LOCK_ACQUIRED_FIRST
Definition kmp_lock.h:166
struct kmp_base_queuing_lock kmp_base_queuing_lock_t
Definition kmp_lock.h:384
struct kmp_lock_table kmp_lock_table_t
Definition kmp_lock.h:966
#define KMP_BIND_USER_LOCK(kind)
Definition kmp_lock.h:929
static kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck)
Definition kmp_lock.h:632
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:539
union kmp_tas_lock kmp_tas_lock_t
Definition kmp_lock.h:143
static void __kmp_set_user_lock_flags(kmp_user_lock_p lck, kmp_lock_flags_t flags)
Definition kmp_lock.h:904
#define KMP_LOCK_STILL_HELD
Definition kmp_lock.h:165
#define KMP_LOCK_BUSY(v, type)
Definition kmp_lock.h:1283
#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock)
Definition kmp_lock.h:523
struct kmp_base_ticket_lock kmp_base_ticket_lock_t
Definition kmp_lock.h:270
union kmp_queuing_lock kmp_queuing_lock_t
Definition kmp_lock.h:396
static const ident_t * __kmp_get_user_lock_location(kmp_user_lock_p lck)
Definition kmp_lock.h:881
#define KMP_XCHG_FIXED32(p, v)
Definition kmp_os.h:878
#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv)
Definition kmp_os.h:860
#define KMP_WAIT
Definition kmp_os.h:1193
#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv)
Definition kmp_os.h:817
#define KMP_ATOMIC_ST_REL(p, v)
Definition kmp_os.h:1261
#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv)
Definition kmp_os.h:857
#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv)
Definition kmp_os.h:830
#define KMP_NEQ
Definition kmp_os.h:1196
#define RCAST(type, var)
Definition kmp_os.h:292
#define KMP_WAIT_PTR
Definition kmp_os.h:1194
#define CCAST(type, var)
Definition kmp_os.h:291
#define KMP_MB()
Definition kmp_os.h:1066
#define KMP_EQ
Definition kmp_os.h:1195
bool __kmp_atomic_compare_store_acq(std::atomic< T > *p, T expected, T desired)
Definition kmp_os.h:1282
#define TCR_4(a)
Definition kmp_os.h:1137
#define KMP_ATOMIC_LD_RLX(p)
Definition kmp_os.h:1260
#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv)
Definition kmp_os.h:814
#define TCW_4(a, b)
Definition kmp_os.h:1138
#define TCR_8(a)
Definition kmp_os.h:1141
#define KMP_ATTRIBUTE_TARGET_RTM
Definition kmp_os.h:376
#define KMP_ATOMIC_INC(p)
Definition kmp_os.h:1269
#define KMP_SNPRINTF
#define KMP_MEMCPY
#define KMP_STRLEN
kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname)
Definition kmp_str.cpp:347
void __kmp_str_loc_free(kmp_str_loc_t *loc)
Definition kmp_str.cpp:393
struct kmp_str_loc kmp_str_loc_t
Definition kmp_str.h:100
#define i
Definition kmp_stub.cpp:87
int a
int32_t kmp_int32
volatile int release
omp_lock_t lck
Definition omp_lock.c:7
void func(int *num_exec)
void init(int &A, int val)
#define res
ompt_callbacks_active_t ompt_enabled
return ret
static id loc
#define delay(t)
Definition ompt-signal.h:6
The ident structure that describes a source location.
Definition kmp.h:227
kmp_uint32 max_backoff
Definition kmp_lock.h:1292
kmp_uint32 step
Definition kmp_lock.h:1291
kmp_uint32 min_tick
Definition kmp_lock.h:1293
struct kmp_block_of_locks * next_block
Definition kmp_lock.h:972
char * file
Definition kmp_str.h:95