LLVM OpenMP
kmp_alloc.cpp
Go to the documentation of this file.
1/*
2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_io.h"
15#include "kmp_wrapper_malloc.h"
16
17#if KMP_HWLOC_ENABLED
18#if HWLOC_API_VERSION > 0x00020300
19#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20#elif HWLOC_API_VERSION == 0x00020300
21#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
23#else
24enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
27};
28#endif
29#endif // KMP_HWLOC_ENABLED
30
31// Disable bget when it is not used
32#if KMP_USE_BGET
33
34/* Thread private buffer management code */
35
36typedef int (*bget_compact_t)(size_t, int);
37typedef void *(*bget_acquire_t)(size_t);
38typedef void (*bget_release_t)(void *);
39
40/* NOTE: bufsize must be a signed datatype */
41
42#if KMP_OS_WINDOWS
43#if KMP_ARCH_X86 || KMP_ARCH_ARM
44typedef kmp_int32 bufsize;
45#else
46typedef kmp_int64 bufsize;
47#endif
48#else
49typedef ssize_t bufsize;
50#endif // KMP_OS_WINDOWS
51
52/* The three modes of operation are, fifo search, lifo search, and best-fit */
53
54typedef enum bget_mode {
55 bget_mode_fifo = 0,
56 bget_mode_lifo = 1,
57 bget_mode_best = 2
58} bget_mode_t;
59
60static void bpool(kmp_info_t *th, void *buffer, bufsize len);
61static void *bget(kmp_info_t *th, bufsize size);
62static void *bgetz(kmp_info_t *th, bufsize size);
63static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
64static void brel(kmp_info_t *th, void *buf);
65static void bectl(kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t release,
67 bufsize pool_incr);
68
69/* BGET CONFIGURATION */
70/* Buffer allocation size quantum: all buffers allocated are a
71 multiple of this size. This MUST be a power of two. */
72
73/* On some architectures, malloc() does not ensure 16 byte alignment,
74 Solaris/sparc and x86 among them. */
75
76#if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD
77
78#define SizeQuant 8
79#define AlignType double
80
81#else
82
83#define SizeQuant 16
84#define AlignType _Quad
85
86#endif
87
88// Define this symbol to enable the bstats() function which calculates the
89// total free space in the buffer pool, the largest available buffer, and the
90// total space currently allocated.
91#define BufStats 1
92
93#ifdef KMP_DEBUG
94
95// Define this symbol to enable the bpoold() function which dumps the buffers
96// in a buffer pool.
97#define BufDump 1
98
99// Define this symbol to enable the bpoolv() function for validating a buffer
100// pool.
101#define BufValid 1
102
103// Define this symbol to enable the bufdump() function which allows dumping the
104// contents of an allocated or free buffer.
105#define DumpData 1
106
107#ifdef NOT_USED_NOW
108
109// Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
110// who attempt to use pointers into released buffers.
111#define FreeWipe 1
112
113// Use a best fit algorithm when searching for space for an allocation request.
114// This uses memory more efficiently, but allocation will be much slower.
115#define BestFit 1
116
117#endif /* NOT_USED_NOW */
118#endif /* KMP_DEBUG */
119
120static bufsize bget_bin_size[] = {
121 0,
122 // 1 << 6, /* .5 Cache line */
123 1 << 7, /* 1 Cache line, new */
124 1 << 8, /* 2 Cache lines */
125 1 << 9, /* 4 Cache lines, new */
126 1 << 10, /* 8 Cache lines */
127 1 << 11, /* 16 Cache lines, new */
128 1 << 12, 1 << 13, /* new */
129 1 << 14, 1 << 15, /* new */
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
131 1 << 21, /* 2MB */
132 1 << 22, /* 4MB */
133 1 << 23, /* 8MB */
134 1 << 24, /* 16MB */
135 1 << 25, /* 32MB */
136};
137
138#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
139
140struct bfhead;
141
142// Declare the interface, including the requested buffer size type, bufsize.
143
144/* Queue links */
145typedef struct qlinks {
146 struct bfhead *flink; /* Forward link */
147 struct bfhead *blink; /* Backward link */
148} qlinks_t;
149
150/* Header in allocated and free buffers */
151typedef struct bhead2 {
152 kmp_info_t *bthr; /* The thread which owns the buffer pool */
153 bufsize prevfree; /* Relative link back to previous free buffer in memory or
154 0 if previous buffer is allocated. */
155 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
156} bhead2_t;
157
158/* Make sure the bhead structure is a multiple of SizeQuant in size. */
159typedef union bhead {
160 KMP_ALIGN(SizeQuant)
161 AlignType b_align;
162 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
163 bhead2_t bb;
164} bhead_t;
165#define BH(p) ((bhead_t *)(p))
166
167/* Header in directly allocated buffers (by acqfcn) */
168typedef struct bdhead {
169 bufsize tsize; /* Total size, including overhead */
170 bhead_t bh; /* Common header */
171} bdhead_t;
172#define BDH(p) ((bdhead_t *)(p))
173
174/* Header in free buffers */
175typedef struct bfhead {
176 bhead_t bh; /* Common allocated/free header */
177 qlinks_t ql; /* Links on free list */
178} bfhead_t;
179#define BFH(p) ((bfhead_t *)(p))
180
181typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
183#if BufStats
184 size_t totalloc; /* Total space currently allocated */
185 long numget, numrel; /* Number of bget() and brel() calls */
186 long numpblk; /* Number of pool blocks */
187 long numpget, numprel; /* Number of block gets and rels */
188 long numdget, numdrel; /* Number of direct gets and rels */
189#endif /* BufStats */
190
191 /* Automatic expansion block management functions */
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
195
196 bget_mode_t mode; /* what allocation mode to use? */
197
198 bufsize exp_incr; /* Expansion block size */
199 bufsize pool_len; /* 0: no bpool calls have been made
200 -1: not all pool blocks are the same size
201 >0: (common) block size for all bpool calls made so far
202 */
203 bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
204} thr_data_t;
205
206/* Minimum allocation quantum: */
207#define QLSize (sizeof(qlinks_t))
208#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
209#define MaxSize \
210 (bufsize)( \
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
212// Maximum for the requested size.
213
214/* End sentinel: value placed in bsize field of dummy block delimiting
215 end of pool block. The most negative number which will fit in a
216 bufsize, defined in a way that the compiler will accept. */
217
218#define ESent \
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
220
221/* Thread Data management routines */
222static int bget_get_bin(bufsize size) {
223 // binary chop bins
224 int lo = 0, hi = MAX_BGET_BINS - 1;
225
227
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (size < bget_bin_size[mid])
231 hi = mid - 1;
232 else
233 lo = mid;
234 }
235
236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
237
238 return lo;
239}
240
241static void set_thr_data(kmp_info_t *th) {
242 int i;
243 thr_data_t *data;
244
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
246 ? __kmp_allocate(sizeof(*data))
247 : th->th.th_local.bget_data);
248
249 memset(data, '\0', sizeof(*data));
250
251 for (i = 0; i < MAX_BGET_BINS; ++i) {
252 data->freelist[i].ql.flink = &data->freelist[i];
253 data->freelist[i].ql.blink = &data->freelist[i];
254 }
255
256 th->th.th_local.bget_data = data;
257 th->th.th_local.bget_list = 0;
258#if !USE_CMP_XCHG_FOR_BGET
259#ifdef USE_QUEUING_LOCK_FOR_BGET
260 __kmp_init_lock(&th->th.th_local.bget_lock);
261#else
262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
263#endif /* USE_LOCK_FOR_BGET */
264#endif /* ! USE_CMP_XCHG_FOR_BGET */
265}
266
267static thr_data_t *get_thr_data(kmp_info_t *th) {
268 thr_data_t *data;
269
270 data = (thr_data_t *)th->th.th_local.bget_data;
271
273
274 return data;
275}
276
277/* Walk the free list and release the enqueued buffers */
278static void __kmp_bget_dequeue(kmp_info_t *th) {
279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
280
281 if (p != 0) {
282#if USE_CMP_XCHG_FOR_BGET
283 {
284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286 CCAST(void *, old_value), nullptr)) {
288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
289 }
290 p = CCAST(void *, old_value);
291 }
292#else /* ! USE_CMP_XCHG_FOR_BGET */
293#ifdef USE_QUEUING_LOCK_FOR_BGET
294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
295#else
296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
297#endif /* USE_QUEUING_LOCK_FOR_BGET */
298
299 p = (void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
301
302#ifdef USE_QUEUING_LOCK_FOR_BGET
303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
304#else
305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
306#endif
307#endif /* USE_CMP_XCHG_FOR_BGET */
308
309 /* Check again to make sure the list is not empty */
310 while (p != 0) {
311 void *buf = p;
312 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
313
314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
316 (kmp_uintptr_t)th); // clear possible mark
317 KMP_DEBUG_ASSERT(b->ql.blink == 0);
318
319 p = (void *)b->ql.flink;
320
321 brel(th, buf);
322 }
323 }
324}
325
326/* Chain together the free buffers by using the thread owner field */
327static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
328#ifdef USE_QUEUING_LOCK_FOR_BGET
329 ,
330 kmp_int32 rel_gtid
331#endif
332) {
333 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
334
335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
337 (kmp_uintptr_t)th); // clear possible mark
338
339 b->ql.blink = 0;
340
341 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
343
344#if USE_CMP_XCHG_FOR_BGET
345 {
346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
347 /* the next pointer must be set before setting bget_list to buf to avoid
348 exposing a broken list to other threads, even for an instant. */
349 b->ql.flink = BFH(CCAST(void *, old_value));
350
351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352 CCAST(void *, old_value), buf)) {
354 old_value = TCR_PTR(th->th.th_local.bget_list);
355 /* the next pointer must be set before setting bget_list to buf to avoid
356 exposing a broken list to other threads, even for an instant. */
357 b->ql.flink = BFH(CCAST(void *, old_value));
358 }
359 }
360#else /* ! USE_CMP_XCHG_FOR_BGET */
361#ifdef USE_QUEUING_LOCK_FOR_BGET
362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
363#else
364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
365#endif
366
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (void *)buf;
369
370#ifdef USE_QUEUING_LOCK_FOR_BGET
371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
372#else
373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
374#endif
375#endif /* USE_CMP_XCHG_FOR_BGET */
376}
377
378/* insert buffer back onto a new freelist */
379static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
380 int bin;
381
382 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
384
385 bin = bget_get_bin(b->bh.bb.bsize);
386
387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388 &thr->freelist[bin]);
389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390 &thr->freelist[bin]);
391
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
394
395 thr->freelist[bin].ql.blink = b;
396 b->ql.blink->ql.flink = b;
397}
398
399/* unlink the buffer from the old freelist */
400static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
403
404 b->ql.blink->ql.flink = b->ql.flink;
405 b->ql.flink->ql.blink = b->ql.blink;
406}
407
408/* GET STATS -- check info on free list */
409static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
411 int bin;
412
413 *total_free = *max_free = 0;
414
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
416 bfhead_t *b, *best;
417
418 best = &thr->freelist[bin];
419 b = best->ql.flink;
420
421 while (b != &thr->freelist[bin]) {
422 *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
424 best = b;
425
426 /* Link to next buffer */
427 b = b->ql.flink;
428 }
429
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
432 }
433
434 if (*max_free > (bufsize)sizeof(bhead_t))
435 *max_free -= sizeof(bhead_t);
436}
437
438/* BGET -- Allocate a buffer. */
439static void *bget(kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize size = requested_size;
442 bfhead_t *b;
443 void *buf;
444 int compactseq = 0;
445 int use_blink = 0;
446 /* For BestFit */
447 bfhead_t *best;
448
449 if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
450 return NULL;
451 }
452
453 __kmp_bget_dequeue(th); /* Release any queued buffers */
454
455 if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
456 size = SizeQ;
457 }
458#if defined(SizeQuant) && (SizeQuant > 1)
459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
460#endif
461
462 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
464 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
465
466 use_blink = (thr->mode == bget_mode_lifo);
467
468 /* If a compact function was provided in the call to bectl(), wrap
469 a loop around the allocation process to allow compaction to
470 intervene in case we don't find a suitable buffer in the chain. */
471
472 for (;;) {
473 int bin;
474
475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
476 /* Link to next buffer */
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
479
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
482
483 /* Scan the free list searching for the first buffer big enough
484 to hold the requested size buffer. */
485 while (b != &thr->freelist[bin]) {
486 if (b->bh.bb.bsize >= (bufsize)size) {
487 if ((best == &thr->freelist[bin]) ||
488 (b->bh.bb.bsize < best->bh.bb.bsize)) {
489 best = b;
490 }
491 }
492
493 /* Link to next buffer */
494 b = (use_blink ? b->ql.blink : b->ql.flink);
495 }
496 b = best;
497 }
498
499 while (b != &thr->freelist[bin]) {
500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
501
502 // Buffer is big enough to satisfy the request. Allocate it to the
503 // caller. We must decide whether the buffer is large enough to split
504 // into the part given to the caller and a free buffer that remains
505 // on the free list, or whether the entire buffer should be removed
506 // from the free list and given to the caller in its entirety. We
507 // only split the buffer if enough room remains for a header plus the
508 // minimum quantum of allocation.
509 if ((b->bh.bb.bsize - (bufsize)size) >
510 (bufsize)(SizeQ + (sizeof(bhead_t)))) {
511 bhead_t *ba, *bn;
512
513 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
514 bn = BH(((char *)ba) + size);
515
516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
517
518 /* Subtract size from length of free block. */
519 b->bh.bb.bsize -= (bufsize)size;
520
521 /* Link allocated buffer to the previous free buffer. */
522 ba->bb.prevfree = b->bh.bb.bsize;
523
524 /* Plug negative size into user buffer. */
525 ba->bb.bsize = -size;
526
527 /* Mark this buffer as owned by this thread. */
528 TCW_PTR(ba->bb.bthr,
529 th); // not an allocated address (do not mark it)
530 /* Mark buffer after this one not preceded by free block. */
531 bn->bb.prevfree = 0;
532
533 // unlink buffer from old freelist, and reinsert into new freelist
534 __kmp_bget_remove_from_freelist(b);
535 __kmp_bget_insert_into_freelist(thr, b);
536#if BufStats
537 thr->totalloc += (size_t)size;
538 thr->numget++; /* Increment number of bget() calls */
539#endif
540 buf = (void *)((((char *)ba) + sizeof(bhead_t)));
541 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
542 return buf;
543 } else {
544 bhead_t *ba;
545
546 ba = BH(((char *)b) + b->bh.bb.bsize);
547
548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
549
550 /* The buffer isn't big enough to split. Give the whole
551 shebang to the caller and remove it from the free list. */
552
553 __kmp_bget_remove_from_freelist(b);
554#if BufStats
555 thr->totalloc += (size_t)b->bh.bb.bsize;
556 thr->numget++; /* Increment number of bget() calls */
557#endif
558 /* Negate size to mark buffer allocated. */
559 b->bh.bb.bsize = -(b->bh.bb.bsize);
560
561 /* Mark this buffer as owned by this thread. */
562 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
563 /* Zero the back pointer in the next buffer in memory
564 to indicate that this buffer is allocated. */
565 ba->bb.prevfree = 0;
566
567 /* Give user buffer starting at queue links. */
568 buf = (void *)&(b->ql);
569 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
570 return buf;
571 }
572 }
573
574 /* Link to next buffer */
575 b = (use_blink ? b->ql.blink : b->ql.flink);
576 }
577 }
578
579 /* We failed to find a buffer. If there's a compact function defined,
580 notify it of the size requested. If it returns TRUE, try the allocation
581 again. */
582
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
584 break;
585 }
586 }
587
588 /* No buffer available with requested size free. */
589
590 /* Don't give up yet -- look in the reserve supply. */
591 if (thr->acqfcn != 0) {
592 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
593 /* Request is too large to fit in a single expansion block.
594 Try to satisfy it by a direct buffer acquisition. */
595 bdhead_t *bdh;
596
597 size += sizeof(bdhead_t) - sizeof(bhead_t);
598
599 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
600
601 /* richryan */
602 bdh = BDH((*thr->acqfcn)((bufsize)size));
603 if (bdh != NULL) {
604
605 // Mark the buffer special by setting size field of its header to zero.
606 bdh->bh.bb.bsize = 0;
607
608 /* Mark this buffer as owned by this thread. */
609 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
610 // because direct buffer never goes to free list
611 bdh->bh.bb.prevfree = 0;
612 bdh->tsize = size;
613#if BufStats
614 thr->totalloc += (size_t)size;
615 thr->numget++; /* Increment number of bget() calls */
616 thr->numdget++; /* Direct bget() call count */
617#endif
618 buf = (void *)(bdh + 1);
619 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
620 return buf;
621 }
622
623 } else {
624
625 /* Try to obtain a new expansion block */
626 void *newpool;
627
628 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
629
630 /* richryan */
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
633 if (newpool != NULL) {
634 bpool(th, newpool, thr->exp_incr);
635 buf = bget(
636 th, requested_size); /* This can't, I say, can't get into a loop. */
637 return buf;
638 }
639 }
640 }
641
642 /* Still no buffer available */
643
644 return NULL;
645}
646
647/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
648 the entire contents of the buffer to zero, not just the
649 region requested by the caller. */
650
651static void *bgetz(kmp_info_t *th, bufsize size) {
652 char *buf = (char *)bget(th, size);
653
654 if (buf != NULL) {
655 bhead_t *b;
656 bufsize rsize;
657
658 b = BH(buf - sizeof(bhead_t));
659 rsize = -(b->bb.bsize);
660 if (rsize == 0) {
661 bdhead_t *bd;
662
663 bd = BDH(buf - sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
665 } else {
666 rsize -= sizeof(bhead_t);
667 }
668
669 KMP_DEBUG_ASSERT(rsize >= size);
670
671 (void)memset(buf, 0, (bufsize)rsize);
672 }
673 return ((void *)buf);
674}
675
676/* BGETR -- Reallocate a buffer. This is a minimal implementation,
677 simply in terms of brel() and bget(). It could be
678 enhanced to allow the buffer to grow into adjacent free
679 blocks and to avoid moving data unnecessarily. */
680
681static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
682 void *nbuf;
683 bufsize osize; /* Old size of buffer */
684 bhead_t *b;
685
686 nbuf = bget(th, size);
687 if (nbuf == NULL) { /* Acquire new buffer */
688 return NULL;
689 }
690 if (buf == NULL) {
691 return nbuf;
692 }
693 b = BH(((char *)buf) - sizeof(bhead_t));
694 osize = -b->bb.bsize;
695 if (osize == 0) {
696 /* Buffer acquired directly through acqfcn. */
697 bdhead_t *bd;
698
699 bd = BDH(((char *)buf) - sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)sizeof(bdhead_t);
701 } else {
702 osize -= sizeof(bhead_t);
703 }
704
705 KMP_DEBUG_ASSERT(osize > 0);
706
707 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
708 (size_t)((size < osize) ? size : osize));
709 brel(th, buf);
710
711 return nbuf;
712}
713
714/* BREL -- Release a buffer. */
715static void brel(kmp_info_t *th, void *buf) {
716 thr_data_t *thr = get_thr_data(th);
717 bfhead_t *b, *bn;
718 kmp_info_t *bth;
719
720 KMP_DEBUG_ASSERT(buf != NULL);
721 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
722
723 b = BFH(((char *)buf) - sizeof(bhead_t));
724
725 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
726 bdhead_t *bdh;
727
728 bdh = BDH(((char *)buf) - sizeof(bdhead_t));
729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
730#if BufStats
731 thr->totalloc -= (size_t)bdh->tsize;
732 thr->numdrel++; /* Number of direct releases */
733 thr->numrel++; /* Increment number of brel() calls */
734#endif /* BufStats */
735#ifdef FreeWipe
736 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
737#endif /* FreeWipe */
738
739 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
740
741 KMP_DEBUG_ASSERT(thr->relfcn != 0);
742 (*thr->relfcn)((void *)bdh); /* Release it directly. */
743 return;
744 }
745
746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
747 ~1); // clear possible mark before comparison
748 if (bth != th) {
749 /* Add this buffer to be released by the owning thread later */
750 __kmp_bget_enqueue(bth, buf
751#ifdef USE_QUEUING_LOCK_FOR_BGET
752 ,
754#endif
755 );
756 return;
757 }
758
759 /* Buffer size must be negative, indicating that the buffer is allocated. */
760 if (b->bh.bb.bsize >= 0) {
761 bn = NULL;
762 }
763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
764
765 /* Back pointer in next buffer must be zero, indicating the same thing: */
766
767 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
768
769#if BufStats
770 thr->numrel++; /* Increment number of brel() calls */
771 thr->totalloc += (size_t)b->bh.bb.bsize;
772#endif
773
774 /* If the back link is nonzero, the previous buffer is free. */
775
776 if (b->bh.bb.prevfree != 0) {
777 /* The previous buffer is free. Consolidate this buffer with it by adding
778 the length of this buffer to the previous free buffer. Note that we
779 subtract the size in the buffer being released, since it's negative to
780 indicate that the buffer is allocated. */
781 bufsize size = b->bh.bb.bsize;
782
783 /* Make the previous buffer the one we're working on. */
784 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
785 b->bh.bb.prevfree);
786 b = BFH(((char *)b) - b->bh.bb.prevfree);
787 b->bh.bb.bsize -= size;
788
789 /* unlink the buffer from the old freelist */
790 __kmp_bget_remove_from_freelist(b);
791 } else {
792 /* The previous buffer isn't allocated. Mark this buffer size as positive
793 (i.e. free) and fall through to place the buffer on the free list as an
794 isolated free block. */
795 b->bh.bb.bsize = -b->bh.bb.bsize;
796 }
797
798 /* insert buffer back onto a new freelist */
799 __kmp_bget_insert_into_freelist(thr, b);
800
801 /* Now we look at the next buffer in memory, located by advancing from
802 the start of this buffer by its size, to see if that buffer is
803 free. If it is, we combine this buffer with the next one in
804 memory, dechaining the second buffer from the free list. */
805 bn = BFH(((char *)b) + b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
807
808 /* The buffer is free. Remove it from the free list and add
809 its size to that of our buffer. */
810 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
811 bn->bh.bb.bsize);
812
813 __kmp_bget_remove_from_freelist(bn);
814
815 b->bh.bb.bsize += bn->bh.bb.bsize;
816
817 /* unlink the buffer from the old freelist, and reinsert it into the new
818 * freelist */
819 __kmp_bget_remove_from_freelist(b);
820 __kmp_bget_insert_into_freelist(thr, b);
821
822 /* Finally, advance to the buffer that follows the newly
823 consolidated free block. We must set its backpointer to the
824 head of the consolidated free block. We know the next block
825 must be an allocated block because the process of recombination
826 guarantees that two free blocks will never be contiguous in
827 memory. */
828 bn = BFH(((char *)b) + b->bh.bb.bsize);
829 }
830#ifdef FreeWipe
831 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
832 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
833#endif
834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
835
836 /* The next buffer is allocated. Set the backpointer in it to point
837 to this buffer; the previous free buffer in memory. */
838
839 bn->bh.bb.prevfree = b->bh.bb.bsize;
840
841 /* If a block-release function is defined, and this free buffer
842 constitutes the entire block, release it. Note that pool_len
843 is defined in such a way that the test will fail unless all
844 pool blocks are the same size. */
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
847#if BufStats
848 if (thr->numpblk !=
849 1) { /* Do not release the last buffer until finalization time */
850#endif
851
852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
855 b->bh.bb.bsize);
856
857 /* Unlink the buffer from the free list */
858 __kmp_bget_remove_from_freelist(b);
859
860 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
861
862 (*thr->relfcn)(b);
863#if BufStats
864 thr->numprel++; /* Nr of expansion block releases */
865 thr->numpblk--; /* Total number of blocks */
866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
867
868 // avoid leaving stale last_pool pointer around if it is being dealloced
869 if (thr->last_pool == b)
870 thr->last_pool = 0;
871 } else {
872 thr->last_pool = b;
873 }
874#endif /* BufStats */
875 }
876}
877
878/* BECTL -- Establish automatic pool expansion control */
879static void bectl(kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t release,
881 bufsize pool_incr) {
882 thr_data_t *thr = get_thr_data(th);
883
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
886 thr->relfcn = release;
887 thr->exp_incr = pool_incr;
888}
889
890/* BPOOL -- Add a region of memory to the buffer pool. */
891static void bpool(kmp_info_t *th, void *buf, bufsize len) {
892 /* int bin = 0; */
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *b = BFH(buf);
895 bhead_t *bn;
896
897 __kmp_bget_dequeue(th); /* Release any queued buffers */
898
899#ifdef SizeQuant
900 len &= ~((bufsize)(SizeQuant - 1));
901#endif
902 if (thr->pool_len == 0) {
903 thr->pool_len = len;
904 } else if (len != thr->pool_len) {
905 thr->pool_len = -1;
906 }
907#if BufStats
908 thr->numpget++; /* Number of block acquisitions */
909 thr->numpblk++; /* Number of blocks total */
910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
911#endif /* BufStats */
912
913 /* Since the block is initially occupied by a single free buffer,
914 it had better not be (much) larger than the largest buffer
915 whose size we can store in bhead.bb.bsize. */
916 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
917
918 /* Clear the backpointer at the start of the block to indicate that
919 there is no free block prior to this one. That blocks
920 recombination when the first block in memory is released. */
921 b->bh.bb.prevfree = 0;
922
923 /* Create a dummy allocated buffer at the end of the pool. This dummy
924 buffer is seen when a buffer at the end of the pool is released and
925 blocks recombination of the last buffer with the dummy buffer at
926 the end. The length in the dummy buffer is set to the largest
927 negative number to denote the end of the pool for diagnostic
928 routines (this specific value is not counted on by the actual
929 allocation and release functions). */
930 len -= sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
932 /* Set the owner of this buffer */
933 TCW_PTR(b->bh.bb.bthr,
934 (kmp_info_t *)((kmp_uintptr_t)th |
935 1)); // mark the buffer as allocated address
936
937 /* Chain the new block to the free list. */
938 __kmp_bget_insert_into_freelist(thr, b);
939
940#ifdef FreeWipe
941 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
942 (size_t)(len - sizeof(bfhead_t)));
943#endif
944 bn = BH(((char *)b) + len);
945 bn->bb.prevfree = (bufsize)len;
946 /* Definition of ESent assumes two's complement! */
947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
948
949 bn->bb.bsize = ESent;
950}
951
952/* BFREED -- Dump the free lists for this thread. */
953static void bfreed(kmp_info_t *th) {
954 int bin = 0, count = 0;
955 int gtid = __kmp_gtid_from_thread(th);
956 thr_data_t *thr = get_thr_data(th);
957
958#if BufStats
959 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC "\n",
964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
968#endif
969
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
971 bfhead_t *b;
972
973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
974 b = b->ql.flink) {
975 bufsize bs = b->bh.bb.bsize;
976
977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979 KMP_DEBUG_ASSERT(bs > 0);
980
981 count += 1;
982
984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
985 (long)bs);
986#ifdef FreeWipe
987 {
988 char *lerr = ((char *)b) + sizeof(bfhead_t);
989 if ((bs > sizeof(bfhead_t)) &&
990 ((*lerr != 0x55) ||
991 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
992 0))) {
993 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
994 "free block have been overstored.)\n",
995 gtid);
996 }
997 }
998#endif
999 }
1000 }
1001
1002 if (count == 0)
1003 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
1004}
1005
1007 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
1008
1009 set_thr_data(th);
1010
1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1012 (bufsize)__kmp_malloc_pool_incr);
1013}
1014
1016 thr_data_t *thr;
1017 bfhead_t *b;
1018
1019 KMP_DEBUG_ASSERT(th != 0);
1020
1021#if BufStats
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1023 KMP_DEBUG_ASSERT(thr != NULL);
1024 b = thr->last_pool;
1025
1026 /* If a block-release function is defined, and this free buffer constitutes
1027 the entire block, release it. Note that pool_len is defined in such a way
1028 that the test will fail unless all pool blocks are the same size. */
1029
1030 // Deallocate the last pool if one exists because we no longer do it in brel()
1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1036 b->bh.bb.bsize);
1037
1038 /* Unlink the buffer from the free list */
1039 __kmp_bget_remove_from_freelist(b);
1040
1041 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1042
1043 (*thr->relfcn)(b);
1044 thr->numprel++; /* Nr of expansion block releases */
1045 thr->numpblk--; /* Total number of blocks */
1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1047 }
1048#endif /* BufStats */
1049
1050 /* Deallocate bget_data */
1051 if (th->th.th_local.bget_data != NULL) {
1052 __kmp_free(th->th.th_local.bget_data);
1053 th->th.th_local.bget_data = NULL;
1054 }
1055}
1056
1057void kmpc_set_poolsize(size_t size) {
1058 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1059 (bget_release_t)free, (bufsize)size);
1060}
1061
1062size_t kmpc_get_poolsize(void) {
1063 thr_data_t *p;
1064
1065 p = get_thr_data(__kmp_get_thread());
1066
1067 return p->exp_incr;
1068}
1069
1070void kmpc_set_poolmode(int mode) {
1071 thr_data_t *p;
1072
1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1075 p = get_thr_data(__kmp_get_thread());
1076 p->mode = (bget_mode_t)mode;
1077 }
1078}
1079
1080int kmpc_get_poolmode(void) {
1081 thr_data_t *p;
1082
1083 p = get_thr_data(__kmp_get_thread());
1084
1085 return p->mode;
1086}
1087
1088void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1090 bufsize a, b;
1091
1092 __kmp_bget_dequeue(th); /* Release any queued buffers */
1093
1094 bcheck(th, &a, &b);
1095
1096 *maxmem = a;
1097 *allmem = b;
1098}
1099
1100void kmpc_poolprint(void) {
1102
1103 __kmp_bget_dequeue(th); /* Release any queued buffers */
1104
1105 bfreed(th);
1106}
1107
1108#endif // #if KMP_USE_BGET
1109
1110void *kmpc_malloc(size_t size) {
1111 void *ptr;
1112 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1113 if (ptr != NULL) {
1114 // save allocated pointer just before one returned to user
1115 *(void **)ptr = ptr;
1116 ptr = (void **)ptr + 1;
1117 }
1118 return ptr;
1119}
1120
1121#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1122
1123void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1124 void *ptr;
1125 void *ptr_allocated;
1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1127 if (!IS_POWER_OF_TWO(alignment)) {
1128 // AC: do we need to issue a warning here?
1129 errno = EINVAL;
1130 return NULL;
1131 }
1132 size = size + sizeof(void *) + alignment;
1133 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1134 if (ptr_allocated != NULL) {
1135 // save allocated pointer just before one returned to user
1136 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1137 ~(alignment - 1));
1138 *((void **)ptr - 1) = ptr_allocated;
1139 } else {
1140 ptr = NULL;
1141 }
1142 return ptr;
1143}
1144
1145void *kmpc_calloc(size_t nelem, size_t elsize) {
1146 void *ptr;
1147 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1148 if (ptr != NULL) {
1149 // save allocated pointer just before one returned to user
1150 *(void **)ptr = ptr;
1151 ptr = (void **)ptr + 1;
1152 }
1153 return ptr;
1154}
1155
1156void *kmpc_realloc(void *ptr, size_t size) {
1157 void *result = NULL;
1158 if (ptr == NULL) {
1159 // If pointer is NULL, realloc behaves like malloc.
1160 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1161 // save allocated pointer just before one returned to user
1162 if (result != NULL) {
1163 *(void **)result = result;
1164 result = (void **)result + 1;
1165 }
1166 } else if (size == 0) {
1167 // If size is 0, realloc behaves like free.
1168 // The thread must be registered by the call to kmpc_malloc() or
1169 // kmpc_calloc() before.
1170 // So it should be safe to call __kmp_get_thread(), not
1171 // __kmp_entry_thread().
1172 KMP_ASSERT(*((void **)ptr - 1));
1173 brel(__kmp_get_thread(), *((void **)ptr - 1));
1174 } else {
1175 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1176 (bufsize)(size + sizeof(ptr)));
1177 if (result != NULL) {
1178 *(void **)result = result;
1179 result = (void **)result + 1;
1180 }
1181 }
1182 return result;
1183}
1184
1185// NOTE: the library must have already been initialized by a previous allocate
1186void kmpc_free(void *ptr) {
1187 if (!__kmp_init_serial) {
1188 return;
1189 }
1190 if (ptr != NULL) {
1192 __kmp_bget_dequeue(th); /* Release any queued buffers */
1193 // extract allocated pointer and free it
1194 KMP_ASSERT(*((void **)ptr - 1));
1195 brel(th, *((void **)ptr - 1));
1196 }
1197}
1198
1200 void *ptr;
1201 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202 (int)size KMP_SRC_LOC_PARM));
1203 ptr = bget(th, (bufsize)size);
1204 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1205 return ptr;
1206}
1207
1208void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1209 size_t elsize KMP_SRC_LOC_DECL) {
1210 void *ptr;
1211 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1213 ptr = bgetz(th, (bufsize)(nelem * elsize));
1214 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1215 return ptr;
1216}
1217
1219 size_t size KMP_SRC_LOC_DECL) {
1220 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221 ptr, (int)size KMP_SRC_LOC_PARM));
1222 ptr = bgetr(th, ptr, (bufsize)size);
1223 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1224 return ptr;
1225}
1226
1228 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229 ptr KMP_SRC_LOC_PARM));
1230 if (ptr != NULL) {
1231 __kmp_bget_dequeue(th); /* Release any queued buffers */
1232 brel(th, ptr);
1233 }
1234 KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1235}
1236
1237/* OMP 5.0 Memory Management support */
1238/* memkind experimental API: */
1239// memkind_alloc
1240static void *(*kmp_mk_alloc)(void *k, size_t sz);
1241// memkind_free
1242static void (*kmp_mk_free)(void *kind, void *ptr);
1243// kinds we are going to use
1244static void **mk_default;
1245static void **mk_interleave;
1246static void **mk_hbw_interleave;
1247static void **mk_hbw_preferred;
1248static void **mk_dax_kmem;
1249static void **mk_dax_kmem_all;
1250#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1251static const char *kmp_mk_lib_name;
1252static void *h_memkind;
1253// memkind_check_available
1254static int (*kmp_mk_check)(void *kind);
1255static void **mk_hbw;
1256static void **mk_hugetlb;
1257static void **mk_hbw_hugetlb;
1258static void **mk_hbw_preferred_hugetlb;
1259static void **mk_dax_kmem_preferred;
1260#endif
1261static void *(*kmp_target_alloc_host)(size_t size, int device);
1262static void *(*kmp_target_alloc_shared)(size_t size, int device);
1263static void *(*kmp_target_alloc_device)(size_t size, int device);
1264static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1265static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1266static void *(*kmp_target_free_host)(void *ptr, int device);
1267static void *(*kmp_target_free_shared)(void *ptr, int device);
1268static void *(*kmp_target_free_device)(void *ptr, int device);
1270
1271#define KMP_IS_TARGET_MEM_SPACE(MS) \
1272 (MS == llvm_omp_target_host_mem_space || \
1273 MS == llvm_omp_target_shared_mem_space || \
1274 MS == llvm_omp_target_device_mem_space)
1275
1276#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1277 (MA == llvm_omp_target_host_mem_alloc || \
1278 MA == llvm_omp_target_shared_mem_alloc || \
1279 MA == llvm_omp_target_device_mem_alloc)
1280
1281#define KMP_IS_PREDEF_MEM_SPACE(MS) \
1282 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1283 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1284 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1285 KMP_IS_TARGET_MEM_SPACE(MS))
1286
1287/// Support OMP 6.0 target memory management
1288/// Expected offload runtime entries.
1289///
1290/// Returns number of resources and list of unique resource IDs in "resouces".
1291/// Runtime needs to invoke this twice to get the number of resources, allocate
1292/// space for the resource IDs, and finally let offload runtime write resource
1293/// IDs in "resources".
1294/// int __tgt_get_mem_resources(int num_devices, const int *devices,
1295/// int host_access, omp_memspace_handle_t memspace,
1296/// int *resources);
1297///
1298/// Redirects omp_alloc call to offload runtime.
1299/// void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator);
1300///
1301/// Redirects omp_free call to offload runtime.
1302/// void __tgt_omp_free(void *ptr, omp_allocator_handle_t);
1304 bool supported = false;
1305 using get_mem_resources_t = int (*)(int, const int *, int,
1306 omp_memspace_handle_t, int *);
1307 using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t);
1308 using omp_free_t = void (*)(void *, omp_allocator_handle_t);
1309 get_mem_resources_t tgt_get_mem_resources = nullptr;
1310 omp_alloc_t tgt_omp_alloc = nullptr;
1311 omp_free_t tgt_omp_free = nullptr;
1312
1313public:
1314 /// Initialize interface with offload runtime
1315 void init() {
1316 tgt_get_mem_resources =
1317 (get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources");
1318 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc");
1319 tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free");
1320 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1321 }
1322 /// Obtain resource information from offload runtime. We assume offload
1323 /// runtime backends maintain a list of unique resource IDS.
1324 int get_mem_resources(int ndevs, const int *devs, int host,
1325 omp_memspace_handle_t memspace, int *resources) {
1326 if (supported)
1327 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1328 return 0;
1329 }
1330 /// Invoke offload runtime's memory allocation routine
1331 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
1332 if (supported)
1333 return tgt_omp_alloc(size, allocator);
1334 return nullptr;
1335 }
1336 /// Invoke offload runtime's memory deallocation routine
1337 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
1338 if (supported)
1339 tgt_omp_free(ptr, allocator);
1340 }
1342
1343extern "C" int omp_get_num_devices(void);
1344
1345/// Maintain a list of target memory spaces that are identified with the
1346/// requested information. There will be only one unique memory space object
1347/// that matches the input.
1349 kmp_memspace_t *memspace_list = nullptr;
1350 KMP_LOCK_INIT(mtx);
1351 /// Find memory space that matches the provided input
1352 kmp_memspace_t *find(int num_resources, const int *resources,
1353 omp_memspace_handle_t memspace) {
1354 kmp_memspace_t *ms = memspace_list;
1355 while (ms) {
1356 if (ms->num_resources == num_resources && ms->memspace == memspace &&
1357 !memcmp(ms->resources, resources, sizeof(int) * num_resources))
1358 break;
1359 ms = ms->next;
1360 }
1361 return ms;
1362 }
1363 /// Return memory space for the provided input. It tries to find existing
1364 /// memory space that exactly matches the provided input or create one if
1365 /// not found.
1366 omp_memspace_handle_t get(int num_resources, const int *resources,
1367 omp_memspace_handle_t memspace) {
1368 int gtid = __kmp_entry_gtid();
1369 __kmp_acquire_lock(&mtx, gtid);
1370 // Sort absolute IDs in the resource list
1371 int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
1372 KMP_MEMCPY(sorted_resources, resources, num_resources * sizeof(int));
1373 qsort(sorted_resources, (size_t)num_resources, sizeof(int),
1374 [](const void *a, const void *b) {
1375 const int val_a = *(const int *)a;
1376 const int val_b = *(const int *)b;
1377 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1378 });
1379 kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace);
1380 if (ms) {
1381 __kmp_free(sorted_resources);
1382 __kmp_release_lock(&mtx, gtid);
1383 return ms;
1384 }
1386 ms->memspace = memspace;
1387 ms->num_resources = num_resources;
1388 ms->resources = sorted_resources;
1389 ms->next = memspace_list;
1390 memspace_list = ms;
1391 __kmp_release_lock(&mtx, gtid);
1392 return ms;
1393 }
1394
1395public:
1396 /// Initialize memory space list
1397 void init() { __kmp_init_lock(&mtx); }
1398 /// Release resources for the memory space list
1399 void fini() {
1400 kmp_memspace_t *ms = memspace_list;
1401 while (ms) {
1402 if (ms->resources)
1403 __kmp_free(ms->resources);
1404 kmp_memspace_t *tmp = ms;
1405 ms = ms->next;
1406 __kmp_free(tmp);
1407 }
1408 __kmp_destroy_lock(&mtx);
1409 }
1410 /// Return memory space for the provided input
1411 omp_memspace_handle_t get_memspace(int num_devices, const int *devices,
1412 int host_access,
1413 omp_memspace_handle_t memspace) {
1414 int actual_num_devices = num_devices;
1415 int *actual_devices = const_cast<int *>(devices);
1416 if (actual_num_devices == 0) {
1417 actual_num_devices = omp_get_num_devices();
1418 if (actual_num_devices <= 0)
1419 return omp_null_mem_space;
1420 }
1421 if (actual_devices == NULL) {
1422 // Prepare list of all devices in this case.
1423 actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices);
1424 for (int i = 0; i < actual_num_devices; i++)
1425 actual_devices[i] = i;
1426 }
1427 // Get the number of available resources first
1428 int num_resources = __kmp_tgt_allocator.get_mem_resources(
1429 actual_num_devices, actual_devices, host_access, memspace, NULL);
1430 if (num_resources <= 0)
1431 return omp_null_mem_space; // No available resources
1432
1434 if (num_resources > 0) {
1435 int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
1436 // Let offload runtime write the resource IDs
1437 num_resources = __kmp_tgt_allocator.get_mem_resources(
1438 actual_num_devices, actual_devices, host_access, memspace, resources);
1439 ms = get(num_resources, resources, memspace);
1440 __kmp_free(resources);
1441 }
1442 if (!devices && actual_devices)
1443 __kmp_free(actual_devices);
1444 return ms;
1445 }
1446 /// Return sub memory space from the parent memory space
1447 omp_memspace_handle_t get_memspace(int num_resources, const int *resources,
1450 return get(num_resources, resources, ms->memspace);
1451 }
1453
1454#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1455static inline void chk_kind(void ***pkind) {
1456 KMP_DEBUG_ASSERT(pkind);
1457 if (*pkind) // symbol found
1458 if (kmp_mk_check(**pkind)) // kind not available or error
1459 *pkind = NULL;
1460}
1461#endif
1462
1464// as of 2018-07-31 memkind does not support Windows*, exclude it for now
1465#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1466 // use of statically linked memkind is problematic, as it depends on libnuma
1467 kmp_mk_lib_name = "libmemkind.so";
1468 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1469 if (h_memkind) {
1470 kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1471 kmp_mk_alloc =
1472 (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1473 kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1474 mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1475 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1476 !kmp_mk_check(*mk_default)) {
1478 mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1479 chk_kind(&mk_interleave);
1480 mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1481 chk_kind(&mk_hbw);
1482 mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1483 chk_kind(&mk_hbw_interleave);
1484 mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1485 chk_kind(&mk_hbw_preferred);
1486 mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1487 chk_kind(&mk_hugetlb);
1488 mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1489 chk_kind(&mk_hbw_hugetlb);
1490 mk_hbw_preferred_hugetlb =
1491 (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1492 chk_kind(&mk_hbw_preferred_hugetlb);
1493 mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1494 chk_kind(&mk_dax_kmem);
1495 mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1496 chk_kind(&mk_dax_kmem_all);
1497 mk_dax_kmem_preferred =
1498 (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1499 chk_kind(&mk_dax_kmem_preferred);
1500 KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1501 return; // success
1502 }
1503 dlclose(h_memkind); // failure
1504 }
1505 h_memkind = NULL;
1506 kmp_mk_check = NULL;
1507 mk_hbw = NULL;
1508 mk_hugetlb = NULL;
1509 mk_hbw_hugetlb = NULL;
1510 mk_hbw_preferred_hugetlb = NULL;
1511 mk_dax_kmem_preferred = NULL;
1512 kmp_mk_lib_name = "";
1513#endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN)
1514 kmp_mk_alloc = NULL;
1515 kmp_mk_free = NULL;
1516 mk_default = NULL;
1517 mk_interleave = NULL;
1518 mk_hbw_interleave = NULL;
1519 mk_hbw_preferred = NULL;
1520 mk_dax_kmem = NULL;
1521 mk_dax_kmem_all = NULL;
1522}
1523
1525#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1527 KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
1528 if (h_memkind) {
1529 dlclose(h_memkind);
1530 h_memkind = NULL;
1531 }
1532 kmp_mk_check = NULL;
1533 mk_hbw = NULL;
1534 mk_hugetlb = NULL;
1535 mk_hbw_hugetlb = NULL;
1536 mk_hbw_preferred_hugetlb = NULL;
1537 mk_dax_kmem_preferred = NULL;
1538#endif
1539#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1540 kmp_mk_alloc = NULL;
1541 kmp_mk_free = NULL;
1542 mk_default = NULL;
1543 mk_interleave = NULL;
1544 mk_hbw_interleave = NULL;
1545 mk_hbw_preferred = NULL;
1546 mk_dax_kmem = NULL;
1547 mk_dax_kmem_all = NULL;
1548#endif
1549}
1550
1551#if KMP_HWLOC_ENABLED
1552static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1553#if HWLOC_API_VERSION >= 0x00020300
1554 const hwloc_topology_support *support;
1555 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1556 if (support) {
1557 if (policy == HWLOC_MEMBIND_BIND)
1558 return (support->membind->alloc_membind &&
1559 support->membind->bind_membind);
1560 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1561 return (support->membind->alloc_membind &&
1562 support->membind->interleave_membind);
1563 }
1564 return false;
1565#else
1566 return false;
1567#endif // KMP_HWLOC_ENABLED
1568}
1569
1570void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
1571 hwloc_membind_policy_t policy) {
1572#if HWLOC_API_VERSION >= 0x00020300
1573 void *ptr = NULL;
1574 hwloc_obj_t node;
1575 struct hwloc_location initiator;
1576 int ret;
1577 // TODO: We should make this more efficient by getting rid of the OS syscall
1578 // 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead
1579 // use th_affin_mask field when it's capable of getting the underlying
1580 // mask implementation.
1581 hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1582 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1583 if (ret < 0) {
1584 hwloc_bitmap_free(mask);
1585 return ptr;
1586 }
1587 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1588 initiator.location.cpuset = mask;
1589 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1590 &node, NULL);
1591 if (ret < 0) {
1592 return ptr;
1593 }
1594 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1595 HWLOC_MEMBIND_BYNODESET);
1596#else
1597 return NULL;
1598#endif
1599}
1600
1601void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
1602 hwloc_membind_policy_t policy) {
1603#if HWLOC_API_VERSION >= 0x00020300
1604 void *ptr = NULL;
1605 if (ms == omp_high_bw_mem_space) {
1606 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1607 } else if (ms == omp_large_cap_mem_space) {
1608 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1609 } else {
1610 ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1611 }
1612 return ptr;
1613#else
1614 return NULL;
1615#endif
1616}
1617#endif // KMP_HWLOC_ENABLED
1618
1620 *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1621 *(void **)(&kmp_target_alloc_shared) =
1622 KMP_DLSYM("llvm_omp_target_alloc_shared");
1623 *(void **)(&kmp_target_alloc_device) =
1624 KMP_DLSYM("llvm_omp_target_alloc_device");
1625 *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1626 *(void **)(&kmp_target_free_shared) =
1627 KMP_DLSYM("llvm_omp_target_free_shared");
1628 *(void **)(&kmp_target_free_device) =
1629 KMP_DLSYM("llvm_omp_target_free_device");
1634 // lock/pin and unlock/unpin target calls
1635 *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1636 *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1637 __kmp_tgt_allocator.init();
1639}
1640
1641/// Finalize target memory support
1643
1645 int ntraits,
1646 omp_alloctrait_t traits[]) {
1647 kmp_allocator_t *al;
1648 int i;
1649 al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1650 al->memspace = ms; // not used currently
1651
1652 // Assign default values if applicable
1653 al->alignment = 1;
1654 al->pinned = false;
1656 al->pin_device = -1;
1657 al->preferred_device = -1;
1660
1661 for (i = 0; i < ntraits; ++i) {
1662 switch (traits[i].key) {
1663 case omp_atk_sync_hint:
1664 case omp_atk_access:
1665 break;
1666 case omp_atk_pinned:
1667 al->pinned = true;
1668 break;
1669 case omp_atk_alignment:
1670 __kmp_type_convert(traits[i].value, &(al->alignment));
1672 break;
1673 case omp_atk_pool_size:
1674 al->pool_size = traits[i].value;
1675 break;
1676 case omp_atk_fallback:
1677 al->fb = (omp_alloctrait_value_t)traits[i].value;
1679 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1680 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1681 break;
1682 case omp_atk_fb_data:
1683 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1684 break;
1685 case omp_atk_partition:
1686#if KMP_HWLOC_ENABLED
1687 al->membind = (omp_alloctrait_value_t)traits[i].value;
1688 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1689 al->membind == omp_atv_nearest ||
1690 al->membind == omp_atv_blocked ||
1691 al->membind == omp_atv_interleaved);
1692#endif // KMP_HWLOC_ENABLED
1693 al->memkind = RCAST(void **, traits[i].value);
1694 break;
1695 case omp_atk_pin_device:
1696 __kmp_type_convert(traits[i].value, &(al->pin_device));
1697 break;
1699 __kmp_type_convert(traits[i].value, &(al->preferred_device));
1700 break;
1703 break;
1706 break;
1707 case omp_atk_part_size:
1708 __kmp_type_convert(traits[i].value, &(al->part_size));
1709 break;
1710 default:
1711 KMP_ASSERT2(0, "Unexpected allocator trait");
1712 }
1713 }
1714
1715 if (al->memspace > kmp_max_mem_space) {
1716 // Memory space has been allocated for targets.
1717 return (omp_allocator_handle_t)al;
1718 }
1719
1721
1722 if (al->fb == 0) {
1723 // set default allocator
1726 } else if (al->fb == omp_atv_allocator_fb) {
1727 KMP_ASSERT(al->fb_data != NULL);
1728 } else if (al->fb == omp_atv_default_mem_fb) {
1730 }
1732 // Let's use memkind library if available
1733 if (ms == omp_high_bw_mem_space) {
1734 if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1736 } else if (mk_hbw_preferred) {
1737 // AC: do not try to use MEMKIND_HBW for now, because memkind library
1738 // cannot reliably detect exhaustion of HBW memory.
1739 // It could be possible using hbw_verify_memory_region() but memkind
1740 // manual says: "Using this function in production code may result in
1741 // serious performance penalty".
1743 } else {
1744 // HBW is requested but not available --> return NULL allocator
1745 __kmp_free(al);
1746 return omp_null_allocator;
1747 }
1748 } else if (ms == omp_large_cap_mem_space) {
1749 if (mk_dax_kmem_all) {
1750 // All pmem nodes are visited
1752 } else if (mk_dax_kmem) {
1753 // Only closest pmem node is visited
1754 al->memkind = mk_dax_kmem;
1755 } else {
1756 __kmp_free(al);
1757 return omp_null_allocator;
1758 }
1759 } else {
1760 if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1761 al->memkind = mk_interleave;
1762 } else {
1763 al->memkind = mk_default;
1764 }
1765 }
1767 __kmp_free(al);
1768 return omp_null_allocator;
1769 } else {
1770 if (!__kmp_hwloc_available &&
1772 // cannot detect HBW memory presence without memkind library
1773 __kmp_free(al);
1774 return omp_null_allocator;
1775 }
1776 }
1777 return (omp_allocator_handle_t)al;
1778}
1779
1781 if (allocator > kmp_max_mem_alloc)
1782 __kmp_free(allocator);
1783}
1784
1786 if (allocator == omp_null_allocator)
1787 allocator = omp_default_mem_alloc;
1788 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1789}
1790
1792 return __kmp_threads[gtid]->th.th_def_allocator;
1793}
1794
1796 omp_memspace_handle_t memspace,
1797 int host) {
1798 if (!__kmp_init_serial)
1800 // Only accept valid device description and predefined memory space
1801 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1802 return omp_null_mem_space;
1803
1804 return __kmp_tgt_memspace_list.get_memspace(ndevs, devs, host, memspace);
1805}
1806
1808__kmp_get_devices_allocator(int ndevs, const int *devs,
1809 omp_memspace_handle_t memspace, int host) {
1810 if (!__kmp_init_serial)
1812 // Only accept valid device description and predefined memory space
1813 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1814 return omp_null_allocator;
1815
1816 omp_memspace_handle_t mspace =
1817 __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1818 if (mspace == omp_null_mem_space)
1819 return omp_null_allocator;
1820
1821 return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
1822}
1823
1825 if (!__kmp_init_serial)
1827 if (memspace == omp_null_mem_space)
1828 return 0;
1829 if (memspace < kmp_max_mem_space)
1830 return 1; // return 1 for predefined memory space
1831 kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
1832 return ms->num_resources;
1833}
1834
1836 int num_resources, int *resources) {
1837 if (!__kmp_init_serial)
1839 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1840 return memspace; // return input memory space for predefined memory space
1841 kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
1842 if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1843 return omp_null_mem_space; // input memory space cannot satisfy the request
1844
1845 // The stored resource ID is an absolute ID only known to the offload backend,
1846 // and the returned memory space will still keep the property.
1847 int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources);
1848
1849 // Collect absolute resource ID from the relative ID
1850 for (int i = 0; i < num_resources; i++)
1851 resources_abs[i] = ms->resources[resources[i]];
1852
1853 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace(
1854 num_resources, resources_abs, memspace);
1855 __kmp_free(resources_abs);
1856
1857 return submemspace;
1858}
1859
1860typedef struct kmp_mem_desc { // Memory block descriptor
1861 void *ptr_alloc; // Pointer returned by allocator
1862 size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1863 size_t size_orig; // Original size requested
1864 void *ptr_align; // Pointer to aligned memory, returned
1867constexpr size_t alignment = SizeQuant;
1868
1869// external interfaces are wrappers over internal implementation
1870void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1871 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1872 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1873 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1874 return ptr;
1875}
1876
1877void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1878 omp_allocator_handle_t allocator) {
1879 KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1880 (int)size, allocator));
1881 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1882 KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1883 return ptr;
1884}
1885
1886void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1887 omp_allocator_handle_t allocator) {
1888 KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1889 (int)size, allocator));
1890 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1891 KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1892 return ptr;
1893}
1894
1895void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1896 omp_allocator_handle_t allocator,
1897 omp_allocator_handle_t free_allocator) {
1898 KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1899 allocator, free_allocator));
1900 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1901 KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1902 return nptr;
1903}
1904
1905void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1906 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1907 ___kmpc_free(gtid, ptr, allocator);
1908 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1909 return;
1910}
1911
1912// internal implementation, called from inside the library
1913void *__kmp_alloc(int gtid, size_t algn, size_t size,
1914 omp_allocator_handle_t allocator) {
1915 void *ptr = NULL;
1916 kmp_allocator_t *al;
1918 if (size == 0)
1919 return NULL;
1920 if (allocator == omp_null_allocator)
1921 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1922 kmp_int32 default_device =
1923 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1924
1925 al = RCAST(kmp_allocator_t *, allocator);
1926
1927 int sz_desc = sizeof(kmp_mem_desc_t);
1928 kmp_mem_desc_t desc;
1929 kmp_uintptr_t addr; // address returned by allocator
1930 kmp_uintptr_t addr_align; // address to return to caller
1931 kmp_uintptr_t addr_descr; // address of memory block descriptor
1932 size_t align = alignment; // default alignment
1933 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1934 align = al->alignment; // alignment required by allocator trait
1935 if (align < algn)
1936 align = algn; // max of allocator trait, parameter and sizeof(void*)
1937 desc.size_orig = size;
1938 desc.size_a = size + sz_desc + align;
1939 bool is_pinned = false;
1940 if (allocator > kmp_max_mem_alloc)
1941 is_pinned = al->pinned;
1942
1943 // Use default allocator if hwloc and libmemkind are not available
1944 int use_default_allocator =
1946
1947 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1948 // Memspace has been allocated for targets.
1949 return __kmp_tgt_allocator.omp_alloc(size, allocator);
1950 }
1951
1952 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1953 // Use size input directly as the memory may not be accessible on host.
1954 // Use default device for now.
1956 kmp_int32 device =
1957 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1958 if (allocator == llvm_omp_target_host_mem_alloc)
1959 ptr = kmp_target_alloc_host(size, device);
1960 else if (allocator == llvm_omp_target_shared_mem_alloc)
1961 ptr = kmp_target_alloc_shared(size, device);
1962 else // allocator == llvm_omp_target_device_mem_alloc
1963 ptr = kmp_target_alloc_device(size, device);
1964 return ptr;
1965 } else {
1966 KMP_INFORM(TargetMemNotAvailable);
1967 }
1968 }
1969
1970 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1972 kmp_int32 device =
1973 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1975 ptr = kmp_target_alloc_host(size, device);
1977 ptr = kmp_target_alloc_shared(size, device);
1978 else // al->memspace == llvm_omp_target_device_mem_space
1979 ptr = kmp_target_alloc_device(size, device);
1980 return ptr;
1981 } else {
1982 KMP_INFORM(TargetMemNotAvailable);
1983 }
1984 }
1985
1986#if KMP_HWLOC_ENABLED
1988 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1989 if (allocator < kmp_max_mem_alloc) {
1990 // pre-defined allocator
1991 if (allocator == omp_high_bw_mem_alloc) {
1992 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1993 desc.size_a, HWLOC_MEMBIND_BIND);
1994 if (ptr == NULL)
1995 use_default_allocator = true;
1996 } else if (allocator == omp_large_cap_mem_alloc) {
1997 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1998 desc.size_a, HWLOC_MEMBIND_BIND);
1999 if (ptr == NULL)
2000 use_default_allocator = true;
2001 } else {
2002 use_default_allocator = true;
2003 }
2004 if (use_default_allocator) {
2005 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2006 }
2007 } else if (al->pool_size > 0) {
2008 // custom allocator with pool size requested
2009 kmp_uint64 used =
2011 if (used + desc.size_a > al->pool_size) {
2012 // not enough space, need to go fallback path
2014 if (al->fb == omp_atv_default_mem_fb) {
2016 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2017 } else if (al->fb == omp_atv_abort_fb) {
2018 KMP_ASSERT(0); // abort fallback requested
2019 } else if (al->fb == omp_atv_allocator_fb) {
2020 KMP_ASSERT(al != al->fb_data);
2021 al = al->fb_data;
2022 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2023 } // else ptr == NULL;
2024 } else {
2025 // pool has enough space
2026 if (al->membind == omp_atv_interleaved) {
2027 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2028 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2029 HWLOC_MEMBIND_INTERLEAVE);
2030 }
2031 } else if (al->membind == omp_atv_environment) {
2032 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2033 HWLOC_MEMBIND_DEFAULT);
2034 } else {
2035 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2036 }
2037 if (ptr == NULL) {
2038 if (al->fb == omp_atv_default_mem_fb) {
2040 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2041 } else if (al->fb == omp_atv_abort_fb) {
2042 KMP_ASSERT(0); // abort fallback requested
2043 } else if (al->fb == omp_atv_allocator_fb) {
2044 KMP_ASSERT(al != al->fb_data);
2045 al = al->fb_data;
2046 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2047 }
2048 }
2049 }
2050 } else {
2051 // custom allocator, pool size not requested
2052 if (al->membind == omp_atv_interleaved) {
2053 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2054 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2055 HWLOC_MEMBIND_INTERLEAVE);
2056 }
2057 } else if (al->membind == omp_atv_environment) {
2058 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2059 HWLOC_MEMBIND_DEFAULT);
2060 } else {
2061 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2062 }
2063 if (ptr == NULL) {
2064 if (al->fb == omp_atv_default_mem_fb) {
2066 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2067 } else if (al->fb == omp_atv_abort_fb) {
2068 KMP_ASSERT(0); // abort fallback requested
2069 } else if (al->fb == omp_atv_allocator_fb) {
2070 KMP_ASSERT(al != al->fb_data);
2071 al = al->fb_data;
2072 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2073 }
2074 }
2075 }
2076 } else { // alloc membind not supported, use hwloc_alloc
2077 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2078 }
2079 } else {
2080#endif // KMP_HWLOC_ENABLED
2082 if (allocator < kmp_max_mem_alloc) {
2083 // pre-defined allocator
2084 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2085 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2086 } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2087 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2088 } else {
2089 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2090 }
2091 } else if (al->pool_size > 0) {
2092 // custom allocator with pool size requested
2093 kmp_uint64 used =
2095 if (used + desc.size_a > al->pool_size) {
2096 // not enough space, need to go fallback path
2098 if (al->fb == omp_atv_default_mem_fb) {
2100 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2101 } else if (al->fb == omp_atv_abort_fb) {
2102 KMP_ASSERT(0); // abort fallback requested
2103 } else if (al->fb == omp_atv_allocator_fb) {
2104 KMP_ASSERT(al != al->fb_data);
2105 al = al->fb_data;
2106 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2107 if (is_pinned && kmp_target_lock_mem)
2108 kmp_target_lock_mem(ptr, size, default_device);
2109 return ptr;
2110 } // else ptr == NULL;
2111 } else {
2112 // pool has enough space
2113 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2114 if (ptr == NULL) {
2115 if (al->fb == omp_atv_default_mem_fb) {
2117 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2118 } else if (al->fb == omp_atv_abort_fb) {
2119 KMP_ASSERT(0); // abort fallback requested
2120 } else if (al->fb == omp_atv_allocator_fb) {
2121 KMP_ASSERT(al != al->fb_data);
2122 al = al->fb_data;
2123 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2124 if (is_pinned && kmp_target_lock_mem)
2125 kmp_target_lock_mem(ptr, size, default_device);
2126 return ptr;
2127 }
2128 }
2129 }
2130 } else {
2131 // custom allocator, pool size not requested
2132 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2133 if (ptr == NULL) {
2134 if (al->fb == omp_atv_default_mem_fb) {
2136 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2137 } else if (al->fb == omp_atv_abort_fb) {
2138 KMP_ASSERT(0); // abort fallback requested
2139 } else if (al->fb == omp_atv_allocator_fb) {
2140 KMP_ASSERT(al != al->fb_data);
2141 al = al->fb_data;
2142 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2143 if (is_pinned && kmp_target_lock_mem)
2144 kmp_target_lock_mem(ptr, size, default_device);
2145 return ptr;
2146 }
2147 }
2148 }
2149 } else if (allocator < kmp_max_mem_alloc) {
2150 // pre-defined allocator
2151 if (allocator == omp_high_bw_mem_alloc) {
2152 KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
2153 } else if (allocator == omp_large_cap_mem_alloc) {
2154 KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
2155 } else if (allocator == omp_const_mem_alloc) {
2156 KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
2157 } else if (allocator == omp_low_lat_mem_alloc) {
2158 KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
2159 } else if (allocator == omp_cgroup_mem_alloc) {
2160 KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
2161 } else if (allocator == omp_pteam_mem_alloc) {
2162 KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
2163 } else if (allocator == omp_thread_mem_alloc) {
2164 KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
2165 } else { // default allocator requested
2166 use_default_allocator = true;
2167 }
2168 if (use_default_allocator) {
2170 use_default_allocator = false;
2171 }
2172 } else if (al->pool_size > 0) {
2173 // custom allocator with pool size requested
2174 kmp_uint64 used =
2176 if (used + desc.size_a > al->pool_size) {
2177 // not enough space, need to go fallback path
2179 if (al->fb == omp_atv_default_mem_fb) {
2182 } else if (al->fb == omp_atv_abort_fb) {
2183 KMP_ASSERT(0); // abort fallback requested
2184 } else if (al->fb == omp_atv_allocator_fb) {
2185 KMP_ASSERT(al != al->fb_data);
2186 al = al->fb_data;
2187 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2188 if (is_pinned && kmp_target_lock_mem)
2189 kmp_target_lock_mem(ptr, size, default_device);
2190 return ptr;
2191 } // else ptr == NULL
2192 } else {
2193 // pool has enough space
2195 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2196 KMP_ASSERT(0); // abort fallback requested
2197 } // no sense to look for another fallback because of same internal
2198 // alloc
2199 }
2200 } else {
2201 // custom allocator, pool size not requested
2203 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2204 KMP_ASSERT(0); // abort fallback requested
2205 } // no sense to look for another fallback because of same internal alloc
2206 }
2207#if KMP_HWLOC_ENABLED
2208 }
2209#endif // KMP_HWLOC_ENABLED
2210 KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2211 if (ptr == NULL)
2212 return NULL;
2213
2214 if (is_pinned && kmp_target_lock_mem)
2215 kmp_target_lock_mem(ptr, desc.size_a, default_device);
2216
2217 addr = (kmp_uintptr_t)ptr;
2218 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2219 addr_descr = addr_align - sz_desc;
2220
2221 desc.ptr_alloc = ptr;
2222 desc.ptr_align = (void *)addr_align;
2223 desc.allocator = al;
2224 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
2225 KMP_MB();
2226
2227 return desc.ptr_align;
2228}
2229
2230void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
2231 omp_allocator_handle_t allocator) {
2232 void *ptr = NULL;
2233 kmp_allocator_t *al;
2235
2236 if (allocator == omp_null_allocator)
2237 allocator = __kmp_threads[gtid]->th.th_def_allocator;
2238
2239 al = RCAST(kmp_allocator_t *, allocator);
2240
2241 if (nmemb == 0 || size == 0)
2242 return ptr;
2243
2244 if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
2245 if (al->fb == omp_atv_abort_fb) {
2246 KMP_ASSERT(0);
2247 }
2248 return ptr;
2249 }
2250
2251 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
2252
2253 if (ptr) {
2254 memset(ptr, 0x00, nmemb * size);
2255 }
2256 return ptr;
2257}
2258
2259void *__kmp_realloc(int gtid, void *ptr, size_t size,
2260 omp_allocator_handle_t allocator,
2261 omp_allocator_handle_t free_allocator) {
2262 void *nptr = NULL;
2264
2265 if (size == 0) {
2266 if (ptr != NULL)
2267 ___kmpc_free(gtid, ptr, free_allocator);
2268 return nptr;
2269 }
2270
2271 nptr = __kmp_alloc(gtid, 0, size, allocator);
2272
2273 if (nptr != NULL && ptr != NULL) {
2274 kmp_mem_desc_t desc;
2275 kmp_uintptr_t addr_align; // address to return to caller
2276 kmp_uintptr_t addr_descr; // address of memory block descriptor
2277
2278 addr_align = (kmp_uintptr_t)ptr;
2279 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
2280 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
2281
2282 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2283 KMP_DEBUG_ASSERT(desc.size_orig > 0);
2284 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2285 KMP_MEMCPY((char *)nptr, (char *)ptr,
2286 (size_t)((size < desc.size_orig) ? size : desc.size_orig));
2287 }
2288
2289 if (nptr != NULL) {
2290 ___kmpc_free(gtid, ptr, free_allocator);
2291 }
2292
2293 return nptr;
2294}
2295
2296void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
2297 if (ptr == NULL)
2298 return;
2299
2300 kmp_allocator_t *al;
2303 kmp_mem_desc_t desc;
2304 kmp_uintptr_t addr_align; // address to return to caller
2305 kmp_uintptr_t addr_descr; // address of memory block descriptor
2306
2307 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2308 __kmp_tgt_allocator.omp_free(ptr, allocator);
2309 return;
2310 }
2311
2313 (allocator > kmp_max_mem_alloc &&
2315 kmp_int32 device =
2316 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2317 if (allocator == llvm_omp_target_host_mem_alloc) {
2318 kmp_target_free_host(ptr, device);
2319 } else if (allocator == llvm_omp_target_shared_mem_alloc) {
2320 kmp_target_free_shared(ptr, device);
2321 } else if (allocator == llvm_omp_target_device_mem_alloc) {
2322 kmp_target_free_device(ptr, device);
2323 }
2324 return;
2325 }
2326
2327 addr_align = (kmp_uintptr_t)ptr;
2328 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
2329 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
2330
2331 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2332 if (allocator) {
2333 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2334 }
2335 al = desc.allocator;
2336 oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
2337 KMP_DEBUG_ASSERT(al);
2338
2339 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2340 kmp_int32 device =
2341 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2342 kmp_target_unlock_mem(desc.ptr_alloc, device);
2343 }
2344
2345#if KMP_HWLOC_ENABLED
2347 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2348 kmp_uint64 used =
2350 (void)used; // to suppress compiler warning
2351 KMP_DEBUG_ASSERT(used >= desc.size_a);
2352 }
2353 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2354 } else {
2355#endif // KMP_HWLOC_ENABLED
2357 if (oal < kmp_max_mem_alloc) {
2358 // pre-defined allocator
2361 } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2363 } else {
2365 }
2366 } else {
2367 if (al->pool_size > 0) { // custom allocator with pool size requested
2368 kmp_uint64 used =
2370 (void)used; // to suppress compiler warning
2371 KMP_DEBUG_ASSERT(used >= desc.size_a);
2372 }
2373 kmp_mk_free(*al->memkind, desc.ptr_alloc);
2374 }
2375 } else {
2376 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2377 kmp_uint64 used =
2379 (void)used; // to suppress compiler warning
2380 KMP_DEBUG_ASSERT(used >= desc.size_a);
2381 }
2383 }
2384#if KMP_HWLOC_ENABLED
2385 }
2386#endif // KMP_HWLOC_ENABLED
2387}
2388
2389/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
2390 memory leaks, but it may be useful for debugging memory corruptions, used
2391 freed pointers, etc. */
2392/* #define LEAK_MEMORY */
2393struct kmp_mem_descr { // Memory block descriptor.
2394 void *ptr_allocated; // Pointer returned by malloc(), subject for free().
2395 size_t size_allocated; // Size of allocated memory block.
2396 void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
2397 size_t size_aligned; // Size of aligned memory block.
2398};
2400
2401/* Allocate memory on requested boundary, fill allocated memory with 0x00.
2402 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2403 error. Must use __kmp_free when freeing memory allocated by this routine! */
2404static void *___kmp_allocate_align(size_t size,
2405 size_t alignment KMP_SRC_LOC_DECL) {
2406 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
2407 requested to return properly aligned pointer. Original pointer returned
2408 by malloc() and size of allocated block is saved in descriptor just
2409 before the aligned pointer. This information used by __kmp_free() -- it
2410 has to pass to free() original pointer, not aligned one.
2411
2412 +---------+------------+-----------------------------------+---------+
2413 | padding | descriptor | aligned block | padding |
2414 +---------+------------+-----------------------------------+---------+
2415 ^ ^
2416 | |
2417 | +- Aligned pointer returned to caller
2418 +- Pointer returned by malloc()
2419
2420 Aligned block is filled with zeros, paddings are filled with 0xEF. */
2421
2422 kmp_mem_descr_t descr;
2423 kmp_uintptr_t addr_allocated; // Address returned by malloc().
2424 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
2425 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
2426
2427 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2428 (int)size, (int)alignment KMP_SRC_LOC_PARM));
2429
2430 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
2431 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
2432 // Make sure kmp_uintptr_t is enough to store addresses.
2433
2434 descr.size_aligned = size;
2435 descr.size_allocated =
2436 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
2437
2438#if KMP_DEBUG
2439 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2440#else
2442#endif
2443 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
2444 descr.ptr_allocated));
2445 if (descr.ptr_allocated == NULL) {
2446 KMP_FATAL(OutOfHeapMemory);
2447 }
2448
2449 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2450 addr_aligned =
2451 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2452 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
2453
2454 descr.ptr_aligned = (void *)addr_aligned;
2455
2456 KE_TRACE(26, (" ___kmp_allocate_align: "
2457 "ptr_allocated=%p, size_allocated=%d, "
2458 "ptr_aligned=%p, size_aligned=%d\n",
2459 descr.ptr_allocated, (int)descr.size_allocated,
2460 descr.ptr_aligned, (int)descr.size_aligned));
2461
2462 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2463 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
2464 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2465 addr_allocated + descr.size_allocated);
2466 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2467#ifdef KMP_DEBUG
2468 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2469// Fill allocated memory block with 0xEF.
2470#endif
2471 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
2472 // Fill the aligned memory block (which is intended for using by caller) with
2473 // 0x00. Do not
2474 // put this filling under KMP_DEBUG condition! Many callers expect zeroed
2475 // memory. (Padding
2476 // bytes remain filled with 0xEF in debugging library.)
2477 *((kmp_mem_descr_t *)addr_descr) = descr;
2478
2479 KMP_MB();
2480
2481 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2482 return descr.ptr_aligned;
2483} // func ___kmp_allocate_align
2484
2485/* Allocate memory on cache line boundary, fill allocated memory with 0x00.
2486 Do not call this func directly! Use __kmp_allocate macro instead.
2487 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2488 error. Must use __kmp_free when freeing memory allocated by this routine! */
2490 void *ptr;
2491 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
2492 (int)size KMP_SRC_LOC_PARM));
2494 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
2495 return ptr;
2496} // func ___kmp_allocate
2497
2498/* Allocate memory on page boundary, fill allocated memory with 0x00.
2499 Does not call this func directly! Use __kmp_page_allocate macro instead.
2500 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2501 error. Must use __kmp_free when freeing memory allocated by this routine! */
2503 int page_size = 8 * 1024;
2504 void *ptr;
2505
2506 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
2507 (int)size KMP_SRC_LOC_PARM));
2509 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
2510 return ptr;
2511} // ___kmp_page_allocate
2512
2513/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
2514 In debug mode, fill the memory block with 0xEF before call to free(). */
2516 kmp_mem_descr_t descr;
2517#if KMP_DEBUG
2518 kmp_uintptr_t addr_allocated; // Address returned by malloc().
2519 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2520#endif
2521 KE_TRACE(25,
2522 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2523 KMP_ASSERT(ptr != NULL);
2524
2525 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
2526
2527 KE_TRACE(26, (" __kmp_free: "
2528 "ptr_allocated=%p, size_allocated=%d, "
2529 "ptr_aligned=%p, size_aligned=%d\n",
2530 descr.ptr_allocated, (int)descr.size_allocated,
2531 descr.ptr_aligned, (int)descr.size_aligned));
2532#if KMP_DEBUG
2533 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2534 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2535 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2536 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2537 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
2539 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2540 addr_allocated + descr.size_allocated);
2541 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2542// Fill memory block with 0xEF, it helps catch using freed memory.
2543#endif
2544
2545#ifndef LEAK_MEMORY
2546 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
2547#ifdef KMP_DEBUG
2548 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2549#else
2551#endif
2552#endif
2553 KMP_MB();
2554 KE_TRACE(25, ("<- __kmp_free() returns\n"));
2555} // func ___kmp_free
2556
2557#if USE_FAST_MEMORY == 3
2558// Allocate fast memory by first scanning the thread's free lists
2559// If a chunk the right size exists, grab it off the free list.
2560// Otherwise allocate normally using kmp_thread_malloc.
2561
2562// AC: How to choose the limit? Just get 16 for now...
2563#define KMP_FREE_LIST_LIMIT 16
2564
2565// Always use 128 bytes for determining buckets for caching memory blocks
2566#define DCACHE_LINE 128
2567
2568void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
2569 void *ptr;
2570 size_t num_lines, idx;
2571 int index;
2572 void *alloc_ptr;
2573 size_t alloc_size;
2574 kmp_mem_descr_t *descr;
2575
2576 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2578
2579 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2580 idx = num_lines - 1;
2581 KMP_DEBUG_ASSERT(idx >= 0);
2582 if (idx < 2) {
2583 index = 0; // idx is [ 0, 1 ], use first free list
2584 num_lines = 2; // 1, 2 cache lines or less than cache line
2585 } else if ((idx >>= 2) == 0) {
2586 index = 1; // idx is [ 2, 3 ], use second free list
2587 num_lines = 4; // 3, 4 cache lines
2588 } else if ((idx >>= 2) == 0) {
2589 index = 2; // idx is [ 4, 15 ], use third free list
2590 num_lines = 16; // 5, 6, ..., 16 cache lines
2591 } else if ((idx >>= 2) == 0) {
2592 index = 3; // idx is [ 16, 63 ], use fourth free list
2593 num_lines = 64; // 17, 18, ..., 64 cache lines
2594 } else {
2595 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
2596 }
2597
2598 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2599 if (ptr != NULL) {
2600 // pop the head of no-sync free list
2601 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2602 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2603 sizeof(kmp_mem_descr_t)))
2604 ->ptr_aligned);
2605 goto end;
2606 }
2607 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2608 if (ptr != NULL) {
2609 // no-sync free list is empty, use sync free list (filled in by other
2610 // threads only)
2611 // pop the head of the sync free list, push NULL instead
2613 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
2614 KMP_CPU_PAUSE();
2615 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2616 }
2617 // push the rest of chain into no-sync free list (can be NULL if there was
2618 // the only block)
2619 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2620 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2621 sizeof(kmp_mem_descr_t)))
2622 ->ptr_aligned);
2623 goto end;
2624 }
2625
2626alloc_call:
2627 // haven't found block in the free lists, thus allocate it
2628 size = num_lines * DCACHE_LINE;
2629
2630 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2631 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2632 "alloc_size %d\n",
2633 __kmp_gtid_from_thread(this_thr), alloc_size));
2634 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2635
2636 // align ptr to DCACHE_LINE
2637 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
2638 DCACHE_LINE) &
2639 ~(DCACHE_LINE - 1));
2640 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2641
2642 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
2643 // we don't need size_allocated
2644 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
2645 // (it is already saved in bget buffer,
2646 // but we may want to use another allocator in future)
2647 descr->size_aligned = size;
2648
2649end:
2650 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
2651 __kmp_gtid_from_thread(this_thr), ptr));
2652 return ptr;
2653} // func __kmp_fast_allocate
2654
2655// Free fast memory and place it on the thread's free list if it is of
2656// the correct size.
2657void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
2658 kmp_mem_descr_t *descr;
2659 kmp_info_t *alloc_thr;
2660 size_t size;
2661 size_t idx;
2662 int index;
2663
2664 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2666 KMP_ASSERT(ptr != NULL);
2667
2668 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2669
2670 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
2671 (int)descr->size_aligned));
2672
2673 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
2674
2675 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
2676 if (idx == size) {
2677 index = 0; // 2 cache lines
2678 } else if ((idx <<= 1) == size) {
2679 index = 1; // 4 cache lines
2680 } else if ((idx <<= 2) == size) {
2681 index = 2; // 16 cache lines
2682 } else if ((idx <<= 2) == size) {
2683 index = 3; // 64 cache lines
2684 } else {
2685 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2686 goto free_call; // 65 or more cache lines ( > 8KB )
2687 }
2688
2689 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
2690 if (alloc_thr == this_thr) {
2691 // push block to self no-sync free list, linking previous head (LIFO)
2692 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2693 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2694 } else {
2695 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2696 if (head == NULL) {
2697 // Create new free list
2698 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2699 *((void **)ptr) = NULL; // mark the tail of the list
2700 descr->size_allocated = (size_t)1; // head of the list keeps its length
2701 } else {
2702 // need to check existed "other" list's owner thread and size of queue
2703 kmp_mem_descr_t *dsc =
2704 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
2705 // allocating thread, same for all queue nodes
2706 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2707 size_t q_sz =
2708 dsc->size_allocated + 1; // new size in case we add current task
2709 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2710 // we can add current task to "other" list, no sync needed
2711 *((void **)ptr) = head;
2712 descr->size_allocated = q_sz;
2713 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2714 } else {
2715 // either queue blocks owner is changing or size limit exceeded
2716 // return old queue to allocating thread (q_th) synchronously,
2717 // and start new list for alloc_thr's tasks
2718 void *old_ptr;
2719 void *tail = head;
2720 void *next = *((void **)head);
2721 while (next != NULL) {
2723 // queue size should decrease by 1 each step through the list
2724 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
2725 ->size_allocated +
2726 1 ==
2727 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
2728 ->size_allocated);
2729 tail = next; // remember tail node
2730 next = *((void **)next);
2731 }
2732 KMP_DEBUG_ASSERT(q_th != NULL);
2733 // push block to owner's sync free list
2734 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2735 /* the next pointer must be set before setting free_list to ptr to avoid
2736 exposing a broken list to other threads, even for an instant. */
2737 *((void **)tail) = old_ptr;
2738
2740 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2741 KMP_CPU_PAUSE();
2742 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2743 *((void **)tail) = old_ptr;
2744 }
2745
2746 // start new list of not-selt tasks
2747 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2748 *((void **)ptr) = NULL;
2749 descr->size_allocated = (size_t)1; // head of queue keeps its length
2750 }
2751 }
2752 }
2753 goto end;
2754
2755free_call:
2756 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2757 __kmp_gtid_from_thread(this_thr), size));
2758 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
2759 brel(this_thr, descr->ptr_allocated);
2760
2761end:
2762 KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
2763
2764} // func __kmp_fast_free
2765
2766// Initialize the thread free lists related to fast memory
2767// Only do this when a thread is initially created.
2768void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2769 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2770
2771 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
2772}
2773
2774// Free the memory in the thread free lists related to fast memory
2775// Only do this when a thread is being reaped (destroyed).
2776void __kmp_free_fast_memory(kmp_info_t *th) {
2777 // Suppose we use BGET underlying allocator, walk through its structures...
2778 int bin;
2779 thr_data_t *thr = get_thr_data(th);
2780 void **lst = NULL;
2781
2782 KE_TRACE(
2783 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2784
2785 __kmp_bget_dequeue(th); // Release any queued buffers
2786
2787 // Dig through free lists and extract all allocated blocks
2788 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2789 bfhead_t *b = thr->freelist[bin].ql.flink;
2790 while (b != &thr->freelist[bin]) {
2791 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2792 *((void **)b) =
2793 lst; // link the list (override bthr, but keep flink yet)
2794 lst = (void **)b; // push b into lst
2795 }
2796 b = b->ql.flink; // get next buffer
2797 }
2798 }
2799 while (lst != NULL) {
2800 void *next = *lst;
2801 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2802 lst, next, th, __kmp_gtid_from_thread(th)));
2803 (*thr->relfcn)(lst);
2804#if BufStats
2805 // count blocks to prevent problems in __kmp_finalize_bget()
2806 thr->numprel++; /* Nr of expansion block releases */
2807 thr->numpblk--; /* Total number of blocks */
2808#endif
2809 lst = (void **)next;
2810 }
2811
2812 KE_TRACE(
2813 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
2814}
2815
2816#endif // USE_FAST_MEMORY
char buf[BUFFER_SIZE]
int result[2]
Support OMP 6.0 target memory management Expected offload runtime entries.
void init()
Initialize interface with offload runtime.
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
Obtain resource information from offload runtime.
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
Maintain a list of target memory spaces that are identified with the requested information.
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
void init()
Initialize memory space list.
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
void fini()
Release resources for the memory space list.
int64_t kmp_int64
Definition common.h:10
void
Definition ittnotify.h:3324
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
void * addr
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t mode
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id head
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle * key
int __kmp_memkind_available
void * omp_memspace_handle_t
Definition kmp.h:1068
void * omp_allocator_handle_t
Definition kmp.h:1086
#define __kmp_free(ptr)
Definition kmp.h:3762
#define KMP_CPU_PAUSE()
Definition kmp.h:1598
omp_allocator_handle_t const omp_cgroup_mem_alloc
@ omp_atk_fallback
Definition kmp.h:1029
@ omp_atk_pinned
Definition kmp.h:1031
@ omp_atk_access
Definition kmp.h:1027
@ omp_atk_part_size
Definition kmp.h:1038
@ omp_atk_pin_device
Definition kmp.h:1033
@ omp_atk_preferred_device
Definition kmp.h:1034
@ omp_atk_alignment
Definition kmp.h:1026
@ omp_atk_atomic_scope
Definition kmp.h:1037
@ omp_atk_target_access
Definition kmp.h:1036
@ omp_atk_pool_size
Definition kmp.h:1028
@ omp_atk_fb_data
Definition kmp.h:1030
@ omp_atk_partition
Definition kmp.h:1032
@ omp_atk_sync_hint
Definition kmp.h:1025
size_t __kmp_align_alloc
omp_memspace_handle_t const llvm_omp_target_host_mem_space
#define __kmp_get_thread()
Definition kmp.h:3610
omp_allocator_handle_t const omp_default_mem_alloc
omp_alloctrait_value_t
Definition kmp.h:1041
@ omp_atv_interleaved
Definition kmp.h:1060
@ omp_atv_environment
Definition kmp.h:1057
@ omp_atv_blocked
Definition kmp.h:1059
@ omp_atv_default_mem_fb
Definition kmp.h:1053
@ omp_atv_allocator_fb
Definition kmp.h:1056
@ omp_atv_null_fb
Definition kmp.h:1054
@ omp_atv_nearest
Definition kmp.h:1058
@ omp_atv_single
Definition kmp.h:1062
@ omp_atv_device
Definition kmp.h:1049
@ omp_atv_abort_fb
Definition kmp.h:1055
#define __kmp_entry_gtid()
Definition kmp.h:3607
omp_allocator_handle_t const omp_large_cap_mem_alloc
omp_allocator_handle_t const omp_low_lat_mem_alloc
omp_allocator_handle_t const omp_high_bw_mem_alloc
omp_memspace_handle_t const kmp_max_mem_space
static kmp_info_t * __kmp_entry_thread()
Definition kmp.h:3737
#define __kmp_thread_malloc(th, size)
Definition kmp.h:3782
omp_memspace_handle_t const omp_null_mem_space
omp_memspace_handle_t const omp_large_cap_mem_space
kmp_info_t ** __kmp_threads
void __kmp_finalize_bget(kmp_info_t *th)
omp_memspace_handle_t const llvm_omp_target_shared_mem_space
omp_memspace_handle_t const omp_high_bw_mem_space
omp_allocator_handle_t const omp_const_mem_alloc
omp_allocator_handle_t const omp_pteam_mem_alloc
omp_allocator_handle_t const llvm_omp_target_host_mem_alloc
omp_allocator_handle_t const kmp_max_mem_alloc
#define __kmp_allocate(size)
Definition kmp.h:3760
size_t __kmp_malloc_pool_incr
void __kmp_serial_initialize(void)
omp_allocator_handle_t const omp_thread_mem_alloc
void __kmp_initialize_bget(kmp_info_t *th)
static kmp_info_t * __kmp_thread_from_gtid(int gtid)
Definition kmp.h:3640
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
Definition kmp.h:3635
omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc
volatile int __kmp_init_serial
static void __kmp_type_convert(T1 src, T2 *dest)
Definition kmp.h:4891
bool __kmp_hwloc_available
union KMP_ALIGN_CACHE kmp_info kmp_info_t
omp_allocator_handle_t const llvm_omp_target_device_mem_alloc
omp_allocator_handle_t const omp_null_allocator
#define __kmp_thread_free(th, ptr)
Definition kmp.h:3788
void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL)
void * __kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator)
static void ** mk_hbw_interleave
struct kmp_mem_desc kmp_mem_desc_t
static void ** mk_interleave
#define IS_POWER_OF_TWO(n)
void __kmp_init_target_mem()
static void ** mk_dax_kmem
static void *(* kmp_target_unlock_mem)(void *ptr, int device)
void __kmp_fini_target_mem()
Finalize target memory support.
static void ** mk_dax_kmem_all
void kmpc_free(void *ptr)
static void *(* kmp_target_free_shared)(void *ptr, int device)
omp_allocator_handle_t __kmpc_get_default_allocator(int gtid)
#define KMP_IS_PREDEF_MEM_SPACE(MS)
void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator)
#define KMP_IS_TARGET_MEM_ALLOC(MA)
void * __kmpc_calloc(int gtid, size_t nmemb, size_t size, omp_allocator_handle_t allocator)
void * kmpc_malloc(size_t size)
static void ** mk_hbw_preferred
static bool __kmp_target_mem_available
void * kmpc_calloc(size_t nelem, size_t elsize)
omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace, int num_resources, int *resources)
static void *(* kmp_target_alloc_shared)(size_t size, int device)
static void *(* kmp_target_free_device)(void *ptr, int device)
static void *(* kmp_mk_alloc)(void *k, size_t sz)
omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace, int host)
struct kmp_mem_descr kmp_mem_descr_t
#define KMP_IS_TARGET_MEM_SPACE(MS)
void * ___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL)
void __kmp_init_memkind()
class kmp_tgt_allocator_t __kmp_tgt_allocator
void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator)
void * ___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL)
void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator)
int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace)
void * __kmp_realloc(int gtid, void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator)
static void *(* kmp_target_free_host)(void *ptr, int device)
void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator)
void * kmpc_realloc(void *ptr, size_t size)
static void *(* kmp_target_lock_mem)(void *ptr, size_t size, int device)
void * __kmpc_aligned_alloc(int gtid, size_t algn, size_t size, omp_allocator_handle_t allocator)
void * ___kmp_thread_realloc(kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL)
constexpr size_t alignment
void * ___kmp_allocate(size_t size KMP_SRC_LOC_DECL)
static void ** mk_default
class kmp_tgt_memspace_list_t __kmp_tgt_memspace_list
omp_allocator_handle_t __kmp_get_devices_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace, int host)
void * __kmpc_realloc(int gtid, void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator)
int omp_get_num_devices(void)
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, int ntraits, omp_alloctrait_t traits[])
static void(* kmp_mk_free)(void *kind, void *ptr)
void * __kmp_alloc(int gtid, size_t algn, size_t size, omp_allocator_handle_t allocator)
void * ___kmp_thread_calloc(kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL)
void * kmpc_aligned_malloc(size_t size, size_t alignment)
void __kmp_fini_memkind()
static void *(* kmp_target_alloc_device)(size_t size, int device)
static void * ___kmp_allocate_align(size_t size, size_t alignment KMP_SRC_LOC_DECL)
static void *(* kmp_target_alloc_host)(size_t size, int device)
void ___kmp_free(void *ptr KMP_SRC_LOC_DECL)
void * __kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size, omp_allocator_handle_t allocator)
#define KE_TRACE(d, x)
Definition kmp_debug.h:161
#define KMP_ASSERT(cond)
Definition kmp_debug.h:59
#define KC_TRACE(d, x)
Definition kmp_debug.h:159
#define KMP_DEBUG_ASSERT(cond)
Definition kmp_debug.h:61
#define KMP_ASSERT2(cond, msg)
Definition kmp_debug.h:60
unsigned long long kmp_uint64
#define KMP_INFORM(...)
Definition kmp_i18n.h:142
#define KMP_WARNING(...)
Definition kmp_i18n.h:144
#define KMP_FATAL(...)
Definition kmp_i18n.h:146
void __kmp_printf_no_lock(char const *format,...)
Definition kmp_io.cpp:197
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:535
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.h:559
static void __kmp_init_lock(kmp_lock_t *lck)
Definition kmp_lock.h:571
#define KMP_LOCK_INIT(lock)
Definition kmp_lock.h:557
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:527
static void __kmp_destroy_lock(kmp_lock_t *lck)
Definition kmp_lock.h:575
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
Definition kmp_lock.h:567
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
Definition kmp_lock.h:539
#define KMP_TEST_THEN_ADD64(p, v)
Definition kmp_os.h:778
#define TCW_PTR(a, b)
Definition kmp_os.h:1169
#define TCR_SYNC_PTR(a)
Definition kmp_os.h:1170
#define KMP_ALIGN(bytes)
Definition kmp_os.h:393
#define TCR_PTR(a)
Definition kmp_os.h:1168
#define RCAST(type, var)
Definition kmp_os.h:292
#define CACHE_LINE
Definition kmp_os.h:340
#define CCAST(type, var)
Definition kmp_os.h:291
#define KMP_MB()
Definition kmp_os.h:1068
unsigned long kmp_uintptr_t
Definition kmp_os.h:205
#define KMP_DLSYM(name)
Definition kmp_os.h:1304
#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)
Definition kmp_os.h:822
#define KMP_MEMCPY
#define i
Definition kmp_stub.cpp:87
#define _malloc_src_loc(size)
#define KMP_SRC_LOC_DECL
#define malloc_src_loc(args)
#define _free_src_loc(ptr)
#define KMP_SRC_LOC_PARM
#define free_src_loc(args)
int a
int32_t kmp_int32
volatile int release
return ret
Memory allocator information is shared with offload runtime.
Definition kmp.h:1117
size_t alignment
Definition kmp.h:1120
kmp_uint64 pool_size
Definition kmp.h:1123
omp_alloctrait_value_t target_access
Definition kmp.h:1129
omp_alloctrait_value_t atomic_scope
Definition kmp.h:1130
kmp_allocator_t * fb_data
Definition kmp.h:1122
kmp_uint64 pool_used
Definition kmp.h:1124
int pin_device
Definition kmp.h:1127
omp_alloctrait_value_t fb
Definition kmp.h:1121
void ** memkind
Definition kmp.h:1119
int preferred_device
Definition kmp.h:1128
omp_alloctrait_value_t partition
Definition kmp.h:1126
omp_memspace_handle_t memspace
Definition kmp.h:1118
bool pinned
Definition kmp.h:1125
size_t part_size
Definition kmp.h:1131
kmp_allocator_t * allocator
size_t size_aligned
void * ptr_allocated
size_t size_allocated
Memory space informaition is shared with offload runtime.
Definition kmp.h:1109
int num_resources
Definition kmp.h:1111
int * resources
Definition kmp.h:1112
kmp_memspace_t * next
Definition kmp.h:1113
omp_memspace_handle_t memspace
Definition kmp.h:1110
omp_uintptr_t value
Definition kmp.h:1083