18#if HWLOC_API_VERSION > 0x00020300
19#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20#elif HWLOC_API_VERSION == 0x00020300
21#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
24enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
36typedef int (*bget_compact_t)(size_t,
int);
37typedef void *(*bget_acquire_t)(size_t);
38typedef void (*bget_release_t)(
void *);
43#if KMP_ARCH_X86 || KMP_ARCH_ARM
49typedef ssize_t bufsize;
54typedef enum bget_mode {
60static void bpool(
kmp_info_t *th,
void *buffer, bufsize len);
63static void *bgetr(
kmp_info_t *th,
void *buffer, bufsize newsize);
65static void bectl(
kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t
release,
76#if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD
79#define AlignType double
84#define AlignType _Quad
120static bufsize bget_bin_size[] = {
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
138#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
145typedef struct qlinks {
146 struct bfhead *flink;
147 struct bfhead *blink;
151typedef struct bhead2 {
162 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
165#define BH(p) ((bhead_t *)(p))
168typedef struct bdhead {
172#define BDH(p) ((bdhead_t *)(p))
175typedef struct bfhead {
179#define BFH(p) ((bfhead_t *)(p))
181typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
187 long numpget, numprel;
188 long numdget, numdrel;
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
207#define QLSize (sizeof(qlinks_t))
208#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
222static int bget_get_bin(bufsize
size) {
224 int lo = 0, hi = MAX_BGET_BINS - 1;
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (
size < bget_bin_size[mid])
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
247 : th->th.th_local.bget_data);
251 for (
i = 0;
i < MAX_BGET_BINS; ++
i) {
252 data->freelist[
i].ql.flink = &
data->freelist[
i];
253 data->freelist[
i].ql.blink = &
data->freelist[
i];
256 th->th.th_local.bget_data =
data;
257 th->th.th_local.bget_list = 0;
258#if !USE_CMP_XCHG_FOR_BGET
259#ifdef USE_QUEUING_LOCK_FOR_BGET
267static thr_data_t *get_thr_data(
kmp_info_t *th) {
270 data = (thr_data_t *)th->th.th_local.bget_data;
278static void __kmp_bget_dequeue(
kmp_info_t *th) {
282#if USE_CMP_XCHG_FOR_BGET
284 volatile void *old_value =
TCR_SYNC_PTR(th->th.th_local.bget_list);
286 CCAST(
void *, old_value),
nullptr)) {
290 p =
CCAST(
void *, old_value);
293#ifdef USE_QUEUING_LOCK_FOR_BGET
299 p = (
void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
302#ifdef USE_QUEUING_LOCK_FOR_BGET
312 bfhead_t *
b = BFH(((
char *)
p) -
sizeof(bhead_t));
319 p = (
void *)
b->ql.flink;
328#ifdef USE_QUEUING_LOCK_FOR_BGET
333 bfhead_t *
b = BFH(((
char *)
buf) -
sizeof(bhead_t));
341 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
344#if USE_CMP_XCHG_FOR_BGET
346 volatile void *old_value =
TCR_PTR(th->th.th_local.bget_list);
349 b->ql.flink = BFH(
CCAST(
void *, old_value));
354 old_value =
TCR_PTR(th->th.th_local.bget_list);
357 b->ql.flink = BFH(
CCAST(
void *, old_value));
361#ifdef USE_QUEUING_LOCK_FOR_BGET
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (
void *)
buf;
370#ifdef USE_QUEUING_LOCK_FOR_BGET
379static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *
b) {
385 bin = bget_get_bin(
b->bh.bb.bsize);
388 &thr->freelist[bin]);
390 &thr->freelist[bin]);
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
395 thr->freelist[bin].ql.blink =
b;
396 b->ql.blink->ql.flink =
b;
400static void __kmp_bget_remove_from_freelist(bfhead_t *
b) {
404 b->ql.blink->ql.flink =
b->ql.flink;
405 b->ql.flink->ql.blink =
b->ql.blink;
409static void bcheck(
kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
413 *total_free = *max_free = 0;
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
418 best = &thr->freelist[bin];
421 while (
b != &thr->freelist[bin]) {
422 *total_free += (
b->bh.bb.bsize -
sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (
b->bh.bb.bsize < best->bh.bb.bsize))
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
434 if (*max_free > (bufsize)
sizeof(bhead_t))
435 *max_free -=
sizeof(bhead_t);
439static void *bget(
kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize
size = requested_size;
449 if (
size < 0 ||
size +
sizeof(bhead_t) > MaxSize) {
453 __kmp_bget_dequeue(th);
455 if (
size < (bufsize)SizeQ) {
458#if defined(SizeQuant) && (SizeQuant > 1)
459 size = (
size + (SizeQuant - 1)) & (~(SizeQuant - 1));
462 size +=
sizeof(bhead_t);
466 use_blink = (thr->mode == bget_mode_lifo);
475 for (bin = bget_get_bin(
size); bin < MAX_BGET_BINS; ++bin) {
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
485 while (
b != &thr->freelist[bin]) {
486 if (
b->bh.bb.bsize >= (bufsize)
size) {
487 if ((best == &thr->freelist[bin]) ||
488 (
b->bh.bb.bsize < best->bh.bb.bsize)) {
494 b = (use_blink ?
b->ql.blink :
b->ql.flink);
499 while (
b != &thr->freelist[bin]) {
500 if ((bufsize)
b->bh.bb.bsize >= (bufsize)
size) {
509 if ((
b->bh.bb.bsize - (bufsize)
size) >
510 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
513 ba = BH(((
char *)
b) + (
b->bh.bb.bsize - (bufsize)
size));
514 bn = BH(((
char *)ba) +
size);
519 b->bh.bb.bsize -= (bufsize)
size;
522 ba->bb.prevfree =
b->bh.bb.bsize;
525 ba->bb.bsize = -
size;
534 __kmp_bget_remove_from_freelist(
b);
535 __kmp_bget_insert_into_freelist(thr,
b);
537 thr->totalloc += (size_t)
size;
540 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
546 ba = BH(((
char *)
b) +
b->bh.bb.bsize);
553 __kmp_bget_remove_from_freelist(
b);
555 thr->totalloc += (size_t)
b->bh.bb.bsize;
559 b->bh.bb.bsize = -(
b->bh.bb.bsize);
568 buf = (
void *)&(
b->ql);
575 b = (use_blink ?
b->ql.blink :
b->ql.flink);
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(
size, ++compactseq))) {
591 if (thr->acqfcn != 0) {
592 if (
size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
597 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
602 bdh = BDH((*thr->acqfcn)((bufsize)
size));
606 bdh->bh.bb.bsize = 0;
611 bdh->bh.bb.prevfree = 0;
614 thr->totalloc += (size_t)
size;
618 buf = (
void *)(bdh + 1);
628 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
633 if (newpool != NULL) {
634 bpool(th, newpool, thr->exp_incr);
652 char *
buf = (
char *)bget(th,
size);
658 b = BH(
buf -
sizeof(bhead_t));
659 rsize = -(
b->bb.bsize);
663 bd = BDH(
buf -
sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
666 rsize -=
sizeof(bhead_t);
671 (
void)memset(
buf, 0, (bufsize)rsize);
673 return ((
void *)
buf);
686 nbuf = bget(th,
size);
693 b = BH(((
char *)
buf) -
sizeof(bhead_t));
694 osize = -
b->bb.bsize;
699 bd = BDH(((
char *)
buf) -
sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
702 osize -=
sizeof(bhead_t);
708 (
size_t)((
size < osize) ?
size : osize));
716 thr_data_t *thr = get_thr_data(th);
723 b = BFH(((
char *)
buf) -
sizeof(bhead_t));
725 if (
b->bh.bb.bsize == 0) {
728 bdh = BDH(((
char *)
buf) -
sizeof(bdhead_t));
731 thr->totalloc -= (size_t)bdh->tsize;
736 (
void)memset((
char *)
buf, 0x55, (
size_t)(bdh->tsize -
sizeof(bdhead_t)));
739 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
742 (*thr->relfcn)((
void *)bdh);
750 __kmp_bget_enqueue(bth,
buf
751#ifdef USE_QUEUING_LOCK_FOR_BGET
760 if (
b->bh.bb.bsize >= 0) {
771 thr->totalloc += (size_t)
b->bh.bb.bsize;
776 if (
b->bh.bb.prevfree != 0) {
781 bufsize
size =
b->bh.bb.bsize;
786 b = BFH(((
char *)
b) -
b->bh.bb.prevfree);
787 b->bh.bb.bsize -=
size;
790 __kmp_bget_remove_from_freelist(
b);
795 b->bh.bb.bsize = -
b->bh.bb.bsize;
799 __kmp_bget_insert_into_freelist(thr,
b);
805 bn = BFH(((
char *)
b) +
b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
813 __kmp_bget_remove_from_freelist(bn);
815 b->bh.bb.bsize += bn->bh.bb.bsize;
819 __kmp_bget_remove_from_freelist(
b);
820 __kmp_bget_insert_into_freelist(thr,
b);
828 bn = BFH(((
char *)
b) +
b->bh.bb.bsize);
831 (
void)memset(((
char *)
b) +
sizeof(bfhead_t), 0x55,
832 (
size_t)(
b->bh.bb.bsize -
sizeof(bfhead_t)));
839 bn->bh.bb.prevfree =
b->bh.bb.bsize;
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
858 __kmp_bget_remove_from_freelist(
b);
860 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)
b));
869 if (thr->last_pool ==
b)
879static void bectl(
kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t
release,
882 thr_data_t *thr = get_thr_data(th);
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
887 thr->exp_incr = pool_incr;
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *
b = BFH(
buf);
897 __kmp_bget_dequeue(th);
900 len &= ~((bufsize)(SizeQuant - 1));
902 if (thr->pool_len == 0) {
904 }
else if (len != thr->pool_len) {
921 b->bh.bb.prevfree = 0;
930 len -=
sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
938 __kmp_bget_insert_into_freelist(thr,
b);
941 (
void)memset(((
char *)
b) +
sizeof(bfhead_t), 0x55,
942 (
size_t)(len -
sizeof(bfhead_t)));
944 bn = BH(((
char *)
b) + len);
945 bn->bb.prevfree = (bufsize)len;
949 bn->bb.bsize = ESent;
954 int bin = 0,
count = 0;
956 thr_data_t *thr = get_thr_data(th);
960 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC
"\n",
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
973 for (
b = thr->freelist[bin].ql.flink;
b != &thr->freelist[bin];
975 bufsize bs =
b->bh.bb.bsize;
984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid,
b,
988 char *lerr = ((
char *)
b) +
sizeof(bfhead_t);
989 if ((bs >
sizeof(bfhead_t)) &&
991 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
994 "free block have been overstored.)\n",
1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1031 if (thr->relfcn != 0 &&
b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1039 __kmp_bget_remove_from_freelist(
b);
1041 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)
b));
1051 if (th->th.th_local.bget_data != NULL) {
1053 th->th.th_local.bget_data = NULL;
1057void kmpc_set_poolsize(
size_t size) {
1059 (bget_release_t)free, (bufsize)
size);
1062size_t kmpc_get_poolsize(
void) {
1070void kmpc_set_poolmode(
int mode) {
1073 if (
mode == bget_mode_fifo ||
mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1076 p->mode = (bget_mode_t)
mode;
1080int kmpc_get_poolmode(
void) {
1088void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1092 __kmp_bget_dequeue(th);
1100void kmpc_poolprint(
void) {
1103 __kmp_bget_dequeue(th);
1115 *(
void **)ptr = ptr;
1116 ptr = (
void **)ptr + 1;
1121#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1125 void *ptr_allocated;
1134 if (ptr_allocated != NULL) {
1138 *((
void **)ptr - 1) = ptr_allocated;
1150 *(
void **)ptr = ptr;
1151 ptr = (
void **)ptr + 1;
1166 }
else if (
size == 0) {
1176 (bufsize)(
size +
sizeof(ptr)));
1192 __kmp_bget_dequeue(th);
1195 brel(th, *((
void **)ptr - 1));
1201 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1203 ptr = bget(th, (bufsize)
size);
1204 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1211 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1213 ptr = bgetz(th, (bufsize)(nelem * elsize));
1214 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1220 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1222 ptr = bgetr(th, ptr, (bufsize)
size);
1223 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1228 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1231 __kmp_bget_dequeue(th);
1234 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1240static void *(*kmp_mk_alloc)(
void *k,
size_t sz);
1250#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1251static const char *kmp_mk_lib_name;
1252static void *h_memkind;
1254static int (*kmp_mk_check)(
void *kind);
1255static void **mk_hbw;
1256static void **mk_hugetlb;
1257static void **mk_hbw_hugetlb;
1258static void **mk_hbw_preferred_hugetlb;
1259static void **mk_dax_kmem_preferred;
1261static void *(*kmp_target_alloc_host)(
size_t size,
int device);
1262static void *(*kmp_target_alloc_shared)(
size_t size,
int device);
1263static void *(*kmp_target_alloc_device)(
size_t size,
int device);
1264static void *(*kmp_target_lock_mem)(
void *ptr,
size_t size,
int device);
1265static void *(*kmp_target_unlock_mem)(
void *ptr,
int device);
1266static void *(*kmp_target_free_host)(
void *ptr,
int device);
1267static void *(*kmp_target_free_shared)(
void *ptr,
int device);
1268static void *(*kmp_target_free_device)(
void *ptr,
int device);
1271#define KMP_IS_TARGET_MEM_SPACE(MS) \
1272 (MS == llvm_omp_target_host_mem_space || \
1273 MS == llvm_omp_target_shared_mem_space || \
1274 MS == llvm_omp_target_device_mem_space)
1276#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1277 (MA == llvm_omp_target_host_mem_alloc || \
1278 MA == llvm_omp_target_shared_mem_alloc || \
1279 MA == llvm_omp_target_device_mem_alloc)
1281#define KMP_IS_PREDEF_MEM_SPACE(MS) \
1282 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1283 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1284 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1285 KMP_IS_TARGET_MEM_SPACE(MS))
1304 bool supported =
false;
1305 using get_mem_resources_t =
int (*)(
int,
const int *,
int,
1309 get_mem_resources_t tgt_get_mem_resources =
nullptr;
1310 omp_alloc_t tgt_omp_alloc =
nullptr;
1311 omp_free_t tgt_omp_free =
nullptr;
1316 tgt_get_mem_resources =
1317 (get_mem_resources_t)
KMP_DLSYM(
"__tgt_get_mem_resources");
1318 tgt_omp_alloc = (omp_alloc_t)
KMP_DLSYM(
"__tgt_omp_alloc");
1319 tgt_omp_free = (omp_free_t)
KMP_DLSYM(
"__tgt_omp_free");
1320 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1327 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1333 return tgt_omp_alloc(
size, allocator);
1339 tgt_omp_free(ptr, allocator);
1357 !memcmp(ms->
resources, resources,
sizeof(
int) * num_resources))
1371 int *sorted_resources = (
int *)
__kmp_allocate(
sizeof(
int) * num_resources);
1372 KMP_MEMCPY(sorted_resources, resources, num_resources *
sizeof(
int));
1373 qsort(sorted_resources, (
size_t)num_resources,
sizeof(
int),
1374 [](
const void *
a,
const void *
b) {
1375 const int val_a = *(
const int *)
a;
1376 const int val_b = *(
const int *)
b;
1377 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1379 kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace);
1386 ms->memspace = memspace;
1387 ms->num_resources = num_resources;
1388 ms->resources = sorted_resources;
1389 ms->next = memspace_list;
1414 int actual_num_devices = num_devices;
1415 int *actual_devices =
const_cast<int *
>(devices);
1416 if (actual_num_devices == 0) {
1418 if (actual_num_devices <= 0)
1421 if (actual_devices == NULL) {
1423 actual_devices = (
int *)
__kmp_allocate(
sizeof(
int) * actual_num_devices);
1424 for (
int i = 0;
i < actual_num_devices;
i++)
1425 actual_devices[
i] =
i;
1429 actual_num_devices, actual_devices, host_access, memspace, NULL);
1430 if (num_resources <= 0)
1434 if (num_resources > 0) {
1435 int *resources = (
int *)
__kmp_allocate(
sizeof(
int) * num_resources);
1438 actual_num_devices, actual_devices, host_access, memspace, resources);
1439 ms = get(num_resources, resources, memspace);
1442 if (!devices && actual_devices)
1450 return get(num_resources, resources, ms->
memspace);
1454#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1455static inline void chk_kind(
void ***pkind) {
1458 if (kmp_mk_check(**pkind))
1465#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1467 kmp_mk_lib_name =
"libmemkind.so";
1468 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1470 kmp_mk_check = (
int (*)(
void *))dlsym(h_memkind,
"memkind_check_available");
1472 (
void *(*)(
void *,
size_t))dlsym(h_memkind,
"memkind_malloc");
1473 kmp_mk_free = (
void (*)(
void *,
void *))dlsym(h_memkind,
"memkind_free");
1474 mk_default = (
void **)dlsym(h_memkind,
"MEMKIND_DEFAULT");
1478 mk_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_INTERLEAVE");
1480 mk_hbw = (
void **)dlsym(h_memkind,
"MEMKIND_HBW");
1486 mk_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HUGETLB");
1487 chk_kind(&mk_hugetlb);
1488 mk_hbw_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_HUGETLB");
1489 chk_kind(&mk_hbw_hugetlb);
1490 mk_hbw_preferred_hugetlb =
1491 (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED_HUGETLB");
1492 chk_kind(&mk_hbw_preferred_hugetlb);
1493 mk_dax_kmem = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM");
1497 mk_dax_kmem_preferred =
1498 (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_PREFERRED");
1499 chk_kind(&mk_dax_kmem_preferred);
1500 KE_TRACE(25, (
"__kmp_init_memkind: memkind library initialized\n"));
1506 kmp_mk_check = NULL;
1509 mk_hbw_hugetlb = NULL;
1510 mk_hbw_preferred_hugetlb = NULL;
1511 mk_dax_kmem_preferred = NULL;
1512 kmp_mk_lib_name =
"";
1525#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1527 KE_TRACE(25, (
"__kmp_fini_memkind: finalize memkind library\n"));
1532 kmp_mk_check = NULL;
1535 mk_hbw_hugetlb = NULL;
1536 mk_hbw_preferred_hugetlb = NULL;
1537 mk_dax_kmem_preferred = NULL;
1539#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1551#if KMP_HWLOC_ENABLED
1552static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1553#if HWLOC_API_VERSION >= 0x00020300
1554 const hwloc_topology_support *support;
1555 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1557 if (policy == HWLOC_MEMBIND_BIND)
1558 return (support->membind->alloc_membind &&
1559 support->membind->bind_membind);
1560 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1561 return (support->membind->alloc_membind &&
1562 support->membind->interleave_membind);
1570void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr,
size_t size,
1571 hwloc_membind_policy_t policy) {
1572#if HWLOC_API_VERSION >= 0x00020300
1575 struct hwloc_location initiator;
1581 hwloc_cpuset_t
mask = hwloc_bitmap_alloc();
1582 ret = hwloc_get_cpubind(__kmp_hwloc_topology,
mask, HWLOC_CPUBIND_THREAD);
1584 hwloc_bitmap_free(
mask);
1587 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1588 initiator.location.cpuset =
mask;
1589 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1594 return hwloc_alloc_membind(__kmp_hwloc_topology,
size, node->nodeset, policy,
1595 HWLOC_MEMBIND_BYNODESET);
1602 hwloc_membind_policy_t policy) {
1603#if HWLOC_API_VERSION >= 0x00020300
1606 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
size, policy);
1608 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
size, policy);
1610 ptr = hwloc_alloc(__kmp_hwloc_topology,
size);
1622 KMP_DLSYM(
"llvm_omp_target_alloc_shared");
1624 KMP_DLSYM(
"llvm_omp_target_alloc_device");
1627 KMP_DLSYM(
"llvm_omp_target_free_shared");
1629 KMP_DLSYM(
"llvm_omp_target_free_device");
1661 for (
i = 0;
i < ntraits; ++
i) {
1662 switch (traits[
i].
key) {
1686#if KMP_HWLOC_ENABLED
1836 int num_resources,
int *resources) {
1842 if (num_resources == 0 || ms->
num_resources < num_resources || !resources)
1847 int *resources_abs = (
int *)
__kmp_allocate(
sizeof(
int) * num_resources);
1850 for (
int i = 0;
i < num_resources;
i++)
1854 num_resources, resources_abs, memspace);
1871 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)
size, allocator));
1873 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1879 KE_TRACE(25, (
"__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (
int)algn,
1880 (
int)
size, allocator));
1882 KE_TRACE(25, (
"__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1888 KE_TRACE(25, (
"__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (
int)nmemb,
1889 (
int)
size, allocator));
1891 KE_TRACE(25, (
"__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1898 KE_TRACE(25, (
"__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (
int)
size,
1899 allocator, free_allocator));
1901 KE_TRACE(25, (
"__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1906 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1908 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1923 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1939 bool is_pinned =
false;
1944 int use_default_allocator =
1957 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1973 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1986#if KMP_HWLOC_ENABLED
1988 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1992 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1993 desc.
size_a, HWLOC_MEMBIND_BIND);
1995 use_default_allocator =
true;
1997 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1998 desc.
size_a, HWLOC_MEMBIND_BIND);
2000 use_default_allocator =
true;
2002 use_default_allocator =
true;
2004 if (use_default_allocator) {
2005 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2016 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2027 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2029 HWLOC_MEMBIND_INTERLEAVE);
2033 HWLOC_MEMBIND_DEFAULT);
2035 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2040 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2053 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2055 HWLOC_MEMBIND_INTERLEAVE);
2059 HWLOC_MEMBIND_DEFAULT);
2061 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2066 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2077 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.
size_a);
2152 KMP_WARNING(OmpNoAllocator,
"omp_high_bw_mem_alloc");
2154 KMP_WARNING(OmpNoAllocator,
"omp_large_cap_mem_alloc");
2156 KMP_WARNING(OmpNoAllocator,
"omp_const_mem_alloc");
2158 KMP_WARNING(OmpNoAllocator,
"omp_low_lat_mem_alloc");
2160 KMP_WARNING(OmpNoAllocator,
"omp_cgroup_mem_alloc");
2162 KMP_WARNING(OmpNoAllocator,
"omp_pteam_mem_alloc");
2164 KMP_WARNING(OmpNoAllocator,
"omp_thread_mem_alloc");
2166 use_default_allocator =
true;
2168 if (use_default_allocator) {
2170 use_default_allocator =
false;
2207#if KMP_HWLOC_ENABLED
2210 KE_TRACE(10, (
"__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.
size_a));
2218 addr_align = (
addr + sz_desc + align - 1) & ~(align - 1);
2219 addr_descr = addr_align - sz_desc;
2241 if (nmemb == 0 ||
size == 0)
2254 memset(ptr, 0x00, nmemb *
size);
2273 if (nptr != NULL && ptr != NULL) {
2316 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2341 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2345#if KMP_HWLOC_ENABLED
2384#if KMP_HWLOC_ENABLED
2427 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2456 KE_TRACE(26, (
" ___kmp_allocate_align: "
2457 "ptr_allocated=%p, size_allocated=%d, "
2458 "ptr_aligned=%p, size_aligned=%d\n",
2491 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
2494 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
2503 int page_size = 8 * 1024;
2506 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
2509 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)
size, ptr));
2528 "ptr_allocated=%p, size_allocated=%d, "
2529 "ptr_aligned=%p, size_aligned=%d\n",
2554 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
2557#if USE_FAST_MEMORY == 3
2563#define KMP_FREE_LIST_LIMIT 16
2566#define DCACHE_LINE 128
2570 size_t num_lines, idx;
2576 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2579 num_lines = (
size + DCACHE_LINE - 1) / DCACHE_LINE;
2580 idx = num_lines - 1;
2585 }
else if ((idx >>= 2) == 0) {
2588 }
else if ((idx >>= 2) == 0) {
2591 }
else if ((idx >>= 2) == 0) {
2598 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2601 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2607 ptr =
TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2613 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
2615 ptr =
TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2619 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2628 size = num_lines * DCACHE_LINE;
2631 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2634 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2639 ~(DCACHE_LINE - 1));
2650 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
2664 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2670 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
2675 idx = DCACHE_LINE * 2;
2678 }
else if ((idx <<= 1) ==
size) {
2680 }
else if ((idx <<= 2) ==
size) {
2682 }
else if ((idx <<= 2) ==
size) {
2690 if (alloc_thr == this_thr) {
2692 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2693 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2695 void *
head = this_thr->th.th_free_lists[index].th_free_list_other;
2698 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2699 *((
void **)ptr) = NULL;
2709 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2711 *((
void **)ptr) =
head;
2713 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2720 void *next = *((
void **)
head);
2721 while (next != NULL) {
2730 next = *((
void **)next);
2734 old_ptr =
TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2737 *((
void **)
tail) = old_ptr;
2740 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr,
head)) {
2742 old_ptr =
TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2743 *((
void **)
tail) = old_ptr;
2747 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2748 *((
void **)ptr) = NULL;
2756 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2758 __kmp_bget_dequeue(this_thr);
2762 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
2768void __kmp_initialize_fast_memory(
kmp_info_t *this_thr) {
2769 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2771 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
2779 thr_data_t *thr = get_thr_data(th);
2785 __kmp_bget_dequeue(th);
2788 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2789 bfhead_t *
b = thr->freelist[bin].ql.flink;
2790 while (
b != &thr->freelist[bin]) {
2799 while (lst != NULL) {
2801 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2803 (*thr->relfcn)(lst);
2809 lst = (
void **)next;
Support OMP 6.0 target memory management Expected offload runtime entries.
void init()
Initialize interface with offload runtime.
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
Obtain resource information from offload runtime.
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
Maintain a list of target memory spaces that are identified with the requested information.
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
void init()
Initialize memory space list.
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
void fini()
Release resources for the memory space list.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t count
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t mode
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id head
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id tail
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle * key
int __kmp_memkind_available
void * omp_memspace_handle_t
void * omp_allocator_handle_t
omp_allocator_handle_t const omp_cgroup_mem_alloc
@ omp_atk_preferred_device
omp_memspace_handle_t const llvm_omp_target_host_mem_space
#define __kmp_get_thread()
omp_allocator_handle_t const omp_default_mem_alloc
#define __kmp_entry_gtid()
omp_allocator_handle_t const omp_large_cap_mem_alloc
omp_allocator_handle_t const omp_low_lat_mem_alloc
omp_allocator_handle_t const omp_high_bw_mem_alloc
omp_memspace_handle_t const kmp_max_mem_space
static kmp_info_t * __kmp_entry_thread()
#define __kmp_thread_malloc(th, size)
omp_memspace_handle_t const omp_null_mem_space
omp_memspace_handle_t const omp_large_cap_mem_space
kmp_info_t ** __kmp_threads
void __kmp_finalize_bget(kmp_info_t *th)
omp_memspace_handle_t const llvm_omp_target_shared_mem_space
omp_memspace_handle_t const omp_high_bw_mem_space
omp_allocator_handle_t const omp_const_mem_alloc
omp_allocator_handle_t const omp_pteam_mem_alloc
omp_allocator_handle_t const llvm_omp_target_host_mem_alloc
omp_allocator_handle_t const kmp_max_mem_alloc
#define __kmp_allocate(size)
size_t __kmp_malloc_pool_incr
void __kmp_serial_initialize(void)
omp_allocator_handle_t const omp_thread_mem_alloc
void __kmp_initialize_bget(kmp_info_t *th)
static kmp_info_t * __kmp_thread_from_gtid(int gtid)
static int __kmp_gtid_from_thread(const kmp_info_t *thr)
omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc
volatile int __kmp_init_serial
static void __kmp_type_convert(T1 src, T2 *dest)
bool __kmp_hwloc_available
union KMP_ALIGN_CACHE kmp_info kmp_info_t
omp_allocator_handle_t const llvm_omp_target_device_mem_alloc
omp_allocator_handle_t const omp_null_allocator
#define __kmp_thread_free(th, ptr)
void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL)
void * __kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator)
static void ** mk_hbw_interleave
struct kmp_mem_desc kmp_mem_desc_t
static void ** mk_interleave
#define IS_POWER_OF_TWO(n)
void __kmp_init_target_mem()
static void ** mk_dax_kmem
static void *(* kmp_target_unlock_mem)(void *ptr, int device)
void __kmp_fini_target_mem()
Finalize target memory support.
static void ** mk_dax_kmem_all
void kmpc_free(void *ptr)
static void *(* kmp_target_free_shared)(void *ptr, int device)
omp_allocator_handle_t __kmpc_get_default_allocator(int gtid)
#define KMP_IS_PREDEF_MEM_SPACE(MS)
void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator)
#define KMP_IS_TARGET_MEM_ALLOC(MA)
void * __kmpc_calloc(int gtid, size_t nmemb, size_t size, omp_allocator_handle_t allocator)
void * kmpc_malloc(size_t size)
static void ** mk_hbw_preferred
static bool __kmp_target_mem_available
void * kmpc_calloc(size_t nelem, size_t elsize)
omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace, int num_resources, int *resources)
static void *(* kmp_target_alloc_shared)(size_t size, int device)
static void *(* kmp_target_free_device)(void *ptr, int device)
static void *(* kmp_mk_alloc)(void *k, size_t sz)
omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace, int host)
struct kmp_mem_descr kmp_mem_descr_t
#define KMP_IS_TARGET_MEM_SPACE(MS)
void * ___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL)
void __kmp_init_memkind()
class kmp_tgt_allocator_t __kmp_tgt_allocator
void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator)
void * ___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL)
void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator)
int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace)
void * __kmp_realloc(int gtid, void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator)
static void *(* kmp_target_free_host)(void *ptr, int device)
void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator)
void * kmpc_realloc(void *ptr, size_t size)
static void *(* kmp_target_lock_mem)(void *ptr, size_t size, int device)
void * __kmpc_aligned_alloc(int gtid, size_t algn, size_t size, omp_allocator_handle_t allocator)
void * ___kmp_thread_realloc(kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL)
constexpr size_t alignment
void * ___kmp_allocate(size_t size KMP_SRC_LOC_DECL)
static void ** mk_default
class kmp_tgt_memspace_list_t __kmp_tgt_memspace_list
omp_allocator_handle_t __kmp_get_devices_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace, int host)
void * __kmpc_realloc(int gtid, void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator)
int omp_get_num_devices(void)
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, int ntraits, omp_alloctrait_t traits[])
static void(* kmp_mk_free)(void *kind, void *ptr)
void * __kmp_alloc(int gtid, size_t algn, size_t size, omp_allocator_handle_t allocator)
void * ___kmp_thread_calloc(kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL)
void * kmpc_aligned_malloc(size_t size, size_t alignment)
void __kmp_fini_memkind()
static void *(* kmp_target_alloc_device)(size_t size, int device)
static void * ___kmp_allocate_align(size_t size, size_t alignment KMP_SRC_LOC_DECL)
static void *(* kmp_target_alloc_host)(size_t size, int device)
void ___kmp_free(void *ptr KMP_SRC_LOC_DECL)
void * __kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size, omp_allocator_handle_t allocator)
#define KMP_DEBUG_ASSERT(cond)
#define KMP_ASSERT2(cond, msg)
unsigned long long kmp_uint64
void __kmp_printf_no_lock(char const *format,...)
static void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_lock(kmp_lock_t *lck)
#define KMP_LOCK_INIT(lock)
static int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck)
static void __kmp_destroy_lock(kmp_lock_t *lck)
static void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid)
static void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck)
#define KMP_TEST_THEN_ADD64(p, v)
unsigned long kmp_uintptr_t
#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)
#define _malloc_src_loc(size)
#define malloc_src_loc(args)
#define _free_src_loc(ptr)
#define free_src_loc(args)
Memory allocator information is shared with offload runtime.
omp_alloctrait_value_t target_access
omp_alloctrait_value_t atomic_scope
kmp_allocator_t * fb_data
omp_alloctrait_value_t fb
omp_alloctrait_value_t partition
omp_memspace_handle_t memspace
kmp_allocator_t * allocator
Memory space informaition is shared with offload runtime.
omp_memspace_handle_t memspace